From 95aca0a0831cddaec59940238594c1da0a09af96 Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:11:42 +0200
Subject: [PATCH 1/6] add split class

---
 cluster_experiments/__init__.py               |  2 +
 .../inference/analysis_plan.py                | 54 ++++++-----
 .../inference/analysis_results.py             | 12 ++-
 cluster_experiments/inference/dimension.py    |  2 +
 .../inference/hypothesis_test.py              | 97 +++++++++++++++++--
 cluster_experiments/inference/split.py        | 33 +++++++
 6 files changed, 169 insertions(+), 31 deletions(-)
 create mode 100644 cluster_experiments/inference/split.py

diff --git a/cluster_experiments/__init__.py b/cluster_experiments/__init__.py
index 0ee86bb..e15b568 100644
--- a/cluster_experiments/__init__.py
+++ b/cluster_experiments/__init__.py
@@ -14,6 +14,7 @@
 )
 from cluster_experiments.inference.analysis_plan import AnalysisPlan
 from cluster_experiments.inference.dimension import Dimension
+from cluster_experiments.inference.split import Split
 from cluster_experiments.inference.hypothesis_test import HypothesisTest
 from cluster_experiments.inference.metric import Metric, RatioMetric, SimpleMetric
 from cluster_experiments.inference.variant import Variant
@@ -88,6 +89,7 @@
     "SimpleMetric",
     "RatioMetric",
     "Dimension",
+    "Split",
     "Variant",
     "HypothesisTest",
     "RelativeMixedPerturbator",
diff --git a/cluster_experiments/inference/analysis_plan.py b/cluster_experiments/inference/analysis_plan.py
index 308a823..8cff587 100644
--- a/cluster_experiments/inference/analysis_plan.py
+++ b/cluster_experiments/inference/analysis_plan.py
@@ -10,6 +10,7 @@
 )
 from cluster_experiments.inference.analysis_results import AnalysisPlanResults
 from cluster_experiments.inference.dimension import Dimension
+from cluster_experiments.inference.split import DefaultSplit
 from cluster_experiments.inference.hypothesis_test import HypothesisTest
 from cluster_experiments.inference.metric import Metric
 from cluster_experiments.inference.variant import Variant
@@ -171,7 +172,6 @@ def analyze(
         Method to run the experiment analysis.
         """
 
-        # Validate input data at the beginning
         self._validate_data(exp_data, pre_exp_data)
 
         analysis_results = AnalysisPlanResults()
@@ -179,29 +179,37 @@ def analyze(
         for test in self.tests:
             exp_data = test.add_covariates(exp_data, pre_exp_data)
 
+            splits_to_iterate = test.splits if test.splits else [DefaultSplit()]
+
             for treatment_variant in self.treatment_variants:
-                for dimension in test.dimensions:
-                    for dimension_value in dimension.iterate_dimension_values():
-
-                        if verbose:
-                            logger.info(
-                                f"Metric: {test.metric.alias}, "
-                                f"Treatment: {treatment_variant.name}, "
-                                f"Dimension: {dimension.name}, "
-                                f"Value: {dimension_value}"
-                            )
-
-                        test_results = test.get_test_results(
-                            exp_data=exp_data,
-                            control_variant=self.control_variant,
-                            treatment_variant=treatment_variant,
-                            variant_col=self.variant_col,
-                            dimension=dimension,
-                            dimension_value=dimension_value,
-                            alpha=self.alpha,
-                        )
-
-                        analysis_results = analysis_results + test_results
+                for split in splits_to_iterate:
+                    for split_value in split.iterate_dimension_values():
+                        for dimension in test.dimensions:
+                            for dimension_value in dimension.iterate_dimension_values():
+
+                                if verbose:
+                                    logger.info(
+                                        f"Metric: {test.metric.alias}, "
+                                        f"Treatment: {treatment_variant.name}, "
+                                        f"Split: {split.name}, "
+                                        f"Value: {split_value}, "
+                                        f"Dimension: {dimension.name}, "
+                                        f"Value: {dimension_value}"
+                                    )
+
+                                test_results = test.get_test_results(
+                                    exp_data=exp_data,
+                                    control_variant=self.control_variant,
+                                    treatment_variant=treatment_variant,
+                                    variant_col=self.variant_col,
+                                    dimension=dimension,
+                                    dimension_value=dimension_value,
+                                    alpha=self.alpha,
+                                    split=split,
+                                    split_value=split_value,
+                                )
+
+                                analysis_results = analysis_results + test_results
 
         return analysis_results
 
diff --git a/cluster_experiments/inference/analysis_results.py b/cluster_experiments/inference/analysis_results.py
index 8ce08fa..8c7439f 100644
--- a/cluster_experiments/inference/analysis_results.py
+++ b/cluster_experiments/inference/analysis_results.py
@@ -63,6 +63,8 @@ class AnalysisPlanResults:
     std_error: List[float] = field(default_factory=lambda: [])
     dimension_name: List[str] = field(default_factory=lambda: [])
     dimension_value: List[str] = field(default_factory=lambda: [])
+    split_name: List[str] = field(default_factory=lambda: [])
+    split_value: List[str] = field(default_factory=lambda: [])
     alpha: List[float] = field(default_factory=lambda: [])
 
     def __add__(self, other):
@@ -85,11 +87,19 @@ def __add__(self, other):
             std_error=self.std_error + other.std_error,
             dimension_name=self.dimension_name + other.dimension_name,
             dimension_value=self.dimension_value + other.dimension_value,
+            split_name=self.split_name + other.split_name,
+            split_value=self.split_value + other.split_value,
             alpha=self.alpha + other.alpha,
         )
 
     def to_dataframe(self):
-        return pd.DataFrame(asdict(self))
+        df = pd.DataFrame(asdict(self))
+        
+        cols_to_hide = ["dimension_name", "dimension_value", "split_name", "split_value"]
+        for col in cols_to_hide:
+            if col in df.columns and (df[col] == "").all():
+                df = df.drop(columns=[col])
+        return df
 
     def __str__(self) -> str:
         n = len(self.ate)
diff --git a/cluster_experiments/inference/dimension.py b/cluster_experiments/inference/dimension.py
index 9793a2c..096cb84 100644
--- a/cluster_experiments/inference/dimension.py
+++ b/cluster_experiments/inference/dimension.py
@@ -7,6 +7,8 @@ class Dimension:
     """
     A class used to represent a Dimension with a name and values.
 
+    Dimensions describe stable attributes of units that do not change during the experiment.
+
     Attributes
     ----------
     name : str
diff --git a/cluster_experiments/inference/hypothesis_test.py b/cluster_experiments/inference/hypothesis_test.py
index 2dac959..e3745b4 100644
--- a/cluster_experiments/inference/hypothesis_test.py
+++ b/cluster_experiments/inference/hypothesis_test.py
@@ -7,6 +7,7 @@
 from cluster_experiments.experiment_analysis import ExperimentAnalysis, InferenceResults
 from cluster_experiments.inference.analysis_results import AnalysisPlanResults
 from cluster_experiments.inference.dimension import DefaultDimension, Dimension
+from cluster_experiments.inference.split import DefaultSplit, Split
 from cluster_experiments.inference.metric import Metric, RatioMetric
 from cluster_experiments.inference.variant import Variant
 from cluster_experiments.power_config import analysis_mapping
@@ -25,7 +26,9 @@ class HypothesisTest:
     analysis_config : Optional[dict]
         An optional dictionary representing the configuration for the analysis
     dimensions : Optional[List[Dimension]]
-        An optional list of Dimension instances
+        An optional list of Dimension instances. Dimensions describe stable unit attributes.
+    splits : Optional[List[Split]]
+        An optional list of Split instances. Splits describe attributes that can change during the experiment.
     cupac_config : Optional[dict]
         An optional dictionary representing the configuration for the cupac model
     custom_analysis_type_mapper : Optional[Dict[str, ExperimentAnalysis]]
@@ -38,6 +41,7 @@ def __init__(
         analysis_type: str,
         analysis_config: Optional[dict] = None,
         dimensions: Optional[List[Dimension]] = None,
+        splits: Optional[List[Split]] = None,
         cupac_config: Optional[dict] = None,
         custom_analysis_type_mapper: Optional[Dict[str, ExperimentAnalysis]] = None,
     ):
@@ -52,6 +56,8 @@ def __init__(
             An optional dictionary representing the configuration for the analysis
         dimensions : Optional[List[Dimension]]
             An optional list of Dimension instances
+        splits : Optional[List[Split]]
+            An optional list of Split instances
         cupac_config : Optional[dict]
             An optional dictionary representing the configuration for the cupac model
         custom_analysis_type_mapper : Optional[Dict[str, ExperimentAnalysis]]
@@ -62,6 +68,7 @@ def __init__(
             analysis_type,
             analysis_config,
             dimensions,
+            splits,
             cupac_config,
             custom_analysis_type_mapper,
         )
@@ -69,6 +76,7 @@ def __init__(
         self.analysis_type = analysis_type
         self.analysis_config = analysis_config or {}
         self.dimensions = [DefaultDimension()] + (dimensions or [])
+        self.splits = [DefaultSplit()] + splits if splits else []
         self.cupac_config = cupac_config or {}
         self.custom_analysis_type_mapper = custom_analysis_type_mapper or {}
 
@@ -141,6 +149,7 @@ def _validate_inputs(
         analysis_type: str,
         analysis_config: Optional[dict],
         dimensions: Optional[List[Dimension]],
+        splits: Optional[List[Split]] = None,
         cupac_config: Optional[dict] = None,
         custom_analysis_type_mapper: Optional[Dict[str, ExperimentAnalysis]] = None,
     ):
@@ -157,6 +166,8 @@ def _validate_inputs(
             An optional dictionary representing the configuration for the analysis
         dimensions : Optional[List[Dimension]]
             An optional list of Dimension instances
+        splits : Optional[List[Split]]
+            An optional list of Split instances
         cupac_config : Optional[dict]
             An optional dictionary representing the configuration for the cupac model
         custom_analysis_type_mapper : Optional[dict[str, ExperimentAnalysis]]
@@ -187,6 +198,14 @@ def _validate_inputs(
                 f"Dimensions must be a list of Dimension instances if provided, got {dimensions}"
             )
 
+        # Check if splits is a list of Split instances when provided
+        if splits is not None and (
+            not isinstance(splits, list) or not all(isinstance(split, Split) for split in splits)
+        ):
+            raise TypeError(
+                f"Splits must be a list of Split instances if provided, got {splits}"
+            )
+
         # Validate custom_analysis_type_mapper if provided
         if custom_analysis_type_mapper:
             # Ensure it's a dictionary
@@ -277,6 +296,34 @@ def _prepare_analysis_config(self, treatment_col: str, treatment: str) -> None:
 
         self.new_analysis_config = new_analysis_config
 
+    @staticmethod
+    def _aggregate_by_cluster(
+        df: pd.DataFrame,
+        cluster_cols: List[str],
+        treatment_col: str,
+        metric: Metric,
+        covariates: Optional[List[str]] = None,
+    ) -> pd.DataFrame:
+        """
+        Aggregate metric values by cluster.
+        """
+        agg_cols = {}
+        if isinstance(metric, RatioMetric):
+            agg_cols[metric.target_column] = "sum"
+            agg_cols[metric.scale_column] = "sum"
+        else:
+            agg_cols[metric.target_column] = "sum"
+
+        if covariates:
+            for covariate in covariates:
+                if covariate not in df.columns:
+                    raise ValueError(
+                        f"Covariate '{covariate}' is not present in the data for cluster aggregation"
+                    )
+                agg_cols[covariate] = "mean"
+
+        return df.groupby(cluster_cols + [treatment_col], as_index=False).agg(agg_cols)
+
     @staticmethod
     def prepare_data(
         data: pd.DataFrame,
@@ -285,18 +332,37 @@ def prepare_data(
         control_variant: Variant,
         dimension_name: str,
         dimension_value: str,
+        split_name: Optional[str] = None,
+        split_value: Optional[str] = None,
+        cluster_cols: Optional[List[str]] = None,
+        metric: Optional[Metric] = None,
+        covariates: Optional[List[str]] = None,
     ) -> pd.DataFrame:
-        """
-        Prepares the data for the experiment analysis pipeline
-        """
         prepared_df = data.copy()
 
         prepared_df = prepared_df.assign(__total_dimension="total")
-
         prepared_df = prepared_df.query(
             f"{variant_col}.isin(['{treatment_variant.name}','{control_variant.name}'])"
         ).query(f"{dimension_name} == '{dimension_value}'")
 
+        if split_name is not None:
+            prepared_df = prepared_df.assign(__total_split="total")
+            if split_value is None:
+                raise ValueError("split_value must be provided when split_name is used")
+            
+            prepared_df = prepared_df.query(f"{split_name} == '{split_value}'")
+
+            if not cluster_cols:
+                raise ValueError(f"Split '{split_name}' requires 'cluster_cols' for aggregation.")
+            
+            prepared_df = HypothesisTest._aggregate_by_cluster(
+                df=prepared_df,
+                cluster_cols=cluster_cols,
+                treatment_col=variant_col,
+                metric=metric,
+                covariates=covariates,
+            )
+
         return prepared_df
 
     def add_covariates(
@@ -321,6 +387,8 @@ def get_test_results(
         dimension: Dimension,
         dimension_value: str,
         alpha: float,
+        split: Optional[Split] = None,
+        split_value: Optional[str] = None,
     ) -> AnalysisPlanResults:
         """
         Performs the hypothesis test on the provided data, for the given dimension value.
@@ -359,6 +427,11 @@ def get_test_results(
             control_variant=control_variant,
             dimension_name=dimension.name,
             dimension_value=dimension_value,
+            split_name=split.name if split else None,
+            split_value=split_value,
+            cluster_cols=self.analysis_config.get("cluster_cols"),
+            metric=self.metric,
+            covariates=self.analysis_config.get("covariates", []),
         )
 
         inference_results = self.get_inference_results(df=prepared_df, alpha=alpha)
@@ -370,6 +443,9 @@ def get_test_results(
             prepared_df.query(f"{variant_col}=='{treatment_variant.name}'")
         )
 
+        has_real_dimensions = any(not isinstance(d, DefaultDimension) for d in self.dimensions)
+        has_real_splits = any(not isinstance(s, DefaultSplit) for s in self.splits)
+
         test_results = AnalysisPlanResults(
             metric_alias=[self.metric.alias],
             control_variant_name=[control_variant.name],
@@ -382,9 +458,11 @@ def get_test_results(
             ate_ci_upper=[inference_results.conf_int.upper],
             p_value=[inference_results.p_value],
             std_error=[inference_results.std_error],
-            dimension_name=[dimension.name],
-            dimension_value=[dimension_value],
             alpha=[alpha],
+            dimension_name=[dimension.name] if has_real_dimensions else [""],
+            dimension_value=[dimension_value] if has_real_dimensions else [""],
+            split_name=[split.name if split else "total"] if has_real_splits else [""],
+            split_value=[split_value if split_value else "total"] if has_real_splits else [""],
         )
 
         return test_results
@@ -409,11 +487,16 @@ def from_config(cls, config: dict) -> "HypothesisTest":
             Dimension.from_metrics_config(dimension_config)
             for dimension_config in config.get("dimensions", [])
         ]
+        splits = [
+            Split.from_metrics_config(split_config)
+            for split_config in config.get("splits", [])
+        ]
         return cls(
             metric=metric,
             analysis_type=config["analysis_type"],
             analysis_config=config.get("analysis_config"),
             dimensions=dimensions,
+            splits=splits,
             cupac_config=config.get("cupac_config"),
             custom_analysis_type_mapper=config.get("custom_analysis_type_mapper"),
         )
diff --git a/cluster_experiments/inference/split.py b/cluster_experiments/inference/split.py
new file mode 100644
index 0000000..990d2c3
--- /dev/null
+++ b/cluster_experiments/inference/split.py
@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+from typing import List
+
+from cluster_experiments.inference.dimension import Dimension
+
+
+@dataclass
+class Split(Dimension):
+    """
+    A class used to represent a Split with a name and values.
+
+    Splits describe attributes that may change during the course of the experiment.
+    """
+
+    @classmethod
+    def from_metrics_config(cls, config: dict) -> "Split":
+        return cls(name=config["name"], values=config["values"])
+
+
+@dataclass
+class DefaultSplit(Dimension):
+    """
+    A class used to represent a Split with a default value representing total, i.e. no slicing.
+
+    DefaultSplit is used when no explicit split grouping is requested and the analysis
+    should consider the total population or aggregated cluster values.
+    """
+
+    def __init__(self):
+        super().__init__(name="__total_split", values=["total"])
+
+    def __str__(self) -> str:
+        return "DefaultSplit(total)"

From 1363caaa5010e888bf60db7a1c7fec95dacdbf0c Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:11:53 +0200
Subject: [PATCH 2/6] add split test

---
 tests/inference/test_split.py | 55 +++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/inference/test_split.py

diff --git a/tests/inference/test_split.py b/tests/inference/test_split.py
new file mode 100644
index 0000000..fbce1b4
--- /dev/null
+++ b/tests/inference/test_split.py
@@ -0,0 +1,55 @@
+import pytest
+from cluster_experiments.inference.split import Split, DefaultSplit
+
+def test_split_initialization():
+    """Test Split initialization with valid inputs."""
+    # Using 'Status' as a split example
+    s = Split(name="Status", values=["Prime", "Non-Prime"])
+    assert s.name == "Status"
+    assert s.values == ["Prime", "Non-Prime"]
+
+
+def test_split_name_type():
+    """Test that Split raises TypeError if name is not a string."""
+    with pytest.raises(TypeError, match="Dimension name must be a string"):
+        Split(name=123, values=["A", "B"])
+
+
+def test_split_values_type():
+    """Test that Split raises TypeError if values is not a list of strings."""
+    # Values should be a list
+    with pytest.raises(TypeError, match="Dimension values must be a list of strings"):
+        Split(name="Status", values="Prime, Non-Prime")
+
+    # Values should be a list of strings
+    with pytest.raises(TypeError, match="Dimension values must be a list of strings"):
+        Split(name="Status", values=["Prime", 123])
+
+
+def test_split_iterate_values():
+    """Test Split iterate_dimension_values method to ensure unique values are returned."""
+    # Same logic as your Country example, but with Split
+    s = Split(name="Status", values=["Prime", "Non-Prime", "Prime", "Other"])
+    unique_values = list(s.iterate_dimension_values())
+    assert unique_values == ["Prime", "Non-Prime", "Other"]
+
+
+def test_default_split_initialization():
+    """Test DefaultSplit initialization."""
+    default_s = DefaultSplit()
+    # This checks our specific implementation of DefaultSplit
+    assert default_s.name == "__total_split"
+    assert default_s.values == ["total"]
+
+
+def test_default_split_iterate_dimension_values():
+    """Test that DefaultSplit's iterate_dimension_values yields 'total'."""
+    default_s = DefaultSplit()
+    values = list(default_s.iterate_dimension_values())
+    assert values == ["total"]
+
+
+def test_default_split_str():
+    """Test the __str__ method of DefaultSplit."""
+    default_s = DefaultSplit()
+    assert str(default_s) == "DefaultSplit(total)"

From 5287d41d592b0fe4db7d0debb001b695d4d19dfd Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:15:29 +0200
Subject: [PATCH 3/6] changed docstring

---
 cluster_experiments/inference/hypothesis_test.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cluster_experiments/inference/hypothesis_test.py b/cluster_experiments/inference/hypothesis_test.py
index e3745b4..2f90360 100644
--- a/cluster_experiments/inference/hypothesis_test.py
+++ b/cluster_experiments/inference/hypothesis_test.py
@@ -409,6 +409,11 @@ def get_test_results(
             The value of the dimension
         alpha : float
             The significance level to be used in the inference analysis.
+        split : Optional[Split], optional
+            The split instance to use for segmented analysis and cluster aggregation, 
+            by default None
+        split_value : Optional[str], optional
+            The specific value of the split to filter on, by default None
 
         Returns
         -------

From 53887b70a70c31e7317d14dae2becbe70f2171af Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:46:01 +0200
Subject: [PATCH 4/6] update drop empty in to_dataframe

---
 .../inference/analysis_results.py             | 26 +++++++++++-----
 .../inference/hypothesis_test.py              |  8 ++---
 tests/inference/test_analysis_results.py      | 31 +++++++++++++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/cluster_experiments/inference/analysis_results.py b/cluster_experiments/inference/analysis_results.py
index 8c7439f..0c213ea 100644
--- a/cluster_experiments/inference/analysis_results.py
+++ b/cluster_experiments/inference/analysis_results.py
@@ -92,13 +92,25 @@ def __add__(self, other):
             alpha=self.alpha + other.alpha,
         )
 
-    def to_dataframe(self):
-        df = pd.DataFrame(asdict(self))
-        
-        cols_to_hide = ["dimension_name", "dimension_value", "split_name", "split_value"]
-        for col in cols_to_hide:
-            if col in df.columns and (df[col] == "").all():
-                df = df.drop(columns=[col])
+    def to_dataframe(self, drop_empty: bool = False):
+        data_dict = asdict(self)
+        max_len = max(len(v) for v in data_dict.values()) if data_dict else 0
+        for k, v in data_dict.items():
+            if len(v) < max_len:
+                data_dict[k] = v + [""] * (max_len - len(v))
+
+        df = pd.DataFrame(data_dict)
+
+        if drop_empty:
+            defaults = {
+                "dimension_name": "__total_dimension",
+                "dimension_value": "total",
+                "split_name": "__total_split",
+                "split_value": "total"
+            }
+            for col, val in defaults.items():
+                if col in df.columns and (df[col] == val).all():
+                    df = df.drop(columns=[col])
         return df
 
     def __str__(self) -> str:
diff --git a/cluster_experiments/inference/hypothesis_test.py b/cluster_experiments/inference/hypothesis_test.py
index 2f90360..c19597d 100644
--- a/cluster_experiments/inference/hypothesis_test.py
+++ b/cluster_experiments/inference/hypothesis_test.py
@@ -463,11 +463,11 @@ def get_test_results(
             ate_ci_upper=[inference_results.conf_int.upper],
             p_value=[inference_results.p_value],
             std_error=[inference_results.std_error],
+            dimension_name=[dimension.name] if has_real_dimensions else ["__total_dimension"],
+            dimension_value=[dimension_value] if has_real_dimensions else ["total"],
+            split_name=[split.name if split else "total"] if has_real_splits else ["__total_split"],
+            split_value=[split_value if split_value else "total"] if has_real_splits else ["total"],
             alpha=[alpha],
-            dimension_name=[dimension.name] if has_real_dimensions else [""],
-            dimension_value=[dimension_value] if has_real_dimensions else [""],
-            split_name=[split.name if split else "total"] if has_real_splits else [""],
-            split_value=[split_value if split_value else "total"] if has_real_splits else [""],
         )
 
         return test_results
diff --git a/tests/inference/test_analysis_results.py b/tests/inference/test_analysis_results.py
index 8677465..d2a84ee 100644
--- a/tests/inference/test_analysis_results.py
+++ b/tests/inference/test_analysis_results.py
@@ -22,6 +22,8 @@ def test_analysis_plan_results_initialization():
     assert results.std_error == []
     assert results.dimension_name == []
     assert results.dimension_value == []
+    assert results.split_name == []
+    assert results.split_value == []
     assert results.alpha == []
 
 
@@ -75,6 +77,8 @@ def test_analysis_plan_results_addition():
         std_error=[0.02],
         dimension_name=["Country"],
         dimension_value=["US"],
+        split_name=["Status"],
+        split_value=["Prime"],
         alpha=[0.05],
     )
     results2 = AnalysisPlanResults(
@@ -91,6 +95,8 @@ def test_analysis_plan_results_addition():
         std_error=[0.01],
         dimension_name=["Country"],
         dimension_value=["CA"],
+        split_name=["Status"],
+        split_value=["Non-Prime"],
         alpha=[0.05],
     )
     combined_results = results1 + results2
@@ -108,6 +114,8 @@ def test_analysis_plan_results_addition():
     assert combined_results.std_error == [0.02, 0.01]
     assert combined_results.dimension_name == ["Country", "Country"]
     assert combined_results.dimension_value == ["US", "CA"]
+    assert combined_results.split_name == ["Status", "Status"]
+    assert combined_results.split_value == ["Prime", "Non-Prime"]
     assert combined_results.alpha == [0.05, 0.05]
 
 
@@ -154,3 +162,26 @@ def test_analysis_plan_results_to_dataframe():
     assert df["dimension_name"].iloc[0] == "Country"
     assert df["dimension_value"].iloc[0] == "US"
     assert df["alpha"].iloc[0] == 0.05
+
+def test_analysis_plan_results_to_dataframe_drop_empty():
+    """Test that AnalysisPlanResults drops empty columns when drop_empty=True."""
+    results = AnalysisPlanResults(
+        metric_alias=["metric1"],
+        ate=[0.1],
+        # dimension_name/value and split_name/value are not provided, 
+        # so they will be empty lists []
+    )
+    
+    # Case 1: drop_empty=False (default behavior, all columns present)
+    df_full = results.to_dataframe(drop_empty=False)
+    assert "split_name" in df_full.columns
+    assert "dimension_name" in df_full.columns
+    assert df_full.shape[1] == len(asdict(results).keys())
+
+    # Case 2: drop_empty=True (empty columns should be dropped)
+    df_clean = results.to_dataframe(drop_empty=True)
+    assert "split_name" not in df_clean.columns
+    assert "dimension_name" not in df_clean.columns
+    # Check that we still have core columns
+    assert "metric_alias" in df_clean.columns
+    assert "ate" in df_clean.columns
\ No newline at end of file

From 144e5af6f6bbb67463b414f8235ddc7d5dccb05a Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:49:42 +0200
Subject: [PATCH 5/6] update test for split

---
 tests/inference/test_analysis_results.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/tests/inference/test_analysis_results.py b/tests/inference/test_analysis_results.py
index d2a84ee..54976a1 100644
--- a/tests/inference/test_analysis_results.py
+++ b/tests/inference/test_analysis_results.py
@@ -164,24 +164,31 @@ def test_analysis_plan_results_to_dataframe():
     assert df["alpha"].iloc[0] == 0.05
 
 def test_analysis_plan_results_to_dataframe_drop_empty():
-    """Test that AnalysisPlanResults drops empty columns when drop_empty=True."""
+    """Test that AnalysisPlanResults drops default columns when drop_empty=True."""
     results = AnalysisPlanResults(
         metric_alias=["metric1"],
         ate=[0.1],
-        # dimension_name/value and split_name/value are not provided, 
-        # so they will be empty lists []
+        # Initialize with default values, simulating get_test_results behavior
+        dimension_name=["__total_dimension"],
+        dimension_value=["total"],
+        split_name=["__total_split"],
+        split_value=["total"]
     )
     
-    # Case 1: drop_empty=False (default behavior, all columns present)
+    # Case 1: drop_empty=False (default behavior, all columns should be present)
     df_full = results.to_dataframe(drop_empty=False)
     assert "split_name" in df_full.columns
     assert "dimension_name" in df_full.columns
-    assert df_full.shape[1] == len(asdict(results).keys())
+    assert "split_value" in df_full.columns
+    assert "dimension_value" in df_full.columns
 
-    # Case 2: drop_empty=True (empty columns should be dropped)
+    # Case 2: drop_empty=True (default columns should be dropped)
     df_clean = results.to_dataframe(drop_empty=True)
     assert "split_name" not in df_clean.columns
     assert "dimension_name" not in df_clean.columns
-    # Check that we still have core columns
+    assert "split_value" not in df_clean.columns
+    assert "dimension_value" not in df_clean.columns
+    
+    # Core columns should still be there
     assert "metric_alias" in df_clean.columns
-    assert "ate" in df_clean.columns
\ No newline at end of file
+    assert "ate" in df_clean.columns

From 941e5f1cb0648a7fff4429e8e63879b5249d0f54 Mon Sep 17 00:00:00 2001
From: Ludovico Coveri <105851039+ludovicolc@users.noreply.github.com>
Date: Mon, 30 Mar 2026 13:52:53 +0200
Subject: [PATCH 6/6] run pre-commit

---
 cluster_experiments/__init__.py               |  2 +-
 .../inference/analysis_plan.py                |  2 +-
 .../inference/analysis_results.py             |  2 +-
 .../inference/hypothesis_test.py              | 35 +++++++++++++------
 cluster_experiments/inference/split.py        |  1 -
 tests/inference/test_analysis_results.py      |  7 ++--
 tests/inference/test_split.py                 |  4 ++-
 7 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/cluster_experiments/__init__.py b/cluster_experiments/__init__.py
index e15b568..96925ee 100644
--- a/cluster_experiments/__init__.py
+++ b/cluster_experiments/__init__.py
@@ -14,9 +14,9 @@
 )
 from cluster_experiments.inference.analysis_plan import AnalysisPlan
 from cluster_experiments.inference.dimension import Dimension
-from cluster_experiments.inference.split import Split
 from cluster_experiments.inference.hypothesis_test import HypothesisTest
 from cluster_experiments.inference.metric import Metric, RatioMetric, SimpleMetric
+from cluster_experiments.inference.split import Split
 from cluster_experiments.inference.variant import Variant
 from cluster_experiments.perturbator import (
     BetaRelativePerturbator,
diff --git a/cluster_experiments/inference/analysis_plan.py b/cluster_experiments/inference/analysis_plan.py
index 8cff587..fca1c7a 100644
--- a/cluster_experiments/inference/analysis_plan.py
+++ b/cluster_experiments/inference/analysis_plan.py
@@ -10,9 +10,9 @@
 )
 from cluster_experiments.inference.analysis_results import AnalysisPlanResults
 from cluster_experiments.inference.dimension import Dimension
-from cluster_experiments.inference.split import DefaultSplit
 from cluster_experiments.inference.hypothesis_test import HypothesisTest
 from cluster_experiments.inference.metric import Metric
+from cluster_experiments.inference.split import DefaultSplit
 from cluster_experiments.inference.variant import Variant
 
 logger = logging.getLogger(__name__)
diff --git a/cluster_experiments/inference/analysis_results.py b/cluster_experiments/inference/analysis_results.py
index 0c213ea..0e87dbd 100644
--- a/cluster_experiments/inference/analysis_results.py
+++ b/cluster_experiments/inference/analysis_results.py
@@ -106,7 +106,7 @@ def to_dataframe(self, drop_empty: bool = False):
                 "dimension_name": "__total_dimension",
                 "dimension_value": "total",
                 "split_name": "__total_split",
-                "split_value": "total"
+                "split_value": "total",
             }
             for col, val in defaults.items():
                 if col in df.columns and (df[col] == val).all():
diff --git a/cluster_experiments/inference/hypothesis_test.py b/cluster_experiments/inference/hypothesis_test.py
index c19597d..9eefd61 100644
--- a/cluster_experiments/inference/hypothesis_test.py
+++ b/cluster_experiments/inference/hypothesis_test.py
@@ -7,8 +7,8 @@
 from cluster_experiments.experiment_analysis import ExperimentAnalysis, InferenceResults
 from cluster_experiments.inference.analysis_results import AnalysisPlanResults
 from cluster_experiments.inference.dimension import DefaultDimension, Dimension
-from cluster_experiments.inference.split import DefaultSplit, Split
 from cluster_experiments.inference.metric import Metric, RatioMetric
+from cluster_experiments.inference.split import DefaultSplit, Split
 from cluster_experiments.inference.variant import Variant
 from cluster_experiments.power_config import analysis_mapping
 
@@ -200,7 +200,8 @@ def _validate_inputs(
 
         # Check if splits is a list of Split instances when provided
         if splits is not None and (
-            not isinstance(splits, list) or not all(isinstance(split, Split) for split in splits)
+            not isinstance(splits, list)
+            or not all(isinstance(split, Split) for split in splits)
         ):
             raise TypeError(
                 f"Splits must be a list of Split instances if provided, got {splits}"
@@ -349,12 +350,14 @@ def prepare_data(
             prepared_df = prepared_df.assign(__total_split="total")
             if split_value is None:
                 raise ValueError("split_value must be provided when split_name is used")
-            
+
             prepared_df = prepared_df.query(f"{split_name} == '{split_value}'")
 
             if not cluster_cols:
-                raise ValueError(f"Split '{split_name}' requires 'cluster_cols' for aggregation.")
-            
+                raise ValueError(
+                    f"Split '{split_name}' requires 'cluster_cols' for aggregation."
+                )
+
             prepared_df = HypothesisTest._aggregate_by_cluster(
                 df=prepared_df,
                 cluster_cols=cluster_cols,
@@ -410,7 +413,7 @@ def get_test_results(
         alpha : float
             The significance level to be used in the inference analysis.
         split : Optional[Split], optional
-            The split instance to use for segmented analysis and cluster aggregation, 
+            The split instance to use for segmented analysis and cluster aggregation,
             by default None
         split_value : Optional[str], optional
             The specific value of the split to filter on, by default None
@@ -448,7 +451,9 @@ def get_test_results(
             prepared_df.query(f"{variant_col}=='{treatment_variant.name}'")
         )
 
-        has_real_dimensions = any(not isinstance(d, DefaultDimension) for d in self.dimensions)
+        has_real_dimensions = any(
+            not isinstance(d, DefaultDimension) for d in self.dimensions
+        )
         has_real_splits = any(not isinstance(s, DefaultSplit) for s in self.splits)
 
         test_results = AnalysisPlanResults(
@@ -463,10 +468,20 @@ def get_test_results(
             ate_ci_upper=[inference_results.conf_int.upper],
             p_value=[inference_results.p_value],
             std_error=[inference_results.std_error],
-            dimension_name=[dimension.name] if has_real_dimensions else ["__total_dimension"],
+            dimension_name=(
+                [dimension.name] if has_real_dimensions else ["__total_dimension"]
+            ),
             dimension_value=[dimension_value] if has_real_dimensions else ["total"],
-            split_name=[split.name if split else "total"] if has_real_splits else ["__total_split"],
-            split_value=[split_value if split_value else "total"] if has_real_splits else ["total"],
+            split_name=(
+                [split.name if split else "total"]
+                if has_real_splits
+                else ["__total_split"]
+            ),
+            split_value=(
+                [split_value if split_value else "total"]
+                if has_real_splits
+                else ["total"]
+            ),
             alpha=[alpha],
         )
 
diff --git a/cluster_experiments/inference/split.py b/cluster_experiments/inference/split.py
index 990d2c3..2d13a0e 100644
--- a/cluster_experiments/inference/split.py
+++ b/cluster_experiments/inference/split.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass
-from typing import List
 
 from cluster_experiments.inference.dimension import Dimension
 
diff --git a/tests/inference/test_analysis_results.py b/tests/inference/test_analysis_results.py
index 54976a1..13d4dea 100644
--- a/tests/inference/test_analysis_results.py
+++ b/tests/inference/test_analysis_results.py
@@ -163,6 +163,7 @@ def test_analysis_plan_results_to_dataframe():
     assert df["dimension_value"].iloc[0] == "US"
     assert df["alpha"].iloc[0] == 0.05
 
+
 def test_analysis_plan_results_to_dataframe_drop_empty():
     """Test that AnalysisPlanResults drops default columns when drop_empty=True."""
     results = AnalysisPlanResults(
@@ -172,9 +173,9 @@ def test_analysis_plan_results_to_dataframe_drop_empty():
         dimension_name=["__total_dimension"],
         dimension_value=["total"],
         split_name=["__total_split"],
-        split_value=["total"]
+        split_value=["total"],
     )
-    
+
     # Case 1: drop_empty=False (default behavior, all columns should be present)
     df_full = results.to_dataframe(drop_empty=False)
     assert "split_name" in df_full.columns
@@ -188,7 +189,7 @@ def test_analysis_plan_results_to_dataframe_drop_empty():
     assert "dimension_name" not in df_clean.columns
     assert "split_value" not in df_clean.columns
     assert "dimension_value" not in df_clean.columns
-    
+
     # Core columns should still be there
     assert "metric_alias" in df_clean.columns
     assert "ate" in df_clean.columns
diff --git a/tests/inference/test_split.py b/tests/inference/test_split.py
index fbce1b4..db7edf4 100644
--- a/tests/inference/test_split.py
+++ b/tests/inference/test_split.py
@@ -1,5 +1,7 @@
 import pytest
-from cluster_experiments.inference.split import Split, DefaultSplit
+
+from cluster_experiments.inference.split import DefaultSplit, Split
+
 
 def test_split_initialization():
     """Test Split initialization with valid inputs."""