Source code for abacus.splitter.params

from __future__ import annotations
from typing import List, Dict, ClassVar, Optional
from pydantic.dataclasses import dataclass
from pydantic import Field
from pydantic import validator


[docs]@dataclass class SplitBuilderParams: """Split experiment parameters class. Args: map_group_names_to_sizes(Dict): dictionary with group names and sizes. Key with name "control" is obligatory main_strata_col (str): the name of the column to be used first for splitting split_metric_col (str): the name of the column to be binning data for splitting id_col (str): the name of the column with id cols: columns for stratification data cat_cols: categorical columns that are using for stratification. These columns will be encoded as category features n_bins: number of bins to be created based on split_metric_col min_cluster_size: min count of samples in HDBSCAN cluster strata_outliers_frac: frequency of outliers in strata alpha: significance level for A/A test for split """ min_unique_values_in_col: ClassVar[int] = 3 control_group_name: ClassVar[str] = "control" map_group_names_to_sizes: Dict[str, Optional[int]] main_strata_col: str split_metric_col: str metric_type: str = "continuous" # continuous, binary, ratio id_col: str = "customer_id" cols: List[str] = Field(default_factory=list) cat_cols: List[str] = Field(default_factory=list) n_bins: int = 3 min_cluster_size: int = 100 strata_outliers_frac: float = 0.01 alpha: float = 0.05 def __post_init_post_parse__(self): self.cols.extend([self.split_metric_col]) self.cols = list(set(self.cols)) @validator("alpha", always=True, allow_reuse=True) @classmethod def alpha_validator(cls, alpha: float): assert 0 < alpha < 1 return alpha @validator("metric_type", always=True, allow_reuse=True) @classmethod def metric_type_validator(cls, metric_type: str) -> str: assert metric_type in [ "continuous", "binary", "ratio", ], "metric_type is not in ['continuous', 'binary', 'ratio']" return metric_type