Source code for abacus.auto_ab.params

from __future__ import annotations
from typing import Dict, Any, Optional

from pydantic.dataclasses import dataclass
from pydantic import validator, Field
import numpy as np

from abacus.types import (
    ColumnNameType,
    ColumnNamesType,
    ArrayNumType,
    ArrayStrType,
    MetricType,
    MetricTransformType,
)


class ValidationConfig:
    validate_assignment = True
    arbitrary_types_allowed = True


[docs] @dataclass(config=ValidationConfig) class DataParams: """Data description as column names of dataset generated during experiment. Parameters: id_col (str): ID of observations. group_col (str): Group of experiment. control_name (str): Name of control group in ``group_col``. treatment_name (str): Name of treatment group in ``group_col``. is_grouped (bool, Optional): Flag that shows whether observations are grouped. strata_col (str, Optional): Name of stratification column. Stratification column must be categorical. target (str, Optional): Target column name of continuous or binary metric. numerator (str, Optional): Numerator for ratio metric. denominator (str, Optional): Denominator for ratio metric. covariate (str, Optional): Covariate column for CUPED. target_prev (str, Optional): Target column name for previous period of continuous metric. predictors_now (List[str], Optional): List of columns to predict covariate. predictors_prev (List[str], Optional): List of columns to create linear model for covariate prediction. control (ArrayNumType, Optional): Control group data used for quick access and excluding querying dataset. treatment (ArrayNumType, Optional): Treatment group data used for quick access and excluding querying dataset. transforms (ArrayStrType, Optional): List of transformations applied to experiment. """ id_col: ColumnNameType = "id" group_col: ColumnNameType = "groups" control_name: str = "A" treatment_name: str = "B" is_grouped: Optional[bool] = True strata_col: Optional[ColumnNameType] = "" target: Optional[ColumnNameType] = "" numerator: Optional[ColumnNameType] = "" denominator: Optional[ColumnNameType] = "" covariate: Optional[ColumnNameType] = "" target_prev: Optional[ColumnNameType] = "" predictors_now: Optional[ColumnNamesType] = Field(default_factory=list) predictors_prev: Optional[ColumnNamesType] = Field(default_factory=list) control: Optional[ArrayNumType] = Field(default_factory=list) treatment: Optional[ArrayNumType] = Field(default_factory=list) transforms: Optional[ArrayStrType] = Field(default_factory=list)
[docs] @dataclass(config=ValidationConfig) class HypothesisParams: """Description of hypothesis parameters. Parameters: alpha (float): type I error. beta (float): type II error. alternative (str): directionality of hypothesis: less, greater, two-sided. metric_type (str): metric type: continuous, binary, ratio. metric_name (str): metric name: mean, median. If custom metric, then use here appropriate name. metric (Callable[[Iterable[float]], np.ndarray], Optional): if metric_name is custom, then you must define metric function. metric_transform (Callable[[np.ndarray], np.ndarray], Optional): applied transformations to experiment. metric_transform_info (Dict[str, Dict[str, Any]], Optional): information of applied transformations. filter_method (str, Optional): method for filtering outliers: top_5, isolation_forest. n_boot_samples (int, Optional): number of bootstrap iterations. n_buckets (int, Optional): number of buckets. strata (str, Optional): stratification column. strata_weights (Dict[str, float], Optional): historical strata weights. """ alpha: Optional[float] = 0.05 beta: Optional[float] = 0.2 alternative: Optional[str] = "two-sided" # less, greater, two-sided metric_type: Optional[str] = "continuous" # continuous, binary, ratio metric_name: Optional[str] = "mean" # mean, median metric: Optional[MetricType] = np.mean metric_transform: Optional[MetricTransformType] = np.mean metric_transform_info: Optional[Dict[str, Dict[str, Any]]] = Field( default_factory=dict ) filter_method: Optional[str] = "top_5" # top_5, isolation_forest n_boot_samples: Optional[int] = 200 n_buckets: Optional[int] = 100 strata: Optional[str] = "" strata_weights: Optional[Dict[str, float]] = Field(default_factory=dict) def __post_init__(self): if self.metric_name == "mean": self.metric = np.mean if self.metric_name == "median": self.metric = np.median @validator("alpha", always=True, allow_reuse=True) @classmethod def alpha_validator(cls, alpha: float) -> float: assert 1 > alpha > 0, "alpha is not in range [0, 1]" return alpha @validator("beta", always=True, allow_reuse=True) @classmethod def beta_validator(cls, beta: float) -> float: assert 1 > beta > 0, "beta is not in range [0, 1]" return beta @validator("alternative", always=True, allow_reuse=True) @classmethod def alternative_validator(cls, alternative: str) -> str: assert alternative in [ "two-sided", "less", "greater", ], "alternative is not in ['two-sided', 'less', 'greater']" return alternative @validator("metric_type", always=True, allow_reuse=True) @classmethod def metric_type_validator(cls, metric_type: str) -> str: assert metric_type in [ "continuous", "binary", "ratio", ], "metric_type is not in ['continuous', 'binary', 'ratio']" return metric_type
[docs] @dataclass class ABTestParams: data_params: DataParams = Field(default=DataParams()) hypothesis_params: HypothesisParams = Field(default=HypothesisParams())