Skip to content

Bibat's python api

Bibat provides some Python classes and functions that you can use! This page describes them.

bibat.inference_configuration

The inference_configuration module.

This module provides the class InferenceConfiguration and the function load_inference_configuration.

InferenceConfiguration

Configuration for a statistical inference.

Parameters:

Name Type Description Default
name

A name identifying the model configuration

required
stan_file

Path to a Stan program, with "/" even on windows

required
prepared_data

Name of the prepared data for this inference

required
stan_input_function

name of a function from src.stan_input_functions used to get a Stan input dictionary from a PreparedData object.

required
sample_kwargs

dictionary of keyword arguments to cmdstanpy.CmdStanModel.sample.

required
modes

which modes to run the model in. Choose one or more of the AVAILABLE_MODES.

required
dims

map from parameter names to lists of coordinate names.

required
cpp_options

valid choices for the cpp_options argument to CmdStanModel

required
stanc_options

valid choices for the cpp_options argument to CmdStanModel

required
Source code in bibat/inference_configuration.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
class InferenceConfiguration(BaseModel):
    """Configuration for a statistical inference.

    :param name: A name identifying the model configuration

    :param stan_file: Path to a Stan program, with "/" even on windows

    :param prepared_data: Name of the prepared data for this inference

    :param stan_input_function: name of a function from src.stan_input_functions
    used to get a Stan input dictionary from a PreparedData object.

    :param sample_kwargs: dictionary of keyword arguments to
    cmdstanpy.CmdStanModel.sample.

    :param modes: which modes to run the model in. Choose one or more of the
    AVAILABLE_MODES.

    :param dims: map from parameter names to lists of coordinate names.

    :param cpp_options: valid choices for the `cpp_options` argument to
    CmdStanModel

    :param stanc_options: valid choices for the `cpp_options` argument to
    CmdStanModel
    """

    name: str
    stan_file: str
    prepared_data: str
    stan_input_function: str
    fitting_modes: list[str] = Field(alias="modes")
    sample_kwargs: dict = Field(default_factory=lambda: DEFAULT_SAMPLE_KWARGS)
    dims: dict[str, list[str]] = Field(default_factory=lambda: DEFAULT_DIMS)
    mode_options: dict[str, dict] = Field(default_factory=dict)
    cpp_options: dict | None = None
    stanc_options: dict | None = None

    @model_validator(mode="after")
    def check_folds(self: InferenceConfiguration) -> InferenceConfiguration:
        """Check that there is a number of folds if required."""
        if any(m == "kfold" for m in self.fitting_modes):
            if self.mode_options == {}:
                msg = "Mode 'kfold' requires a mode_options.kfold table."
                raise ValueError(
                    msg,
                )
            if "kfold" not in self.mode_options:
                msg = "Mode 'kfold' requires a mode_options.kfold table."
                raise ValueError(
                    msg,
                )
            if "n_folds" not in self.mode_options["kfold"]:
                msg = "Set 'n_folds' field in kfold mode options."
                raise ValueError(msg)
            mo = self.mode_options["kfold"]["n_folds"]
            if isinstance(mo, str) and not mo.isdigit():
                msg = (
                    f"Could not coerce n_folds choice "
                    f"{self.mode_options['kfold']['n_folds']} to int."
                )
                raise ValueError(msg)
        return self

    @field_validator("stan_file")
    @classmethod
    def check_stan_file_exists(
        cls: type[InferenceConfiguration],
        v: str,
    ) -> str:
        """Check that the stan file exists."""
        stan_dir = Path("src") / "stan"
        file = stan_dir / v
        if not file.exists():
            msg = f"{v} is not a file in {stan_dir}."
            raise ValueError(msg)
        return v

load_inference_configuration(path)

Load an inference configuration object from a toml file.

Parameters:

Name Type Description Default
path Path

Path to directory containing a suitable config.toml file

required
Source code in bibat/inference_configuration.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def load_inference_configuration(path: Path) -> InferenceConfiguration:
    """Load an inference configuration object from a toml file.

    :param path: Path to directory containing a suitable config.toml file

    """
    kwargs = toml.load(path / "config.toml")
    for k, default in zip(
        ["dims", "sample_kwargs"],
        [DEFAULT_DIMS, DEFAULT_SAMPLE_KWARGS],
        strict=True,
    ):
        if k in kwargs:
            kwargs[k] = default | kwargs[k]
    return InferenceConfiguration(**kwargs)

bibat.fitting_mode

A general definition of a fitting mode, plus some mode instances.

FittingMode

A way of fitting a statistical model.

Inferences can be configured to use any of the fitting modes defined here by including them by name in the top-level list 'modes'. For example:

  ...
  modes = ['prior', 'posterior', 'kfold']
  ...

Parameters:

Name Type Description Default
name

A string identifying the fitting mode

required
idata_target

A string identifying the InferenceData group that the mode writes to. Must be one of "prior", "posterior" or "log_likelihood".

required
fit

A function that takes in an InferenceConfiguration object, a PreparedData object and a dictionary of local functions, and returns either a CmdStanMCMC object (if the idata_target is "prior" or "posterior") or an xarray DataArray object (if the idata_target is "log_likelihood")

required
Source code in bibat/fitting_mode.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class FittingMode(BaseModel):
    """A way of fitting a statistical model.

    Inferences can be configured to use any of the fitting modes defined here
    by including them by name in the top-level list 'modes'. For example:

    ```toml
      ...
      modes = ['prior', 'posterior', 'kfold']
      ...
    ```

    :param name: A string identifying the fitting mode

    :param idata_target: A string identifying the
    [`InferenceData`](https://python.arviz.org/en/stable/api/inference_data.html)
    group that the mode writes to. Must be one of "prior", "posterior" or
    "log_likelihood".

    :param fit: A function that takes in an `InferenceConfiguration` object, a
    `PreparedData` object and a dictionary of local functions, and returns
    either a CmdStanMCMC object (if the `idata_target` is "prior" or
    "posterior") or an xarray DataArray object (if the `idata_target` is
    "log_likelihood")
    """

    name: str
    idata_target: IdataTarget
    fit: Callable[
        [InferenceConfiguration, PreparedData, dict[str, Callable]],
        CmdStanMCMC | xr.DataArray,
    ]

prior_mode = FittingMode(name='prior', idata_target=IdataTarget.prior, fit=sample_hmc_prior) module-attribute

posterior_mode = FittingMode(name='posterior', idata_target=IdataTarget.posterior, fit=sample_hmc_posterior) module-attribute

kfold_mode = FittingMode(name='kfold', idata_target=IdataTarget.log_likelihood, fit=sample_hmc_kfold) module-attribute

bibat.util

A module that provides some Bayesian analysis oriented utility code.

validate_df_or_string(v)

Load a dataframe even if it is in json string form.

Source code in bibat/util.py
23
24
25
26
27
def validate_df_or_string(v: pd.DataFrame | str) -> pd.DataFrame:
    """Load a dataframe even if it is in json string form."""
    if isinstance(v, str):
        v = pd.read_json(StringIO(v))
    return v

returns_stan_input(func)

Decorate a function so it returns a json-serialisable dictionary.

Source code in bibat/util.py
39
40
41
42
43
44
45
46
47
48
def returns_stan_input(
    func: Callable[P, Mapping[str, Any]],
) -> Callable[P, Mapping[str, Any]]:
    """Decorate a function so it returns a json-serialisable dictionary."""

    @wraps(func)
    def wrapper(*args: P.args, **kwargs: P.kwargs) -> Mapping[str, Any]:
        return process_dictionary(func(*args, **kwargs))

    return wrapper

one_encode(s)

Replace a series's values with 1-indexed integer factors.

Parameters:

Name Type Description Default
s Series

a pandas Series that you want to factorise.

required
Source code in bibat/util.py
51
52
53
54
55
56
57
def one_encode(s: pd.Series) -> pd.Series:
    """Replace a series's values with 1-indexed integer factors.

    :param s: a pandas Series that you want to factorise.

    """
    return pd.Series(pd.factorize(s)[0] + 1, index=s.index)

make_columns_lower_case(df)

Make a DataFrame's columns lower case.

Parameters:

Name Type Description Default
df DataFrame

a pandas DataFrame

required
Source code in bibat/util.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def make_columns_lower_case(df: pd.DataFrame) -> pd.DataFrame:
    """Make a DataFrame's columns lower case.

    :param df: a pandas DataFrame
    """
    new = df.copy()
    if isinstance(new.columns, pd.MultiIndex):
        new.columns = pd.MultiIndex.from_arrays(
            [
                [c.lower() for c in new.columns.get_level_values(i)]
                for i in range(len(new.columns.levels))
            ],
        )
    else:
        new.columns = pd.Index([c.lower() for c in new.columns])
    return new