diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d1327a4..4d12d40d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # PEtab changelog +## 0.5 series + +### 0.5.0 + +**Fixes** +* Circumvent `SettingWithCopyWarning` + + by @m-philipps in https://github.com/PEtab-dev/libpetab-python/pull/306 + +* If `flatten_timepoint_specific_output_overrides` makes the visualization + table invalid, remove it from `Problem` + + by @m-philipps in https://github.com/PEtab-dev/libpetab-python/pull/316 + +**Features** + +* Added `petab.v2.models` by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/302 +* Added `petab.v1.priors.priors_to_measurements(...)` for replacing + `objectivePrior*` by observables/measurements + + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/309, https://github.com/PEtab-dev/libpetab-python/pull/315, https://github.com/PEtab-dev/libpetab-python/pull/317 + +* Make model id optional for `PySBModel` + + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/318 + +* Implemented `Model.__repr__` + + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/319 + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.4.1...v0.5.0 + ## 0.4 series This series contains many changes related to the new `petab.v2` subpackage. `petab.v2` should not be considered stable; the `petab.v2` API may change rapidly until we release libpetab-python v1.0.0. diff --git a/doc/modules.rst b/doc/modules.rst index 6a747809..8d6335c8 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -20,6 +20,7 @@ API Reference petab.v1.observables petab.v1.parameter_mapping petab.v1.parameters + petab.v1.priors petab.v1.problem petab.v1.sampling petab.v1.sbml diff --git a/petab/v1/core.py b/petab/v1/core.py index 5004141f..10274b8c 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -299,6 +299,20 @@ def flatten_timepoint_specific_output_overrides( petab_problem.observable_df.index.name = OBSERVABLE_ID petab_problem.measurement_df = pd.concat(new_measurement_dfs) + # remove visualization df if it uses observables that are not in the + # flattened PEtab problem + if petab_problem.visualization_df is not None: + assert petab_problem.observable_df.index.name == OBSERVABLE_ID + if not all( + petab_problem.observable_df.index.isin( + petab_problem.visualization_df[Y_VALUES] + ) + ): + petab_problem.visualization_df = None + logger.warning( + "Removing visualization table from flattened PEtab problem." + ) + def unflatten_simulation_df( simulation_df: pd.DataFrame, diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py index de1ebf3a..795c7f0b 100644 --- a/petab/v1/models/model.py +++ b/petab/v1/models/model.py @@ -16,6 +16,9 @@ class Model(abc.ABC): def __init__(self): ... + def __repr__(self): + return f"<{self.__class__.__name__} {self.model_id!r}>" + @staticmethod @abc.abstractmethod def from_file(filepath_or_buffer: Any, model_id: str) -> Model: diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index 7355669e..f0147990 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -9,6 +9,7 @@ import pysb +from .. import is_valid_identifier from . import MODEL_TYPE_PYSB from .model import Model @@ -53,14 +54,21 @@ class PySBModel(Model): type_id = MODEL_TYPE_PYSB - def __init__(self, model: pysb.Model, model_id: str): + def __init__(self, model: pysb.Model, model_id: str = None): super().__init__() self.model = model - self._model_id = model_id + self._model_id = model_id or self.model.name + + if not is_valid_identifier(self._model_id): + raise ValueError( + f"Model ID '{self._model_id}' is not a valid identifier. " + "Either provide a valid identifier or change the model name " + "to a valid PEtab model identifier." + ) @staticmethod - def from_file(filepath_or_buffer, model_id: str): + def from_file(filepath_or_buffer, model_id: str = None): return PySBModel( model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id ) diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 382e6b57..8f252988 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -458,7 +458,7 @@ def get_priors_from_df( # get types and parameters of priors from dataframe par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] - if parameter_ids: + if parameter_ids is not None: try: par_to_estimate = par_to_estimate.loc[parameter_ids, :] except KeyError as e: diff --git a/petab/v1/priors.py b/petab/v1/priors.py new file mode 100644 index 00000000..52fec20d --- /dev/null +++ b/petab/v1/priors.py @@ -0,0 +1,205 @@ +"""Functions related to prior handling.""" +import copy + +import numpy as np +import pandas as pd + +from ..v1.C import PREEQUILIBRATION_CONDITION_ID +from . import ( + ESTIMATE, + LAPLACE, + LIN, + LOG, + LOG10, + LOG_LAPLACE, + LOG_NORMAL, + MEASUREMENT, + NOISE_DISTRIBUTION, + NOISE_FORMULA, + NOISE_PARAMETERS, + NORMAL, + OBJECTIVE_PRIOR_PARAMETERS, + OBJECTIVE_PRIOR_TYPE, + OBSERVABLE_FORMULA, + OBSERVABLE_ID, + OBSERVABLE_TRANSFORMATION, + PARAMETER_SCALE, + PARAMETER_SCALE_LAPLACE, + PARAMETER_SCALE_NORMAL, + PARAMETER_SEPARATOR, + SIMULATION_CONDITION_ID, + TIME, + Problem, +) + +__all__ = ["priors_to_measurements"] + + +def priors_to_measurements(problem: Problem): + """Convert priors to measurements. + + Reformulate the given problem such that the objective priors are converted + to measurements. This is done by adding a new observable + ``prior_{parameter_id}`` for each estimated parameter that has an objective + prior, and adding a corresponding measurement to the measurement table. + The new measurement is the prior distribution itself. The resulting + optimization problem will be equivalent to the original problem. + This is meant to be used for tools that do not support priors. + + The conversion involves the probability density function (PDF) of the + prior, the parameters (e.g., location and scale) of that prior PDF, and the + scale and value of the estimated parameter. Currently, `uniform` priors are + not supported by this method. This method creates observables with: + + - `observableFormula`: the parameter value on the `parameterScale` + - `observableTransformation`: `log` for `logNormal`/`logLaplace` + distributions, `lin` otherwise + + and measurements with: + + - `measurement`: the PDF location + - `noiseFormula`: the PDF scale + + Arguments + --------- + problem: + The problem to be converted. + + Returns + ------- + The new problem with the priors converted to measurements. + """ + new_problem = copy.deepcopy(problem) + + # we only need to consider parameters that are estimated + par_df_tmp = problem.parameter_df.loc[problem.parameter_df[ESTIMATE] == 1] + + if ( + OBJECTIVE_PRIOR_TYPE not in par_df_tmp + or par_df_tmp.get(OBJECTIVE_PRIOR_TYPE).isna().all() + or OBJECTIVE_PRIOR_PARAMETERS not in par_df_tmp + or par_df_tmp.get(OBJECTIVE_PRIOR_PARAMETERS).isna().all() + ): + # nothing to do + return new_problem + + def scaled_observable_formula(parameter_id, parameter_scale): + if parameter_scale == LIN: + return parameter_id + if parameter_scale == LOG: + return f"ln({parameter_id})" + if parameter_scale == LOG10: + return f"log10({parameter_id})" + raise ValueError(f"Unknown parameter scale {parameter_scale}.") + + new_measurement_dicts = [] + new_observable_dicts = [] + for _, row in par_df_tmp.iterrows(): + prior_type = row[OBJECTIVE_PRIOR_TYPE] + parameter_scale = row.get(PARAMETER_SCALE, LIN) + if pd.isna(prior_type): + if not pd.isna(row[OBJECTIVE_PRIOR_PARAMETERS]): + raise AssertionError( + "Objective prior parameters are set, but prior type is " + "not specified." + ) + continue + + if "uniform" in prior_type.lower(): + # for measurements, "uniform" is not supported yet + # if necessary, this could still be implemented by adding another + # observable/measurement that will produce a constant objective + # offset + raise NotImplementedError("Uniform priors are not supported.") + + parameter_id = row.name + prior_parameters = tuple( + map( + float, + row[OBJECTIVE_PRIOR_PARAMETERS].split(PARAMETER_SEPARATOR), + ) + ) + if len(prior_parameters) != 2: + raise AssertionError( + "Expected two objective prior parameters for parameter " + f"{parameter_id}, but got {prior_parameters}." + ) + + # create new observable + new_obs_id = f"prior_{parameter_id}" + if new_obs_id in new_problem.observable_df.index: + raise ValueError( + f"Observable ID {new_obs_id}, which is to be " + "created, already exists." + ) + new_observable = { + OBSERVABLE_ID: new_obs_id, + OBSERVABLE_FORMULA: scaled_observable_formula( + parameter_id, + parameter_scale if "parameterScale" in prior_type else LIN, + ), + NOISE_FORMULA: f"noiseParameter1_{new_obs_id}", + } + if prior_type in (LOG_NORMAL, LOG_LAPLACE): + new_observable[OBSERVABLE_TRANSFORMATION] = LOG + elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df: + # only set default if the column is already present + new_observable[OBSERVABLE_TRANSFORMATION] = LIN + + if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL): + new_observable[NOISE_DISTRIBUTION] = NORMAL + elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE): + new_observable[NOISE_DISTRIBUTION] = LAPLACE + else: + raise NotImplementedError( + f"Objective prior type {prior_type} is not implemented." + ) + + new_observable_dicts.append(new_observable) + + # add measurement + # we could just use any condition and time point since the parameter + # value is constant. however, using an existing timepoint and + # (preequilibrationConditionId+)simulationConditionId will avoid + # requiring extra simulations and solver stops in tools that do not + # check for time dependency of the observable. we use the first + # condition/timepoint from the measurement table + new_measurement = { + OBSERVABLE_ID: new_obs_id, + TIME: problem.measurement_df[TIME].iloc[0], + MEASUREMENT: prior_parameters[0], + NOISE_PARAMETERS: prior_parameters[1], + SIMULATION_CONDITION_ID: new_problem.measurement_df[ + SIMULATION_CONDITION_ID + ].iloc[0], + } + if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df: + new_measurement[ + PREEQUILIBRATION_CONDITION_ID + ] = new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ + 0 + ] + new_measurement_dicts.append(new_measurement) + + # remove prior from parameter table + new_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ] = np.nan + new_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_PARAMETERS + ] = np.nan + + new_problem.observable_df = pd.concat( + [ + new_problem.observable_df, + pd.DataFrame(new_observable_dicts).set_index(OBSERVABLE_ID), + ] + ) + new_problem.measurement_df = pd.concat( + [ + new_problem.measurement_df, + pd.DataFrame(new_measurement_dicts), + ], + ignore_index=True, + ) + return new_problem diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index e690df2c..b607350b 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -64,6 +64,8 @@ def __init__( self.data_to_plot.sort_index(inplace=True) self.conditions = conditions_ + if self.conditions is not None: + self.conditions = self.conditions.copy() self.inf_point = ( np.inf in self.conditions if self.conditions is not None else False ) diff --git a/petab/v2/models/__init__.py b/petab/v2/models/__init__.py new file mode 100644 index 00000000..a387c27b --- /dev/null +++ b/petab/v2/models/__init__.py @@ -0,0 +1,2 @@ +"""Handling of different model types supported by PEtab.""" +from ...v1.models import * # noqa: F401, F403 diff --git a/petab/v2/models/model.py b/petab/v2/models/model.py new file mode 100644 index 00000000..403a03e2 --- /dev/null +++ b/petab/v2/models/model.py @@ -0,0 +1,2 @@ +"""PEtab model abstraction""" +from ...v1.models.model import * # noqa: F401, F403 diff --git a/petab/v2/models/pysb_model.py b/petab/v2/models/pysb_model.py new file mode 100644 index 00000000..111c9864 --- /dev/null +++ b/petab/v2/models/pysb_model.py @@ -0,0 +1,2 @@ +"""Functions for handling PySB models""" +from ...v1.models.pysb_model import * # noqa: F401, F403 diff --git a/petab/v2/models/sbml_model.py b/petab/v2/models/sbml_model.py new file mode 100644 index 00000000..2a0eadc7 --- /dev/null +++ b/petab/v2/models/sbml_model.py @@ -0,0 +1,2 @@ +"""Functions for handling SBML models""" +from ...v1.models.sbml_model import * # noqa: F401, F403 diff --git a/petab/version.py b/petab/version.py index 05784482..c59cab99 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,2 @@ """PEtab library version""" -__version__ = "0.4.1" +__version__ = "0.5.0" diff --git a/tests/v1/test_model_pysb.py b/tests/v1/test_model_pysb.py index 972b3e25..922dab2f 100644 --- a/tests/v1/test_model_pysb.py +++ b/tests/v1/test_model_pysb.py @@ -87,3 +87,9 @@ def test_pattern_parsing(uses_pysb): pattern = pysb.as_complex_pattern(B(s="a") ** c1) assert pattern_from_string(str(pattern), model).matches(pattern) assert str(pattern) == str(pattern_from_string("B(s='a') ** c1", model)) + + +def test_pysb_model_repr(uses_pysb): + model = pysb.Model(name="test") + petab_model = PySBModel(model) + assert repr(petab_model) == "" diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py new file mode 100644 index 00000000..ea47e54f --- /dev/null +++ b/tests/v1/test_priors.py @@ -0,0 +1,137 @@ +from copy import deepcopy +from pathlib import Path + +import benchmark_models_petab +import numpy as np +import pandas as pd +import pytest +from scipy.stats import norm + +import petab.v1 +from petab.v1 import get_simulation_conditions +from petab.v1.priors import priors_to_measurements + + +@pytest.mark.parametrize( + "problem_id", ["Schwen_PONE2014", "Isensee_JCB2018", "Raimundez_PCB2020"] +) +def test_priors_to_measurements(problem_id): + """Test the conversion of priors to measurements.""" + petab_problem_priors: petab.v1.Problem = ( + benchmark_models_petab.get_problem(problem_id) + ) + petab_problem_priors.visualization_df = None + assert petab.v1.lint_problem(petab_problem_priors) is False + + if problem_id == "Isensee_JCB2018": + # required to match the stored simulation results below + petab.v1.flatten_timepoint_specific_output_overrides( + petab_problem_priors + ) + assert petab.v1.lint_problem(petab_problem_priors) is False + original_problem = deepcopy(petab_problem_priors) + + petab_problem_measurements = priors_to_measurements(petab_problem_priors) + + # check that the original problem is not modified + for attr in [ + "condition_df", + "parameter_df", + "observable_df", + "measurement_df", + ]: + assert ( + diff := getattr(petab_problem_priors, attr).compare( + getattr(original_problem, attr) + ) + ).empty, diff + # check that measurements and observables were added + assert petab.v1.lint_problem(petab_problem_measurements) is False + assert ( + petab_problem_measurements.parameter_df.shape[0] + == petab_problem_priors.parameter_df.shape[0] + ) + assert ( + petab_problem_measurements.observable_df.shape[0] + > petab_problem_priors.observable_df.shape[0] + ) + assert ( + petab_problem_measurements.measurement_df.shape[0] + > petab_problem_priors.measurement_df.shape[0] + ) + # ensure we didn't introduce any new conditions + assert len( + get_simulation_conditions(petab_problem_measurements.measurement_df) + ) == len(get_simulation_conditions(petab_problem_priors.measurement_df)) + + # verify that the objective function value is the same + + # load/construct the simulation results + simulation_df_priors = petab.v1.get_simulation_df( + Path( + benchmark_models_petab.MODELS_DIR, + problem_id, + f"simulatedData_{problem_id}.tsv", + ) + ) + simulation_df_measurements = pd.concat( + [ + petab_problem_measurements.measurement_df.rename( + columns={petab.v1.MEASUREMENT: petab.v1.SIMULATION} + )[ + petab_problem_measurements.measurement_df[ + petab.v1.C.OBSERVABLE_ID + ].str.startswith("prior_") + ], + simulation_df_priors, + ] + ) + + llh_priors = petab.v1.calculate_llh_for_table( + petab_problem_priors.measurement_df, + simulation_df_priors, + petab_problem_priors.observable_df, + petab_problem_priors.parameter_df, + ) + llh_measurements = petab.v1.calculate_llh_for_table( + petab_problem_measurements.measurement_df, + simulation_df_measurements, + petab_problem_measurements.observable_df, + petab_problem_measurements.parameter_df, + ) + + # get prior objective function contribution + parameter_ids = petab_problem_priors.parameter_df.index.values[ + (petab_problem_priors.parameter_df[petab.v1.ESTIMATE] == 1) + & petab_problem_priors.parameter_df[ + petab.v1.OBJECTIVE_PRIOR_TYPE + ].notna() + ] + priors = petab.v1.get_priors_from_df( + petab_problem_priors.parameter_df, + mode="objective", + parameter_ids=parameter_ids, + ) + prior_contrib = 0 + for parameter_id, prior in zip(parameter_ids, priors, strict=True): + prior_type, prior_pars, par_scale, par_bounds = prior + if prior_type == petab.v1.PARAMETER_SCALE_NORMAL: + prior_contrib += norm.logpdf( + petab_problem_priors.x_nominal_free_scaled[ + petab_problem_priors.x_free_ids.index(parameter_id) + ], + loc=prior_pars[0], + scale=prior_pars[1], + ) + else: + # enable other models, once libpetab has proper support for + # evaluating the prior contribution. until then, two test + # problems should suffice + assert problem_id == "Raimundez_PCB2020" + pytest.skip(f"Prior type {prior_type} not implemented") + + assert np.isclose( + llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16 + ), (llh_priors + prior_contrib, llh_measurements) + # check that the tolerance is not too high + assert np.abs(prior_contrib) > 1e-3 * np.abs(llh_priors) diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index dfa92dad..350a2f0d 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -1,9 +1,12 @@ import os import sys +import libsbml import pandas as pd import pytest +from petab.v1.models.sbml_model import SbmlModel + sys.path.append(os.getcwd()) import petab # noqa: E402 @@ -122,3 +125,11 @@ def test_get_condition_specific_models(): ) check_model(condition_model) + + +def test_sbml_model_repr(): + sbml_document = libsbml.SBMLDocument() + sbml_model = sbml_document.createModel() + sbml_model.setId("test") + petab_model = SbmlModel(sbml_model) + assert repr(petab_model) == ""