Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Functions for adding conditions/observables/parameter to Problem #328

Merged
merged 2 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions petab/v1/mapping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Functionality related to the PEtab entity mapping table"""
# TODO: Move to petab.v2.mapping
from pathlib import Path

import pandas as pd
Expand Down
181 changes: 180 additions & 1 deletion petab/v1/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

import os
import tempfile
from collections.abc import Iterable
from collections.abc import Iterable, Sequence
from math import nan
from numbers import Number
from pathlib import Path, PurePosixPath
from typing import TYPE_CHECKING
from warnings import warn
Expand Down Expand Up @@ -1005,3 +1006,181 @@ def n_priors(self) -> int:
return 0

return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum()

def add_condition(self, id_: str, name: str = None, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To match column headers/C.py? Not exactly the same due to underscores... but we could decide whether to go for consistent or context-specific IDs everywhere.

Suggested change
def add_condition(self, id_: str, name: str = None, **kwargs):
def add_condition(self, condition_id: str, condition_name: str = None, **kwargs):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternative: change all of these add_* methods to take **kwargs that are used to create a pd.Series, which is then validated and concatenated. get_*_df can be used to set the index. Then no table-specific code, and no need to redefine column names here?

  • if not isinstance(kwarg, str) and isinstance(kwarg, list): kwarg = PARAMETER_SEPARATOR.join(map(str, kwarg))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was struggling with what would be preferable. In add_condition, it feels redundant to prefix everything with condition_. Then again, it might be considered confusing if the arguments don't match the table columns. For me, the former felt more important. Also with regards to potentially introducing a proper object model, I think we'd want things to be more pythonic and less petaby.

I don't think the kwargs-solution would be very convenient. That would mean, you'd have write add_observable(**{petab.OBSERVABLE_ID:"foo", petab.SIMULATION_CONDITION_ID: "bar"}), I think then I rather directly go back to constructing dataframes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave it as is for. As discussed elsewhere, the v2 API is likely to change drastically overall where these points will be addressed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, fine for v1, we can revisit v2 in case we change the column names there

"""Add a simulation condition to the problem.

Arguments:
id_: The condition id
name: The condition name
kwargs: Parameter, value pairs to add to the condition table.
"""
record = {CONDITION_ID: [id_], **kwargs}
if name is not None:
record[CONDITION_NAME] = name
tmp_df = pd.DataFrame(record).set_index([CONDITION_ID])
self.condition_df = (
pd.concat([self.condition_df, tmp_df])
if self.condition_df is not None
else tmp_df
)

def add_observable(
self,
id_: str,
formula: str | float | int,
noise_formula: str | float | int = None,
noise_distribution: str = None,
transform: str = None,
name: str = None,
Comment on lines +1030 to +1035
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above

Suggested change
id_: str,
formula: str | float | int,
noise_formula: str | float | int = None,
noise_distribution: str = None,
transform: str = None,
name: str = None,
observable_id: str,
observable_formula: str | float | int,
noise_formula: str | float | int = None,
noise_distribution: str = None,
transform: str = None,
observable_name: str = None,

**kwargs,
):
"""Add an observable to the problem.

Arguments:
id_: The observable id
formula: The observable formula
noise_formula: The noise formula
noise_distribution: The noise distribution
transform: The observable transformation
name: The observable name
kwargs: additional columns/values to add to the observable table

"""
record = {
OBSERVABLE_ID: [id_],
OBSERVABLE_FORMULA: [formula],
}
if name is not None:
record[OBSERVABLE_NAME] = [name]
if noise_formula is not None:
record[NOISE_FORMULA] = [noise_formula]
if noise_distribution is not None:
record[NOISE_DISTRIBUTION] = [noise_distribution]
if transform is not None:
record[OBSERVABLE_TRANSFORMATION] = [transform]
record.update(kwargs)

tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID])
self.observable_df = (
pd.concat([self.observable_df, tmp_df])
if self.observable_df is not None
else tmp_df
)

def add_parameter(
self,
id_: str,
estimated: bool | str | int = True,
nominal_value=None,
scale: str = None,
lb: Number = None,
ub: Number = None,
init_prior_type: str = None,
init_prior_pars: str | Sequence = None,
obj_prior_type: str = None,
obj_prior_pars: str | Sequence = None,
**kwargs,
):
"""Add a parameter to the problem.

Arguments:
id_: The parameter id
estimated: Whether the parameter is estimated
nominal_value: The nominal value of the parameter
scale: The parameter scale
lb: The lower bound of the parameter
ub: The upper bound of the parameter
init_prior_type: The type of the initialization prior distribution
init_prior_pars: The parameters of the initialization prior
distribution
obj_prior_type: The type of the objective prior distribution
obj_prior_pars: The parameters of the objective prior distribution
kwargs: additional columns/values to add to the parameter table
"""
record = {
PARAMETER_ID: [id_],
}
if estimated is not None:
record[ESTIMATE] = [
int(estimated)
if isinstance(estimated, bool | int)
else estimated
]
if nominal_value is not None:
record[NOMINAL_VALUE] = [nominal_value]
if scale is not None:
record[PARAMETER_SCALE] = [scale]
if lb is not None:
record[LOWER_BOUND] = [lb]
if ub is not None:
record[UPPER_BOUND] = [ub]
if init_prior_type is not None:
record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type]
if init_prior_pars is not None:
if not isinstance(init_prior_pars, str):
init_prior_pars = PARAMETER_SEPARATOR.join(
map(str, init_prior_pars)
)
record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars]
if obj_prior_type is not None:
record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type]
if obj_prior_pars is not None:
if not isinstance(obj_prior_pars, str):
obj_prior_pars = PARAMETER_SEPARATOR.join(
map(str, obj_prior_pars)
)
record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars]
record.update(kwargs)

tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID])
self.parameter_df = (
pd.concat([self.parameter_df, tmp_df])
if self.parameter_df is not None
else tmp_df
)

def add_measurement(
self,
obs_id: str,
sim_cond_id: str,
time: float,
measurement: float,
observable_parameters: Sequence[str] = None,
noise_parameters: Sequence[str] = None,
preeq_cond_id: str = None,
):
"""Add a measurement to the problem.

Arguments:
obs_id: The observable ID
sim_cond_id: The simulation condition ID
time: The measurement time
measurement: The measurement value
observable_parameters: The observable parameters
noise_parameters: The noise parameters
preeq_cond_id: The pre-equilibration condition ID
"""
record = {
OBSERVABLE_ID: [obs_id],
SIMULATION_CONDITION_ID: [sim_cond_id],
TIME: [time],
MEASUREMENT: [measurement],
}
if observable_parameters is not None:
record[OBSERVABLE_PARAMETERS] = [
PARAMETER_SEPARATOR.join(observable_parameters)
]
if noise_parameters is not None:
record[NOISE_PARAMETERS] = [
PARAMETER_SEPARATOR.join(noise_parameters)
]
if preeq_cond_id is not None:
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]

tmp_df = pd.DataFrame(record)
self.measurement_df = (
pd.concat([self.measurement_df, tmp_df])
if self.measurement_df is not None
else tmp_df
)
21 changes: 15 additions & 6 deletions petab/v2/petab1to2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import shutil
from itertools import chain
from pathlib import Path
from urllib.parse import urlparse

from pandas.io.common import get_handle, is_url

Expand Down Expand Up @@ -76,7 +77,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
# condition tables, observable tables, SBML files, parameter table:
# no changes - just copy
file = yaml_config[C.PARAMETER_FILE]
_copy_file(get_src_path(file), get_dest_path(file))
_copy_file(get_src_path(file), Path(get_dest_path(file)))

for problem_config in yaml_config[C.PROBLEMS]:
for file in chain(
Expand All @@ -89,7 +90,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
problem_config.get(C.MEASUREMENT_FILES, []),
problem_config.get(C.VISUALIZATION_FILES, []),
):
_copy_file(get_src_path(file), get_dest_path(file))
_copy_file(get_src_path(file), Path(get_dest_path(file)))

# TODO: Measurements: preequilibration to experiments/timecourses once
# finalized
Expand Down Expand Up @@ -131,15 +132,23 @@ def _update_yaml(yaml_config: dict) -> dict:
return yaml_config


def _copy_file(src: Path | str, dest: Path | str):
def _copy_file(src: Path | str, dest: Path):
"""Copy file."""
src = str(src)
dest = str(dest)
# src might be a URL - convert to Path if local
src_url = urlparse(src)
if not src_url.scheme:
src = Path(src)
elif src_url.scheme == "file" and not src_url.netloc:
src = Path(src.removeprefix("file:/"))

if is_url(src):
with get_handle(src, mode="r") as src_handle:
with open(dest, "w") as dest_handle:
dest_handle.write(src_handle.handle.read())
return

shutil.copy(str(src), str(dest))
try:
if dest.samefile(src):
return
except FileNotFoundError:
shutil.copy(str(src), str(dest))
Loading
Loading