From 9a4efb46f91f0af06f9e857ab1656f103281fbbf Mon Sep 17 00:00:00 2001
From: Daniel Weindl <dweindl@users.noreply.github.com>
Date: Tue, 3 Dec 2024 22:27:50 +0100
Subject: [PATCH] Enable passing the base path to Problem.from_yaml (#327)

When passing the problem configuration as `dict` to
`Problem.from_yaml`, one should be able to specify
the base path for resolving relative paths. See #324.

Closes #324
---
 petab/v1/problem.py      | 15 ++++++--
 petab/v2/problem.py      | 19 +++++++---
 tests/v1/test_petab.py   | 13 +++++--
 tests/v2/test_problem.py | 80 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 14 deletions(-)

diff --git a/petab/v1/problem.py b/petab/v1/problem.py
index 4a5577eb..6145656f 100644
--- a/petab/v1/problem.py
+++ b/petab/v1/problem.py
@@ -251,21 +251,28 @@ def from_files(
         )
 
     @staticmethod
-    def from_yaml(yaml_config: dict | Path | str) -> Problem:
+    def from_yaml(
+        yaml_config: dict | Path | str, base_path: str | Path = None
+    ) -> Problem:
         """
         Factory method to load model and tables as specified by YAML file.
 
         Arguments:
             yaml_config: PEtab configuration as dictionary or YAML file name
+            base_path: Base directory or URL to resolve relative paths
         """
         if isinstance(yaml_config, Path):
             yaml_config = str(yaml_config)
 
-        get_path = lambda filename: filename  # noqa: E731
         if isinstance(yaml_config, str):
-            path_prefix = get_path_prefix(yaml_config)
+            if base_path is None:
+                base_path = get_path_prefix(yaml_config)
             yaml_config = yaml.load_yaml(yaml_config)
-            get_path = lambda filename: f"{path_prefix}/{filename}"  # noqa: E731
+
+        def get_path(filename):
+            if base_path is None:
+                return filename
+            return f"{base_path}/{filename}"
 
         if yaml.is_composite_problem(yaml_config):
             raise ValueError(
diff --git a/petab/v2/problem.py b/petab/v2/problem.py
index 612f2571..4c36d791 100644
--- a/petab/v2/problem.py
+++ b/petab/v2/problem.py
@@ -117,24 +117,31 @@ def __str__(self):
         )
 
     @staticmethod
-    def from_yaml(yaml_config: dict | Path | str) -> Problem:
+    def from_yaml(
+        yaml_config: dict | Path | str, base_path: str | Path = None
+    ) -> Problem:
         """
         Factory method to load model and tables as specified by YAML file.
 
         Arguments:
             yaml_config: PEtab configuration as dictionary or YAML file name
+            base_path: Base directory or URL to resolve relative paths
         """
         if isinstance(yaml_config, Path):
             yaml_config = str(yaml_config)
 
         if isinstance(yaml_config, str):
             yaml_file = yaml_config
-            path_prefix = get_path_prefix(yaml_file)
-            yaml_config = yaml.load_yaml(yaml_config)
-            get_path = lambda filename: f"{path_prefix}/{filename}"  # noqa: E731
+            if base_path is None:
+                base_path = get_path_prefix(yaml_file)
+            yaml_config = yaml.load_yaml(yaml_file)
         else:
             yaml_file = None
-            get_path = lambda filename: filename  # noqa: E731
+
+        def get_path(filename):
+            if base_path is None:
+                return filename
+            return f"{base_path}/{filename}"
 
         if yaml_config[FORMAT_VERSION] not in {"2.0.0"}:
             # If we got a path to a v1 yaml file, try to auto-upgrade
@@ -186,7 +193,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem:
                 else None
             )
 
-        if len(problem0[MODEL_FILES]) > 1:
+        if len(problem0[MODEL_FILES] or []) > 1:
             # TODO https://github.com/PEtab-dev/libpetab-python/issues/6
             raise NotImplementedError(
                 "Support for multiple models is not yet implemented."
diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py
index 65700af5..1a3f3344 100644
--- a/tests/v1/test_petab.py
+++ b/tests/v1/test_petab.py
@@ -862,11 +862,16 @@ def test_problem_from_yaml_v1_multiple_files():
                 observables_df, Path(tmpdir, f"observables{i}.tsv")
             )
 
-        petab_problem = petab.Problem.from_yaml(yaml_path)
+        petab_problem1 = petab.Problem.from_yaml(yaml_path)
 
-    assert petab_problem.measurement_df.shape[0] == 2
-    assert petab_problem.observable_df.shape[0] == 2
-    assert petab_problem.condition_df.shape[0] == 2
+        # test that we can load the problem from a dict with a custom base path
+        yaml_config = petab.v1.load_yaml(yaml_path)
+        petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir)
+
+    for petab_problem in (petab_problem1, petab_problem2):
+        assert petab_problem.measurement_df.shape[0] == 2
+        assert petab_problem.observable_df.shape[0] == 2
+        assert petab_problem.condition_df.shape[0] == 2
 
 
 def test_get_required_parameters_for_parameter_table(petab_problem):
diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py
index 334dc86a..418f7818 100644
--- a/tests/v2/test_problem.py
+++ b/tests/v2/test_problem.py
@@ -1,4 +1,19 @@
+import tempfile
+from pathlib import Path
+
+import pandas as pd
+
+import petab.v2 as petab
 from petab.v2 import Problem
+from petab.v2.C import (
+    CONDITION_ID,
+    MEASUREMENT,
+    NOISE_FORMULA,
+    OBSERVABLE_FORMULA,
+    OBSERVABLE_ID,
+    SIMULATION_CONDITION_ID,
+    TIME,
+)
 
 
 def test_load_remote():
@@ -25,3 +40,68 @@ def test_auto_upgrade():
     problem = Problem.from_yaml(yaml_url)
     # TODO check something specifically different in a v2 problem
     assert isinstance(problem, Problem)
+
+
+def test_problem_from_yaml_multiple_files():
+    """Test loading PEtab version 2 yaml with multiple condition / measurement
+    / observable files
+    """
+    yaml_config = """
+    format_version: 2.0.0
+    parameter_file:
+    problems:
+    - condition_files: [conditions1.tsv, conditions2.tsv]
+      measurement_files: [measurements1.tsv, measurements2.tsv]
+      observable_files: [observables1.tsv, observables2.tsv]
+      model_files:
+    """
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yaml_path = Path(tmpdir, "problem.yaml")
+        with open(yaml_path, "w") as f:
+            f.write(yaml_config)
+
+        for i in (1, 2):
+            condition_df = pd.DataFrame(
+                {
+                    CONDITION_ID: [f"condition{i}"],
+                }
+            )
+            condition_df.set_index([CONDITION_ID], inplace=True)
+            petab.write_condition_df(
+                condition_df, Path(tmpdir, f"conditions{i}.tsv")
+            )
+
+            measurement_df = pd.DataFrame(
+                {
+                    SIMULATION_CONDITION_ID: [f"condition{i}"],
+                    OBSERVABLE_ID: [f"observable{i}"],
+                    TIME: [i],
+                    MEASUREMENT: [1],
+                }
+            )
+            petab.write_measurement_df(
+                measurement_df, Path(tmpdir, f"measurements{i}.tsv")
+            )
+
+            observables_df = pd.DataFrame(
+                {
+                    OBSERVABLE_ID: [f"observable{i}"],
+                    OBSERVABLE_FORMULA: [1],
+                    NOISE_FORMULA: [1],
+                }
+            )
+            petab.write_observable_df(
+                observables_df, Path(tmpdir, f"observables{i}.tsv")
+            )
+
+        petab_problem1 = petab.Problem.from_yaml(yaml_path)
+
+        # test that we can load the problem from a dict with a custom base path
+        yaml_config = petab.load_yaml(yaml_path)
+        petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir)
+
+    for petab_problem in (petab_problem1, petab_problem2):
+        assert petab_problem.measurement_df.shape[0] == 2
+        assert petab_problem.observable_df.shape[0] == 2
+        assert petab_problem.condition_df.shape[0] == 2