datalad · bpinsard · Mar 14, 2023 · Mar 15, 2023 · Mar 21, 2023 · Mar 21, 2023
diff --git a/changelog.d/pr-205.md b/changelog.d/pr-205.md
@@ -0,0 +1,3 @@
+### 🚀 Enhancements and New Features
+
+- Add --assume-ready for image and extra-inputs.  [PR #205](https://github.com/datalad/datalad-container/pull/205) (by [@bpinsard](https://github.com/bpinsard))
diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py
@@ -26,6 +26,7 @@
 from datalad.utils import ensure_iter
 
 from datalad_container.find_container import find_container_
+from datalad.support.constraints import EnsureChoice
 
 lgr = logging.getLogger("datalad.containers.containers_run")
 
@@ -39,8 +40,17 @@
     container_name=Parameter(
         args=('-n', '--container-name',),
         metavar="NAME",
-        doc="""Specify the name of or a path to a known container to use 
+        doc="""Specify the name of or a path to a known container to use
         for execution, in case multiple containers are configured."""),
+    assume_ready=Parameter(
+        args=("--assume-ready",),
+        nargs="*",
+        constraints=EnsureChoice(None, "image", "inputs", "outputs", "extra-inputs"),
+        doc="""Assume that inputs do not need to be retrieved and/or outputs do not
+        need to unlocked or removed, or containers/extra-inputs to be retrieved,
+        before running the command. This option allows
+        you to avoid the expense of these preparation steps if you know that they
+        are unnecessary."""),
 )
 
 
@@ -79,7 +89,7 @@
     @eval_results
     def __call__(cmd, container_name=None, dataset=None,
                  inputs=None, outputs=None, message=None, expand=None,
-                 explicit=False, sidecar=None):
+                 explicit=False, sidecar=None, assume_ready=None):
         from unittest.mock import \
             patch  # delayed, since takes long (~600ms for yoh)
         pwd, _ = get_command_pwds(dataset)
@@ -154,39 +164,59 @@
             # just prepend and pray
             cmd = container['path'] + ' ' + cmd
 
+        assume_ready = assume_ready or []
         extra_inputs = []
-        for extra_input in ensure_iter(container.get("extra-input",[]), set):
-            try:
-                xi_kwargs = dict(
-                    img_dspath=image_dspath,
-                    img_dirpath=op.dirname(image_path) or ".",
-                )
-                extra_inputs.append(extra_input.format(**xi_kwargs))
-            except KeyError as exc:
-                yield get_status_dict(
-                    'run',
-                    ds=ds,
-                    status='error',
-                    message=(
-                        'Unrecognized extra_input placeholder: %s. '
-                        'See containers-add for information on known ones: %s',
-                        exc,
-                        ", ".join(xi_kwargs)))
-                return
+        if "extra-inputs" not in assume_ready:
+            for extra_input in ensure_iter(container.get("extra-input",[]), set):
+                try:
+                    xi_kwargs = dict(
+                        img_dspath=image_dspath,
+                        img_dirpath=op.dirname(image_path) or ".",
+                    )
+                    extra_inputs.append(extra_input.format(**xi_kwargs))
+                except KeyError as exc:
+                    yield get_status_dict(
+                        'run',
+                        ds=ds,
+                        status='error',
+                        message=(
+                            'Unrecognized extra_input placeholder: %s. '
+                            'See containers-add for information on known ones: %s',
+                            exc,
+                            ", ".join(xi_kwargs)))
+                    return
+        else:
+            # filter the whole list as .remove only removes first instance
+            assume_ready = [ar for ar in assume_ready if ar != 'extra-inputs']
+
+        if "image" not in assume_ready:
+            extra_inputs.append(image_path)
+        else:
+            assume_ready = [ar for ar in assume_ready if ar != 'image']
 
         lgr.debug("extra_inputs = %r", extra_inputs)
 
+        if not assume_ready:
+            assume_ready = None
+        elif len(assume_ready) == 1:
+            assume_ready = assume_ready[0]
+        elif  "inputs" in assume_ready and "outputs" in assume_ready:
+            assume_ready = "both"
+        else:
+            raise ValueError(f"Ended up with assume_ready={assume_ready!r} which must have not happened")
+
         with patch.dict('os.environ',
                         {CONTAINER_NAME_ENVVAR: container['name']}):
             # fire!
             for r in run_command(
                     cmd=cmd,
                     dataset=dataset or (ds if ds.path == pwd else None),
                     inputs=inputs,
-                    extra_inputs=[image_path] + extra_inputs,
+                    extra_inputs=extra_inputs,
                     outputs=outputs,
                     message=message,
                     expand=expand,
                     explicit=explicit,
-                    sidecar=sidecar):
+                    sidecar=sidecar,
+                    assume_ready=assume_ready):
                 yield r
diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py
@@ -276,6 +276,79 @@ def test_extra_inputs(path=None):
     ) == set(runinfo.get("extra_inputs", set()))
 
 
+@with_tree(
+    tree={
+        "container.img": "image file",
+        "input.txt": "input data",
+        "overlay1.img": "overlay1",
+    }
+)
+def test_assume_ready(path=None):
+    ds = Dataset(path).create(force=True, **common_kwargs)
+    ds.containers_add(
+        "mycontainer",
+        image="container.img",
+        call_fmt="echo image={img} cmd={cmd} img_dspath={img_dspath} img_dirpath={img_dirpath} > out.log",
+        extra_input=["overlay1.img"],
+        **common_kwargs
+    )
+    ds.save(**common_kwargs)
+    # assume image is ready
+    ds.containers_run(
+        "XXX",
+        container_name="mycontainer",
+        assume_ready=['image'],
+        **common_kwargs)
+    ok_file_has_content(
+        os.path.join(ds.repo.path, "out.log"),
+        "image=container.img",
+        re_=True,
+    )
+    commit_msg = ds.repo.call_git(["show", "--format=%B"])
+    cmd, runinfo = get_run_info(ds, commit_msg)
+    assert "container.img"  not in runinfo.get("extra_inputs", [])
+
+    # fails if erroneous assume_ready value
+    with pytest.raises(ValueError):
+        ds.containers_run(
+            "XXX",
+            inputs=['input.txt'],
+            container_name="mycontainer",
+            assume_ready=['inputsssstypo', 'outputs'],
+            **common_kwargs)
+
+    # fail when output is assume ready but is not unlocked
+    with pytest.raises(IncompleteResultsError):
+        ds.containers_run(
+            "XXX",
+            inputs=['input.txt'],
+            outputs=['out.log'],
+            container_name="mycontainer",
+            assume_ready=['inputs', 'outputs'],
+            **common_kwargs)
+
+    # assume inputs as ready, pass to regular `run`
+    ds.containers_run(
+        "YYY",
+        inputs=['input.txt'],
+        outputs=['out.log'],
+        container_name="mycontainer",
+        assume_ready=['inputs'],
+        **common_kwargs)
+    commit_msg = ds.repo.call_git(["show", "--format=%B"])
+    cmd, runinfo = get_run_info(ds, commit_msg)
+
+    ds.containers_run(
+        "ZZZ",
+        container_name="mycontainer",
+        outputs=['out.log'],
+        assume_ready=['extra-inputs'],
+        **common_kwargs)
+    commit_msg = ds.repo.call_git(["show", "--format=%B"])
+    cmd, runinfo = get_run_info(ds, commit_msg)
+    assert 'overlay1.img' not in runinfo.get("extra_inputs", [])
+
+
 @skip_if_no_network
 @with_tree(tree={"subdir": {"in": "innards"}})
 def test_run_no_explicit_dataset(path=None):
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		### 🚀 Enhancements and New Features

		- Add --assume-ready for image and extra-inputs. [PR #205](https://github.com/datalad/datalad-container/pull/205) (by [@bpinsard](https://github.com/bpinsard))