From dfd4d8592033a307bad1259bc07d69dd9a46568c Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Mon, 9 Oct 2023 11:32:14 +0000 Subject: [PATCH 01/23] rough implementation --- .../test_python_apps/test_lifted_dict.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 parsl/tests/test_python_apps/test_lifted_dict.py diff --git a/parsl/tests/test_python_apps/test_lifted_dict.py b/parsl/tests/test_python_apps/test_lifted_dict.py new file mode 100644 index 0000000000..acf3d6389f --- /dev/null +++ b/parsl/tests/test_python_apps/test_lifted_dict.py @@ -0,0 +1,23 @@ +from parsl import python_app + + +@python_app +def returns_a_dict(): + return {"a": "X", "b": "Y"} + + +def test_returns_a_dict(): + + # precondition that returns_a_dict behaves + # correctly + assert returns_a_dict().result()["a"] == "X" + + # check that the deferred __getitem__ functionality works, + # allowing [] to be used on an AppFuture + assert returns_a_dict()["a"].result() == "X" + + # other things to test: when the result is a sequence, so that + # [] is a position + + # when the result is not indexable, a sensible error should + # appear in the appropriate future From 2c9d8f9a28e518289c3872efc1a302cf95832e25 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Mon, 9 Oct 2023 11:33:24 +0000 Subject: [PATCH 02/23] add functionality --- parsl/dataflow/futures.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 610932af4c..d07ff97b90 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -5,11 +5,12 @@ 2. AppFutures which represent the futures on App/Leaf tasks. """ +from __future__ import annotations from concurrent.futures import Future import logging import threading -from typing import Optional, Sequence +from typing import Any, Optional, Sequence from parsl.app.futures import DataFuture from parsl.dataflow.taskrecord import TaskRecord @@ -118,3 +119,17 @@ def task_status(self) -> str: @property def outputs(self) -> Sequence[DataFuture]: return self._outputs + + def __getitem__(self, key: Any) -> AppFuture: + + # hack around circular imports for python_app + from parsl.app.app import python_app + deferred_getitem_app = python_app(deferred_getitem) + + return deferred_getitem_app(self, key) + +# this needs python_app to be importable, but three's an import loop +# if so... so hack around it for prototyping. +# @python_app +def deferred_getitem(o:Any, k: Any) -> Any: + return o[k] From 6efedb521cbe12bf1e87aae55af140312b503477 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Mon, 9 Oct 2023 11:49:12 +0000 Subject: [PATCH 03/23] Fix whitespace --- parsl/dataflow/futures.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index d07ff97b90..017b1994f3 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -121,15 +121,15 @@ def outputs(self) -> Sequence[DataFuture]: return self._outputs def __getitem__(self, key: Any) -> AppFuture: - # hack around circular imports for python_app from parsl.app.app import python_app - deferred_getitem_app = python_app(deferred_getitem) + deferred_getitem_app = python_app(deferred_getitem) return deferred_getitem_app(self, key) + # this needs python_app to be importable, but three's an import loop # if so... so hack around it for prototyping. # @python_app -def deferred_getitem(o:Any, k: Any) -> Any: +def deferred_getitem(o: Any, k: Any) -> Any: return o[k] From 9036a7e5a72ead1806c5467952424f97f06d69a6 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Mon, 9 Oct 2023 11:52:58 +0000 Subject: [PATCH 04/23] Execute on _parsl_internal, and add a TODO about choosing which DFK to execute on --- parsl/dataflow/futures.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 017b1994f3..912d909af1 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -123,7 +123,12 @@ def outputs(self) -> Sequence[DataFuture]: def __getitem__(self, key: Any) -> AppFuture: # hack around circular imports for python_app from parsl.app.app import python_app - deferred_getitem_app = python_app(deferred_getitem) + + # TODO: this should be run on the same DFK as is executing the + # task that is associated with this future. That value isn't + # easily available here (although probably the right thing to + # do is add it to self.task_def) + deferred_getitem_app = python_app(deferred_getitem, executors=['_parsl_internal']) return deferred_getitem_app(self, key) From d68664ed6e5f31bf87ebfa01251434b4abe1a617 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Mon, 9 Oct 2023 11:55:36 +0000 Subject: [PATCH 05/23] Add some discussion on DFK choice --- parsl/dataflow/futures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 912d909af1..d12a3a7db7 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -124,6 +124,13 @@ def __getitem__(self, key: Any) -> AppFuture: # hack around circular imports for python_app from parsl.app.app import python_app + # TODO: it would be nice to avoid redecorating this each time, + # which was done to avoid import loops here -- but the DFK + # is not defined at import time, and so this decoration needs + # to happen at least once per DFK. So perhaps for implementation + # simplicity, this redecoration should always happen, as happens + # for example with the globus data provider. + # TODO: this should be run on the same DFK as is executing the # task that is associated with this future. That value isn't # easily available here (although probably the right thing to From e6ff04bcd81aa9eeaa128c5e8da8ab68b5da98bb Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 12:38:04 +0000 Subject: [PATCH 06/23] Add some more tests for lifted getitem --- .../test_python_apps/test_lifted_dict.py | 48 +++++++++++++++---- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/parsl/tests/test_python_apps/test_lifted_dict.py b/parsl/tests/test_python_apps/test_lifted_dict.py index acf3d6389f..f6cd0a27cc 100644 --- a/parsl/tests/test_python_apps/test_lifted_dict.py +++ b/parsl/tests/test_python_apps/test_lifted_dict.py @@ -1,4 +1,5 @@ from parsl import python_app +from concurrent.futures import Future @python_app @@ -6,18 +7,49 @@ def returns_a_dict(): return {"a": "X", "b": "Y"} -def test_returns_a_dict(): +@python_app +def returns_a_list(): + return [5, 6, 7, 8, 9] + + +@python_app +def passthrough(v): + return v - # precondition that returns_a_dict behaves - # correctly - assert returns_a_dict().result()["a"] == "X" +def test_lifted_getitem_on_dict(): # check that the deferred __getitem__ functionality works, # allowing [] to be used on an AppFuture assert returns_a_dict()["a"].result() == "X" - # other things to test: when the result is a sequence, so that - # [] is a position - # when the result is not indexable, a sensible error should - # appear in the appropriate future +def test_lifted_getitem_on_dict_bad_key(): + assert isinstance(returns_a_dict()["invalid"].exception(), KeyError) + + +def test_lifted_getitem_on_list(): + assert returns_a_list()[2].result() == 7 + + +def test_lifted_getitem_ordering(): + # this should test that lifting getitem has the correct execution + # order: that it does not defer the execution of following code + + f_prereq = Future() + + f_top = passthrough(f_prereq) + + f_a = f_top['a'] + + # lifted ['a'] should not defer execution here (so it should not + # implicitly call result() on f_top). If it does, this test will + # hang at this point, waiting for f_top to get a value, which + # will not happen until f_prereq gets a value.. + # which doesn't happen until: + + f_prereq.set_result({"a": "X"}) + + # now at this point it should be safe to wait for f_a to get a result + # while passthrough and lifted getitem run... + + assert f_a.result() == "X" From 8bbc054a0b5e95d483ffd550c12c26abc73188dc Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 12:38:49 +0000 Subject: [PATCH 07/23] Rename test case as it tests more than dicts now --- .../{test_lifted_dict.py => test_lifted_getitem.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename parsl/tests/test_python_apps/{test_lifted_dict.py => test_lifted_getitem.py} (100%) diff --git a/parsl/tests/test_python_apps/test_lifted_dict.py b/parsl/tests/test_python_apps/test_lifted_getitem.py similarity index 100% rename from parsl/tests/test_python_apps/test_lifted_dict.py rename to parsl/tests/test_python_apps/test_lifted_getitem.py From b72216ef8df9120ca6a9a1cfebeb5796287fc3a6 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 13:40:38 +0000 Subject: [PATCH 08/23] Store DFK into task record, for use by follow on tasks that should be bound to the same DFK --- parsl/dataflow/dflow.py | 1 + parsl/dataflow/taskrecord.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/parsl/dataflow/dflow.py b/parsl/dataflow/dflow.py index c42d536c70..0b72d5ed88 100644 --- a/parsl/dataflow/dflow.py +++ b/parsl/dataflow/dflow.py @@ -969,6 +969,7 @@ def submit(self, task_def: TaskRecord task_def = {'depends': [], + 'dfk': self, 'executor': executor, 'func_name': func.__name__, 'memoize': cache, diff --git a/parsl/dataflow/taskrecord.py b/parsl/dataflow/taskrecord.py index a5df6f144d..34d5ef4ca5 100644 --- a/parsl/dataflow/taskrecord.py +++ b/parsl/dataflow/taskrecord.py @@ -5,18 +5,25 @@ from typing_extensions import TypedDict from concurrent.futures import Future + # only for type checking: from typing import Any, Callable, Dict, Optional, List, Sequence, TYPE_CHECKING, Union if TYPE_CHECKING: from parsl.dataflow.futures import AppFuture +import parsl.dataflow.dflow as dflow + from parsl.dataflow.states import States class TaskRecord(TypedDict, total=False): """This stores most information about a Parsl task""" + dfk: dflow.DataFlowKernel + """The DataFlowKernel which is managing this task. + """ + func_name: str status: States From 2ec311629a26df5cc1e46718d40f817771c44068 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 13:46:49 +0000 Subject: [PATCH 09/23] Bind to same DFK as parent task --- parsl/dataflow/futures.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index d12a3a7db7..ca8eb94c49 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -12,6 +12,8 @@ import threading from typing import Any, Optional, Sequence +import parsl.app.app as app + from parsl.app.futures import DataFuture from parsl.dataflow.taskrecord import TaskRecord @@ -121,27 +123,13 @@ def outputs(self) -> Sequence[DataFuture]: return self._outputs def __getitem__(self, key: Any) -> AppFuture: - # hack around circular imports for python_app - from parsl.app.app import python_app - - # TODO: it would be nice to avoid redecorating this each time, - # which was done to avoid import loops here -- but the DFK - # is not defined at import time, and so this decoration needs - # to happen at least once per DFK. So perhaps for implementation - # simplicity, this redecoration should always happen, as happens - # for example with the globus data provider. - - # TODO: this should be run on the same DFK as is executing the - # task that is associated with this future. That value isn't - # easily available here (although probably the right thing to - # do is add it to self.task_def) - deferred_getitem_app = python_app(deferred_getitem, executors=['_parsl_internal']) + # This is decorated on each invocation because the getitem task + # should be bound to the same DFK as the task associated with this + # Future. + deferred_getitem_app = app.python_app(deferred_getitem, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) return deferred_getitem_app(self, key) -# this needs python_app to be importable, but three's an import loop -# if so... so hack around it for prototyping. -# @python_app def deferred_getitem(o: Any, k: Any) -> Any: return o[k] From e52c3f385321157ad6129614cf3b32713a5b8e37 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 13:54:00 +0000 Subject: [PATCH 10/23] Add brief documentation on lifted [] --- docs/userguide/index.rst | 1 + docs/userguide/lifted_ops.rst | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 docs/userguide/lifted_ops.rst diff --git a/docs/userguide/index.rst b/docs/userguide/index.rst index 21de9eb704..3d667816a5 100644 --- a/docs/userguide/index.rst +++ b/docs/userguide/index.rst @@ -16,6 +16,7 @@ User guide monitoring workflow modularizing + lifted_ops joins usage_tracking plugins diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst new file mode 100644 index 0000000000..a15748b52b --- /dev/null +++ b/docs/userguide/lifted_ops.rst @@ -0,0 +1,24 @@ +.. _label-liftedops: + +Lifted [] operator +================== + +When an app returns a complex structure such as a ``dict`` or a ``list``, +it is sometimes useful to pass an element of that structure to a subsequent +task, without waiting for that subsequent task to complete. + +To help with this, Parsl allows the ``[]`` operator to be used on and +`AppFuture`. This operator will return return another `AppFuture` that will +complete after the initial future, with the result of `[]` on the value +of the initial future. + +The end result is that this assertion will hold: + +.. code-block:: python + + fut = my_app() + assert fut['x'].result() == fut.result()[x] + +but more concurrency will be available, as execution of the main workflow +code will not stop to wait for ``result()`` to complete on the initial +future. From 3af9963039ece90a4cc9e60afa58c80f64f180b4 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Thu, 12 Oct 2023 13:56:38 +0000 Subject: [PATCH 11/23] Add more notes to doc --- docs/userguide/lifted_ops.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index a15748b52b..8e8c022538 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -22,3 +22,13 @@ The end result is that this assertion will hold: but more concurrency will be available, as execution of the main workflow code will not stop to wait for ``result()`` to complete on the initial future. + +`AppFuture` does not implement other methods commonly associated with +dicts and lists, such as ``len``, because those methods should return a +specific type of result immediately, and that is not possible when the +results are not available until the future. + +If a key does not exist in the returned result, then the exception will +appear in the Future returned by ``[]``, rather than at the point that +the ``[]`` operator is applied. This is because the valid values that can +be used are not known until the underlying result is available. From 90c0c177af8695ab7f367df508f8242e8026a5ae Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Thu, 12 Oct 2023 18:13:49 -0700 Subject: [PATCH 12/23] Add `__getattr__` support to PR 2904 (#2906) * Add support for `__getattr__` * Formatting patch * E302 lint fix * Add a test for a set * Rename test * Fix code comment --- parsl/dataflow/futures.py | 26 +++++++ parsl/tests/test_python_apps/test_lifted.py | 75 +++++++++++++++++++ .../test_python_apps/test_lifted_dict.py | 23 ------ 3 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 parsl/tests/test_python_apps/test_lifted.py delete mode 100644 parsl/tests/test_python_apps/test_lifted_dict.py diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index d12a3a7db7..9dbbf6237b 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -139,9 +139,35 @@ def __getitem__(self, key: Any) -> AppFuture: return deferred_getitem_app(self, key) + def __getattr__(self, name: str) -> AppFuture: + # hack around circular imports for python_app + from parsl.app.app import python_app + + # TODO: it would be nice to avoid redecorating this each time, + # which was done to avoid import loops here -- but the DFK + # is not defined at import time, and so this decoration needs + # to happen at least once per DFK. So perhaps for implementation + # simplicity, this redecoration should always happen, as happens + # for example with the globus data provider. + + # TODO: this should be run on the same DFK as is executing the + # task that is associated with this future. That value isn't + # easily available here (although probably the right thing to + # do is add it to self.task_def) + deferred_getattr_app = python_app(deferred_getattr, executors=['_parsl_internal']) + + return deferred_getattr_app(self, name) + # this needs python_app to be importable, but three's an import loop # if so... so hack around it for prototyping. # @python_app def deferred_getitem(o: Any, k: Any) -> Any: return o[k] + + +# this needs python_app to be importable, but three's an import loop +# if so... so hack around it for prototyping. +# @python_app +def deferred_getattr(o: Any, name: str) -> Any: + return getattr(o, name) diff --git a/parsl/tests/test_python_apps/test_lifted.py b/parsl/tests/test_python_apps/test_lifted.py new file mode 100644 index 0000000000..d83c48815c --- /dev/null +++ b/parsl/tests/test_python_apps/test_lifted.py @@ -0,0 +1,75 @@ +from parsl import python_app + + +@python_app +def returns_a_dict(): + return {"a": "X", "b": "Y"} + + +@python_app +def returns_a_list(): + return ["X", "Y"] + + +@python_app +def returns_a_tuple(): + return ("X", "Y") + + +@python_app +def returns_a_class(): + from dataclasses import dataclass + + @dataclass + class MyClass: + a: str = "X" + b: str = "Y" + + return MyClass + + +def test_returns_a_dict(): + + # precondition that returns_a_dict behaves + # correctly + assert returns_a_dict().result()["a"] == "X" + + # check that the deferred __getitem__ functionality works, + # allowing [] to be used on an AppFuture + assert returns_a_dict()["a"].result() == "X" + + +def test_returns_a_list(): + + # precondition that returns_a_list behaves + # correctly + assert returns_a_list().result()[0] == "X" + + # check that the deferred __getitem__ functionality works, + # allowing [] to be used on an AppFuture + assert returns_a_list()[0].result() == "X" + + +def test_returns_a_tuple(): + + # precondition that returns_a_tuple behaves + # correctly + assert returns_a_tuple().result()[0] == "X" + + # check that the deferred __getitem__ functionality works, + # allowing [] to be used on an AppFuture + assert returns_a_tuple()[0].result() == "X" + + +def test_returns_a_class(): + + # precondition that returns_a_class behaves + # correctly + assert returns_a_class().result().a == "X" + + # check that the deferred __getattr__ functionality works, + # allowing [] to be used on an AppFuture + assert returns_a_class().a.result() == "X" + + # when the result is not indexable, a sensible error should + # appear in the appropriate future diff --git a/parsl/tests/test_python_apps/test_lifted_dict.py b/parsl/tests/test_python_apps/test_lifted_dict.py deleted file mode 100644 index acf3d6389f..0000000000 --- a/parsl/tests/test_python_apps/test_lifted_dict.py +++ /dev/null @@ -1,23 +0,0 @@ -from parsl import python_app - - -@python_app -def returns_a_dict(): - return {"a": "X", "b": "Y"} - - -def test_returns_a_dict(): - - # precondition that returns_a_dict behaves - # correctly - assert returns_a_dict().result()["a"] == "X" - - # check that the deferred __getitem__ functionality works, - # allowing [] to be used on an AppFuture - assert returns_a_dict()["a"].result() == "X" - - # other things to test: when the result is a sequence, so that - # [] is a position - - # when the result is not indexable, a sensible error should - # appear in the appropriate future From fcde3ea6bb46a7f531a1635a4d156e1df7c0d375 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:11:54 +0000 Subject: [PATCH 13/23] Skip test that conflicts with wq/taskvine return handling --- parsl/tests/test_python_apps/test_lifted.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parsl/tests/test_python_apps/test_lifted.py b/parsl/tests/test_python_apps/test_lifted.py index d83c48815c..19ce3fde04 100644 --- a/parsl/tests/test_python_apps/test_lifted.py +++ b/parsl/tests/test_python_apps/test_lifted.py @@ -1,3 +1,5 @@ +import pytest + from parsl import python_app @@ -61,6 +63,7 @@ def test_returns_a_tuple(): assert returns_a_tuple()[0].result() == "X" +@pytest.mark.skip("returning classes is not supported in WorkQueue or Task Vine - see issue #2908") def test_returns_a_class(): # precondition that returns_a_class behaves From fe4d3512d5e3ca0e7dc83d8a8ddd1e35c196090b Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:17:30 +0000 Subject: [PATCH 14/23] More docs: comments from Andrew Rosen and document his . implementation --- docs/userguide/lifted_ops.rst | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index 8e8c022538..f843694883 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -1,5 +1,9 @@ .. _label-liftedops: +Parsl allows some operators (``[]`` and ``.``) to be used on an AppFuture in +a way that makes sense with those operators on the eventually returned +result. + Lifted [] operator ================== @@ -7,8 +11,8 @@ When an app returns a complex structure such as a ``dict`` or a ``list``, it is sometimes useful to pass an element of that structure to a subsequent task, without waiting for that subsequent task to complete. -To help with this, Parsl allows the ``[]`` operator to be used on and -`AppFuture`. This operator will return return another `AppFuture` that will +To help with this, Parsl allows the ``[]`` operator to be used on an +`AppFuture`. This operator will return another `AppFuture` that will complete after the initial future, with the result of `[]` on the value of the initial future. @@ -32,3 +36,13 @@ If a key does not exist in the returned result, then the exception will appear in the Future returned by ``[]``, rather than at the point that the ``[]`` operator is applied. This is because the valid values that can be used are not known until the underlying result is available. + +Lifted . operator +================= + +The ``.`` operator works similarly to ``[]`` described above: + +.. code-block:: python + + fut = my_app + assert fut.x.result() == fut.result().x From ab9fcfc32433eedafacf5d0a0620fb8dcbda07c6 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:18:59 +0000 Subject: [PATCH 15/23] Bring my recent [] changes to Andrew's . implementation --- parsl/dataflow/futures.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 60a598ae4f..bc4beedf9f 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -131,21 +131,9 @@ def __getitem__(self, key: Any) -> AppFuture: return deferred_getitem_app(self, key) def __getattr__(self, name: str) -> AppFuture: - # hack around circular imports for python_app from parsl.app.app import python_app - # TODO: it would be nice to avoid redecorating this each time, - # which was done to avoid import loops here -- but the DFK - # is not defined at import time, and so this decoration needs - # to happen at least once per DFK. So perhaps for implementation - # simplicity, this redecoration should always happen, as happens - # for example with the globus data provider. - - # TODO: this should be run on the same DFK as is executing the - # task that is associated with this future. That value isn't - # easily available here (although probably the right thing to - # do is add it to self.task_def) - deferred_getattr_app = python_app(deferred_getattr, executors=['_parsl_internal']) + deferred_getattr_app = python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) return deferred_getattr_app(self, name) @@ -154,8 +142,5 @@ def deferred_getitem(o: Any, k: Any) -> Any: return o[k] -# this needs python_app to be importable, but three's an import loop -# if so... so hack around it for prototyping. -# @python_app def deferred_getattr(o: Any, name: str) -> Any: return getattr(o, name) From de56ce97ed5c4c67e03b5d3e376c146f69432d4d Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:23:34 +0000 Subject: [PATCH 16/23] Merge tests into one file --- parsl/tests/test_python_apps/test_lifted.py | 34 ++++++++++++ .../test_python_apps/test_lifted_getitem.py | 55 ------------------- 2 files changed, 34 insertions(+), 55 deletions(-) delete mode 100644 parsl/tests/test_python_apps/test_lifted_getitem.py diff --git a/parsl/tests/test_python_apps/test_lifted.py b/parsl/tests/test_python_apps/test_lifted.py index 19ce3fde04..7f6dea481a 100644 --- a/parsl/tests/test_python_apps/test_lifted.py +++ b/parsl/tests/test_python_apps/test_lifted.py @@ -1,5 +1,6 @@ import pytest +from concurrent.futures import Future from parsl import python_app @@ -63,6 +64,10 @@ def test_returns_a_tuple(): assert returns_a_tuple()[0].result() == "X" +def test_lifted_getitem_on_dict_bad_key(): + assert isinstance(returns_a_dict()["invalid"].exception(), KeyError) + + @pytest.mark.skip("returning classes is not supported in WorkQueue or Task Vine - see issue #2908") def test_returns_a_class(): @@ -76,3 +81,32 @@ def test_returns_a_class(): # when the result is not indexable, a sensible error should # appear in the appropriate future + + +@python_app +def passthrough(v): + return v + + +def test_lifted_getitem_ordering(): + # this should test that lifting getitem has the correct execution + # order: that it does not defer the execution of following code + + f_prereq = Future() + + f_top = passthrough(f_prereq) + + f_a = f_top['a'] + + # lifted ['a'] should not defer execution here (so it should not + # implicitly call result() on f_top). If it does, this test will + # hang at this point, waiting for f_top to get a value, which + # will not happen until f_prereq gets a value.. + # which doesn't happen until: + + f_prereq.set_result({"a": "X"}) + + # now at this point it should be safe to wait for f_a to get a result + # while passthrough and lifted getitem run... + + assert f_a.result() == "X" diff --git a/parsl/tests/test_python_apps/test_lifted_getitem.py b/parsl/tests/test_python_apps/test_lifted_getitem.py deleted file mode 100644 index f6cd0a27cc..0000000000 --- a/parsl/tests/test_python_apps/test_lifted_getitem.py +++ /dev/null @@ -1,55 +0,0 @@ -from parsl import python_app -from concurrent.futures import Future - - -@python_app -def returns_a_dict(): - return {"a": "X", "b": "Y"} - - -@python_app -def returns_a_list(): - return [5, 6, 7, 8, 9] - - -@python_app -def passthrough(v): - return v - - -def test_lifted_getitem_on_dict(): - # check that the deferred __getitem__ functionality works, - # allowing [] to be used on an AppFuture - assert returns_a_dict()["a"].result() == "X" - - -def test_lifted_getitem_on_dict_bad_key(): - assert isinstance(returns_a_dict()["invalid"].exception(), KeyError) - - -def test_lifted_getitem_on_list(): - assert returns_a_list()[2].result() == 7 - - -def test_lifted_getitem_ordering(): - # this should test that lifting getitem has the correct execution - # order: that it does not defer the execution of following code - - f_prereq = Future() - - f_top = passthrough(f_prereq) - - f_a = f_top['a'] - - # lifted ['a'] should not defer execution here (so it should not - # implicitly call result() on f_top). If it does, this test will - # hang at this point, waiting for f_top to get a value, which - # will not happen until f_prereq gets a value.. - # which doesn't happen until: - - f_prereq.set_result({"a": "X"}) - - # now at this point it should be safe to wait for f_a to get a result - # while passthrough and lifted getitem run... - - assert f_a.result() == "X" From 892759a19e8bb468e38c4d6bb0717ef619e4fcaf Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:27:28 +0000 Subject: [PATCH 17/23] Add class instance .-operator test, which is less extreme than broken class definition returning test --- parsl/tests/test_python_apps/test_lifted.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/parsl/tests/test_python_apps/test_lifted.py b/parsl/tests/test_python_apps/test_lifted.py index 7f6dea481a..72dfcc14e4 100644 --- a/parsl/tests/test_python_apps/test_lifted.py +++ b/parsl/tests/test_python_apps/test_lifted.py @@ -31,6 +31,17 @@ class MyClass: return MyClass +class MyOuterClass(): + def __init__(self): + self.q = "A" + self.r = "B" + + +@python_app +def returns_a_class_instance(): + return MyOuterClass() + + def test_returns_a_dict(): # precondition that returns_a_dict behaves @@ -68,6 +79,14 @@ def test_lifted_getitem_on_dict_bad_key(): assert isinstance(returns_a_dict()["invalid"].exception(), KeyError) +def test_returns_a_class_instance(): + # precondition + assert returns_a_class_instance().result().q == "A" + + # test of commuting . and result() + assert returns_a_class_instance().q.result() == "A" + + @pytest.mark.skip("returning classes is not supported in WorkQueue or Task Vine - see issue #2908") def test_returns_a_class(): From f57d19737acb6a18a8ef33b24ef509587dbf7f7d Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:41:39 +0000 Subject: [PATCH 18/23] Avoid lifting attribute references on _underscored attributes --- docs/userguide/lifted_ops.rst | 5 +++++ parsl/dataflow/futures.py | 6 ++++++ parsl/tests/test_python_apps/test_lifted.py | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index f843694883..d0fffacdcb 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -46,3 +46,8 @@ The ``.`` operator works similarly to ``[]`` described above: fut = my_app assert fut.x.result() == fut.result().x + +Attributes beginning with ``_`` are not lifted as thus usually indicates an +attribute that is used for internal purposes, and to try to avoid mixing +protocols (such as iteration in for loops) defined on AppFutures vs protocols +defined on the underlying result object. diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index bc4beedf9f..429fb1ca0e 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -133,6 +133,12 @@ def __getitem__(self, key: Any) -> AppFuture: def __getattr__(self, name: str) -> AppFuture: from parsl.app.app import python_app + # this will avoid lifting behaviour on private methods and attributes, + # including __double_underscore__ methods which implement other + # Python syntax (such as iterators in for loops) + if name.startswith("_"): + raise AttributeError() + deferred_getattr_app = python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) return deferred_getattr_app(self, name) diff --git a/parsl/tests/test_python_apps/test_lifted.py b/parsl/tests/test_python_apps/test_lifted.py index 72dfcc14e4..644792205a 100644 --- a/parsl/tests/test_python_apps/test_lifted.py +++ b/parsl/tests/test_python_apps/test_lifted.py @@ -87,6 +87,12 @@ def test_returns_a_class_instance(): assert returns_a_class_instance().q.result() == "A" +def test_returns_a_class_instance_no_underscores(): + # test that _underscore attribute references are not lifted + with pytest.raises(AttributeError): + returns_a_class_instance()._nosuchattribute.result() + + @pytest.mark.skip("returning classes is not supported in WorkQueue or Task Vine - see issue #2908") def test_returns_a_class(): From 47f0b3f31464a4a7421ce12c23655b05b702e5fc Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 09:53:06 +0000 Subject: [PATCH 19/23] Fix up headings --- docs/userguide/lifted_ops.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index d0fffacdcb..69404821dd 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -1,11 +1,14 @@ .. _label-liftedops: +Lifted operators +================ + Parsl allows some operators (``[]`` and ``.``) to be used on an AppFuture in a way that makes sense with those operators on the eventually returned result. Lifted [] operator -================== +------------------ When an app returns a complex structure such as a ``dict`` or a ``list``, it is sometimes useful to pass an element of that structure to a subsequent @@ -38,7 +41,7 @@ the ``[]`` operator is applied. This is because the valid values that can be used are not known until the underlying result is available. Lifted . operator -================= +----------------- The ``.`` operator works similarly to ``[]`` described above: From b526da10240948a11089c475bf4a696f5b460302 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 11:30:45 +0000 Subject: [PATCH 20/23] Fix quotation style around [] --- docs/userguide/lifted_ops.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index 69404821dd..abaeca7915 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -16,7 +16,7 @@ task, without waiting for that subsequent task to complete. To help with this, Parsl allows the ``[]`` operator to be used on an `AppFuture`. This operator will return another `AppFuture` that will -complete after the initial future, with the result of `[]` on the value +complete after the initial future, with the result of ``[]`` on the value of the initial future. The end result is that this assertion will hold: From 09865b43a7e10082b4e50f70072203bdf8e81a11 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 13:09:22 +0000 Subject: [PATCH 21/23] Remove in-function import and use package level import like in __getitem__ --- parsl/dataflow/futures.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 429fb1ca0e..0d9e22d72a 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -131,15 +131,13 @@ def __getitem__(self, key: Any) -> AppFuture: return deferred_getitem_app(self, key) def __getattr__(self, name: str) -> AppFuture: - from parsl.app.app import python_app - # this will avoid lifting behaviour on private methods and attributes, # including __double_underscore__ methods which implement other # Python syntax (such as iterators in for loops) if name.startswith("_"): raise AttributeError() - deferred_getattr_app = python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) + deferred_getattr_app = app.python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) return deferred_getattr_app(self, name) From 1f3e820a3432792d38cf1f69d789d98937ca0750 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 14 Oct 2023 16:53:15 +0000 Subject: [PATCH 22/23] Fix typo --- docs/userguide/lifted_ops.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/userguide/lifted_ops.rst b/docs/userguide/lifted_ops.rst index abaeca7915..6e258b9b62 100644 --- a/docs/userguide/lifted_ops.rst +++ b/docs/userguide/lifted_ops.rst @@ -50,7 +50,7 @@ The ``.`` operator works similarly to ``[]`` described above: fut = my_app assert fut.x.result() == fut.result().x -Attributes beginning with ``_`` are not lifted as thus usually indicates an +Attributes beginning with ``_`` are not lifted as this usually indicates an attribute that is used for internal purposes, and to try to avoid mixing protocols (such as iteration in for loops) defined on AppFutures vs protocols defined on the underlying result object. From 6188e2132cae15f85a727fe79c36fdc8c3384b85 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Sat, 21 Oct 2023 21:25:44 +0000 Subject: [PATCH 23/23] Use renamed task_def=>task_record in this PR --- parsl/dataflow/futures.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parsl/dataflow/futures.py b/parsl/dataflow/futures.py index 81ce39fc38..7beac4e599 100644 --- a/parsl/dataflow/futures.py +++ b/parsl/dataflow/futures.py @@ -126,7 +126,7 @@ def __getitem__(self, key: Any) -> AppFuture: # This is decorated on each invocation because the getitem task # should be bound to the same DFK as the task associated with this # Future. - deferred_getitem_app = app.python_app(deferred_getitem, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) + deferred_getitem_app = app.python_app(deferred_getitem, executors=['_parsl_internal'], data_flow_kernel=self.task_record['dfk']) return deferred_getitem_app(self, key) @@ -137,7 +137,7 @@ def __getattr__(self, name: str) -> AppFuture: if name.startswith("_"): raise AttributeError() - deferred_getattr_app = app.python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_def['dfk']) + deferred_getattr_app = app.python_app(deferred_getattr, executors=['_parsl_internal'], data_flow_kernel=self.task_record['dfk']) return deferred_getattr_app(self, name)