From bbcd64d6ab289a67ebd984ecf161e96c398dde3a Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Tue, 17 Oct 2023 10:17:48 -0400 Subject: [PATCH] Add explanation about imports and global variables --- docs/teaching_scripts/README.md | 3 ++ docs/teaching_scripts/test_apps.py | 60 ++++++++++++++++++++++++++++++ docs/userguide/apps.rst | 58 +++++++++++++++++++++++++++++ docs/userguide/workflow.rst | 2 +- 4 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 docs/teaching_scripts/README.md create mode 100644 docs/teaching_scripts/test_apps.py diff --git a/docs/teaching_scripts/README.md b/docs/teaching_scripts/README.md new file mode 100644 index 0000000000..67f80899af --- /dev/null +++ b/docs/teaching_scripts/README.md @@ -0,0 +1,3 @@ +# Example Scripts + +Scripts which illustrate example from the documentation that do not run well as part of the pytest diff --git a/docs/teaching_scripts/test_apps.py b/docs/teaching_scripts/test_apps.py new file mode 100644 index 0000000000..68fb6add62 --- /dev/null +++ b/docs/teaching_scripts/test_apps.py @@ -0,0 +1,60 @@ +"""Tests documentation related to building apps. Must reside outside the Parsl library to be effective""" +from typing import List, Union + +import numpy as np + +from parsl import python_app, HighThroughputExecutor, Config +import parsl + +parsl.load(Config(executors=[HighThroughputExecutor(label='htex_spawn', max_workers=1, start_method='spawn', address='127.0.0.1')])) + + +# Part 1: Explain imports +# BAD: Assumes library has been imported +@python_app(executors=['htex_spawn']) +def bad_imports(x: Union[List[float], np.ndarray], m: float, b: float): + return np.multiply(x, m) + b + + +# GOOD: Imports libraries itself +@python_app(executors=['htex_spawn']) +def good_imports(x: Union[List[float], 'np.ndarray'], m: float, b: float): + import numpy as np + return np.multiply(x, m) + b + + +future = bad_imports([1.], 1, 0) + +try: + future.result() + raise ValueError() +except NameError as e: + print('Failed, as expected. Error:', e) + +future = good_imports([1.], 1, 0) +print(f'Passed, as expected: {future.result()}') + +# Part 2: Test other types of globals +# BAD: Uses global variables +global_var = {'a': 0} + + +@python_app +def bad_global(string: str, character: str = 'a'): + global_var[character] += string.count(character) # `global_var` will not be accessible + + +# GOOD +@python_app +def good_global(string: str, character: str = 'a'): + return {character: string.count(character)} + + +try: + bad_global('parsl').result() +except NameError as e: + print(f'Failed, as expected: {e}') + +for ch, co in good_global('parsl', 'a').result().items(): + global_var[ch] += co + diff --git a/docs/userguide/apps.rst b/docs/userguide/apps.rst index 66a415ec0b..56b8f75463 100644 --- a/docs/userguide/apps.rst +++ b/docs/userguide/apps.rst @@ -83,6 +83,64 @@ as in following code snippet, which copies the contents of one file (``in.txt``) echo(inputs=[in.txt], outputs=[out.txt]) + +Imports and Global Variables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parsl apps have access to less information from the script that defined then +than functions run via Python's native multiprocessing libraries. +The reasons is that functions are executed on workers that +do not have access to the same global variables as the script that defined them. +Practically, this means + +1. *Functions may need to re-import libraries.* + Place the import statements that define functions or classes inside the function. + Type annotations should also not use libraries that must be defined later. + + .. code-block:: python + + import numpy as np + + # BAD: Assumes library has been imported + @python_app + def linear_model(x: list[float] | np.ndarray, m: float, b: float): + return np.multiply(x, m) + b + + # GOOD: Function imports libraries on remote worker + @python_app + def linear_model(x: list[float] | 'np.ndarray', m: float, b: float): + import numpy as np + return np.multiply(x, m) + b + + +2. *Global variables are inaccessible*. + Functions should not use variables defined outside the function. + Likewise, do not assume that variables created inside the function are visible elsewhere. + + +.. code-block:: python + + # BAD: Uses global variables + global_var = {'a': 0} + + @python_app + def counter_func(string: str, character: str = 'a'): + global_var[character] += string.count(character) # `global_var` will not be accessible + + + # GOOD + @python_app + def counter_func(string: str, character: str = 'a'): + return {'A'} string.count(character) # `global_var` will not be accessible + + for ch, co in counter_func('parsl', 'a').result() + global_var[ch] += co + +.. note:: + + These rules do not apply to functions which are imported from libraries. + Library functions are sent to workers differently than functions defined in a script. + Special Keyword Arguments ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/userguide/workflow.rst b/docs/userguide/workflow.rst index cb047ed762..4ab41e8fbf 100644 --- a/docs/userguide/workflow.rst +++ b/docs/userguide/workflow.rst @@ -59,7 +59,7 @@ Sequential workflows can be created by passing an AppFuture from one task to ano def generate(limit): from random import randint """Generate a random integer and return it""" - return randint(1,limit) + return randint(1, limit) # Write a message to a file @bash_app