From c3afd050bdb9df93f87ad29571e948bc02d4b8f5 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Wed, 10 May 2023 05:48:15 +0000 Subject: [PATCH 1/4] Switch configuration executor List type to Sequence The list of executors in configuration is intended to be read only. Using Sequence for this makes that more explicit, as Sequences cannot be modified. This is part of a move towards using Sequence rather than List in many places (although the runtime type remains a list) because Sequence has better behaviour with subclasses of its parameter type: it is covariant, rather than invariant. This PR rearranges the validation logic a bit so that there is no longer an arbitrary property setter for the executor property - it was always a bug to replace the executor list in a config once parsl has been configured, and removing this property setter enforces that more strongly. --- parsl/config.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/parsl/config.py b/parsl/config.py index 0f56c5b9cb..b924c32bd4 100644 --- a/parsl/config.py +++ b/parsl/config.py @@ -1,7 +1,7 @@ import logging import typeguard -from typing import Callable, List, Optional, Sequence, Union +from typing import Callable, Optional, Sequence, Union from typing_extensions import Literal from parsl.utils import RepresentationMixin @@ -20,8 +20,8 @@ class Config(RepresentationMixin): Parameters ---------- - executors : list of ParslExecutor, optional - List of `ParslExecutor` instances to use for executing tasks. + executors : sequence of ParslExecutor, optional + List (or other sequence) of `ParslExecutor` instances to use for executing tasks. Default is [:class:`~parsl.executors.threads.ThreadPoolExecutor()`]. app_cache : bool, optional Enable app caching. Default is True. @@ -73,7 +73,7 @@ class Config(RepresentationMixin): @typeguard.typechecked def __init__(self, - executors: Optional[List[ParslExecutor]] = None, + executors: Optional[Sequence[ParslExecutor]] = None, app_cache: bool = True, checkpoint_files: Optional[Sequence[str]] = None, checkpoint_mode: Union[None, @@ -92,9 +92,12 @@ def __init__(self, monitoring: Optional[MonitoringHub] = None, usage_tracking: bool = False, initialize_logging: bool = True) -> None: + self._executors: Sequence[ParslExecutor] if executors is None: - executors = [ThreadPoolExecutor()] - self.executors = executors + self._executors = [ThreadPoolExecutor()] + else: + self._validate_executors(executors) + self._executors = executors self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode @@ -125,11 +128,9 @@ def __init__(self, def executors(self) -> Sequence[ParslExecutor]: return self._executors - @executors.setter - def executors(self, executors: Sequence[ParslExecutor]): + def _validate_executors(self, executors: Sequence[ParslExecutor]) -> None: labels = [e.label for e in executors] duplicates = [e for n, e in enumerate(labels) if e in labels[:n]] if len(duplicates) > 0: raise ConfigurationError('Executors must have unique labels ({})'.format( ', '.join(['label={}'.format(repr(d)) for d in duplicates]))) - self._executors = executors From db9d890cee7f2653f3e20a7aa5b1612fcd96fc89 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Wed, 8 Nov 2023 12:24:47 +0000 Subject: [PATCH 2/4] Raise configuration error if no executors are specified I think this is probably the right thing to do. Breaking change: There's a corner case where you would only use the implicit _parsl_internal executor: eg. if you were only doing join_apps and file staging, and if you were co-ordinating non-Parsl futures and not running any python_ or bash_ apps. But I think that is sufficiently obscure, this check is the right thing to do and if anyone complains, it can be removed again. --- parsl/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parsl/config.py b/parsl/config.py index b924c32bd4..f4a51a5a65 100644 --- a/parsl/config.py +++ b/parsl/config.py @@ -129,6 +129,10 @@ def executors(self) -> Sequence[ParslExecutor]: return self._executors def _validate_executors(self, executors: Sequence[ParslExecutor]) -> None: + + if len(executors) == 0: + raise ConfigurationError('At least one executor must be specified') + labels = [e.label for e in executors] duplicates = [e for n, e in enumerate(labels) if e in labels[:n]] if len(duplicates) > 0: From bbc3ddae7710775cad34e6340113610b6805d8f0 Mon Sep 17 00:00:00 2001 From: Ben Clifford Date: Wed, 8 Nov 2023 13:38:19 +0000 Subject: [PATCH 3/4] Accept an iterable for executor config This is a supertype of the previous-commit Sequence, itself a supertype of List that is current in master. It subtly emphasises that the collection of iterators will be read over once by Parsl and not touched again. This drives a change in the validation call to validate after copying into an internal structure. --- parsl/config.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/parsl/config.py b/parsl/config.py index f4a51a5a65..c857b75ad1 100644 --- a/parsl/config.py +++ b/parsl/config.py @@ -1,7 +1,7 @@ import logging import typeguard -from typing import Callable, Optional, Sequence, Union +from typing import Callable, Iterable, Optional, Sequence, Union from typing_extensions import Literal from parsl.utils import RepresentationMixin @@ -21,8 +21,8 @@ class Config(RepresentationMixin): Parameters ---------- executors : sequence of ParslExecutor, optional - List (or other sequence) of `ParslExecutor` instances to use for executing tasks. - Default is [:class:`~parsl.executors.threads.ThreadPoolExecutor()`]. + List (or other iterable) of `ParslExecutor` instances to use for executing tasks. + Default is (:class:`~parsl.executors.threads.ThreadPoolExecutor()`,). app_cache : bool, optional Enable app caching. Default is True. checkpoint_files : sequence of str, optional @@ -73,7 +73,7 @@ class Config(RepresentationMixin): @typeguard.typechecked def __init__(self, - executors: Optional[Sequence[ParslExecutor]] = None, + executors: Optional[Iterable[ParslExecutor]] = None, app_cache: bool = True, checkpoint_files: Optional[Sequence[str]] = None, checkpoint_mode: Union[None, @@ -92,12 +92,15 @@ def __init__(self, monitoring: Optional[MonitoringHub] = None, usage_tracking: bool = False, initialize_logging: bool = True) -> None: + + if not executors: + executors = (ThreadPoolExecutor(),) + self._executors: Sequence[ParslExecutor] - if executors is None: - self._executors = [ThreadPoolExecutor()] - else: - self._validate_executors(executors) - self._executors = executors + self._executors = tuple(executors) + + self._validate_executors() + self.app_cache = app_cache self.checkpoint_files = checkpoint_files self.checkpoint_mode = checkpoint_mode @@ -128,12 +131,12 @@ def __init__(self, def executors(self) -> Sequence[ParslExecutor]: return self._executors - def _validate_executors(self, executors: Sequence[ParslExecutor]) -> None: + def _validate_executors(self) -> None: - if len(executors) == 0: + if len(self.executors) == 0: raise ConfigurationError('At least one executor must be specified') - labels = [e.label for e in executors] + labels = [e.label for e in self.executors] duplicates = [e for n, e in enumerate(labels) if e in labels[:n]] if len(duplicates) > 0: raise ConfigurationError('Executors must have unique labels ({})'.format( From ab0bba32e523ec12d12b2a0ffcefa00b15d52dba Mon Sep 17 00:00:00 2001 From: Kevin Hunter Kesling Date: Wed, 8 Nov 2023 15:23:48 -0500 Subject: [PATCH 4/4] Executor definition logic to match expectation Per a Slack conversation, the behavior to preserve is "_if you don't specify any executors, you get a default local thread one_". Accordingly, concretize the iterator if it's not `None`, and then act accordingly. --- parsl/config.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/parsl/config.py b/parsl/config.py index c857b75ad1..4f3b7be23a 100644 --- a/parsl/config.py +++ b/parsl/config.py @@ -93,12 +93,11 @@ def __init__(self, usage_tracking: bool = False, initialize_logging: bool = True) -> None: + executors = tuple(executors or []) if not executors: executors = (ThreadPoolExecutor(),) - self._executors: Sequence[ParslExecutor] - self._executors = tuple(executors) - + self._executors: Sequence[ParslExecutor] = executors self._validate_executors() self.app_cache = app_cache