From e35d6f6ec008a930d740504affcce1534086678f Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Sat, 14 Dec 2024 14:42:33 +0100 Subject: [PATCH 01/10] feat: use protected config for remote configuration This commit uses `ConfigManager.get_from_protected_sources` to check whether untrusted execution is allowed on a certain dataset from within the git annex remote. --- datalad_remake/annexremotes/remake_remote.py | 5 --- datalad_remake/utils/getconfig.py | 39 ++++++++++++++++++++ datalad_remake/utils/getkeys.py | 20 ---------- datalad_remake/utils/remake_remote.py | 8 +--- 4 files changed, 40 insertions(+), 32 deletions(-) create mode 100644 datalad_remake/utils/getconfig.py delete mode 100644 datalad_remake/utils/getkeys.py diff --git a/datalad_remake/annexremotes/remake_remote.py b/datalad_remake/annexremotes/remake_remote.py index 254cc3a..ef29507 100644 --- a/datalad_remake/annexremotes/remake_remote.py +++ b/datalad_remake/annexremotes/remake_remote.py @@ -53,11 +53,6 @@ class RemakeRemote(SpecialRemote): def __init__(self, annex: Master): super().__init__(annex) - self.configs = { - 'allow-untrusted-execution': 'Allow execution of untrusted code ' - 'with untrusted parameters. set to "true" to enable. THIS IS ' - 'DANGEROUS and might lead to remote code execution.', - } self._config_manager: ConfigManager | None = None @property diff --git a/datalad_remake/utils/getconfig.py b/datalad_remake/utils/getconfig.py new file mode 100644 index 0000000..0a075a9 --- /dev/null +++ b/datalad_remake/utils/getconfig.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from datalad_core.config import ( + ConfigManager, + get_manager, +) + +from datalad_remake import ( + allow_untrusted_execution_key, + trusted_keys_config_key, +) + + +def get_trusted_keys(config_manager: ConfigManager | None = None) -> list[str]: + value = get_protected_config(trusted_keys_config_key, config_manager) + if value is None: + return [] + return [key.strip() for key in value.split(',')] + + +def get_allow_untrusted_execution( + dataset_id: str, + config_manager: ConfigManager | None = None +) -> bool: + """Get an allow-untrusted-execution indicator for a dataset.""" + value = get_protected_config( + allow_untrusted_execution_key + dataset_id, + config_manager, + ) + return value == 'true' + + +def get_protected_config( + config_key: str, + config_manager: ConfigManager | None = None +) -> str: + if config_manager is None: + config_manager = get_manager() + return config_manager.get_from_protected_sources(config_key).value diff --git a/datalad_remake/utils/getkeys.py b/datalad_remake/utils/getkeys.py deleted file mode 100644 index 93b9590..0000000 --- a/datalad_remake/utils/getkeys.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import annotations - -from datalad_core.config import ( - ConfigManager, - get_manager, -) - -from datalad_remake import trusted_keys_config_key - - -def get_trusted_keys(config_manager: ConfigManager | None = None) -> list[str]: - if config_manager is None: - config_manager = get_manager() - - trusted_key_items = config_manager.get_from_protected_sources( - trusted_keys_config_key - ) - if trusted_key_items.value is None: - return [] - return [key.strip() for key in trusted_key_items.value.split(',')] diff --git a/datalad_remake/utils/remake_remote.py b/datalad_remake/utils/remake_remote.py index 8e0d255..49cca6f 100644 --- a/datalad_remake/utils/remake_remote.py +++ b/datalad_remake/utils/remake_remote.py @@ -9,18 +9,12 @@ logger = logging.getLogger('datalad.remake.utils.remake_remote') -def add_remake_remote( - dataset_root: str, - *, - allow_untrusted_execution: bool = False, -): - aue = 'true' if allow_untrusted_execution else 'false' +def add_remake_remote(dataset_root: str): options = [ 'type=external', 'externaltype=datalad-remake', 'encryption=none', 'autoenable=true', - f'allow-untrusted-execution={aue}', ] # Create a `Dataset`-instance to use the `AnnexRepo`-methods for special From 2c1412f24cdc22e0067d02f3a5dea7b506af4f16 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 16 Dec 2024 15:10:52 +0100 Subject: [PATCH 02/10] doc: adapt README.md to trusted execution --- README.md | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index ed7acbf..8dcd450 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,11 @@ data curators, and infrastructure administrators. ## Requirements -This extension requires Python >= `3.9`. +This extension requires Python >= `3.9`. It also requires GPG to be installed +as well as a GPG key-pair to sign and verify commits. In addition, +git has to be configured to sign commits. For more information on how to sign +commits, refer to the +[Git documentation](https://git-scm.com/book/en/v2/Git-Tools-Signing-Your-Work). ## Installation @@ -50,7 +54,7 @@ To check your installation, run: ## Example usage -Create a dataset: +Ensure that your commits are signed (see the [Git documentation](https://git-scm.com/book/en/v2/Git-Tools-Signing-Your-Work)), and create a dataset: ```bash @@ -74,10 +78,14 @@ EOF > datalad save -m "add 'one-to-many' remake method" ``` +Before the computation can be executed, `datalad-make` has to be told to trust +the public key of the signer. How this is done is described in the section +[Trusted Keys](#trusted-keys). + Execute a computation and save the result: ```bash > datalad make -p first=bob -p second=alice -p output=name -o name-1.txt \ --o name-2.txt --allow-untrusted-execution one-to-many +-o name-2.txt one-to-many ``` The method `one-to-many` will create two files with the names `-1.txt` and `-2.txt`. Thus, the two files `name-1.txt` and `name-2.txt` need to @@ -93,22 +101,11 @@ content: alice ### Recomputation DataLad REMAKE can recompute dropped content. To demonstrate this, we will -drop a file and then recreate it via `datalad get`. Before we can do that in -this example we have to make a small adjustement. This is due to the fact that -we use "untrusted" execution in this example. It makes the example easier -because no signing keys are required. However, the git annex special remote that -was created by the `datalad make` command does not allow untrusted execution by -default (for security reasons we never automatically create a datalad-remake -remote that supports untrusted execution). To instruct the special remote to -allow untrusted execution, we have to reconfigure it. This can be done via the -following command: - -```bash -> git annex enableremote datalad-remake-auto allow-untrusted-execution=true -``` +drop a file and then recreate it via `datalad get`. -Now we drop the content of `name-1.txt`, verify it is gone, and recreate it via -`datalad get`, which "fetches" it from the `datalad-remake` remote. +Drop the content of `name-1.txt`, verify it is gone, and recreate it via +`datalad get`, which "fetches" it from the `datalad-remake` remote. Note: the +`datalad-remake` remote was automatically created by the command `datalad make`. ```bash > datalad drop name-1.txt @@ -126,17 +123,17 @@ The prospective computation can be initiated by using the ```bash > datalad make -p first=john -p second=susan -p output=person \ --o person-1.txt -o person-2.txt --prospective-execution --allow-untrusted-execution one-to-many +-o person-1.txt -o person-2.txt --prospective-execution one-to-many ``` The following command will fail, because no computation has been performed, -and the file content is unavailable: +and the file content is not yet available: ```bash > cat person-1.txt # this will fail, because the computation has not yet been performed ``` -We can further inspect this with `git annex info`: +We can further inspect `person-1.txt` with `git annex info`: ```bash > git annex info person-1.txt @@ -160,7 +157,7 @@ content: john Please note, to use this feature, the following configuration variable `remote.datalad-remake-auto.annex-security-allow-unverified-downloads` is set -to `ACKTHPPT` for each automatically created git-annex special remote +to `ACKTHPPT` for each automatically created git-annex special remote.
Why does the configuration variable have to be set? From 741530a1cdfde7e973e4f1abb653ec38e2f0ee54 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 16 Dec 2024 15:11:53 +0100 Subject: [PATCH 03/10] test: adapt tests to new untrusted execution spec --- datalad_remake/annexremotes/tests/test_priority.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/datalad_remake/annexremotes/tests/test_priority.py b/datalad_remake/annexremotes/tests/test_priority.py index a05946b..c2e8a76 100644 --- a/datalad_remake/annexremotes/tests/test_priority.py +++ b/datalad_remake/annexremotes/tests/test_priority.py @@ -4,7 +4,9 @@ from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows +import datalad_core.config.manager from datalad_remake import ( + allow_untrusted_execution_key, priority_config_key, specification_dir, template_dir, @@ -74,9 +76,15 @@ def test_compute_remote_priority(tmp_path, cfgman, monkeypatch, priority): # no for label in ['alpha', 'beta'] ] - # Run the special remote with the given priority configuration. - with cfgman.overrides({priority_config_key: ConfigItem(','.join(priority))}): - run_remake_remote(tmp_path, urls, False) + with cfgman.overrides( + { + # Run the special remote with the given priority configuration. + priority_config_key: ConfigItem(','.join(priority)), + # Allow the special remote to execute untrusted operations on this dataset + allow_untrusted_execution_key + dataset.id: ConfigItem('true'), + } + ): + run_remake_remote(tmp_path, urls) # At this point the datalad-remake remote should have executed the # prioritized template and written the result. From 8cab21d14806ee5aeb841f5534680ab1684dce27 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 16 Dec 2024 15:54:02 +0100 Subject: [PATCH 04/10] test: adapt complex case tests This commit adapts the tests for complex cases to the new way in which the remote gets allowance to perform untrusted computations. --- datalad_remake/tests/test_complex_cases.py | 30 ++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/datalad_remake/tests/test_complex_cases.py b/datalad_remake/tests/test_complex_cases.py index e3530d9..1d4650e 100644 --- a/datalad_remake/tests/test_complex_cases.py +++ b/datalad_remake/tests/test_complex_cases.py @@ -1,6 +1,12 @@ from pathlib import Path -from datalad_remake import template_dir +from datalad_core.config import ConfigItem +from datalad_core.tests.fixtures import cfgman + +from datalad_remake import ( + allow_untrusted_execution_key, + template_dir, +) from datalad_remake.commands.tests.create_datasets import ( create_simple_computation_dataset, ) @@ -21,7 +27,7 @@ """ -def test_input_is_output(tmp_path: Path): +def test_input_is_output(tmp_path: Path, cfgman): root_dataset = create_simple_computation_dataset(tmp_path, 'ds1', 0, template) line = 'the second line' @@ -41,12 +47,19 @@ def test_input_is_output(tmp_path: Path): root_dataset.drop('a.txt', result_renderer='disabled') assert (root_dataset.pathobj / 'a.txt').exists() is False - root_dataset.get('a.txt', result_renderer='disabled') + with cfgman.overrides( + { + # Allow the special remote to execute untrusted operations on this + # dataset + allow_untrusted_execution_key + root_dataset.id: ConfigItem('true'), + } + ): + root_dataset.get('a.txt', result_renderer='disabled') assert (root_dataset.pathobj / 'a.txt').exists() assert (root_dataset.pathobj / 'a.txt').read_text() == f'a\n{line}\n' -def test_chain_dependency(tmp_path: Path): +def test_chain_dependency(tmp_path: Path, cfgman): # c2.txt -> c1.txt -> a.txt # Create a simple dataset that can create c1.txt from a.txt root_dataset = create_simple_computation_dataset( @@ -88,5 +101,12 @@ def test_chain_dependency(tmp_path: Path): root_dataset.drop(file, result_renderer='disabled') assert (root_dataset.pathobj / file).exists() is False - root_dataset.get('c2.txt', result_renderer='disabled') + with cfgman.overrides( + { + # Allow the special remote to execute untrusted operations on this + # dataset + allow_untrusted_execution_key + root_dataset.id: ConfigItem('true'), + } + ): + root_dataset.get('c2.txt', result_renderer='disabled') assert (root_dataset.pathobj / 'c2.txt').read_text() == f'a\n{c1_line}\n{c2_line}\n' From f0cda8e4a794b321d3e9dbbb4ade6b674bb68120 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 16 Dec 2024 15:58:54 +0100 Subject: [PATCH 05/10] test: adapt remake remote tests This commit adapts the basic tests of annex remote to the new way in which the remote gets allowance to perform untrusted computations. --- datalad_remake/annexremotes/tests/test_remake_remote.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datalad_remake/annexremotes/tests/test_remake_remote.py b/datalad_remake/annexremotes/tests/test_remake_remote.py index 8f5281e..1c3409b 100644 --- a/datalad_remake/annexremotes/tests/test_remake_remote.py +++ b/datalad_remake/annexremotes/tests/test_remake_remote.py @@ -3,6 +3,7 @@ from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows +from datalad_remake import allow_untrusted_execution_key from datalad_remake.commands.tests.create_datasets import create_ds_hierarchy from ... import ( @@ -69,9 +70,11 @@ def test_compute_remote_main(tmp_path, cfgman, monkeypatch, trusted): # noqa: F with cfgman.overrides( { trusted_keys_config_key: ConfigItem(signing_key), + allow_untrusted_execution_key + dataset.id: ConfigItem('true'), + } ): - run_remake_remote(tmp_path, [url], trusted) + run_remake_remote(tmp_path, [url]) # At this point the datalad-remake remote should have executed the # computation and written the result. From 758b3ed330316ad8e0d678034328d29145c0cd67 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 17 Dec 2024 11:46:57 +0100 Subject: [PATCH 06/10] feat: add config for dataset based trust exception --- datalad_remake/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datalad_remake/__init__.py b/datalad_remake/__init__.py index 01bf4b0..0f939b7 100644 --- a/datalad_remake/__init__.py +++ b/datalad_remake/__init__.py @@ -6,6 +6,7 @@ __all__ = [ '__version__', + 'allow_untrusted_execution_key', 'auto_remote_name', 'command_suite', 'priority_config_key', @@ -52,5 +53,6 @@ template_dir = '.datalad/make/methods' specification_dir = '.datalad/make/specifications' trusted_keys_config_key = 'datalad.make.trusted-keys' +allow_untrusted_execution_key = 'datalad.make.allow-untrusted-execution.d-' priority_config_key = 'datalad.make.priority' auto_remote_name = 'datalad-remake-auto' From cf8eb4eb6bc447b4ff9a9061ae9deef42f32d8b9 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 17 Dec 2024 11:49:51 +0100 Subject: [PATCH 07/10] feat: remove allow_untrusted_execution from remote This commit removes the annex remote configuration `allow_untrusted_execution`. The remote can still perform untrusted execution for a dataset with dataset ID ``, if the protected git configuration contains the key: `datalad.make.allow-untrusted-execution.d-` with value: `true`. This is mainly done to support tests without creating GPG-keys. --- datalad_remake/annexremotes/remake_remote.py | 10 ++++- .../annexremotes/tests/test_hierarchies.py | 42 +++++++++++-------- .../annexremotes/tests/test_priority.py | 1 - datalad_remake/annexremotes/tests/utils.py | 5 +-- datalad_remake/commands/make_cmd.py | 4 +- .../commands/tests/create_datasets.py | 19 +++++---- .../commands/tests/test_collection.py | 3 +- datalad_remake/commands/tests/test_make.py | 16 +++++-- .../commands/tests/test_provision.py | 9 ++-- .../utils/tests/test_verification.py | 3 +- 10 files changed, 69 insertions(+), 43 deletions(-) diff --git a/datalad_remake/annexremotes/remake_remote.py b/datalad_remake/annexremotes/remake_remote.py index ef29507..5850f0d 100644 --- a/datalad_remake/annexremotes/remake_remote.py +++ b/datalad_remake/annexremotes/remake_remote.py @@ -37,7 +37,10 @@ get_file_dataset, provide_context, ) -from datalad_remake.utils.getkeys import get_trusted_keys +from datalad_remake.utils.getconfig import ( + get_allow_untrusted_execution, + get_trusted_keys, +) from datalad_remake.utils.glob import resolve_patterns from datalad_remake.utils.verify import verify_file @@ -156,7 +159,10 @@ def get_assigned_value(assignment: str) -> str: def transfer_retrieve(self, key: str, file_name: str) -> None: self.annex.debug(f'TRANSFER RETRIEVE key: {key!r}, file_name: {file_name!r}') - if self.annex.getconfig('allow-untrusted-execution') == 'true': + dataset_id = self.config_manager.get('datalad.dataset.id').value + self.annex.debug(f'TRANSFER RETRIEVE dataset_id: {dataset_id!r}') + self.annex.debug(f'TRANSFER RETRIEVE get_allow_untrusted_execution: {get_allow_untrusted_execution(dataset_id)}') + if get_allow_untrusted_execution(dataset_id): trusted_key_ids = None else: trusted_key_ids = get_trusted_keys() diff --git a/datalad_remake/annexremotes/tests/test_hierarchies.py b/datalad_remake/annexremotes/tests/test_hierarchies.py index 94aaee7..684be80 100644 --- a/datalad_remake/annexremotes/tests/test_hierarchies.py +++ b/datalad_remake/annexremotes/tests/test_hierarchies.py @@ -2,10 +2,13 @@ from pathlib import Path import pytest +from datalad_core.config import ConfigItem +from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad.distribution.get import Get as datalad_Get from datalad_next.datasets import Dataset from datalad_next.tests import skip_if_on_windows +from datalad_remake import allow_untrusted_execution_key from datalad_remake.commands.tests.create_datasets import ( create_simple_computation_dataset, ) @@ -78,7 +81,7 @@ def _check_content(dataset, file_content: Iterable[tuple[str, str]]): @skip_if_on_windows @pytest.mark.parametrize('output_pattern', [output_pattern_static, output_pattern_glob]) -def test_end_to_end(tmp_path, monkeypatch, output_pattern): +def test_end_to_end(tmp_path, cfgman, monkeypatch, output_pattern): root_dataset = create_simple_computation_dataset(tmp_path, 'd2', 3, test_method) # run `make` command @@ -103,21 +106,26 @@ def test_end_to_end(tmp_path, monkeypatch, output_pattern): # check computation success _check_content(root_dataset, test_file_content) - # Drop all computed content - _drop_files(root_dataset, collected_output) - # Go to the subdataset `d2_subds0/d2_subds1` and fetch the content of `a1.txt` # from a datalad-remake remote. - monkeypatch.chdir(root_dataset.pathobj / 'd2_subds0' / 'd2_subds1') - datalad_Get()('a1.txt') - - # check that all known files that were computed are added to the annex - _check_content(root_dataset, test_file_content) - - _drop_files(root_dataset, collected_output) - - # check get in subdatasets - monkeypatch.chdir(root_dataset.pathobj) - datalad_Get()('d2_subds0/d2_subds1/a1.txt') - - _check_content(root_dataset, test_file_content) + with cfgman.overrides( + { + # Allow the special remote to execute untrusted operations on the + # dataset `root_dataset/d2_subds0/d2_subds1` + allow_untrusted_execution_key + + Dataset(root_dataset.pathobj / 'd2_subds0' / 'd2_subds1').id: ConfigItem('true'), + } + ): + # Drop all computed content + _drop_files(root_dataset, collected_output) + + monkeypatch.chdir(root_dataset.pathobj / 'd2_subds0' / 'd2_subds1') + datalad_Get()('a1.txt') + # check that all known files that were computed are added to the annex + _check_content(root_dataset, test_file_content) + + _drop_files(root_dataset, collected_output) + + monkeypatch.chdir(root_dataset.pathobj) + datalad_Get()('d2_subds0/d2_subds1/a1.txt') + _check_content(root_dataset, test_file_content) diff --git a/datalad_remake/annexremotes/tests/test_priority.py b/datalad_remake/annexremotes/tests/test_priority.py index c2e8a76..62ee343 100644 --- a/datalad_remake/annexremotes/tests/test_priority.py +++ b/datalad_remake/annexremotes/tests/test_priority.py @@ -4,7 +4,6 @@ from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows -import datalad_core.config.manager from datalad_remake import ( allow_untrusted_execution_key, priority_config_key, diff --git a/datalad_remake/annexremotes/tests/utils.py b/datalad_remake/annexremotes/tests/utils.py index 8f7213f..436eaf9 100644 --- a/datalad_remake/annexremotes/tests/utils.py +++ b/datalad_remake/annexremotes/tests/utils.py @@ -130,7 +130,7 @@ def create_keypair(gpg_dir: Path, name: bytes = b'Test User'): )[0] -def run_remake_remote(dest_path, urls, trusted): +def run_remake_remote(dest_path, urls): input_ = MockedInput() annex_key = 'some-fake-annex-key' @@ -139,8 +139,7 @@ def run_remake_remote(dest_path, urls, trusted): # below. input_.send('PREPARE\n') input_.send(f'TRANSFER RETRIEVE {annex_key} {dest_path / "remade.txt"!s}\n') - # The next line is the answer to `GETCONFIG allow-untrusted-execution` - input_.send(f'VALUE {"false" if trusted else "true"}\n') + input_.send('VALUE .git\n') # The next two lines assemble the answer to # `GETURLS datalad-remake:` for url in urls: diff --git a/datalad_remake/commands/make_cmd.py b/datalad_remake/commands/make_cmd.py index bdd04ce..148761f 100644 --- a/datalad_remake/commands/make_cmd.py +++ b/datalad_remake/commands/make_cmd.py @@ -42,7 +42,7 @@ ) from datalad_remake.utils.chdir import chdir from datalad_remake.utils.compute import compute -from datalad_remake.utils.getkeys import get_trusted_keys +from datalad_remake.utils.getconfig import get_trusted_keys from datalad_remake.utils.glob import resolve_patterns from datalad_remake.utils.remake_remote import add_remake_remote from datalad_remake.utils.verify import verify_file @@ -534,7 +534,7 @@ def initialize_remotes(dataset: Dataset, files: Iterable[str]) -> None: } for dataset_dir in touched_dataset_dirs: - add_remake_remote(str(dataset_dir), allow_untrusted_execution=False) + add_remake_remote(str(dataset_dir)) def unlock_files(dataset: Dataset, files: Iterable[str]) -> None: diff --git a/datalad_remake/commands/tests/create_datasets.py b/datalad_remake/commands/tests/create_datasets.py index 36ae6be..e797dc1 100644 --- a/datalad_remake/commands/tests/create_datasets.py +++ b/datalad_remake/commands/tests/create_datasets.py @@ -4,7 +4,10 @@ from datalad_next.datasets import Dataset -from datalad_remake import template_dir +from datalad_remake import ( + template_dir, + trusted_keys_config_key, +) from datalad_remake.utils.remake_remote import add_remake_remote @@ -19,7 +22,7 @@ def create_ds_hierarchy( root_dataset.create(force=True, result_renderer='disabled') (root_dataset.pathobj / 'a.txt').write_text('a\n') (root_dataset.pathobj / 'b.txt').write_text('b\n') - _enable_signing(root_dataset, signing_key) + _enable_verification(root_dataset, signing_key) root_dataset.save(result_renderer='disabled') datasets = [(name, tmp_path / name, root_dataset)] @@ -31,7 +34,7 @@ def create_ds_hierarchy( (subdataset.pathobj / f'a{level}.txt').write_text(f'a{level}\n') (subdataset.pathobj / f'b{level}.txt').write_text(f'b{level}\n') subdataset.save(result_renderer='disabled') - _enable_signing(subdataset, signing_key) + _enable_verification(subdataset, signing_key) datasets.append((f'{name}_subds{level}', subdataset_path, subdataset)) # Link the datasets @@ -47,22 +50,20 @@ def create_ds_hierarchy( root_dataset.get(recursive=True, result_renderer='disabled') # Add datalad-remake remotes to the root dataset and all subdatasets - add_remake_remote(root_dataset.path, allow_untrusted_execution=signing_key is None) + add_remake_remote(root_dataset.path) subdataset_path = Path() for index in range(subdataset_levels): subdataset_path /= f'{name}_subds{index}' - add_remake_remote( - str(root_dataset.pathobj / subdataset_path), - allow_untrusted_execution=signing_key is None, - ) + add_remake_remote(str(root_dataset.pathobj / subdataset_path)) return datasets -def _enable_signing(dataset: Dataset, key: str | None): +def _enable_verification(dataset: Dataset, key: str | None): if key is not None: dataset.config.set('commit.gpgsign', 'true', scope='local') dataset.config.set('user.signingkey', key, scope='local') + dataset.config.set(trusted_keys_config_key, key, scope='global') def create_simple_computation_dataset( diff --git a/datalad_remake/commands/tests/test_collection.py b/datalad_remake/commands/tests/test_collection.py index 743180d..a1f71cd 100644 --- a/datalad_remake/commands/tests/test_collection.py +++ b/datalad_remake/commands/tests/test_collection.py @@ -1,5 +1,6 @@ from pathlib import Path +from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows from ..make_cmd import collect @@ -8,7 +9,7 @@ @skip_if_on_windows -def test_collect(tmp_path): +def test_collect(tmp_path, cfgman): dataset = create_ds_hierarchy(tmp_path, 'ds1', 1)[0][2] worktree_dir = tmp_path / 'ds1_worktree' diff --git a/datalad_remake/commands/tests/test_make.py b/datalad_remake/commands/tests/test_make.py index c28ef3e..035bd41 100644 --- a/datalad_remake/commands/tests/test_make.py +++ b/datalad_remake/commands/tests/test_make.py @@ -1,10 +1,13 @@ from unittest.mock import MagicMock from urllib.parse import urlparse +from datalad_core.config import ConfigItem +from datalad_core.tests.fixtures import cfgman from datalad_next.datasets import Dataset from datalad_next.tests import skip_if_on_windows import datalad_remake.commands.make_cmd +from datalad_remake import allow_untrusted_execution_key from datalad_remake.commands.make_cmd import get_url from datalad_remake.commands.tests.create_datasets import ( create_simple_computation_dataset, @@ -28,7 +31,7 @@ def test_duplicated_computation(tmp_path): @skip_if_on_windows -def test_speculative_computation(tmp_path): +def test_speculative_computation(tmp_path, cfgman): root_dataset = create_simple_computation_dataset(tmp_path, 'ds1', 0, test_method) root_dataset.make( @@ -39,8 +42,15 @@ def test_speculative_computation(tmp_path): result_renderer='disabled', ) - # Perform the speculative computation - root_dataset.get('spec.txt', result_renderer='disabled') + with cfgman.overrides( + { + # Allow the special remote to execute untrusted operations on this + # dataset + allow_untrusted_execution_key + root_dataset.id: ConfigItem('true'), + } + ): + # Perform the speculative computation + root_dataset.get('spec.txt', result_renderer='disabled') assert (root_dataset.pathobj / 'spec.txt').read_text() == 'Hello Robert\n' diff --git a/datalad_remake/commands/tests/test_provision.py b/datalad_remake/commands/tests/test_provision.py index c928cfc..0212209 100644 --- a/datalad_remake/commands/tests/test_provision.py +++ b/datalad_remake/commands/tests/test_provision.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING from datalad.core.distributed.clone import Clone +from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.datasets import Dataset from datalad_next.runners import call_git_lines from datalad_next.tests import skip_if_on_windows @@ -32,7 +33,7 @@ @skip_if_on_windows -def test_worktree_basic(tmp_path): +def test_worktree_basic(tmp_path, cfgman): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] inputs = [ 'a.txt', @@ -69,7 +70,7 @@ def check_deleted_worktrees(ds: Dataset): @skip_if_on_windows -def test_worktree_globbing(tmp_path): +def test_worktree_globbing(tmp_path, cfgman): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] result = dataset.provision( worktree_dir=tmp_path / 'ds1_worktree2', @@ -122,7 +123,7 @@ def get_file_list( @skip_if_on_windows -def test_provision_context(tmp_path): +def test_provision_context(tmp_path, cfgman): dataset = create_ds_hierarchy(tmp_path, 'ds1')[0][2] with provide_context(dataset, branch=None, input_patterns=['**']) as worktree: files = set(get_file_list(worktree)) @@ -131,7 +132,7 @@ def test_provision_context(tmp_path): @skip_if_on_windows -def test_branch_deletion_after_provision(tmp_path): +def test_branch_deletion_after_provision(tmp_path, cfgman): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] with provide_context( dataset=dataset, branch=None, input_patterns=['a.txt'] diff --git a/datalad_remake/utils/tests/test_verification.py b/datalad_remake/utils/tests/test_verification.py index 80cc86a..946b998 100644 --- a/datalad_remake/utils/tests/test_verification.py +++ b/datalad_remake/utils/tests/test_verification.py @@ -1,13 +1,14 @@ from pathlib import Path import pytest +from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_remake.annexremotes.tests.test_remake_remote import create_keypair from datalad_remake.commands.tests.create_datasets import create_ds_hierarchy from datalad_remake.utils.verify import verify_file -def test_whitelist(tmp_path, monkeypatch): +def test_whitelist(tmp_path, cfgman, monkeypatch): gpg_dir = tmp_path / 'gpg' tmp_home = tmp_path / 'tmp_home' From ef9da9652c41e7292cd647852f9a8517fb922442 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 17 Dec 2024 12:22:56 +0100 Subject: [PATCH 08/10] rf: import `cfgman` in `conftest.py` --- conftest.py | 2 +- datalad_remake/annexremotes/tests/test_hierarchies.py | 3 +-- datalad_remake/annexremotes/tests/test_priority.py | 1 - datalad_remake/annexremotes/tests/test_remake_remote.py | 1 - datalad_remake/commands/tests/test_collection.py | 1 - datalad_remake/commands/tests/test_make.py | 1 - datalad_remake/commands/tests/test_provision.py | 1 - datalad_remake/tests/test_complex_cases.py | 1 - datalad_remake/utils/tests/test_verification.py | 1 - 9 files changed, 2 insertions(+), 10 deletions(-) diff --git a/conftest.py b/conftest.py index d86d75c..49c6288 100644 --- a/conftest.py +++ b/conftest.py @@ -3,6 +3,6 @@ # its tooling from datalad_next.conftest import setup_package -pytest_plugins = 'datalad_next.tests.fixtures' +pytest_plugins = ('datalad_core.tests.fixtures', 'datalad_next.tests.fixtures') __all__ = ['setup_package'] diff --git a/datalad_remake/annexremotes/tests/test_hierarchies.py b/datalad_remake/annexremotes/tests/test_hierarchies.py index 684be80..c9390f5 100644 --- a/datalad_remake/annexremotes/tests/test_hierarchies.py +++ b/datalad_remake/annexremotes/tests/test_hierarchies.py @@ -2,9 +2,8 @@ from pathlib import Path import pytest -from datalad_core.config import ConfigItem -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad.distribution.get import Get as datalad_Get +from datalad_core.config import ConfigItem from datalad_next.datasets import Dataset from datalad_next.tests import skip_if_on_windows diff --git a/datalad_remake/annexremotes/tests/test_priority.py b/datalad_remake/annexremotes/tests/test_priority.py index 62ee343..2f4456e 100644 --- a/datalad_remake/annexremotes/tests/test_priority.py +++ b/datalad_remake/annexremotes/tests/test_priority.py @@ -1,7 +1,6 @@ import pytest from annexremote import Master from datalad_core.config import ConfigItem -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows from datalad_remake import ( diff --git a/datalad_remake/annexremotes/tests/test_remake_remote.py b/datalad_remake/annexremotes/tests/test_remake_remote.py index 1c3409b..ee9bcdc 100644 --- a/datalad_remake/annexremotes/tests/test_remake_remote.py +++ b/datalad_remake/annexremotes/tests/test_remake_remote.py @@ -1,6 +1,5 @@ import pytest from datalad_core.config import ConfigItem -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows from datalad_remake import allow_untrusted_execution_key diff --git a/datalad_remake/commands/tests/test_collection.py b/datalad_remake/commands/tests/test_collection.py index a1f71cd..53c16ca 100644 --- a/datalad_remake/commands/tests/test_collection.py +++ b/datalad_remake/commands/tests/test_collection.py @@ -1,6 +1,5 @@ from pathlib import Path -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.tests import skip_if_on_windows from ..make_cmd import collect diff --git a/datalad_remake/commands/tests/test_make.py b/datalad_remake/commands/tests/test_make.py index 035bd41..f0987c0 100644 --- a/datalad_remake/commands/tests/test_make.py +++ b/datalad_remake/commands/tests/test_make.py @@ -2,7 +2,6 @@ from urllib.parse import urlparse from datalad_core.config import ConfigItem -from datalad_core.tests.fixtures import cfgman from datalad_next.datasets import Dataset from datalad_next.tests import skip_if_on_windows diff --git a/datalad_remake/commands/tests/test_provision.py b/datalad_remake/commands/tests/test_provision.py index 0212209..9fc4480 100644 --- a/datalad_remake/commands/tests/test_provision.py +++ b/datalad_remake/commands/tests/test_provision.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING from datalad.core.distributed.clone import Clone -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_next.datasets import Dataset from datalad_next.runners import call_git_lines from datalad_next.tests import skip_if_on_windows diff --git a/datalad_remake/tests/test_complex_cases.py b/datalad_remake/tests/test_complex_cases.py index 1d4650e..3236df3 100644 --- a/datalad_remake/tests/test_complex_cases.py +++ b/datalad_remake/tests/test_complex_cases.py @@ -1,7 +1,6 @@ from pathlib import Path from datalad_core.config import ConfigItem -from datalad_core.tests.fixtures import cfgman from datalad_remake import ( allow_untrusted_execution_key, diff --git a/datalad_remake/utils/tests/test_verification.py b/datalad_remake/utils/tests/test_verification.py index 946b998..08e9743 100644 --- a/datalad_remake/utils/tests/test_verification.py +++ b/datalad_remake/utils/tests/test_verification.py @@ -1,7 +1,6 @@ from pathlib import Path import pytest -from datalad_core.tests.fixtures import cfgman # noqa: F401 from datalad_remake.annexremotes.tests.test_remake_remote import create_keypair from datalad_remake.commands.tests.create_datasets import create_ds_hierarchy From 3c2a528335c7a19f149771462483b2f55772d9af Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 17 Dec 2024 12:52:36 +0100 Subject: [PATCH 09/10] chore: fix format problems --- datalad_remake/annexremotes/remake_remote.py | 5 ++++- datalad_remake/annexremotes/tests/test_hierarchies.py | 4 +++- datalad_remake/annexremotes/tests/test_priority.py | 4 ++-- .../annexremotes/tests/test_remake_remote.py | 3 +-- datalad_remake/commands/tests/test_collection.py | 2 +- datalad_remake/commands/tests/test_make.py | 10 +++++----- datalad_remake/commands/tests/test_provision.py | 8 ++++---- datalad_remake/utils/getconfig.py | 6 ++---- datalad_remake/utils/tests/test_verification.py | 2 +- 9 files changed, 23 insertions(+), 21 deletions(-) diff --git a/datalad_remake/annexremotes/remake_remote.py b/datalad_remake/annexremotes/remake_remote.py index 5850f0d..46ba6e4 100644 --- a/datalad_remake/annexremotes/remake_remote.py +++ b/datalad_remake/annexremotes/remake_remote.py @@ -161,7 +161,10 @@ def transfer_retrieve(self, key: str, file_name: str) -> None: dataset_id = self.config_manager.get('datalad.dataset.id').value self.annex.debug(f'TRANSFER RETRIEVE dataset_id: {dataset_id!r}') - self.annex.debug(f'TRANSFER RETRIEVE get_allow_untrusted_execution: {get_allow_untrusted_execution(dataset_id)}') + self.annex.debug( + 'TRANSFER RETRIEVE get_allow_untrusted_execution: ' + f'{get_allow_untrusted_execution(dataset_id)}' + ) if get_allow_untrusted_execution(dataset_id): trusted_key_ids = None else: diff --git a/datalad_remake/annexremotes/tests/test_hierarchies.py b/datalad_remake/annexremotes/tests/test_hierarchies.py index c9390f5..f9c943a 100644 --- a/datalad_remake/annexremotes/tests/test_hierarchies.py +++ b/datalad_remake/annexremotes/tests/test_hierarchies.py @@ -112,7 +112,9 @@ def test_end_to_end(tmp_path, cfgman, monkeypatch, output_pattern): # Allow the special remote to execute untrusted operations on the # dataset `root_dataset/d2_subds0/d2_subds1` allow_untrusted_execution_key - + Dataset(root_dataset.pathobj / 'd2_subds0' / 'd2_subds1').id: ConfigItem('true'), + + Dataset(root_dataset.pathobj / 'd2_subds0' / 'd2_subds1').id: ConfigItem( + 'true' + ), } ): # Drop all computed content diff --git a/datalad_remake/annexremotes/tests/test_priority.py b/datalad_remake/annexremotes/tests/test_priority.py index 2f4456e..66d3c15 100644 --- a/datalad_remake/annexremotes/tests/test_priority.py +++ b/datalad_remake/annexremotes/tests/test_priority.py @@ -29,7 +29,7 @@ @skip_if_on_windows @pytest.mark.parametrize('priority', [['alpha', 'beta'], ['beta', 'alpha']]) -def test_compute_remote_priority(tmp_path, cfgman, monkeypatch, priority): # noqa: F811 +def test_compute_remote_priority(tmp_path, cfgman, monkeypatch, priority): dataset = create_ds_hierarchy( tmp_path=tmp_path, name='ds1', @@ -91,7 +91,7 @@ def test_compute_remote_priority(tmp_path, cfgman, monkeypatch, priority): # no ).read_text().strip() == f'from {priority[0]}: {priority[0]}_parameter' -def test_config_precedence(existing_dataset, tmp_path, cfgman, monkeypatch): # noqa: F811 +def test_config_precedence(existing_dataset, tmp_path, cfgman, monkeypatch): existing_dataset.config.add('datalad.make.priority', '1', scope='branch') monkeypatch.setattr( diff --git a/datalad_remake/annexremotes/tests/test_remake_remote.py b/datalad_remake/annexremotes/tests/test_remake_remote.py index ee9bcdc..0b5def1 100644 --- a/datalad_remake/annexremotes/tests/test_remake_remote.py +++ b/datalad_remake/annexremotes/tests/test_remake_remote.py @@ -24,7 +24,7 @@ @skip_if_on_windows @pytest.mark.parametrize('trusted', [True, False]) -def test_compute_remote_main(tmp_path, cfgman, monkeypatch, trusted): # noqa: F811 +def test_compute_remote_main(tmp_path, cfgman, monkeypatch, trusted): if trusted: gpg_homedir = tmp_path / 'tmp_gpg_dir' tmp_home = tmp_path / 'tmp_home' @@ -70,7 +70,6 @@ def test_compute_remote_main(tmp_path, cfgman, monkeypatch, trusted): # noqa: F { trusted_keys_config_key: ConfigItem(signing_key), allow_untrusted_execution_key + dataset.id: ConfigItem('true'), - } ): run_remake_remote(tmp_path, [url]) diff --git a/datalad_remake/commands/tests/test_collection.py b/datalad_remake/commands/tests/test_collection.py index 53c16ca..743180d 100644 --- a/datalad_remake/commands/tests/test_collection.py +++ b/datalad_remake/commands/tests/test_collection.py @@ -8,7 +8,7 @@ @skip_if_on_windows -def test_collect(tmp_path, cfgman): +def test_collect(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 1)[0][2] worktree_dir = tmp_path / 'ds1_worktree' diff --git a/datalad_remake/commands/tests/test_make.py b/datalad_remake/commands/tests/test_make.py index f0987c0..a52d9a7 100644 --- a/datalad_remake/commands/tests/test_make.py +++ b/datalad_remake/commands/tests/test_make.py @@ -42,11 +42,11 @@ def test_speculative_computation(tmp_path, cfgman): ) with cfgman.overrides( - { - # Allow the special remote to execute untrusted operations on this - # dataset - allow_untrusted_execution_key + root_dataset.id: ConfigItem('true'), - } + { + # Allow the special remote to execute untrusted operations on this + # dataset + allow_untrusted_execution_key + root_dataset.id: ConfigItem('true'), + } ): # Perform the speculative computation root_dataset.get('spec.txt', result_renderer='disabled') diff --git a/datalad_remake/commands/tests/test_provision.py b/datalad_remake/commands/tests/test_provision.py index 9fc4480..c928cfc 100644 --- a/datalad_remake/commands/tests/test_provision.py +++ b/datalad_remake/commands/tests/test_provision.py @@ -32,7 +32,7 @@ @skip_if_on_windows -def test_worktree_basic(tmp_path, cfgman): +def test_worktree_basic(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] inputs = [ 'a.txt', @@ -69,7 +69,7 @@ def check_deleted_worktrees(ds: Dataset): @skip_if_on_windows -def test_worktree_globbing(tmp_path, cfgman): +def test_worktree_globbing(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] result = dataset.provision( worktree_dir=tmp_path / 'ds1_worktree2', @@ -122,7 +122,7 @@ def get_file_list( @skip_if_on_windows -def test_provision_context(tmp_path, cfgman): +def test_provision_context(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1')[0][2] with provide_context(dataset, branch=None, input_patterns=['**']) as worktree: files = set(get_file_list(worktree)) @@ -131,7 +131,7 @@ def test_provision_context(tmp_path, cfgman): @skip_if_on_windows -def test_branch_deletion_after_provision(tmp_path, cfgman): +def test_branch_deletion_after_provision(tmp_path): dataset = create_ds_hierarchy(tmp_path, 'ds1', 3)[0][2] with provide_context( dataset=dataset, branch=None, input_patterns=['a.txt'] diff --git a/datalad_remake/utils/getconfig.py b/datalad_remake/utils/getconfig.py index 0a075a9..c380623 100644 --- a/datalad_remake/utils/getconfig.py +++ b/datalad_remake/utils/getconfig.py @@ -19,8 +19,7 @@ def get_trusted_keys(config_manager: ConfigManager | None = None) -> list[str]: def get_allow_untrusted_execution( - dataset_id: str, - config_manager: ConfigManager | None = None + dataset_id: str, config_manager: ConfigManager | None = None ) -> bool: """Get an allow-untrusted-execution indicator for a dataset.""" value = get_protected_config( @@ -31,8 +30,7 @@ def get_allow_untrusted_execution( def get_protected_config( - config_key: str, - config_manager: ConfigManager | None = None + config_key: str, config_manager: ConfigManager | None = None ) -> str: if config_manager is None: config_manager = get_manager() diff --git a/datalad_remake/utils/tests/test_verification.py b/datalad_remake/utils/tests/test_verification.py index 08e9743..ba8d664 100644 --- a/datalad_remake/utils/tests/test_verification.py +++ b/datalad_remake/utils/tests/test_verification.py @@ -7,7 +7,7 @@ from datalad_remake.utils.verify import verify_file -def test_whitelist(tmp_path, cfgman, monkeypatch): +def test_whitelist(tmp_path, monkeypatch, cfgman): # noqa ARG001 gpg_dir = tmp_path / 'gpg' tmp_home = tmp_path / 'tmp_home' From 7cbbe0bd3570f4d0296ccc95f6aa59f58db31a76 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Tue, 17 Dec 2024 13:05:05 +0100 Subject: [PATCH 10/10] fix: add stub for datalad_next.conftest --- resources/type_stubs/datalad_next/conftest.pyi | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 resources/type_stubs/datalad_next/conftest.pyi diff --git a/resources/type_stubs/datalad_next/conftest.pyi b/resources/type_stubs/datalad_next/conftest.pyi new file mode 100644 index 0000000..c036518 --- /dev/null +++ b/resources/type_stubs/datalad_next/conftest.pyi @@ -0,0 +1,4 @@ +from datalad.conftest import setup_package as setup_package +from datalad_next.iter_collections.tests.test_itertar import sample_tar_xz as sample_tar_xz +from datalad_next.iter_collections.tests.test_iterzip import sample_zip as sample_zip +from datalad_next.tests.fixtures import check_gitconfig_global as check_gitconfig_global, check_plaintext_keyring as check_plaintext_keyring, credman as credman, datalad_cfg as datalad_cfg, datalad_interactive_ui as datalad_interactive_ui, datalad_noninteractive_ui as datalad_noninteractive_ui, dataset as dataset, existing_dataset as existing_dataset, existing_noannex_dataset as existing_noannex_dataset, http_credential as http_credential, http_server as http_server, http_server_with_basicauth as http_server_with_basicauth, httpbin as httpbin, httpbin_service as httpbin_service, modified_dataset as modified_dataset, no_result_rendering as no_result_rendering, reduce_logging as reduce_logging, sshserver as sshserver, sshserver_setup as sshserver_setup, tmp_keyring as tmp_keyring, webdav_credential as webdav_credential, webdav_server as webdav_server