Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch authored Nov 11, 2024
2 parents a8e0eb9 + d492add commit a2214de
Show file tree
Hide file tree
Showing 38 changed files with 490 additions and 57 deletions.
12 changes: 9 additions & 3 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ environment:
APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2204
PY: 3.11
INSTALL_GITANNEX: git-annex -m snapshot
KEYWORDS: not no_such_test

# Windows core tests
- job_name: test-win
Expand All @@ -80,6 +81,7 @@ environment:
COVERAGE_ROOT: C:\DLTMP
HATCH_DATA_DIR: C:\hatch-data-dir
PIP_CACHE: C:\Users\appveyor\AppData\Local\pip\Cache
KEYWORDS: not test_whitelist

# MacOS core tests
- job_name: test-mac
Expand All @@ -89,7 +91,9 @@ environment:
COVERAGE_ROOT: /Users/appveyor/DLTMP
HATCH_DATA_DIR: /Users/appveyor/hatch-data-dir
PIP_CACHE: /Users/appveyor/.cache/pip

KEYWORDS: >
not test_compute_remote_main[True]
and not test_whitelist
# only run the CI if there are code or tooling changes
only_commits:
Expand Down Expand Up @@ -140,6 +144,8 @@ for:
# verify that a PY variable is declared that identifies the desired Python version
# for this run
- "[ \"x$PY\" != x ]"
# create a dedicated socket directory to prevent too long socket names
- tools/appveyor/setup-gpg
# Missing system software
- tools/appveyor/install-syspkgs $INSTALL_SYSPKGS
# activate Python env solely to get `python` to become available consistently
Expand All @@ -150,7 +156,7 @@ for:
- "[ -f ${HOME}/dlinstaller_env.sh ] && . ${HOME}/dlinstaller_env.sh || true"

test_script:
- 'hatch run tests.py${PY}:run-cov --doctest-modules --durations 10'
- 'hatch run tests.py${PY}:run-cov --doctest-modules --durations 10 -k "$KEYWORDS"'

after_test:
- 'hatch run tests.py${PY}:cov-combine'
Expand Down Expand Up @@ -208,7 +214,7 @@ for:
- cmd: IF DEFINED INSTALL_GITANNEX datalad-installer --sudo ok %INSTALL_GITANNEX%

test_script:
- cmd: 'hatch run tests.py%PY%:run-cov --doctest-modules --durations 10'
- cmd: 'hatch run tests.py%PY%:run-cov --doctest-modules -k "%KEYWORDS%" --durations 10'

after_test:
- cmd: 'hatch run tests.py%PY%:cov-combine'
Expand Down
2 changes: 2 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ python:
install:
- method: pip
path: .
extra_requirements:
- docs
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,15 @@ EOF

Create a `datalad-remake` git-annex special remote:
```bash
> git annex initremote datalad-remake encryption=none type=external externaltype=datalad-remake
> git annex initremote datalad-remake encryption=none type=external externaltype=datalad-remake allow_untrusted_execution=true
```

Execute a computation and save the result:
```bash
> datalad make -p first=bob -p second=alice -p output=name \
-o name-1.txt -o name-2.txt one-to-many
> datalad make -p first=bob -p second=alice -p output=name -o name-1.txt \
-o name-2.txt --allow-untrusted-execution one-to-many
```

The method `one-to-many` will create two files with the names `<output>-1.txt`
and `<output>-2.txt`. Thus, the two files `name-1.txt` and `name-2.txt` need to
be specified as outputs in the command above.
Expand Down Expand Up @@ -117,7 +118,8 @@ Afterwards, a prospective computation can be initiated by using the

```bash
> datalad make -p first=john -p second=susan -p output=person \
-o person-1.txt -o person-2.txt -u one-to-many
-o person-1.txt -o person-2.txt -u --allow_untrusted_execution one-to-many
> cat person-1.txt # this will fail, because the computation has not yet been performed
```

The following command will fail, because no computation has been performed,
Expand Down
2 changes: 2 additions & 0 deletions datalad_remake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
'command_suite',
'specification_dir',
'template_dir',
'trusted_keys_config_key',
]


Expand Down Expand Up @@ -47,3 +48,4 @@
url_scheme = 'datalad-remake'
template_dir = '.datalad/make/methods'
specification_dir = '.datalad/make/specifications'
trusted_keys_config_key = 'datalad.trusted-keys'
1 change: 1 addition & 0 deletions datalad_remake/annexremotes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""The DataLad remake special remote"""
23 changes: 21 additions & 2 deletions datalad_remake/annexremotes/remake_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
get_file_dataset,
provide_context,
)
from datalad_remake.utils.getkeys import get_trusted_keys
from datalad_remake.utils.glob import resolve_patterns
from datalad_remake.utils.verify import verify_file

if TYPE_CHECKING:
from collections.abc import Iterable
Expand All @@ -41,6 +43,11 @@
class RemakeRemote(SpecialRemote):
def __init__(self, annex: Master):
super().__init__(annex)
self.configs = {
'allow_untrusted_execution': 'Allow execution of untrusted code with untrusted parameters. '
'set to "true" to enable. THIS IS DANGEROUS and might lead to '
'remote code execution.',
}

def __del__(self):
self.close()
Expand Down Expand Up @@ -85,7 +92,11 @@ def get_url_for_key(self, key: str) -> str:
self.annex.debug(f'get_url_for_key: key: {key!r}, urls: {urls!r}')
return urls[0]

def get_compute_info(self, key: str) -> tuple[dict[str, Any], Dataset]:
def get_compute_info(
self,
key: str,
trusted_key_ids: list[str] | None,
) -> tuple[dict[str, Any], Dataset]:
def get_assigned_value(assignment: str) -> str:
return assignment.split('=', 1)[1]

Expand All @@ -96,6 +107,8 @@ def get_assigned_value(assignment: str) -> str:

dataset = self._find_dataset(root_version)
spec_path = dataset.pathobj / specification_dir / spec_name
if trusted_key_ids is not None:
verify_file(dataset.pathobj, spec_path, trusted_key_ids)
with open(spec_path, 'rb') as f:
spec = json.load(f)

Expand All @@ -108,7 +121,12 @@ def get_assigned_value(assignment: str) -> str:
def transfer_retrieve(self, key: str, file_name: str) -> None:
self.annex.debug(f'TRANSFER RETRIEVE key: {key!r}, file_name: {file_name!r}')

compute_info, dataset = self.get_compute_info(key)
if self.annex.getconfig('allow_untrusted_execution') == 'true':
trusted_key_ids = None
else:
trusted_key_ids = get_trusted_keys()

compute_info, dataset = self.get_compute_info(key, trusted_key_ids)
self.annex.debug(f'TRANSFER RETRIEVE compute_info: {compute_info!r}')

# Perform the computation, and collect the results
Expand All @@ -124,6 +142,7 @@ def transfer_retrieve(self, key: str, file_name: str) -> None:
compute_info['method'],
compute_info['parameter'],
compute_info['output'],
trusted_key_ids,
)
lgr.debug('Starting collection')
self.annex.debug('Starting collection')
Expand Down
1 change: 1 addition & 0 deletions datalad_remake/annexremotes/tests/test_hierarchies.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def test_end_to_end(tmp_path, monkeypatch, output_pattern):
],
output=output_pattern,
result_renderer='disabled',
allow_untrusted_execution=True,
)

collected_output = [
Expand Down
103 changes: 97 additions & 6 deletions datalad_remake/annexremotes/tests/test_remake_remote.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import re
import subprocess
from io import TextIOBase
from pathlib import Path
from queue import Queue
from typing import cast

import pytest
from annexremote import Master
from datalad_next.tests import skip_if_on_windows

from datalad_remake.commands.tests.create_datasets import create_ds_hierarchy

from ... import specification_dir
from ... import (
specification_dir,
template_dir,
)
from ...commands.make_cmd import build_json
from ..remake_remote import RemakeRemote

Expand Down Expand Up @@ -64,11 +70,30 @@ def send(self, value):


@skip_if_on_windows
def test_compute_remote_main(tmp_path, monkeypatch):
dataset = create_ds_hierarchy(tmp_path, 'ds1', 0)[0][2]
@pytest.mark.parametrize('trusted', [True, False])
def test_compute_remote_main(tmp_path, datalad_cfg, monkeypatch, trusted):
if trusted:
gpg_homedir = tmp_path / 'tmp_gpg_dir'
tmp_home = tmp_path / 'tmp_home'

# make sure that the users keystore is not overwritten
monkeypatch.setenv('HOME', str(tmp_home))

# Generate a keypair
signing_key = create_keypair(gpg_dir=gpg_homedir)

# Activate the new keys
monkeypatch.setenv('GNUPGHOME', str(gpg_homedir))

datalad_cfg.add('datalad.trusted-keys', signing_key, where='global')

else:
signing_key = None

dataset = create_ds_hierarchy(tmp_path, 'ds1', 0, signing_key)[0][2]
monkeypatch.chdir(dataset.path)

template_path = dataset.pathobj / '.datalad' / 'make' / 'methods'
template_path = dataset.pathobj / template_dir
template_path.mkdir(parents=True)
(template_path / 'echo').write_text(template)
dataset.save()
Expand All @@ -84,10 +109,13 @@ def test_compute_remote_main(tmp_path, monkeypatch):
)
).split(b': ')[1]

(dataset.pathobj / specification_dir).mkdir(parents=True, exist_ok=True)
(dataset.pathobj / specification_dir / '000001111122222').write_text(
specification_path = dataset.pathobj / specification_dir
spec_name = '000001111122222'
specification_path.mkdir(parents=True, exist_ok=True)
(specification_path / spec_name).write_text(
build_json('echo', [], ['a.txt'], {'content': 'some_string'})
)
dataset.save()

input_ = MockedInput()

Expand All @@ -96,12 +124,16 @@ def test_compute_remote_main(tmp_path, monkeypatch):
# below.
input_.send('PREPARE\n')
input_.send(f'TRANSFER RETRIEVE {key.decode()} {tmp_path / "remade.txt"!s}\n')
# The next line is the answer to `GETCONFIG allow_untrusted_execution`
input_.send(f'VALUE {"false" if trusted else "true"}\n')
url = (
'datalad-make:///?'
f'root_version={dataset.repo.get_hexsha()}'
'&specification=000001111122222'
'&this=a.txt'
)
# The next line is the answer to
# `GETURLS MD5E-s2--60b725f10c9c85c70d97880dfe8191b3.txt datalad-remake:`
input_.send(f'VALUE {url}\n')
input_.send('VALUE\n')
input_.send('VALUE .git\n')
Expand All @@ -117,3 +149,62 @@ def test_compute_remote_main(tmp_path, monkeypatch):
# At this point the datalad-remake remote should have executed the
# computation and written the result.
assert (tmp_path / 'remade.txt').read_text().strip() == 'content: some_string'


def create_keypair(gpg_dir: Path, name: bytes = b'Test User'):
gpg_dir.mkdir(parents=True, exist_ok=True)
gpg_dir.chmod(0o700)
private_keys_dir = gpg_dir / 'private-keys-v1.d'
private_keys_dir.mkdir(exist_ok=True)
private_keys_dir.chmod(0o700)
template = b"""
Key-Type: RSA
Key-Length: 4096
Subkey-Type: RSA
Subkey-Length: 4096
Name-Real: $NAME
Name-Email: [email protected]
Expire-Date: 0
%no-protection
#%transient-key
%commit
"""
script = template.replace(b'$NAME', name)

# unset $HOME to prevent accidental changes to the user's keyring
environment = {'HOME': '/dev/null'}

# use gpg to generate a keypair
subprocess.run(
[ # noqa: S607
'gpg',
'--batch',
'--homedir',
str(gpg_dir),
'--gen-key',
'--keyid-format',
'long',
],
input=script,
capture_output=True,
check=True,
env=environment,
)

result = subprocess.run(
[ # noqa: S607
'gpg',
'--homedir',
str(gpg_dir),
'--list-secret-keys',
'--keyid-format',
'long',
],
capture_output=True,
check=True,
env=environment,
)
return re.findall(
r'(?m)sec.*rsa4096/([A-Z0-9]+).*\n.*\n.*' + name.decode(),
result.stdout.decode(),
)[0]
1 change: 1 addition & 0 deletions datalad_remake/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Commands provided by the datalad-remake extension."""
Loading

0 comments on commit a2214de

Please sign in to comment.