Skip to content

Commit

Permalink
feat: add automated datalad-remake remote creation
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-monch committed Dec 6, 2024
1 parent f320830 commit 41b149f
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 36 deletions.
2 changes: 2 additions & 0 deletions datalad_remake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

__all__ = [
'__version__',
'auto_remote_name',
'command_suite',
'priority_config_key',
'specification_dir',
Expand Down Expand Up @@ -52,3 +53,4 @@
specification_dir = '.datalad/make/specifications'
trusted_keys_config_key = 'datalad.make.trusted-keys'
priority_config_key = 'datalad.make.priority'
auto_remote_name = 'datalad-remake-auto'
58 changes: 58 additions & 0 deletions datalad_remake/commands/make_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from datalad_remake.utils.compute import compute
from datalad_remake.utils.getkeys import get_trusted_keys
from datalad_remake.utils.glob import resolve_patterns
from datalad_remake.utils.remake_remote import add_remake_remote
from datalad_remake.utils.verify import verify_file

if TYPE_CHECKING:
Expand Down Expand Up @@ -262,6 +263,8 @@ def __call__(
else:
resolved_output = set(output_pattern)

initialize_remotes(ds, resolved_output)

for out in resolved_output:
url = add_url(ds, out, url_base, url_only=prospective_execution)
yield get_status_dict(
Expand Down Expand Up @@ -469,6 +472,10 @@ def collect(
) -> set[str]:
output = resolve_patterns(root_dir=worktree, patterns=output_pattern)

# Ensure that all subdatasets that are touched by paths in `output` are
# installed.
install_containing_subdatasets(dataset, output)

# Unlock output files in the dataset-directory and copy the result
unlock_files(dataset, output)
for o in output:
Expand All @@ -482,6 +489,57 @@ def collect(
return output


def install_containing_subdatasets(dataset: Dataset, files: Iterable[str]) -> None:
"""Install all subdatasets that contain a file from `files`."""

# Set the set of subdatasets to the set of subdatasets that are installed.
# Compare each prefix of a file path with the path of a subdataset from the
# root of `dataset`. If it matches, the subdataset is installed and the set
# of subdatasets is updated accordingly.

# Get the relative paths of all known subdatasets
subdataset_infos = {
Path(result['path']).relative_to(Path(result['parentds'])): result['state']
== 'present'
for result in dataset.subdatasets(recursive=True)
}

# Get the prefixes of all required paths sorted by length
required_paths = sorted(
{
prefix
for file in files
for prefix in Path(file).parents
if prefix != Path('.')
},
key=lambda p: p.parts.__len__(),
)

for path in required_paths:
if path in subdataset_infos and not subdataset_infos[path]:
dataset.install(path=str(path), result_renderer='disabled')
# Update subdataset_info to get newly installed subdatasets.
subdataset_infos = {
Path(result['path']).relative_to(Path(result['parentds'])): result[
'state'
]
== 'present'
for result in dataset.subdatasets(recursive=True)
}


def initialize_remotes(dataset: Dataset, files: Iterable[str]) -> None:
"""Add a remake remote to all datasets that are touched by the files"""

# Get the subdatasets that contain generated files
touched_dataset_dirs = {
get_file_dataset(dataset.pathobj / file)[0] for file in files
}

for dataset_dir in touched_dataset_dirs:
add_remake_remote(str(dataset_dir), allow_untrusted_execution=False)


def unlock_files(dataset: Dataset, files: Iterable[str]) -> None:
"""Use datalad to resolve subdatasets and unlock files in the dataset."""
# TODO: for some reason `dataset unlock` does not operate in the
Expand Down
5 changes: 3 additions & 2 deletions datalad_remake/commands/provision_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def resolve_patterns(
This method will resolve relative path-patterns in the dataset. It will
install all subdatasets that are matched by the patterns. Pattern are
described as outline in `glob.glob`. The method support recursive globbing
described as outlined in `glob.glob`. The method support recursive globbing
of zero or more directories with the pattern: `**`.
Parameters
Expand Down Expand Up @@ -342,7 +342,8 @@ def glob_pattern(
# Match all elements at the current position with the first part of the
# pattern.
for rec_match in glob(
'*' if pattern[0] == '**' else pattern[0], root_dir=root.pathobj / position
'*' if pattern[0] == '**' else pattern[0],
root_dir=root.pathobj / position,
):
match = position / rec_match

Expand Down
38 changes: 4 additions & 34 deletions datalad_remake/commands/tests/create_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,9 @@
from pathlib import Path

from datalad_next.datasets import Dataset
from datalad_next.runners import call_git_success

from datalad_remake import template_dir


def update_config_for_remake(dataset: Dataset):
# set annex security related variables to allow remake-URLs
dataset.configuration(
action='set',
scope='local',
recursive=True,
spec=[('remote.remake.annex-security-allow-unverified-downloads', 'ACKTHPPT')],
result_renderer='disabled',
)


def add_remake_remote(dataset: Dataset, signing_key: str | None = None):
aue = 'false' if signing_key else 'true'
call_git_success(
[
'-C',
dataset.path,
'annex',
'initremote',
'remake',
'type=external',
'externaltype=datalad-remake',
'encryption=none',
f'allow-untrusted-execution={aue}',
],
capture_output=True,
)
update_config_for_remake(dataset)
from datalad_remake.utils.remake_remote import add_remake_remote


def create_ds_hierarchy(
Expand Down Expand Up @@ -77,13 +47,13 @@ def create_ds_hierarchy(
root_dataset.get(recursive=True, result_renderer='disabled')

# Add datalad-remake remotes to the root dataset and all subdatasets
add_remake_remote(root_dataset, signing_key)
add_remake_remote(root_dataset.path, allow_untrusted_execution=signing_key is None)
subdataset_path = Path()
for index in range(subdataset_levels):
subdataset_path /= f'{name}_subds{index}'
add_remake_remote(
Dataset(root_dataset.pathobj / subdataset_path),
signing_key,
str(root_dataset.pathobj / subdataset_path),
allow_untrusted_execution=signing_key is None,
)

return datasets
Expand Down
69 changes: 69 additions & 0 deletions datalad_remake/utils/remake_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

import logging

from datalad_next.datasets import Dataset

from datalad_remake import auto_remote_name

logger = logging.getLogger('datalad.remake.utils.remake_remote')


def add_remake_remote(
dataset_root: str,
*,
allow_untrusted_execution: bool = False,
):
aue = 'true' if allow_untrusted_execution else 'false'
options = [
'type=external',
'externaltype=datalad-remake',
'encryption=none',
'autoenable=true',
f'allow-untrusted-execution={aue}',
]

# Create a `Dataset`-instance to use the `AnnexRepo`-methods for special
# remote handling.
dataset = Dataset(dataset_root)

# If no `datalad-remake` remote exists, create a new one. Do not touch
# existing `datalad-remake` remotes.
if not get_remake_auto_remote(dataset):
dataset.repo.init_remote(auto_remote_name, options)
else:
logger.info(
'Found already existing `datalad-remake` remote in %s. '
'Leaving it unmodified, please check its configuration.',
dataset_root,
)

# Update the configuration to allow unverified downloads from the remake
# remote. This is necessary for prospective computation.
update_config_for_remake(dataset_root, auto_remote_name)


def get_remake_auto_remote(dataset: Dataset) -> list:
return [
remote_info
for remote_info in dataset.repo.get_special_remotes().values()
if remote_info['type'] == 'external'
and remote_info['externaltype'] == 'datalad-remake'
]


def update_config_for_remake(dataset_root: str, remote_name: str) -> None:
# set annex security related variables to allow remake-URLs in prospective
# computation
dataset = Dataset(dataset_root)
dataset.configuration(
action='set',
scope='local',
spec=[
(
f'remote.{remote_name}.annex-security-allow-unverified-downloads',
'ACKTHPPT',
),
],
result_renderer='disabled',
)

0 comments on commit 41b149f

Please sign in to comment.