Skip to content

Commit

Permalink
Merge branch 'main' into improve-git-oneline
Browse files Browse the repository at this point in the history
  • Loading branch information
mih authored Oct 28, 2024
2 parents 53accc0 + 6393bec commit 7573cf7
Show file tree
Hide file tree
Showing 28 changed files with 2,587 additions and 25 deletions.
80 changes: 80 additions & 0 deletions datalad_core/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Components for implementing DataLad commands
At the very core, a DataLad command is function that documents and validates
its parameters in a particular way, and yields return values that follow
particular conventions.
In its simplest form, a DataLad command can be created like this:
>>> @datalad_command()
... def my_command(some_arg):
... # do processing ...
... return # typically one or more results are returned/yielded
The :func:`datalad_command` decorator wraps a function, and automatically
establishes the conventions expected for a DataLad command (see its
documentation for details).
Beyond this simplest use, the decorator can be used to customize parameter
validation, and result handling. Both aspects are handled by customizable
classes.
.. currentmodule:: datalad_core.commands
.. autosummary::
:toctree: generated
datalad_command
ResultHandler
StandardResultHandler
PassthroughHandler
get_default_result_handler
set_default_result_handler
Dataset
EnsureDataset
ParamProcessor
JointParamProcessor
ParamConstraintContext
ParamErrors
ParamSetConstraint
"""

__all__ = [
'Dataset',
'EnsureDataset',
'StandardResultHandler',
'ResultHandler',
'ParamProcessor',
'JointParamProcessor',
'ParamConstraintContext',
'ParamErrors',
'ParamSetConstraint',
'PassthroughHandler',
'datalad_command',
'get_default_result_handler',
'set_default_result_handler',
]


from .dataset import (
Dataset,
EnsureDataset,
)
from .decorator import datalad_command
from .default_result_handler import (
StandardResultHandler,
get_default_result_handler,
set_default_result_handler,
)
from .exceptions import (
ParamConstraintContext,
ParamErrors,
)
from .param_constraint import ParamSetConstraint
from .preproc import (
JointParamProcessor,
ParamProcessor,
)
from .result_handler import (
PassthroughHandler,
ResultHandler,
)
248 changes: 248 additions & 0 deletions datalad_core/commands/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from datalad_core.constraints import Constraint
from datalad_core.repo import (
Repo,
Worktree,
)


class Dataset:
"""Dataset parameter type for DataLad command implementations
Many DataLad commands operate on datasets, which are typically Git
repositories. This class provides a type to represent this parameter.
The main purpose of this class is to relay the semantics of the original
parameter specification all the way to the implementation of a particular
command. A dataset may be identified in a variety of ways, including
auto-discovery based on a working directory. Individual commands may want
to behave differently depending on how a dataset was identified, or if at
all.
A second use case are commands that can work on bare repositories and
worktrees alike. This class is a single type from which the presence of
both entities can be discovered without code duplication.
A third use case are to-be-created datasets for which no repository or
worktree exist on the file system yet, and consequently the
:class:`~datalad_core.repo.Repo` and :class:`~datalad_core.repo.Worktree`
classes cannot be used directly.
.. note::
Despite the name, this class is very different from the ``Dataset``
class in legacy DataLad. This is not a convenience interface
for DataLad commands that operate on datasets. Instead, it is
merely a type to be used for implementing individual DataLad commands,
with uniform semantics for this key parameter.
"""

def __init__(
self,
spec: str | Path | Repo | Worktree | None,
):
"""
A ``spec`` is required, even if the given value is ``None``.
"""
self._spec = spec
self._repo: Repo | None = None
self._worktree: Worktree | None = None
self._path: Path | None = None

@property
def pristine_spec(self) -> str | Path | Repo | Worktree | None:
"""Returns the unaltered specification of the dataset
This is the exact value that has been given to the constructor.
"""
return self._spec

@property
def path(self) -> Path:
"""Returns the local path associated with any (non-)existing dataset
If an associated Git repository exists on the file system, the return
path is the worktree root path for non-bare repositories and their
worktree, or the repository root path for bare repositories.
If no repository exists, the path is derived from the given ``spec``
regardless of a corresponding directory existing on the file system.
If the spec is ``None``, the returned path will be the process working
directory.
"""
if self._path is not None:
return self._path

if self._spec is None:
self._path = Path.cwd()
return self._path

# use the (resolved) path of a worktree or repo,
# if they exist.
# this gives an absolute path
self._path = (
self.worktree.path
if self.worktree
else self.repo.path
if self.repo
else None
)

if self._path is not None:
return self._path

# there is nothing on the filesystem, we can only work with the
# pristine_spec as-is
ps = self.pristine_spec
if isinstance(ps, Path):
self._path = ps
else:
if TYPE_CHECKING:
assert isinstance(ps, (Path, str))
# could be a str-path or some magic label.
# for now we only support a path specification
self._path = Path(ps)
return self._path

@property
def repo(self) -> Repo | None:
"""Returns a repository associated with the dataset (if one exists)
This property is mostly useful for datasets without a worktree.
For datasets with a worktree it is generally more appropriate
to access the ``repo`` property of the :attr:`worktree` property.
Returns ``None`` if there is no associated repository. This may
happen, if a repository is yet to be created.
"""
# short cut
ps = self.pristine_spec

if self._repo is not None:
return self._repo

if self.worktree is not None:
self._repo = self.worktree.repo
elif isinstance(ps, Repo):
self._repo = ps
elif isinstance(ps, Path):
self._repo = get_gitmanaged_from_pathlike(Repo, ps)
elif isinstance(ps, str):
# could be a str-path or some magic label.
# for now we only support a path specification
self._repo = get_gitmanaged_from_pathlike(Repo, ps)
return self._repo

@property
def worktree(self) -> Worktree | None:
"""Returns a worktree associated with the dataset (if one exists)
Returns ``None`` if there is no associated worktree. This may
happen, if the dataset is associated with a bare Git repository,
or if the worktree (and repository) is yet to be created.
"""
if self._worktree is None:
ps = self.pristine_spec
if isinstance(ps, Worktree):
# we can take this right away
self._worktree = ps
elif isinstance(ps, Path):
self._worktree = get_gitmanaged_from_pathlike(Worktree, ps)
elif isinstance(ps, str):
# could be a str-path or some magic label.
# for now we only support a path specification
self._worktree = get_gitmanaged_from_pathlike(Worktree, ps)
return self._worktree

def __repr__(self) -> str:
return f'{self.__class__.__name__}({self.pristine_spec!r})'


class EnsureDataset(Constraint):
"""Ensure an absent/present `Dataset` from any path or Dataset instance
Regardless of the nature of the input (`Dataset` instance or local path)
a resulting instance (if it can be created) is optionally tested for
absence or presence on the local file system.
Due to the particular nature of the `Dataset` class (the same instance
is used for a unique path), this constraint returns a `DatasetParameter`
rather than a `Dataset` directly. Consuming commands can discover
the original parameter value via its `original` property, and access a
`Dataset` instance via its `ds` property.
In addition to any value representing an explicit path, this constraint
also recognizes the special value `None`. This instructs the implementation
to find a dataset that contains the process working directory (PWD).
Such a dataset need not have its root at PWD, but could be located in
any parent directory too. If no such dataset can be found, PWD is used
directly. Tests for ``installed`` are performed in the same way as with
an explicit dataset location argument. If `None` is given and
``installed=True``, but no dataset is found, an exception is raised
(this is the behavior of the ``required_dataset()`` function in
the DataLad core package). With ``installed=False`` no exception is
raised and a dataset instances matching PWD is returned.
"""

def __init__(self, installed: bool | str | None = None):
"""
Parameters
----------
installed: bool, optional
If given, a dataset will be verified to be installed or not.
Otherwise the installation-state will not be inspected.
"""
self._installed = installed
super().__init__()

@property
def input_synopsis(self) -> str:
return '(path to) {}dataset'.format(
'an existing '
if self._installed
else 'a non-existing '
if self._installed is False
else 'a '
)

def __call__(self, value) -> Dataset:
ds = Dataset(value)
try:
# resolve
ds.path # noqa: B018
except (ValueError, TypeError) as e:
self.raise_for(
value,
'cannot create Dataset from {type}: {__caused_by__}',
type=type(value),
__caused_by__=e,
)
if self._installed is False and (ds.worktree or ds.repo):
self.raise_for(ds, 'already exists locally')
if self._installed and not (ds.worktree or ds.repo):
self.raise_for(ds, 'not installed')
if self._installed != 'with-id':
return ds

to_query = ds.worktree or ds.repo
if TYPE_CHECKING:
assert to_query is not None
if 'datalad.dataset.id' not in to_query.config.sources['datalad-branch']:
self.raise_for(ds, 'does not have a datalad-id')
return ds


def get_gitmanaged_from_pathlike(cls, path):
if not isinstance(path, Path):
path = Path(path)
# the constructor will tell us, if this an instance of the
# requested class
try:
return cls(path)
except ValueError:
return None
Loading

0 comments on commit 7573cf7

Please sign in to comment.