Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Experimental] multiflavor ORA-remote #106

Draft
wants to merge 24 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
b5af7ec
Lay a base structure for the ora special remote (just empty required …
adswa Sep 1, 2023
0106481
Create a space for legacy ria code to move to
adswa Sep 1, 2023
99956d3
Add a logger
adswa Sep 1, 2023
ea72958
Port over RIARemoteError and RemoteCommandFailedError
adswa Sep 1, 2023
d03f06e
Lay some foundations for the fallback on legacy ORA
adswa Sep 1, 2023
988555c
Port over a first small set of class attributes
adswa Sep 1, 2023
6f75d96
Port over retrieval of datalad id with modern methods
adswa Sep 1, 2023
87b908e
typo
adswa Sep 5, 2023
2b00096
Merge branch 'main' into ora
adswa Sep 12, 2023
4dbabc1
Don't have a legacy module, import directly from datalad for now
adswa Sep 12, 2023
d229fe9
Merge branch 'main' into ora
adswa Sep 20, 2023
a722e2e
add a very simple demo git annex special remote
christian-monch Sep 26, 2023
1e53178
[temp] store some work on uncurl-based ora-remote
christian-monch Sep 28, 2023
9b268e8
[temp] store ssh-demo work
christian-monch Feb 29, 2024
27fda18
[temp] store some work
christian-monch Mar 1, 2024
7f5329e
[temp] store some work
christian-monch Mar 9, 2024
39dc809
[temp] add progress for upload and other work
christian-monch Mar 11, 2024
862af96
[temp] add tests, remove commented code
christian-monch Mar 12, 2024
b93260b
[temp] use "real" annex paths
christian-monch Mar 15, 2024
31fe151
[temp] add layout-version handling
christian-monch Mar 18, 2024
f24b9bb
[temp] make _remote_tempfile a method, improve _ensure_writable
christian-monch Mar 22, 2024
129cc94
[temp] add a test for ssh_riahandler_ensure_writable
christian-monch Mar 23, 2024
3edae86
remove usage of `_remote_temp_file()`
christian-monch Mar 26, 2024
b7dc13b
Merge branch 'main' into demo-ora
christian-monch Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
89 changes: 89 additions & 0 deletions datalad_ria/annexremotes/file_riahandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import annotations

import logging
import os
import shutil
from pathlib import (
PurePath,
)
from threading import (
Lock,
)

from datalad_next.annexremotes import SpecialRemote, RemoteError

from .riahandler import RIAHandler


lgr = logging.getLogger('datalad.ria.riahandler.file')


class FileRIAHandlerPosix(RIAHandler):
def __init__(self,
special_remote: SpecialRemote,
base_path: PurePath,
dataset_id: str,
) -> None:
super().__init__(special_remote, base_path, dataset_id)
self.debug = special_remote.annex.debug
self.base_path = base_path
self.command_lock = Lock()

def initremote(self):
dataset_path = self.base_path / self.dataset_id
lgr.debug(f'initremote: making dir: {dataset_path}')
os.makedirs(self.base_path / self.dataset_id, exist_ok=True)

def prepare(self):
dataset_path = self.base_path / self.dataset_id
lgr.debug(f'prepare: making dir: {dataset_path}')
os.makedirs(self.base_path / self.dataset_id, exist_ok=True)

def transfer_store(self, key: str, local_file: str):
with self.command_lock:
# If we already have the key, we will not transfer anything
if not self._locked_checkpresent(key):
try:
transfer_path = self.get_ria_path(key, '.transfer')
destination_path = self.get_ria_path(key)
shutil.copy(local_file, transfer_path)
shutil.move(transfer_path, destination_path)
except OSError as e:
shutil.rmtree(transfer_path, ignore_errors=True)
raise RemoteError(
f'Failed to store key {key} in special remote'
) from e

def transfer_retrieve(self, key: str, local_file: str):
with self.command_lock:
if not self._locked_checkpresent(key):
raise RemoteError(f'Key {key} not present in special remote')
shutil.copy(self.get_ria_path(key), local_file)

def remove(self, key: str):
with self.command_lock:
if self._locked_checkpresent(key):
try:
shutil.move(
self.get_ria_path(key),
self.get_ria_path(key, '.deleted')
)
except OSError as e:
raise RemoteError(
f'Failed to remove key {key} from special remote'
) from e

def checkpresent(self, key: str) -> bool:
with self.command_lock:
return self._locked_checkpresent(key)

def _locked_checkpresent(self, key: str) -> bool:
try:
os.stat(self.get_ria_path(key))
except OSError as e:
lgr.debug(
f'_locked_checkpresent: os.stat({self.get_ria_path(key)}) '
f'failed, reason: {e!r}'
)
return False
return True
70 changes: 70 additions & 0 deletions datalad_ria/annexremotes/riahandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from __future__ import annotations

import logging
from abc import (
ABCMeta,
abstractmethod,
)
from pathlib import (
PurePath,
PurePosixPath,
)

from datalad.customremotes import SpecialRemote


lgr = logging.getLogger('datalad.ria.riahandler')


class RIAHandler(metaclass=ABCMeta):
def __init__(self,
special_remote: SpecialRemote,
base_path: PurePath,
dataset_id: str,
) -> None:
self.special_remote = special_remote
self.base_path = PurePosixPath(base_path)
self.dataset_id = dataset_id
self.annex = special_remote.annex

def get_ria_repo_path(self) -> PurePosixPath:
return (
self.base_path
/ self.dataset_id[:3] / self.dataset_id[3:]
)

def get_ria_key_path(self,
key: str,
extension: str = ''
) -> PurePosixPath:
return (
self.get_ria_repo_path()
/ 'annex' / 'objects'
/ self.special_remote.annex.dirhash(key)
/ key / (key + extension)
)

def initremote(self):
pass

def prepare(self):
pass

def progress_handler(self, size: int, total_size: int):
self.annex.progress(size)

@abstractmethod
def transfer_store(self, key: str, local_file: str):
raise NotImplementedError

@abstractmethod
def transfer_retrieve(self, key: str, local_file: str):
raise NotImplementedError

@abstractmethod
def remove(self, key: str):
raise NotImplementedError

@abstractmethod
def checkpresent(self, key: str) -> bool:
raise NotImplementedError
104 changes: 104 additions & 0 deletions datalad_ria/annexremotes/ssh_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from __future__ import annotations

import logging
from pathlib import PurePosixPath
from threading import Lock
from typing import Any
from urllib.parse import urlparse

from annexremote import ProtocolError
from datalad.customremotes.main import main as super_main
from datalad.customremotes import (
RemoteError,
SpecialRemote as _SpecialRemote,
)

from .ssh_riahandler import SshRIAHandlerPosix
from .file_riahandler import FileRIAHandlerPosix


lgr = logging.getLogger('datalad.ria.ssh')


g_supported_schemes = {
'ria2+ssh': SshRIAHandlerPosix,
'ria2+file': FileRIAHandlerPosix,
'ria2+http': None,
'ria2+https': None,
}


class DemoSshRemote(_SpecialRemote):
def __init__(self, annex: Any):
super().__init__(annex)
self.configs = {
'url': 'url of the RIA store',
'id': 'ID of the dataset',
}
self.url = None
self.dataset_id = None
self.handler = None
self.initialization_lock = Lock()

def __del__(self):
pass

def message(self, msg, type='debug'):
output_msg = f'DemoSshRemote: ' + msg
try:
self.annex.info('INFO: ' + output_msg)
except (ProtocolError, AttributeError):
pass

def _get_handler(self):
with self.initialization_lock:
if self.handler:
return

self.url = urlparse(self.annex.getconfig('url'))
self.dataset_id = self.annex.getconfig('id')
handler_class = g_supported_schemes.get(self.url.scheme, None)
if handler_class is None:
self.message(f'unsupported scheme: {self.url.scheme!r}')
raise RemoteError(f'unsupported scheme: {self.url.scheme!r}')
if handler_class is SshRIAHandlerPosix:
self.handler = handler_class(
self,
PurePosixPath(self.url.path),
self.dataset_id,
[b'ssh', b'-i', b'/home/cristian/.ssh/gitlab-metadata-key', self.url.netloc.encode()],
)
elif handler_class is FileRIAHandlerPosix:
self.handler = handler_class(
self,
PurePosixPath(self.url.path),
self.dataset_id,
)

def initremote(self):
self._get_handler()
self.handler.initremote()

def prepare(self):
self._get_handler()
self.handler.prepare()

def transfer_store(self, key: str, local_file: str):
self.handler.transfer_store(key, local_file)

def transfer_retrieve(self, key: str, local_file: str):
self.handler.transfer_retrieve(key, local_file)

def remove(self, key):
self.handler.remove(key)

def checkpresent(self, key: str) -> bool:
return self.handler.checkpresent(key)


def main():
super_main(
cls=DemoSshRemote,
remote_name='demo',
description='a demo special remote',
)
Loading
Loading