From 1ba8d8ab5c70b64a6f39e1acd363679ac8de73b4 Mon Sep 17 00:00:00 2001 From: yanghua Date: Mon, 19 Aug 2024 17:50:08 +0800 Subject: [PATCH] [TOSFS #10] Implement rmdir API --- tosfs/core.py | 86 ++++++++++++++++++++++++++++++++++++++- tosfs/tests/conftest.py | 10 +++-- tosfs/tests/test_tosfs.py | 30 +++++++++++++- 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/tosfs/core.py b/tosfs/core.py index 8a49e76..7fb6604 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -203,6 +203,87 @@ def info( return self._try_dir_info(bucket, key, path, fullpath) + def rmdir(self, path: str) -> None: + """Remove a directory if it is empty. + + Parameters + ---------- + path : str + The path of the directory to remove. The path should be in the format + `tos://bucket_name/directory_path`. + + Raises + ------ + FileNotFoundError + If the directory does not exist. + NotADirectoryError + If the path is not a directory. + TosfsError + If the directory is not empty, + or if there is an error during the removal process. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs.rmdir("tos://mybucket/mydir/") + + """ + path = self._strip_protocol(path).rstrip("/") + "/" + bucket, key, _ = self._split_path(path) + if not key: + return self._rm_bucket(path) + + if not self.exists(path): + raise FileNotFoundError(f"Directory {path} not found.") + + if not self.isdir(path): + raise NotADirectoryError(f"{path} is not a directory.") + + if len(self.ls(path, refresh=True, detail=False)) > 0: + raise TosfsError(f"Directory {path} is not empty.") + + try: + self.tos_client.delete_object(bucket, key.rstrip("/") + "/") + self.invalidate_cache(path.rstrip("/")) + except tos.exceptions.TosClientError as e: + logger.error("Tosfs failed with client error: %s", e) + raise e + except tos.exceptions.TosServerError as e: + logger.error("Tosfs failed with server error: %s", e) + raise e + except Exception as e: + logger.error("Tosfs failed with unknown error: %s", e) + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + + def _rm_bucket(self, path: str) -> None: + """Remove a bucket.""" + bucket, _, _ = self._split_path(path) + + try: + # if the bucket is not empty, raise an error + if len(self.ls(bucket, refresh=True, detail=False)) > 0: + logger.warning( + "Try to delete bucket %s, " + "but delete failed due to bucket is not empty.", + bucket, + ) + raise TosfsError(f"Bucket {bucket} is not empty.") + + logger.warning("Deleting an empty bucket %s", bucket) + self.tos_client.delete_bucket(bucket) + except tos.exceptions.TosClientError as e: + logger.error("Tosfs failed with client error: %s", e) + raise e + except tos.exceptions.TosServerError as e: + logger.error("Tosfs failed with server error: %s", e) + raise e + except Exception as e: + logger.error("Tosfs failed with unknown error: %s", e) + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + + self.invalidate_cache(path.rstrip("/")) + self.invalidate_cache("") + def _info_from_cache( self, path: str, fullpath: str, version_id: Optional[str] ) -> dict: @@ -643,9 +724,10 @@ def _split_path(self, path: str) -> Tuple[str, str, Optional[str]]: @staticmethod def _fill_common_prefix_info(common_prefix: CommonPrefixInfo, bucket: str) -> dict: + name = "/".join([bucket, common_prefix.prefix[:-1]]) return { - "name": common_prefix.prefix[:-1], - "Key": "/".join([bucket, common_prefix.prefix]), + "name": name, + "Key": name, "Size": 0, "type": "directory", } diff --git a/tosfs/tests/conftest.py b/tosfs/tests/conftest.py index 488ed31..9833631 100644 --- a/tosfs/tests/conftest.py +++ b/tosfs/tests/conftest.py @@ -18,7 +18,7 @@ import pytest from tos import EnvCredentialsProvider -from tosfs.core import TosFileSystem +from tosfs.core import TosFileSystem, logger from tosfs.utils import random_path @@ -54,6 +54,8 @@ def temporary_workspace( # will replace with tosfs.mkdir in the future tosfs.tos_client.put_object(bucket=bucket, key=f"{workspace}/") yield workspace - # currently, remove dir via purely tos python client, - # will replace with tosfs.rmdir in the future - tosfs.tos_client.delete_object(bucket=bucket, key=f"{workspace}/") + try: + tosfs.rmdir(f"{bucket}/{workspace}/") + except Exception: + logger.error("Ignore exception.") + assert not tosfs.exists(f"{bucket}/{workspace}/") diff --git a/tosfs/tests/test_tosfs.py b/tosfs/tests/test_tosfs.py index 6c35db5..725862a 100644 --- a/tosfs/tests/test_tosfs.py +++ b/tosfs/tests/test_tosfs.py @@ -18,6 +18,7 @@ from tos.exceptions import TosServerError from tosfs.core import TosFileSystem +from tosfs.exceptions import TosfsError from tosfs.utils import random_path @@ -35,11 +36,11 @@ def test_ls_bucket(tosfs: TosFileSystem, bucket: str) -> None: def test_ls_dir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> None: - assert temporary_workspace in tosfs.ls(bucket, detail=False) + assert f"{bucket}/{temporary_workspace}" in tosfs.ls(bucket, detail=False) detailed_list = tosfs.ls(bucket, detail=True) assert detailed_list for item in detailed_list: - if item["name"] == temporary_workspace: + if item["name"] == f"{bucket}/{temporary_workspace}": assert item["type"] == "directory" break else: @@ -105,3 +106,28 @@ def test_info(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> No with pytest.raises(FileNotFoundError): tosfs.info(f"{bucket}/nonexistent") + + +def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> None: + with pytest.raises(TosfsError): + tosfs.rmdir(bucket) + + file_name = random_path() + tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}") + assert f"{bucket}/{temporary_workspace}/{file_name}" in tosfs.ls( + f"{bucket}/{temporary_workspace}", detail=False + ) + + with pytest.raises(TosfsError): + tosfs.rmdir(f"{bucket}/{temporary_workspace}") + + with pytest.raises(NotADirectoryError): + tosfs.rmdir(f"{bucket}/{temporary_workspace}/{file_name}") + + tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}") + assert tosfs.ls(f"{bucket}/{temporary_workspace}", detail=False) == [] + + tosfs.rmdir(f"{bucket}/{temporary_workspace}") + assert f"{bucket}/{temporary_workspace}" not in tosfs.ls( + bucket, detail=False, refresh=True + )