From 3bbd0062234cfb839a0989a6d016292d57987646 Mon Sep 17 00:00:00 2001 From: yanghua Date: Sun, 18 Aug 2024 09:01:14 +0800 Subject: [PATCH] [TOSFS #18] Override fssepc#exists default implementation to optimize performance --- tosfs/core.py | 50 +++++++++++++++++++++++++++++++++++++++ tosfs/tests/test_tosfs.py | 20 ++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/tosfs/core.py b/tosfs/core.py index d2c1049..e6f054b 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -46,6 +46,9 @@ def setup_logging() -> None: "The tosfs's log level is set to be %s", logging.getLevelName(logger.level) ) +# constants +SERVER_RESPONSE_CODE_NOT_FOUND = 404 + class TosFileSystem(AbstractFileSystem): """Tos file system. @@ -124,6 +127,53 @@ def ls( return files if detail else sorted([o["name"] for o in files]) + def exists(self, path: str, **kwargs: Union[str, bool, float, None]) -> bool: + """Is there a file at the given path.""" + bucket, key, _ = self._split_path(path) + if not key: + return self._exists_bucket(bucket) + + object_exists = self._exists_object(bucket, key) + if not object_exists: + return self._exists_object(bucket, key + "/") + return object_exists + + def _exists_bucket(self, bucket: str) -> bool: + """Check the bucket exist.""" + try: + self.tos_client.head_bucket(bucket) + return True + except tos.exceptions.TosClientError as e: + logger.error("Tosfs failed with client error: %s", e) + raise e + except tos.exceptions.TosServerError as e: + if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: + return False + else: + logger.error("Tosfs failed with server error: %s", e) + raise e + except Exception as e: + logger.error("Tosfs failed with unknown error: %s", e) + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + + def _exists_object(self, bucket: str, key: str) -> bool: + """Check the object exist.""" + try: + self.tos_client.head_object(bucket, key) + return True + except tos.exceptions.TosClientError as e: + logger.error("Tosfs failed with client error: %s", e) + raise e + except tos.exceptions.TosServerError as e: + if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: + return False + else: + logger.error("Tosfs failed with server error: %s", e) + raise e + except Exception as e: + logger.error("Tosfs failed with unknown error: %s", e) + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + def _lsbuckets(self, refresh: bool = False) -> List[dict]: """List all buckets in the account. diff --git a/tosfs/tests/test_tosfs.py b/tosfs/tests/test_tosfs.py index b242fb6..a63625b 100644 --- a/tosfs/tests/test_tosfs.py +++ b/tosfs/tests/test_tosfs.py @@ -83,3 +83,23 @@ def test_inner_rm(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) - assert tosfs.ls(f"{bucket}/{temporary_workspace}", detail=False) == [] tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}") + + +def test_exists_bucket( + tosfs: TosFileSystem, bucket: str, temporary_workspace: str +) -> None: + assert tosfs.exists(bucket) + assert not tosfs.exists("nonexistent") + + +def test_exists_object( + tosfs: TosFileSystem, bucket: str, temporary_workspace: str +) -> None: + file_name = random_path() + tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}") + assert tosfs.exists(f"{bucket}/{temporary_workspace}") + assert tosfs.exists(f"{bucket}/{temporary_workspace}/") + assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent") + assert not tosfs.exists(f"{bucket}/nonexistent") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")