Skip to content

Commit

Permalink
[TOSFS #18] Override fssepc#exists default implementation to optimize…
Browse files Browse the repository at this point in the history
… performance
  • Loading branch information
yanghua committed Aug 18, 2024
1 parent c62f50a commit 3bbd006
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
50 changes: 50 additions & 0 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def setup_logging() -> None:
"The tosfs's log level is set to be %s", logging.getLevelName(logger.level)
)

# constants
SERVER_RESPONSE_CODE_NOT_FOUND = 404


class TosFileSystem(AbstractFileSystem):
"""Tos file system.
Expand Down Expand Up @@ -124,6 +127,53 @@ def ls(

return files if detail else sorted([o["name"] for o in files])

def exists(self, path: str, **kwargs: Union[str, bool, float, None]) -> bool:
"""Is there a file at the given path."""
bucket, key, _ = self._split_path(path)
if not key:
return self._exists_bucket(bucket)

object_exists = self._exists_object(bucket, key)
if not object_exists:
return self._exists_object(bucket, key + "/")
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check the bucket exist."""
try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
logger.error("Tosfs failed with unknown error: %s", e)
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(self, bucket: str, key: str) -> bool:
"""Check the object exist."""
try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
logger.error("Tosfs failed with unknown error: %s", e)
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _lsbuckets(self, refresh: bool = False) -> List[dict]:
"""List all buckets in the account.
Expand Down
20 changes: 20 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,23 @@ def test_inner_rm(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -
assert tosfs.ls(f"{bucket}/{temporary_workspace}", detail=False) == []

tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}")


def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")


def test_exists_object(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")

0 comments on commit 3bbd006

Please sign in to comment.