Skip to content

Commit

Permalink
[TOSFS #18] Override fssepc#exists default implementation to optimize…
Browse files Browse the repository at this point in the history
… performance
  • Loading branch information
yanghua committed Aug 21, 2024
1 parent a81701d commit 0277b77
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
49 changes: 49 additions & 0 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def setup_logging() -> None:
"The tosfs's log level is set to be %s", logging.getLevelName(logger.level)
)

# constants
SERVER_RESPONSE_CODE_NOT_FOUND = 404


class TosFileSystem(AbstractFileSystem):
"""Tos file system.
Expand Down Expand Up @@ -413,6 +416,52 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def exists(self, path: str, **kwargs: Union[str, bool, float, None]) -> bool:
"""Is there a file at the given path."""
bucket, key, _ = self._split_path(path)
if not key:
return self._exists_bucket(bucket)

object_exists = self._exists_object(bucket, key)
if not object_exists:
return self._exists_object(bucket, key + "/")
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check the bucket exist."""
try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
logger.error("Tosfs failed with unknown error: %s", e)
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(self, bucket: str, key: str) -> bool:
"""Check the object exist."""
try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _lsbuckets(self, refresh: bool = False) -> List[dict]:
"""List all buckets in the account.
Expand Down
20 changes: 20 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,23 @@ def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> N
assert f"{bucket}/{temporary_workspace}" not in tosfs.ls(
bucket, detail=False, refresh=True
)


def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")


def test_exists_object(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")

0 comments on commit 0277b77

Please sign in to comment.