Skip to content

Commit

Permalink
Core: Implement exists api
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Aug 21, 2024
1 parent 0277b77 commit 45cf338
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 15 deletions.
178 changes: 163 additions & 15 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,51 +417,199 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def exists(self, path: str, **kwargs: Union[str, bool, float, None]) -> bool:
"""Is there a file at the given path."""
bucket, key, _ = self._split_path(path)
"""Check if a path exists in the TOS file system.
Parameters
----------
path : str
The path to check for existence.
**kwargs : dict, optional
Additional arguments.
Returns
-------
bool
True if the path exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the path.
tos.exceptions.TosServerError
If there is a server error while checking the path.
TosfsError
If there is an unknown error while checking the path.
Examples
--------
>>> fs = TosFileSystem()
>>> fs.exists("tos://bucket/to/file")
True
>>> fs.exists("tos://mybucket/nonexistentfile")
False
"""
if path in ["", "/"]:
# the root always exists, even if anon
return True

path = self._strip_protocol(path)
bucket, key, version_id = self._split_path(path)
# if the path is a bucket
if not key:
return self._exists_bucket(bucket)

object_exists = self._exists_object(bucket, key)
if not object_exists:
return self._exists_object(bucket, key + "/")
return object_exists
else:
object_exists = self._exists_object(bucket, key, path, version_id)
if not object_exists:
return self._exists_object(
bucket, key.rstrip("/") + "/", path, version_id
)
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check the bucket exist."""
"""Check if a bucket exists in the TOS file system.
It will first check the dircache,
then check the bucket using the TOS client.
Parameters
----------
bucket : str
The name of the bucket to check for existence.
Returns
-------
bool
True if the bucket exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the bucket.
tos.exceptions.TosServerError
If there is a server error while checking the bucket.
TosfsError
If there is an unknown error while checking the bucket.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_bucket("mybucket")
True
>>> fs._exists_bucket("nonexistentbucket")
False
"""
if self.dircache.get(bucket, False):
return True
else:
try:
if self._ls_from_cache(bucket):
return True
except FileNotFoundError:
# might still be a bucket we can access but don't own
pass

try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
logger.error("Tosfs failed with unknown error: %s", e)
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(self, bucket: str, key: str) -> bool:
"""Check the object exist."""
def _exists_object(
self, bucket: str, key: str, path: str, version_id: Optional[str] = None
) -> bool:
"""Check if an object exists in the TOS file system.
It will first check the dircache,
then check the object using the TOS client.
Parameters
----------
bucket : str
The name of the bucket.
key : str
The key of the object.
path : str
The full path of the object.
version_id : str, optional
The version ID of the object (default is None).
Returns
-------
bool
True if the object exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the object.
tos.exceptions.TosServerError
If there is a server error while checking the object.
TosfsError
If there is an unknown error while checking the object.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile")
True
>>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile")
False
"""
exists_in_cache = self._exists_in_cache(path, bucket, key, version_id)
if exists_in_cache is not None:
return exists_in_cache

try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
logger.error("Tosfs failed with client error: %s", e)
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
logger.error("Tosfs failed with server error: %s", e)
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_in_cache(
self,
path: str,
bucket: str,
key: str,
version_id: Optional[str] = None,
) -> Optional[bool]:
fullpath = "/".join((bucket, key))

try:
entries = self._ls_from_cache(fullpath)
except FileNotFoundError:
return False

if entries is None:
return None

if not self.version_aware or version_id is None:
return True

for entry in entries:
if entry["name"] == fullpath and entry.get("VersionId") == version_id:
return True

# dircache doesn't support multiple versions, so we really can't tell if
# the one we want exists.
return None

def _lsbuckets(self, refresh: bool = False) -> List[dict]:
"""List all buckets in the account.
Expand Down
4 changes: 4 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> N
def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists("")
assert tosfs.exists("/")
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")

Expand All @@ -151,3 +153,5 @@ def test_exists_object(
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")

0 comments on commit 45cf338

Please sign in to comment.