diff --git a/tosfs/core.py b/tosfs/core.py index 458f9ee..f9b1a49 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -417,51 +417,199 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict raise TosfsError(f"Tosfs failed with unknown error: {e}") from e def exists(self, path: str, **kwargs: Union[str, bool, float, None]) -> bool: - """Is there a file at the given path.""" - bucket, key, _ = self._split_path(path) + """Check if a path exists in the TOS file system. + + Parameters + ---------- + path : str + The path to check for existence. + **kwargs : dict, optional + Additional arguments. + + Returns + ------- + bool + True if the path exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the path. + tos.exceptions.TosServerError + If there is a server error while checking the path. + TosfsError + If there is an unknown error while checking the path. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs.exists("tos://bucket/to/file") + True + >>> fs.exists("tos://mybucket/nonexistentfile") + False + + """ + if path in ["", "/"]: + # the root always exists, even if anon + return True + + path = self._strip_protocol(path) + bucket, key, version_id = self._split_path(path) + # if the path is a bucket if not key: return self._exists_bucket(bucket) - - object_exists = self._exists_object(bucket, key) - if not object_exists: - return self._exists_object(bucket, key + "/") - return object_exists + else: + object_exists = self._exists_object(bucket, key, path, version_id) + if not object_exists: + return self._exists_object( + bucket, key.rstrip("/") + "/", path, version_id + ) + return object_exists def _exists_bucket(self, bucket: str) -> bool: - """Check the bucket exist.""" + """Check if a bucket exists in the TOS file system. + + It will first check the dircache, + then check the bucket using the TOS client. + + Parameters + ---------- + bucket : str + The name of the bucket to check for existence. + + Returns + ------- + bool + True if the bucket exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the bucket. + tos.exceptions.TosServerError + If there is a server error while checking the bucket. + TosfsError + If there is an unknown error while checking the bucket. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs._exists_bucket("mybucket") + True + >>> fs._exists_bucket("nonexistentbucket") + False + + """ + if self.dircache.get(bucket, False): + return True + else: + try: + if self._ls_from_cache(bucket): + return True + except FileNotFoundError: + # might still be a bucket we can access but don't own + pass + try: self.tos_client.head_bucket(bucket) return True except tos.exceptions.TosClientError as e: - logger.error("Tosfs failed with client error: %s", e) raise e except tos.exceptions.TosServerError as e: if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: return False else: - logger.error("Tosfs failed with server error: %s", e) raise e except Exception as e: - logger.error("Tosfs failed with unknown error: %s", e) raise TosfsError(f"Tosfs failed with unknown error: {e}") from e - def _exists_object(self, bucket: str, key: str) -> bool: - """Check the object exist.""" + def _exists_object( + self, bucket: str, key: str, path: str, version_id: Optional[str] = None + ) -> bool: + """Check if an object exists in the TOS file system. + + It will first check the dircache, + then check the object using the TOS client. + + Parameters + ---------- + bucket : str + The name of the bucket. + key : str + The key of the object. + path : str + The full path of the object. + version_id : str, optional + The version ID of the object (default is None). + + Returns + ------- + bool + True if the object exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the object. + tos.exceptions.TosServerError + If there is a server error while checking the object. + TosfsError + If there is an unknown error while checking the object. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile") + True + >>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile") + False + + """ + exists_in_cache = self._exists_in_cache(path, bucket, key, version_id) + if exists_in_cache is not None: + return exists_in_cache + try: self.tos_client.head_object(bucket, key) return True except tos.exceptions.TosClientError as e: - logger.error("Tosfs failed with client error: %s", e) raise e except tos.exceptions.TosServerError as e: if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: return False else: - logger.error("Tosfs failed with server error: %s", e) raise e except Exception as e: raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + def _exists_in_cache( + self, + path: str, + bucket: str, + key: str, + version_id: Optional[str] = None, + ) -> Optional[bool]: + fullpath = "/".join((bucket, key)) + + try: + entries = self._ls_from_cache(fullpath) + except FileNotFoundError: + return False + + if entries is None: + return None + + if not self.version_aware or version_id is None: + return True + + for entry in entries: + if entry["name"] == fullpath and entry.get("VersionId") == version_id: + return True + + # dircache doesn't support multiple versions, so we really can't tell if + # the one we want exists. + return None + def _lsbuckets(self, refresh: bool = False) -> List[dict]: """List all buckets in the account. diff --git a/tosfs/tests/test_tosfs.py b/tosfs/tests/test_tosfs.py index 25748b7..c008324 100644 --- a/tosfs/tests/test_tosfs.py +++ b/tosfs/tests/test_tosfs.py @@ -136,6 +136,8 @@ def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> N def test_exists_bucket( tosfs: TosFileSystem, bucket: str, temporary_workspace: str ) -> None: + assert tosfs.exists("") + assert tosfs.exists("/") assert tosfs.exists(bucket) assert not tosfs.exists("nonexistent") @@ -151,3 +153,5 @@ def test_exists_object( assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent") assert not tosfs.exists(f"{bucket}/nonexistent") assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent") + tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")