Skip to content

Commit

Permalink
Core: Implement find api (#48)
Browse files Browse the repository at this point in the history
* Core: Implement find api

* Core: Implement find api
  • Loading branch information
yanghua authored Sep 2, 2024
1 parent c8a03c1 commit 037788b
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 8 deletions.
112 changes: 104 additions & 8 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,93 @@ def walk(
path, maxdepth=maxdepth, topdown=topdown, on_error=on_error, **kwargs
)

def find(
self,
path: str,
maxdepth: Optional[int] = None,
withdirs: bool = False,
detail: bool = False,
prefix: str = "",
**kwargs: Any,
) -> Union[List[str], dict]:
"""Find all files or dirs with conditions.
Like posix ``find`` command without conditions
Parameters
----------
path : str
The path to search.
maxdepth: int, optional
If not None, the maximum number of levels to descend
withdirs: bool
Whether to include directory paths in the output. This is True
when used by glob, but users usually only want files.
prefix: str
Only return files that match ``^{path}/{prefix}`` (if there is an
exact match ``filename == {path}/{prefix}``, it also will be included)
detail: bool
If True, return a dict with file information, else just the path
**kwargs: Any
Additional arguments.
"""
if path in ["", "*"] + ["{}://".format(p) for p in self.protocol]:
raise ValueError("Cannot access all of TOS via path {}.".format(path))

path = self._strip_protocol(path)
bucket, key, _ = self._split_path(path)
if not bucket:
raise ValueError("Cannot access all of TOS without specify a bucket.")

if maxdepth and prefix:
raise ValueError(
"Can not specify 'prefix' option alongside 'maxdepth' options."
)
if maxdepth:
return super().find(
bucket + "/" + key,
maxdepth=maxdepth,
withdirs=withdirs,
detail=detail,
**kwargs,
)

out = self._find_file_dir(key, path, prefix, withdirs, kwargs)

if detail:
return {o["name"]: o for o in out}
else:
return [o["name"] for o in out]

def _find_file_dir(
self, key: str, path: str, prefix: str, withdirs: bool, kwargs: Any
) -> List[dict]:
out = self._lsdir(
path, delimiter="", include_self=True, prefix=prefix, **kwargs
)
if not out and key:
try:
out = [self.info(path)]
except FileNotFoundError:
out = []
dirs = []
for o in out:
par = self._parent(o["name"])
if len(path) <= len(par):
d = {
"Key": self._split_path(par)[1],
"Size": 0,
"name": par,
"type": "directory",
}
dirs.append(d)
if withdirs:
out = sorted(out + dirs, key=lambda x: x["name"])
else:
out = [o for o in out if o["type"] == "file"]
return out

def _open_remote_file(
self,
bucket: str,
Expand Down Expand Up @@ -1059,6 +1146,7 @@ def _lsdir(
max_items: int = 1000,
delimiter: str = "/",
prefix: str = "",
include_self: bool = False,
versions: bool = False,
) -> List[dict]:
"""List objects in a directory.
Expand All @@ -1073,6 +1161,8 @@ def _lsdir(
The delimiter to use for grouping objects (default is '/').
prefix : str, optional
The prefix to use for filtering objects (default is '').
include_self : bool, optional
Whether to include the directory itself in the listing (default is False).
versions : bool, optional
Whether to list object versions (default is False).
Expand Down Expand Up @@ -1107,12 +1197,15 @@ def _lsdir(
max_items=max_items,
delimiter=delimiter,
prefix=prefix,
include_self=include_self,
versions=versions,
):
if isinstance(obj, CommonPrefixInfo):
dirs.append(self._fill_common_prefix_info(obj, bucket))
dirs.append(self._fill_dir_info(bucket, obj))
elif obj.key.endswith("/"):
dirs.append(self._fill_dir_info(bucket, None, obj.key))
else:
files.append(self._fill_object_info(obj, bucket, versions))
files.append(self._fill_file_info(obj, bucket, versions))
files += dirs

return files
Expand All @@ -1123,6 +1216,7 @@ def _listdir(
max_items: int = 1000,
delimiter: str = "/",
prefix: str = "",
include_self: bool = False,
versions: bool = False,
) -> List[Union[CommonPrefixInfo, ListedObject, ListedObjectVersion]]:
"""List objects in a bucket.
Expand All @@ -1137,6 +1231,8 @@ def _listdir(
The delimiter to use for grouping objects (default is '/').
prefix : str, optional
The prefix to use for filtering objects (default is '').
include_self : bool, optional
Whether to include the bucket itself in the listing (default is False).
versions : bool, optional
Whether to list object versions (default is False).
Expand Down Expand Up @@ -1194,7 +1290,7 @@ def _listdir(
resp = self.tos_client.list_objects_type2(
bucket,
prefix,
start_after=prefix,
start_after=prefix if not include_self else None,
delimiter=delimiter,
max_keys=max_items,
continuation_token=continuation_token,
Expand Down Expand Up @@ -1255,8 +1351,10 @@ def _split_path(self, path: str) -> Tuple[str, str, Optional[str]]:
)

@staticmethod
def _fill_common_prefix_info(common_prefix: CommonPrefixInfo, bucket: str) -> dict:
name = "/".join([bucket, common_prefix.prefix[:-1]])
def _fill_dir_info(
bucket: str, common_prefix: Optional[CommonPrefixInfo], key: str = ""
) -> dict:
name = "/".join([bucket, common_prefix.prefix[:-1] if common_prefix else key])
return {
"name": name,
"Key": name,
Expand All @@ -1265,9 +1363,7 @@ def _fill_common_prefix_info(common_prefix: CommonPrefixInfo, bucket: str) -> di
}

@staticmethod
def _fill_object_info(
obj: ListedObject, bucket: str, versions: bool = False
) -> dict:
def _fill_file_info(obj: ListedObject, bucket: str, versions: bool = False) -> dict:
result = {
"Key": f"{bucket}/{obj.key}",
"size": obj.size,
Expand Down
71 changes: 71 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,77 @@ def test_walk(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> No
tosfs.rmdir(f"{bucket}/{temporary_workspace}")


def test_find(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> None:
with pytest.raises(ValueError, match="Cannot access all of TOS via path ."):
tosfs.find("")

with pytest.raises(ValueError, match="Cannot access all of TOS via path *."):
tosfs.find("*")

with pytest.raises(ValueError, match="Cannot access all of TOS via path tos://."):
tosfs.find("tos://")

with pytest.raises(
ValueError, match="Cannot access all of TOS without specify a bucket."
):
tosfs.find("/")

assert len(tosfs.find(bucket, maxdepth=1)) > 0

with pytest.raises(
ValueError,
match="Can not specify 'prefix' option " "alongside 'maxdepth' options.",
):
tosfs.find(bucket, maxdepth=1, withdirs=True, prefix=temporary_workspace)

result = tosfs.find(bucket, prefix=temporary_workspace)
assert len(result) == 0

result = tosfs.find(bucket, prefix=random_str())
assert len(result) == 0

result = tosfs.find(
bucket, prefix=temporary_workspace + "/", withdirs=True, detail=True
)
assert len(result) == len([bucket, f"{bucket}/{temporary_workspace}/"])
assert (
result[f"{bucket}/{temporary_workspace}/"]["name"]
== f"{bucket}/{temporary_workspace}/"
)
assert result[f"{bucket}/{temporary_workspace}/"]["type"] == "directory"

result = tosfs.find(
f"{bucket}/{temporary_workspace}", withdirs=True, maxdepth=1, detail=True
)
assert len(result) == 1

dir_name = random_str()
sub_dir_name = random_str()
file_name = random_str()
sub_file_name = random_str()

tosfs.makedirs(f"{bucket}/{temporary_workspace}/{dir_name}/{sub_dir_name}")
result = tosfs.find(
f"{bucket}/{temporary_workspace}", prefix=dir_name, withdirs=False
)
assert len(result) == 0

tosfs.touch(f"{bucket}/{temporary_workspace}/{dir_name}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{dir_name}/{file_name}")
result = tosfs.find(
f"{bucket}/{temporary_workspace}/{dir_name}", prefix=file_name, withdirs=False
)
assert len(result) == 1

tosfs.rm_file(
f"{bucket}/{temporary_workspace}/{dir_name}/{sub_dir_name}/{sub_file_name}"
)
tosfs.rmdir(f"{bucket}/{temporary_workspace}/{dir_name}/{sub_dir_name}")
tosfs.rm_file(f"{bucket}/{temporary_workspace}/{dir_name}/{file_name}")
tosfs.rmdir(f"{bucket}/{temporary_workspace}/{dir_name}")
tosfs.rmdir(f"{bucket}/{temporary_workspace}")


###########################################################
# File operation tests #
###########################################################
Expand Down

0 comments on commit 037788b

Please sign in to comment.