Skip to content

Commit

Permalink
performance optimize
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Sep 19, 2024
1 parent cf79efc commit 93b8304
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 5 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pytest-cov = "==5.0.0"
coverage = "==7.5.0"
ruff = "==0.6.0"
types-requests = "==2.32.0.20240907"
FastWARC = "==0.14.9"

[tool.pydocstyle]
convention = "numpy"
Expand Down
44 changes: 39 additions & 5 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ def __init__(
socket_timeout=socket_timeout,
high_latency_log_threshold=high_latency_log_threshold,
credentials_provider=credentials_provider,
enable_crc=False,
enable_verify_ssl=False,
disable_encoding_meta=True,
)
self.version_aware = version_aware
self.default_block_size = (
Expand Down Expand Up @@ -1970,6 +1973,7 @@ def __init__(
self.fs = fs
self.bucket = bucket
self.key = key
self.version_id = path_version_id
self.path = path
self.mode = mode
self.autocommit = autocommit
Expand Down Expand Up @@ -2130,25 +2134,55 @@ def _call_upload_part(
)

def _fetch_range(self, start: int, end: int) -> bytes:
bucket, key, version_id = self.fs._split_path(self.path)
if start == end:
logger.debug(
"skip fetch for negative range - bucket=%s,key=%s,start=%d,end=%d",
bucket,
key,
self.bucket,
self.key,
start,
end,
)
return b""
logger.debug("Fetch: %s/%s, %s-%s", bucket, key, start, end)
logger.debug("Fetch: %s/%s, %s-%s", self.bucket, self.key, start, end)

def fetch() -> bytes:
return self.fs.tos_client.get_object(
bucket, key, version_id, range_start=start, range_end=end
self.bucket, self.key, self.version_id, range_start=start, range_end=end
).read()

return retryable_func_executor(fetch, max_retry_num=self.fs.max_retry_num)

def read(self, length: int = -1) -> bytes:
"""Return data from cache, or fetch pieces as necessary.
Parameters
----------
length: int (-1)
Number of bytes to read; if <0, all remaining bytes.
"""
length = -1 if length is None else int(length)
if self.mode != "rb":
raise ValueError("File not in read mode")
if length < 0:
length = self.size - self.loc
if self.closed:
raise ValueError("I/O operation on closed file.")
if length == 0:
# don't even bother calling fetch
return b""
out = self.cache._fetch(self.loc, self.loc + length)

logger.debug(
"%s read: %i - %i %s",
self,
self.loc,
self.loc + length,
self.cache._log_stats(),
)
self.loc += len(out)
return out

def commit(self) -> None:
"""Complete multipart upload or PUT."""
logger.debug("Commit %s", self)
Expand Down

0 comments on commit 93b8304

Please sign in to comment.