Skip to content

Commit

Permalink
Version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
shinzlet committed Jul 18, 2024
1 parent f65249b commit 0a0bee0
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "czpeedy"
version = "0.1.0"
version = "0.2.0"
description = "A command-line tool used to determine the tensorstore settings which yield the fastest write speed on a given machine."
authors = [
{ name = "Seth Hinz", email = "[email protected]" }
Expand Down
7 changes: 6 additions & 1 deletion src/czpeedy/czpeedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ def main() -> None:
type=list_type(zarr_version),
help="The version of zarr to use. (Supported: 2, 3.)",
)
parser.add_argument(
"--fullxy",
action='store_true',
help="If specified, the chunk size will be the full x and y dimensions of the data. Useful if you plan to stream xy slices over the network.",
)
args = parser.parse_args()

if args.dest:
Expand All @@ -219,7 +224,7 @@ def main() -> None:
data = load_input(args.source, args.shape, args.dtype)
if args.chunk_size is None:
args.chunk_size = ParameterSpace.suggest_chunk_sizes(
data.shape, data.itemsize
data.shape, data.itemsize, args.fullxy
)

parameter_space = ParameterSpace(
Expand Down
27 changes: 21 additions & 6 deletions src/czpeedy/parameter_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,22 @@ def to_trial_parameters(
def suggest_chunk_sizes(
shape: ArrayLike,
itemsize: int,
max_bytes=2**31 - 17,
size_ratio=1.5,
volume_ratio=1.5,
full_xy: bool = False,
max_bytes: int = 2**31 - 17,
size_ratio: float | None = None,
volume_ratio: float | None = None,
) -> list[list[int]]:

# This is just heuristic - for full xy frames, we have fewer variables to play with (usually just the z
# axis chunk length). So, to give the user several chunk options, we make the geometric sequence a bit
# tighter.
if full_xy:
size_ratio = size_ratio or 1.25
volume_ratio = volume_ratio or 1.25
else:
size_ratio = size_ratio or 1.5
volume_ratio = volume_ratio or 1.5

# Concept: The smallest size we reasonably want along an axis is min(axis_size, 100) - 100 is small,
# so we use 100 as minimum unless axis_size is even smaller.
# Figure out an integer n such that 100 ~= axis_size / n. Then compute the sequence
Expand Down Expand Up @@ -230,7 +242,11 @@ def break_axis(axis: int) -> list[int]:

return chunk_lengths

chunks = list(product(*[break_axis(axis) for axis in shape]))
if full_xy:
chunks = list(product(*[break_axis(axis) for axis in shape[:-2]], [shape[-2]], [shape[-1]]))
else:
chunks = list(product(*[break_axis(axis) for axis in shape]))

chunks_with_volumes = map(lambda chunk: (chunk, np.prod(chunk)), chunks)
chunks_with_volumes = sorted(chunks_with_volumes, key=lambda item: item[1])

Expand Down Expand Up @@ -262,6 +278,5 @@ def break_axis(axis: int) -> list[int]:
# return np.prod(grid_size) - np.prod(shape)

# for chunk in suggested_chunks:
# print(100 % waste(shape, chunk) / np.prod(shape))

# print(100 % waste(shape, chunk) / np.prod(shape)))
return suggested_chunks

0 comments on commit 0a0bee0

Please sign in to comment.