Version bump

royerlab · Jul 18, 2024 · 0a0bee0 · 0a0bee0
1 parent f65249b
commit 0a0bee0
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 8 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "czpeedy"
-version = "0.1.0"
+version = "0.2.0"
 description = "A command-line tool used to determine the tensorstore settings which yield the fastest write speed on a given machine."
 authors = [
     { name = "Seth Hinz", email = "[email protected]" }

diff --git a/src/czpeedy/czpeedy.py b/src/czpeedy/czpeedy.py
@@ -210,6 +210,11 @@ def main() -> None:
         type=list_type(zarr_version),
         help="The version of zarr to use. (Supported: 2, 3.)",
     )
+    parser.add_argument(
+        "--fullxy",
+        action='store_true',
+        help="If specified, the chunk size will be the full x and y dimensions of the data. Useful if you plan to stream xy slices over the network.",
+    )
     args = parser.parse_args()
 
     if args.dest:
@@ -219,7 +224,7 @@ def main() -> None:
         data = load_input(args.source, args.shape, args.dtype)
         if args.chunk_size is None:
             args.chunk_size = ParameterSpace.suggest_chunk_sizes(
-                data.shape, data.itemsize
+                data.shape, data.itemsize, args.fullxy
             )
 
         parameter_space = ParameterSpace(

diff --git a/src/czpeedy/parameter_space.py b/src/czpeedy/parameter_space.py
@@ -176,10 +176,22 @@ def to_trial_parameters(
     def suggest_chunk_sizes(
         shape: ArrayLike,
         itemsize: int,
-        max_bytes=2**31 - 17,
-        size_ratio=1.5,
-        volume_ratio=1.5,
+        full_xy: bool = False,
+        max_bytes: int = 2**31 - 17,
+        size_ratio: float | None = None,
+        volume_ratio: float | None = None,
     ) -> list[list[int]]:
+
+        # This is just heuristic - for full xy frames, we have fewer variables to play with (usually just the z
+        # axis chunk length). So, to give the user several chunk options, we make the geometric sequence a bit
+        # tighter.
+        if full_xy:
+            size_ratio = size_ratio or 1.25
+            volume_ratio = volume_ratio or 1.25
+        else:
+            size_ratio = size_ratio or 1.5
+            volume_ratio = volume_ratio or 1.5
+
         # Concept: The smallest size we reasonably want along an axis is min(axis_size, 100) - 100 is small,
         # so we use 100 as minimum unless axis_size is even smaller.
         # Figure out an integer n such that 100 ~= axis_size / n. Then compute the sequence
@@ -230,7 +242,11 @@ def break_axis(axis: int) -> list[int]:
 
                 return chunk_lengths
 
-        chunks = list(product(*[break_axis(axis) for axis in shape]))
+        if full_xy:
+            chunks = list(product(*[break_axis(axis) for axis in shape[:-2]], [shape[-2]], [shape[-1]]))
+        else:
+            chunks = list(product(*[break_axis(axis) for axis in shape]))
+
         chunks_with_volumes = map(lambda chunk: (chunk, np.prod(chunk)), chunks)
         chunks_with_volumes = sorted(chunks_with_volumes, key=lambda item: item[1])
 
@@ -262,6 +278,5 @@ def break_axis(axis: int) -> list[int]:
         #     return np.prod(grid_size) - np.prod(shape)
 
         # for chunk in suggested_chunks:
-        #     print(100 % waste(shape, chunk) / np.prod(shape))
-
+        #     print(100 % waste(shape, chunk) / np.prod(shape)))
         return suggested_chunks