Added proper support for various datatypes (rather than the stubs i h…

…ad before...)
royerlab · Sep 6, 2024 · 5cf9c4b · 5cf9c4b
1 parent db40864
commit 5cf9c4b
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 42 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "czpeedy"
-version = "0.2.2"
+version = "0.2.3"
 description = "A command-line tool used to determine the tensorstore settings which yield the fastest write speed on a given machine."
 authors = [
     { name = "Seth Hinz", email = "[email protected]" }

diff --git a/src/czpeedy/trial_parameters.py b/src/czpeedy/trial_parameters.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+from typing import Union
 from pathlib import Path
 import numpy as np
 from numpy.typing import ArrayLike
@@ -48,13 +49,67 @@ def __init__(
     # Returns a zarr v2 dtype string based on the numpy data type of this `TrialParameters`.
     # Refernce: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding
     # TODO: This only supports uint16 right now!
-    def dtype_json_v2(self) -> str:
-        endianness_char = ("|", ">", "<")[self.endianness]
-        return endianness_char + "u2"
+    def dtype_json_v2(self) -> Union[list, str]:
+        # Helper function to parse a regular (not structured) dtype into a zarr v2 dtype string.
+        def field_dtype(dtype: np.dtype) -> str:
+            kind = dtype.kind
+            itemsize = dtype.itemsize
+
+            # If the user didn't specify a byteorder (i.e. if it's still on native byteorder,
+            # then we allow the user to specify the endianness. Otherwise, we use the byteorder
+            # provided in this specific field - if this is a structured dtype, it might very well
+            # be important)
+            if dtype.byteorder == "=":
+                endianness_char = ("|", ">", "<")[self.endianness]
+            else:
+                endianness_char = dtype.byteorder
+
+            if kind in 'biufc':
+                return f"{endianness_char}{kind}{itemsize}"
+            elif kind == 'M':
+                return f"{endianness_char}M8[{dtype.str[4:-1]}]"
+            elif kind == 'm':
+                return f"{endianness_char}m8[{dtype.str[4:-1]}]"
+            elif kind in 'SU':
+                return f"{endianness_char}{kind}{itemsize}"
+            elif kind == 'V':
+                return f"{endianness_char}V{itemsize}"
+            else:
+                raise ValueError(f"Unsupported dtype: {self.dtype}")
+
+        # Helper function to parse a structured dtype into a zarr v2 compatible dtype object.
+        def structured_dtype(self) -> list:
+            result = []
+            # TODO: Figure out how to pass byte offsets to tensorstore (if at all)?
+            for name, (dtype, offset) in self.dtype.fields.items():
+                field = [name, self._field_dtype(dtype)]
+                if dtype.shape:
+                    field.append(list(dtype.shape))
+                result.append(field)
+            return result
+
+        def dtype_str(dtype: np.dtype) -> Union[str, list]:
+            if dtype.fields is not None:
+                # For the time being, we only support regular dtypes. There isn't really a way to
+                # pass a structured dtype to this via the CLI, but we should support it in the future.
+                raise ValueError("Structured dtypes are not supported yet.")
+                return structured_dtype(dtype)
+            else:
+                return field_dtype(dtype)
+
+        return dtype_str(self.dtype)
 
     # TODO: This only supports uint16 right now!
     def dtype_json_v3(self) -> str:
-        return "uint16"
+        if self.dtype.kind == 'V':
+            return f'r{self.dtype.itemsize * 8}'
+        name = self.dtype.name
+        if name in ('bool', 'int8', 'int16', 'int32', 'int64', 
+                    'uint8', 'uint16', 'uint32', 'uint64',
+                    'float16', 'float32', 'float64', 
+                    'complex64', 'complex128'):
+            return name
+        raise ValueError(f"Unsupported dtype: {self.dtype}")
 
     # Produces a jsonable dict that communicates all the trial parameters to tensorstore.
     # Usage: `ts.open(trial_parameters.to_spec()).result()`
@@ -65,6 +120,7 @@ def to_spec(self) -> dict:
                 "kvstore": {
                     "driver": "file",
                     "path": str(self.output_path.absolute()),
+                    # "file_io_sync": False
                 },
                 "metadata": {
                     "compressor": {
@@ -81,43 +137,6 @@ def to_spec(self) -> dict:
                 "delete_existing": True,
             }
         elif self.zarr_version == 3:
-            #     'driver': 'zarr3',
-            #     'kvstore': {
-            #         'driver': 'file',
-            #         'path': 'D:\\Seth\\tmp/',
-            #     },
-            #     'metadata': {
-            #         "shape": shape,
-            #         "chunk_grid": {
-            #             "name": "regular",
-            #             "configuration": {"chunk_shape": [400, 400, 1024]}
-            #             # "configuration": {"chunk_shape": [482, 480, 512]} # doesn't write at all?????
-            #         },
-            #         "chunk_key_encoding": {"name": "default"},
-            #         # "codecs": [{"name": "blosc", "configuration": {"cname": "lz4", "shuffle": "bitshuffle"}}],
-            #         "data_type": "uint16",
-            #     },
-            #
-            #     # 'metadata': {
-            #     #     'compressor': {
-            #     #         'id': 'blosc',
-            #     #         'cname': 'lz4',
-            #     #         'shuffle': 2
-            #     #     },
-            #     #     'dtype': '>u2',
-            #     #     'shape': shape,
-            #     #     # 'blockSize': [100, 100, 100],
-            #     # },
-            #     'create': True,
-            #     'delete_existing': True,
-            #     # 'dtype': 'uint16'
-            #     },
-            #     # Due to a (i think) bug in tensorstore, you have to put the codec separate from the other metadata
-            #     # or it fails to merge the expected codecs ([]) and the given codecs (not empty array)
-            #     codec=ts.CodecSpec({
-            #       "codecs": [{"name": "blosc", "configuration": {"cname": "lz4", "shuffle": "bitshuffle"}}],
-            #       'driver': 'zarr3',
-            #     })).result()
             return {
                 "driver": "zarr3",
                 "kvstore": {