Skip to content

Commit

Permalink
Added proper support for various datatypes (rather than the stubs i h…
Browse files Browse the repository at this point in the history
…ad before...)
  • Loading branch information
shinzlet committed Sep 6, 2024
1 parent db40864 commit 5cf9c4b
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 42 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "czpeedy"
version = "0.2.2"
version = "0.2.3"
description = "A command-line tool used to determine the tensorstore settings which yield the fastest write speed on a given machine."
authors = [
{ name = "Seth Hinz", email = "[email protected]" }
Expand Down
101 changes: 60 additions & 41 deletions src/czpeedy/trial_parameters.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import annotations
from typing import Union
from pathlib import Path
import numpy as np
from numpy.typing import ArrayLike
Expand Down Expand Up @@ -48,13 +49,67 @@ def __init__(
# Returns a zarr v2 dtype string based on the numpy data type of this `TrialParameters`.
# Refernce: https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html#data-type-encoding
# TODO: This only supports uint16 right now!
def dtype_json_v2(self) -> str:
endianness_char = ("|", ">", "<")[self.endianness]
return endianness_char + "u2"
def dtype_json_v2(self) -> Union[list, str]:
# Helper function to parse a regular (not structured) dtype into a zarr v2 dtype string.
def field_dtype(dtype: np.dtype) -> str:
kind = dtype.kind
itemsize = dtype.itemsize

# If the user didn't specify a byteorder (i.e. if it's still on native byteorder,
# then we allow the user to specify the endianness. Otherwise, we use the byteorder
# provided in this specific field - if this is a structured dtype, it might very well
# be important)
if dtype.byteorder == "=":
endianness_char = ("|", ">", "<")[self.endianness]
else:
endianness_char = dtype.byteorder

if kind in 'biufc':
return f"{endianness_char}{kind}{itemsize}"
elif kind == 'M':
return f"{endianness_char}M8[{dtype.str[4:-1]}]"
elif kind == 'm':
return f"{endianness_char}m8[{dtype.str[4:-1]}]"
elif kind in 'SU':
return f"{endianness_char}{kind}{itemsize}"
elif kind == 'V':
return f"{endianness_char}V{itemsize}"
else:
raise ValueError(f"Unsupported dtype: {self.dtype}")

# Helper function to parse a structured dtype into a zarr v2 compatible dtype object.
def structured_dtype(self) -> list:
result = []
# TODO: Figure out how to pass byte offsets to tensorstore (if at all)?
for name, (dtype, offset) in self.dtype.fields.items():
field = [name, self._field_dtype(dtype)]
if dtype.shape:
field.append(list(dtype.shape))
result.append(field)
return result

def dtype_str(dtype: np.dtype) -> Union[str, list]:
if dtype.fields is not None:
# For the time being, we only support regular dtypes. There isn't really a way to
# pass a structured dtype to this via the CLI, but we should support it in the future.
raise ValueError("Structured dtypes are not supported yet.")
return structured_dtype(dtype)
else:
return field_dtype(dtype)

return dtype_str(self.dtype)

# TODO: This only supports uint16 right now!
def dtype_json_v3(self) -> str:
return "uint16"
if self.dtype.kind == 'V':
return f'r{self.dtype.itemsize * 8}'
name = self.dtype.name
if name in ('bool', 'int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float16', 'float32', 'float64',
'complex64', 'complex128'):
return name
raise ValueError(f"Unsupported dtype: {self.dtype}")

# Produces a jsonable dict that communicates all the trial parameters to tensorstore.
# Usage: `ts.open(trial_parameters.to_spec()).result()`
Expand All @@ -65,6 +120,7 @@ def to_spec(self) -> dict:
"kvstore": {
"driver": "file",
"path": str(self.output_path.absolute()),
# "file_io_sync": False
},
"metadata": {
"compressor": {
Expand All @@ -81,43 +137,6 @@ def to_spec(self) -> dict:
"delete_existing": True,
}
elif self.zarr_version == 3:
# 'driver': 'zarr3',
# 'kvstore': {
# 'driver': 'file',
# 'path': 'D:\\Seth\\tmp/',
# },
# 'metadata': {
# "shape": shape,
# "chunk_grid": {
# "name": "regular",
# "configuration": {"chunk_shape": [400, 400, 1024]}
# # "configuration": {"chunk_shape": [482, 480, 512]} # doesn't write at all?????
# },
# "chunk_key_encoding": {"name": "default"},
# # "codecs": [{"name": "blosc", "configuration": {"cname": "lz4", "shuffle": "bitshuffle"}}],
# "data_type": "uint16",
# },
#
# # 'metadata': {
# # 'compressor': {
# # 'id': 'blosc',
# # 'cname': 'lz4',
# # 'shuffle': 2
# # },
# # 'dtype': '>u2',
# # 'shape': shape,
# # # 'blockSize': [100, 100, 100],
# # },
# 'create': True,
# 'delete_existing': True,
# # 'dtype': 'uint16'
# },
# # Due to a (i think) bug in tensorstore, you have to put the codec separate from the other metadata
# # or it fails to merge the expected codecs ([]) and the given codecs (not empty array)
# codec=ts.CodecSpec({
# "codecs": [{"name": "blosc", "configuration": {"cname": "lz4", "shuffle": "bitshuffle"}}],
# 'driver': 'zarr3',
# })).result()
return {
"driver": "zarr3",
"kvstore": {
Expand Down

0 comments on commit 5cf9c4b

Please sign in to comment.