royerlab · TeunHuijben · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/CONFIG.ts b/CONFIG.ts
@@ -11,16 +11,26 @@ const config = {
     // When opening the viewer, or refreshing the page, the viewer will revert to the following default dataset
     data:{
         // Default dataset URL (must be publically accessible)
-        default_dataset: "https://public.czbiohub.org/royerlab/zoo/Zebrafish/tracks_zebrafish_bundle.zarr/"
+        // default_dataset: "https://public.czbiohub.org/royerlab/zoo/Zebrafish/tracks_zebrafish_bundle.zarr/"
+        default_dataset: "https://public.czbiohub.org/royerlab/zoo/misc/tracks_drosophila_attributes_norm_bundle.zarr/"
-        default_dataset: "https://public.czbiohub.org/royerlab/zoo/misc/tracks_drosophila_attributes_norm_bundle.zarr/"
+        default_dataset: "https://public.czbiohub.org/royerlab/zoo/misc/tracks_zebrafish_displ_norm_bundle.zarr/"
-        default_dataset: "https://public.czbiohub.org/royerlab/zoo/misc/tracks_drosophila_attributes_norm_bundle.zarr/"
+        default_dataset: "https://public.czbiohub.org/royerlab/zoo/misc/tracks_zebrafish_displ_norm_bundle.zarr/"
     },
 
     // Default settings for certain parameters
     settings:{
         // Maximum number of cells a user can select without getting a warning
         max_num_selected_cells: 100,
 
-        // Choose colormap for the tracks, options: viridis-inferno, magma-inferno, inferno-inferno, plasma-inferno, cividis-inferno [default]
+        // Choose colormap for the tracks
+        // options: viridis-inferno, magma-inferno, inferno-inferno, plasma-inferno, cividis-inferno [default]
         colormap_tracks: "cividis-inferno",
+
+        // Choose colormap for coloring the cells, when the attribute is continuous or categorical
+        // options: HSL, viridis, plasma, inferno, magma, cividis
+        colormap_colorby_categorical: "HSL",
+        colormap_colorby_continuous: "plasma",
+
+        // Show default attributes in the left dropdown menu for coloring the cells
+        showDefaultAttributes: true,
 
         // Point size (arbitrary units), if cell sizes not provided in zarr attributes
         point_size: 0.1,
@@ -33,7 +43,11 @@ const config = {
 
         // Point color (when selector hovers over)
         preview_hightlight_point_color: [0.8, 0.8, 0], //yellow
+    },
 
+    permission:{
+        // Allow users to color cells by attributes
+        allowColorByAttribute: true
     }
 }
 

diff --git a/README.md b/README.md
@@ -117,6 +117,13 @@ intracktive convert --csv_file path/to/tracks.csv --add_radius
 
 Or use `intracktive convert --help` for the documentation on the inputs and outputs
 
+**ToDo: explain how to add attributes**
+```
+intracktive convert --csv_file path/to/tracks.csv --add_all_attributes
+intracktive convert --csv_file path/to/file.csv --add_attribute cell_size
+intracktive convert --csv_file path/to/file.csv --add_attribute cell_size,time,diameter,color
+```
+
 
 In order for the viewer to access the data, the data must be hosted at a location the browser can access. For testing and visualizing data on your own computer, the easiest way is to host the data via `localhost`. This repository contains a [tool](python/src/intracktive//server.py) to host the data locally:
 

diff --git a/python/src/intracktive/_tests/test_cli.py b/python/src/intracktive/_tests/test_cli.py
@@ -12,7 +12,7 @@ def _run_command(command_and_args: List[str]) -> None:
         assert exit.code == 0, f"{command_and_args} failed with exit code {exit.code}"
 
 
-def test_convert_cli(
+def test_convert_cli_simple(
     tmp_path: Path,
     make_sample_data: pd.DataFrame,
 ) -> None:
@@ -28,3 +28,82 @@ def test_convert_cli(
             str(tmp_path),
         ]
     )
+
+
+def test_convert_cli_single_attribute(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    df = make_sample_data
+    df.to_csv(tmp_path / "sample_data.csv", index=False)
+
+    _run_command(
+        [
+            "convert",
+            "--csv_file",
+            str(tmp_path / "sample_data.csv"),
+            "--out_dir",
+            str(tmp_path),
+            "--add_attribute",
+            "z",
+        ]
+    )
+
+
+def test_convert_cli_multiple_attributes(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    df = make_sample_data
+    df.to_csv(tmp_path / "sample_data.csv", index=False)
+
+    _run_command(
+        [
+            "convert",
+            "--csv_file",
+            str(tmp_path / "sample_data.csv"),
+            "--out_dir",
+            str(tmp_path),
+            "--add_attribute",
+            "z,x,z",
+        ]
+    )
+
+
+def test_convert_cli_all_attributes(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    df = make_sample_data
+    df.to_csv(tmp_path / "sample_data.csv", index=False)
+
+    _run_command(
+        [
+            "convert",
+            "--csv_file",
+            str(tmp_path / "sample_data.csv"),
+            "--out_dir",
+            str(tmp_path),
+            "--add_all_attributes",
+        ]
+    )
+
+
+def test_convert_cli_all_attributes_prenormalized(
+    tmp_path: Path,
+    make_sample_data: pd.DataFrame,
+) -> None:
+    df = make_sample_data
+    df.to_csv(tmp_path / "sample_data.csv", index=False)
+
+    _run_command(
+        [
+            "convert",
+            "--csv_file",
+            str(tmp_path / "sample_data.csv"),
+            "--out_dir",
+            str(tmp_path),
+            "--add_all_attributes",
+            "--pre_normalized",
+        ]
+    )
diff --git a/python/src/intracktive/_tests/test_convert.py b/python/src/intracktive/_tests/test_convert.py
@@ -43,7 +43,8 @@ def test_actual_zarr_content(tmp_path: Path, make_sample_data: pd.DataFrame) ->
     convert_dataframe_to_zarr(
         df=df,
         zarr_path=new_path,
-        extra_cols=["radius"],
+        add_radius=True,
+        extra_cols=(),
     )
 
     new_data = zarr.open(new_path)

diff --git a/python/src/intracktive/convert.py b/python/src/intracktive/convert.py
@@ -85,7 +85,9 @@ def get_unique_zarr_path(zarr_path: Path) -> Path:
 def convert_dataframe_to_zarr(
     df: pd.DataFrame,
     zarr_path: Path,
+    add_radius: bool = False,
     extra_cols: Iterable[str] = (),
+    pre_normalized: bool = False,
 ) -> Path:
     """
     Convert a DataFrame of tracks to a sparse Zarr store
@@ -113,11 +115,18 @@ def convert_dataframe_to_zarr(
         flag_2D = True
         df["z"] = 0.0
 
+    points_cols = (
+        ["z", "y", "x", "radius"] if add_radius else ["z", "y", "x"]
+    )  # columns to store in the points array
     extra_cols = list(extra_cols)
-    columns = REQUIRED_COLUMNS + extra_cols
-    points_cols = ["z", "y", "x"] + extra_cols  # columns to store in the points array
-
-    for col in columns:
+    columns_to_check = (
+        REQUIRED_COLUMNS + ["radius"] if add_radius else REQUIRED_COLUMNS
+    )  # columns to check for in the DataFrame
+    columns_to_check = columns_to_check + extra_cols
+    print("point_cols:", points_cols)
+    print("columns_to_check:", columns_to_check)
+
+    for col in columns_to_check:
         if col not in df.columns:
             raise ValueError(f"Column '{col}' not found in the DataFrame")
 
@@ -144,7 +153,7 @@ def convert_dataframe_to_zarr(
 
     n_tracklets = df["track_id"].nunique()
     # (z, y, x) + extra_cols
-    num_values_per_point = 3 + len(extra_cols)
+    num_values_per_point = 4 if add_radius else 3
 
     # store the points in an array
     points_array = (
@@ -154,6 +163,15 @@ def convert_dataframe_to_zarr(
         )
         * INF_SPACE
     )
+    attribute_array_empty = (
+        np.ones(
+            (n_time_points, max_values_per_time_point),
+            dtype=np.float32,
+        )
+        * INF_SPACE
+    )
+    attribute_arrays = {}
+    attribute_types = [None] * len(extra_cols)
 
     points_to_tracks = lil_matrix(
         (n_time_points * max_values_per_time_point, n_tracklets), dtype=np.int32
@@ -165,10 +183,25 @@ def convert_dataframe_to_zarr(
         points_array[t, : group_size * num_values_per_point] = (
             group[points_cols].to_numpy().ravel()
         )
+
         points_ids = t * max_values_per_time_point + np.arange(group_size)
 
         points_to_tracks[points_ids, group["track_id"] - 1] = 1
 
+    for index, col in enumerate(extra_cols):
+        attribute_array = attribute_array_empty.copy()
+        for t, group in df.groupby("t"):
+            group_size = len(group)
+            attribute_array[t, :group_size] = group[col].to_numpy().ravel()
+        # check if attribute is categorical or continuous
+        if (
+            len(np.unique(attribute_array[attribute_array != INF_SPACE])) <= 10
+        ):  # get number of unique values, excluding INF_SPACE
+            attribute_types[index] = "categorical"
+        else:
+            attribute_types[index] = "continuous"
+        attribute_arrays[col] = attribute_array
+
     LOG.info(f"Munged {len(df)} points in {time.monotonic() - start} seconds")
 
     # creating mapping of tracklets parent-child relationship
@@ -233,16 +266,32 @@ def convert_dataframe_to_zarr(
         chunks=(1, points_array.shape[1]),
         dtype=np.float32,
     )
+    print("points shape:", points.shape)
     points.attrs["values_per_point"] = num_values_per_point
 
+    if len(extra_cols) > 0:
+        attributes_matrix = np.hstack(
+            [attribute_arrays[attr] for attr in attribute_arrays]
+        )
+        attributes = top_level_group.create_dataset(
+            "attributes",
+            data=attributes_matrix,
+            chunks=(1, attribute_array.shape[1]),
+            dtype=np.float32,
+        )
+        attributes.attrs["attribute_names"] = extra_cols
+        attributes.attrs["attribute_types"] = attribute_types
+        attributes.attrs["pre_normalized"] = pre_normalized
+
     mean = df[["z", "y", "x"]].mean()
     extent = (df[["z", "y", "x"]] - mean).abs().max()
     extent_xyz = extent.max()
 
     for col in ("z", "y", "x"):
         points.attrs[f"mean_{col}"] = mean[col]
+
     points.attrs["extent_xyz"] = extent_xyz
-    points.attrs["fields"] = ["z", "y", "x"] + extra_cols
+    points.attrs["fields"] = points_cols
     points.attrs["ndim"] = 2 if flag_2D else 3
 
     top_level_group.create_groups(
@@ -355,10 +404,33 @@ def dataframe_to_browser(df: pd.DataFrame, zarr_dir: Path) -> None:
     default=False,
     type=bool,
 )
+@click.option(
+    "--add_all_attributes",
+    is_flag=True,
+    help="Boolean indicating whether to include extra columns of the CSV as attributes for colors the cells in the viewer",
+    default=False,
+    type=bool,
+)
+@click.option(
+    "--add_attribute",
+    type=str,
+    default=None,
+    help="Comma-separated list of column names to include as attributes (e.g., 'cell_size,diameter,type,label')",
+)
+@click.option(
+    "--pre_normalized",
+    is_flag=True,
+    help="Boolean indicating whether the extra column/columns with attributes are prenormalized to [0,1]",
+    default=False,
+    type=bool,
+)
 def convert_cli(
     csv_file: Path,
     out_dir: Path | None,
     add_radius: bool,
+    add_all_attributes: bool,
+    add_attribute: str | None,
+    pre_normalized: bool,
 ) -> None:
     """
     Convert a CSV of tracks to a sparse Zarr store
@@ -372,16 +444,33 @@ def convert_cli(
 
     zarr_path = out_dir / f"{csv_file.stem}_bundle.zarr"
 
-    extra_cols = ["radius"] if add_radius else []
-
     tracks_df = pd.read_csv(csv_file)
 
     LOG.info(f"Read {len(tracks_df)} points in {time.monotonic() - start} seconds")
 
+    extra_cols = []
+    if add_all_attributes:
+        columns_standard = REQUIRED_COLUMNS
+        extra_cols = tracks_df.columns.difference(columns_standard).to_list()
+        print("extra columns included as attributes:", extra_cols)
+    elif add_attribute:
+        selected_columns = [col.strip() for col in add_attribute.split(",")]
+        missing_columns = [
+            col for col in selected_columns if col not in tracks_df.columns
+        ]
+        if missing_columns:
+            raise ValueError(
+                f"Columns not found in the CSV file: {', '.join(missing_columns)}"
+            )
+        extra_cols = selected_columns
+        print(f"Selected columns included as attributes: {', '.join(extra_cols)}")
+
     convert_dataframe_to_zarr(
         tracks_df,
         zarr_path,
+        add_radius,
         extra_cols=extra_cols,
+        pre_normalized=pre_normalized,
     )
 
     LOG.info(f"Full conversion took {time.monotonic() - start} seconds")