diff --git a/src/luna/common/utils.py b/src/luna/common/utils.py index cc9b33b2..7b0d6a87 100644 --- a/src/luna/common/utils.py +++ b/src/luna/common/utils.py @@ -2,12 +2,13 @@ import json import os import re +import shutil import subprocess import tempfile import time import urllib import warnings -from contextlib import ExitStack +from contextlib import ExitStack, contextmanager from functools import wraps from importlib import import_module from io import BytesIO @@ -77,8 +78,18 @@ def wrapper(*args, **kwargs): return wrapper +@contextmanager +def make_temp_directory(): + temp_dir = tempfile.mkdtemp() + try: + yield temp_dir + finally: + shutil.rmtree(temp_dir) + + def local_cache_urlpath( - file_key_write_mode: dict[str, str] = {}, dir_key_write_mode: dict[str, str] = {} + file_key_write_mode: dict[str, str] = {}, + dir_key_write_mode: dict[str, str] = {}, ): """Decorator for caching url/paths locally""" diff --git a/src/luna/pathology/cli/dsa_upload.py b/src/luna/pathology/cli/dsa_upload.py index 0ebf9465..7a68c1fd 100644 --- a/src/luna/pathology/cli/dsa_upload.py +++ b/src/luna/pathology/cli/dsa_upload.py @@ -8,7 +8,9 @@ import requests from fsspec import open from loguru import logger +from pandera.typing import DataFrame +from luna.common.models import SlideSchema from luna.common.utils import get_config, save_metadata, timed from luna.pathology.dsa.dsa_api_handler import ( get_item_uuid, @@ -85,7 +87,7 @@ def cli( f"Unable to infer image_filename from {annotation_file_urlpath}" ) logger.info(f"Image filename inferred as {image_filename}") - dsa_uuid = upload_annotation_to_dsa( + dsa_uuid = _upload_annotation_to_dsa( config["dsa_endpoint_url"], annotation_file_urlpath, config["collection_name"], @@ -104,6 +106,34 @@ def cli( def upload_annotation_to_dsa( + dsa_endpoint_url: str, + slide_manifest: DataFrame[SlideSchema], + annotation_column: str, + collection_name: str, + image_filename: str, + username: str, + password: str, + force: bool = False, + insecure: bool = False, + storage_options: dict = {}, +): + uuids = [] + for slide in slide_manifest.itertuples(name="Slide"): + uuids += _upload_annotation_to_dsa( + dsa_endpoint_url, + slide[annotation_column], + collection_name, + image_filename, + username, + password, + force, + insecure, + storage_options, + ) + return uuids + + +def _upload_annotation_to_dsa( dsa_endpoint_url: str, annotation_file_urlpaths: Union[str, List[str]], collection_name: str, diff --git a/src/luna/pathology/cli/dsa_viz.py b/src/luna/pathology/cli/dsa_viz.py index 3cd18499..21611522 100644 --- a/src/luna/pathology/cli/dsa_viz.py +++ b/src/luna/pathology/cli/dsa_viz.py @@ -1,9 +1,10 @@ import copy import json +import os import re from decimal import Decimal from pathlib import Path -from typing import Optional +from typing import Dict, List, Optional import fire # type: ignore import fsspec # type: ignore @@ -12,13 +13,16 @@ import ijson # type: ignore import numpy as np import pandas as pd +from dask.distributed import progress from fsspec import open # type: ignore from loguru import logger +from pandera.typing import DataFrame from PIL import Image from shapely import MultiPolygon, box from typing_extensions import TypedDict -from luna.common.models import LabeledTileSchema +from luna.common.dask import get_or_create_dask_client +from luna.common.models import LabeledTileSchema, SlideSchema from luna.common.utils import get_config, save_metadata, timed from luna.pathology.common.utils import address_to_coord from luna.pathology.dsa.utils import vectorize_np_array_bitmask_by_pixel_value @@ -162,7 +166,7 @@ def save_dsa_annotation( @timed @save_metadata -def stardist_polygon( +def stardist_polygon_cli( input_urlpath: str = "???", image_filename: str = "???", annotation_name: str = "???", @@ -188,28 +192,68 @@ def stardist_polygon( dict[str,str]: annotation file path """ config = get_config(vars()) - dsa_annotation = stardist_polygon_main( + annotation_filepath = __stardist_polygon( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["line_colors"], config["fill_colors"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} -def stardist_polygon_main( +def stardist_polygon( + slide_manifest: DataFrame[SlideSchema], + object_urlpath: str, + output_urlpath: str, + image_filename: str, + annotation_name: str, + line_colors: Dict[str, str], + fill_colors: Dict[str, str], + storage_options: Dict, + output_storage_options: Dict, + annotation_column: str = "stardist_polygon_geojson_url", + output_column: str = "regional_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __stardist_polygon, + row[annotation_column], + output_urlpath, + image_filename, + annotation_name, + line_colors, + fill_colors, + storage_options, + output_storage_options, + ) + + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, output_column] = dsa_annotation_url + + return slide_manifest + + +def __stardist_polygon( input_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name: str, - line_colors: dict[str, str], - fill_colors: dict[str, str], - storage_options: dict, + line_colors: Dict[str, str], + fill_colors: Dict[str, str], + storage_options: Dict, + output_storage_options: Dict, ): """Build DSA annotation from stardist geojson classification results @@ -258,12 +302,18 @@ def stardist_polygon_main( elements.append(element) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def stardist_polygon_tile( +def stardist_polygon_tile_cli( object_urlpath: str = "???", tiles_urlpath: str = "???", image_filename: str = "???", @@ -291,33 +341,74 @@ def stardist_polygon_tile( dict[str,str]: annotation file path """ config = get_config(vars()) - dsa_annotations = stardist_polygon_tile_main( + metadata = __stardist_polygon_tile( config["object_urlpath"], config["tiles_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name_prefix"], config["line_colors"], config["fill_colors"], config["storage_options"], + config["output_storage_options"], ) - metadata = {} - for tile_label, dsa_annotation in dsa_annotations.items(): - annotation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - metadata["dsa_annotation_" + tile_label] = annotation_filepath return metadata -def stardist_polygon_tile_main( +def stardist_polygon_tile( + slide_manifest: DataFrame[SlideSchema], object_urlpath: str, tiles_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name_prefix: str, - line_colors: dict[str, str], - fill_colors: dict[str, str], - storage_options: dict, + line_colors: Dict[str, str], + fill_colors: Dict[str, str], + storage_options: Dict, + output_storage_options: Dict, + annotation_column: str = "stardist_polygon_geojson_url", + output_column_suffix: str = "regional_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __stardist_polygon_tile, + row[annotation_column], + row.tiles_url, + output_urlpath, + image_filename, + annotation_name_prefix, + line_colors, + fill_colors, + storage_options, + output_storage_options, + ) + + futures.append(future) + progress(futures) + dsa_annotation_url_map = client.gather(futures) + for tile_label, dsa_annotation_url in dsa_annotation_url_map.iteritems(): + slide_manifest.at[ + row.Index, f"{tile_label}_{output_column_suffix}" + ] = dsa_annotation_url + + return slide_manifest + + +def __stardist_polygon_tile( + object_urlpath: str, + tiles_urlpath: str, + output_urlpath: str, + image_filename: str, + annotation_name_prefix: str, + line_colors: Dict[str, str], + fill_colors: Dict[str, str], + storage_options: Dict, + output_storage_options: Dict, ): """Build DSA annotation json from stardist geojson classification and labeled tiles @@ -386,18 +477,25 @@ def stardist_polygon_tile_main( tile_elements[tile_label].append(element) - dsa_annotations = {} + metadata = {} for tile_label, elements in tile_elements.items(): - dsa_annotations[tile_label] = get_dsa_annotation( + dsa_annotation = get_dsa_annotation( elements, annotation_name_prefix + "_" + tile_label ) + annotation_filepath = save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) + metadata[tile_label] = annotation_filepath - return dsa_annotations + return metadata @timed @save_metadata -def stardist_cell( +def stardist_cell_cli( input_urlpath: str = "???", output_urlpath: str = "???", image_filename: str = "???", @@ -428,28 +526,67 @@ def stardist_cell( dict[str,str]: annotation file path """ config = get_config(vars()) - dsa_annotation = stardist_cell_main( + annotation_filepath = __stardist_cell( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["line_colors"], config["fill_colors"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} + + +def stardist_cell( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + image_filename: str, + annotation_name: str, + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, + annotation_column: str = "stardist_cell_tsv_url", + output_column: str = "stardist_cell_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __stardist_cell, + row[annotation_column], + output_urlpath, + image_filename, + annotation_name, + line_colors, + fill_colors, + storage_options, + output_storage_options, + ) + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, output_column] = dsa_annotation_url -def stardist_cell_main( + return slide_manifest + + +def __stardist_cell( input_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name: str, line_colors: Optional[dict[str, str]], fill_colors: Optional[dict[str, str]], storage_options: dict, + output_storage_options: dict, ): """Build DSA annotation json from TSV classification data generated by stardist @@ -515,12 +652,18 @@ def stardist_cell_main( elements.append(elements_entry) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def regional_polygon( +def regional_polygon_cli( input_urlpath: str = "???", output_urlpath: str = "???", image_filename: str = "???", @@ -547,29 +690,69 @@ def regional_polygon( config = get_config(vars()) - dsa_annotation = regional_polygon_main( + annotation_filepath = __regional_polygon( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["line_colors"], config["fill_colors"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} + + +def regional_polygon( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + image_filename: str, + annotation_name: str, + classes_to_include: List, + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, + annotation_column: str = "regional_geojson_url", + output_column: str = "regional_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __regional_polygon, + row[annotation_column], + output_urlpath, + image_filename, + annotation_name, + fill_colors, + line_colors, + storage_options, + output_storage_options, + ) + + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, output_column] = dsa_annotation_url + + return slide_manifest -def regional_polygon_main( +def __regional_polygon( input_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name: str, line_colors: Optional[dict[str, str]], fill_colors: Optional[dict[str, str]], storage_options: dict, + output_storage_options: dict, ): """Build DSA annotation json from regional annotation geojson @@ -609,12 +792,18 @@ def regional_polygon_main( element["points"] = coords elements.append(element) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def qupath_polygon( +def qupath_polygon_cli( input_urlpath: str = "???", output_urlpath: str = "???", image_filename: str = "???", @@ -638,37 +827,79 @@ def qupath_polygon( line_colors (dict, optional): line color map with {feature name:rgb values} fill_colors (dict, optional): fill color map with {feature name:rgba values} storage_options (dict): storage options to pass to read/write functions + output_storage_options (dict): storage options to pass to read/write functions local_config (string): local config yaml file Returns: dict: annotation file path """ config = get_config(vars()) - dsa_annotation = qupath_polygon_main( + annotation_filepath = __qupath_polygon( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["classes_to_include"], config["line_colors"], config["fill_colors"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} + + +def qupath_polygon( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + image_filename: str, + annotation_name: str, + classes_to_include: List, + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, + annotation_column: str = "qupath_geojson_url", + output_column: str = "qupath_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __qupath_polygon, + row[annotation_column], + output_urlpath, + image_filename, + annotation_name, + classes_to_include, + line_colors, + fill_colors, + storage_options, + output_storage_options, + ) + + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, output_column] = dsa_annotation_url + return slide_manifest -def qupath_polygon_main( + +def __qupath_polygon( input_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name: str, - classes_to_include: list, - line_colors: Optional[dict[str, str]], - fill_colors: Optional[dict[str, str]], - storage_options: dict, + classes_to_include: List, + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, ): """Build DSA annotation json from Qupath polygon geojson @@ -727,20 +958,27 @@ def qupath_polygon_main( "points" ] = connected_component_coords elements.append(connected_component_element) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def bitmask_polygon( - input_map: dict[str, str] = "???", # type: ignore +def bitmask_polygon_cli( + input_map: Dict[str, str] = "???", # type: ignore output_urlpath: str = "???", image_filename: str = "???", annotation_name: str = "???", - line_colors: Optional[dict[str, str]] = None, - fill_colors: Optional[dict[str, str]] = None, + line_colors: Optional[Dict[str, str]] = None, + fill_colors: Optional[Dict[str, str]] = None, scale_factor: Optional[int] = None, - storage_options: dict = {}, + storage_options: Dict = {}, + output_storage_options: Dict = {}, local_config: str = "", ): """Build DSA annotation json from bitmask PNGs @@ -762,30 +1000,30 @@ def bitmask_polygon( dict: annotation file path """ config = get_config(vars()) - dsa_annotation = bitmask_polygon_main( + annotation_filepath = bitmask_polygon( config["input_map"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["line_colors"], config["fill_colors"], config["scale_factor"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} -def bitmask_polygon_main( - input_map: dict[str, str], +def bitmask_polygon( + input_map: Dict[str, str], + output_urlpath: str, + image_filename: str, annotation_name: str, - line_colors: Optional[dict[str, str]], - fill_colors: Optional[dict[str, str]], + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], scale_factor: Optional[int] = 1, - storage_options: dict = {}, + storage_options: Dict = {}, + output_storage_options: Dict = {}, ): """Build DSA annotation json from bitmask PNGs @@ -830,12 +1068,18 @@ def bitmask_polygon_main( element["points"] = coords elements.append(element) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def heatmap( +def heatmap_cli( input_urlpath: str = "???", output_urlpath: str = "???", image_filename: str = "???", @@ -872,8 +1116,10 @@ def heatmap( dict: annotation file path. None if error in writing the file. """ config = get_config(vars()) - dsa_annotation = heatmap_main( + annotation_filepath = __heatmap( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["annotation_name"], config["column"], config["tile_size"], @@ -881,25 +1127,65 @@ def heatmap( config["fill_colors"], config["line_colors"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} + + +def heatmap( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + annotation_name: str, + column: List[str], + tile_size: int, + scale_factor: Optional[int], + fill_colors: Optional[Dict[str, str]], + line_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, +): + if "tiles_url" not in slide_manifest.columns: + raise ValueError("tiles_url not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __heatmap, + row.tiles_url, + output_urlpath, + image_filename, + annotation_name, + column, + tile_size, + scale_factor, + fill_colors, + line_colors, + storage_options, + output_storage_options, + ) + + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, "heatmap_url"] = dsa_annotation_url + return slide_manifest -def heatmap_main( + +def __heatmap( input_urlpath: str, + output_urlpath: str, + image_filename: str, annotation_name: str, - column: list[str], + column: List[str], tile_size: int, scale_factor: Optional[int], - fill_colors: Optional[dict[str, str]], - line_colors: Optional[dict[str, str]], - storage_options: dict, + fill_colors: Optional[Dict[str, str]], + line_colors: Optional[Dict[str, str]], + storage_options: Dict, + output_storage_options: Dict, ): """Generate heatmap based on the tile scores @@ -965,21 +1251,27 @@ def heatmap_main( if len(column) == 1: annotation_name = column[0] + "_" + annotation_name - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + output_storage_options, + ) @timed @save_metadata -def bmp_polygon( +def bmp_polygon_cli( input_urlpath: str = "???", output_urlpath: str = "???", - label_map: dict[int, str] = "???", # type: ignore + label_map: Dict[int, str] = "???", # type: ignore image_filename: str = "???", annotation_name: str = "???", - line_colors: Optional[dict[str, str]] = None, - fill_colors: Optional[dict[str, str]] = None, + line_colors: Optional[Dict[str, str]] = None, + fill_colors: Optional[Dict[str, str]] = None, scale_factor: Optional[int] = 1, - storage_options: dict = {}, + storage_options: Dict = {}, local_config: str = "", ): """Build DSA annotation json from a BMP with multiple labels. @@ -1002,32 +1294,74 @@ def bmp_polygon( dict: annotation file path """ config = get_config(vars()) - dsa_annotation = bmp_polygon_main( + annotation_filepath = __bmp_polygon( config["input_urlpath"], + config["output_urlpath"], + config["image_filename"], config["label_map"], config["annotation_name"], config["line_colors"], + config["fill_colors"], config["scale_factor"], config["storage_options"], + config["output_storage_options"], ) - annotatation_filepath = save_dsa_annotation( - dsa_annotation, - config["output_urlpath"], - config["image_filename"], - config["storage_options"], - ) - return {"dsa_annotation": annotatation_filepath} + return {"dsa_annotation": annotation_filepath} + + +def bmp_polygon( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + label_map: Dict[int, str], + annotation_name: str, + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], + scale_factor: Optional[int] = 1, + storage_options: Dict = {}, + output_storage_options: Dict = {}, + annotation_column: str = "bmp_polygon_url", + output_column: str = "bmp_polygon_dsa_url", +): + if annotation_column not in slide_manifest.columns: + raise ValueError(f"{annotation_column} not found in slide manifest") + client = get_or_create_dask_client() + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + image_filename = os.path.basename(row.url) + future = client.submit( + __bmp_polygon, + row[annotation_column], + output_urlpath, + image_filename, + label_map, + annotation_name, + line_colors, + fill_colors, + scale_factor, + storage_options, + output_storage_options, + ) + futures.append(future) + progress(futures) + dsa_annotation_urls = client.gather(futures) + for dsa_annotation_url in dsa_annotation_urls: + slide_manifest.at[row.Index, output_column] = dsa_annotation_url + + return slide_manifest -def bmp_polygon_main( +def __bmp_polygon( input_urlpath: str, - label_map: dict[int, str], + output_urlpath: str, + image_filename: str, + label_map: Dict[int, str], annotation_name: str, - line_colors: Optional[dict[str, str]], - fill_colors: Optional[dict[str, str]], + line_colors: Optional[Dict[str, str]], + fill_colors: Optional[Dict[str, str]], scale_factor: Optional[int] = 1, - storage_options: dict = {}, + storage_options: Dict = {}, + output_storage_options: Dict = {}, ): """Build DSA annotation json from a BMP with multiple labels. @@ -1070,20 +1404,26 @@ def bmp_polygon_main( element["points"] = coords elements.append(element) - return get_dsa_annotation(elements, annotation_name) + dsa_annotation = get_dsa_annotation(elements, annotation_name) + return save_dsa_annotation( + dsa_annotation, + output_urlpath, + image_filename, + storage_options, + ) def fire_cli(): fire.Fire( { - "stardist-polygon-tile": stardist_polygon_tile, - "stardist-polygon": stardist_polygon, - "stardist-cell": stardist_cell, - "regional-polygon": regional_polygon, - "qupath-polygon": qupath_polygon, - "bitmask-polygon": bitmask_polygon, - "heatmap": heatmap, - "bmp-polygon": bmp_polygon, + "stardist-polygon-tile": stardist_polygon_tile_cli, + "stardist-polygon": stardist_polygon_cli, + "stardist-cell": stardist_cell_cli, + "regional-polygon": regional_polygon_cli, + "qupath-polygon": qupath_polygon_cli, + "bitmask-polygon": bitmask_polygon_cli, + "heatmap": heatmap_cli, + "bmp-polygon": bmp_polygon_cli, } ) diff --git a/src/luna/pathology/cli/extract_tile_shape_features.py b/src/luna/pathology/cli/extract_tile_shape_features.py index b9e88679..0ab7ff49 100644 --- a/src/luna/pathology/cli/extract_tile_shape_features.py +++ b/src/luna/pathology/cli/extract_tile_shape_features.py @@ -11,12 +11,15 @@ import numpy as np import pandas as pd import tiffslide +from dask.distributed import progress from fsspec import open from loguru import logger +from pandera.typing import DataFrame from scipy.stats import entropy, kurtosis from shapely import box -from luna.common.models import LabeledTileSchema +from luna.common.dask import get_or_create_dask_client +from luna.common.models import LabeledTileSchema, SlideSchema from luna.common.utils import get_config, save_metadata, timed from luna.pathology.cli.extract_shape_features import extract_shape_features from luna.pathology.cli.generate_tile_mask import convert_tiles_to_mask @@ -105,40 +108,17 @@ def cli( """ config = get_config(vars()) - fs, path = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) - - output_fpath = Path(path) / "shape_features.parquet" - - if fs.exists(str(output_fpath)): - logger.info( - f"Output file already exist: {fs.unstrip_protocol(str(output_fpath))}" - ) - return {} - - with open(config["tiles_urlpath"], **config["storage_options"]) as of: - tiles_df = pd.read_parquet(of) - - with open(config["slide_urlpath"], **config["storage_options"]) as of: - slide = tiffslide.TiffSlide(of) - slide_width = slide.dimensions[0] - slide_height = slide.dimensions[1] - - with open(config["object_urlpath"], **config["storage_options"]) as of: - object_gdf = gpd.read_file(of) - slide_id = Path(config["slide_urlpath"]).stem statistical_descriptors = config["statistical_descriptors"].capitalize() cellular_features = config["cellular_features"].capitalize() property_type = config["property_type"].capitalize() - df = extract_tile_shape_features( - object_gdf, - tiles_df, - slide_width, - slide_height, + properties = __extract_tile_shape_features( + config["object_urlpath"], + config["tiles_urlpath"], + config["slide_urlpath"], + config["output_urlpath"], config["resize_factor"], config["detection_probability_threshold"], slide_id, @@ -147,25 +127,77 @@ def cli( property_type, config["include_smaller_regions"], config["label_cols"], + config["storage_options"], + config["output_storage_options"], ) + return properties - with fs.open(output_fpath, "wb") as of: - df.to_parquet(of) - properties = { - "shape_features": fs.unstrip_protocol(str(output_fpath)), - "num_features": len(df), - } +def extract_tile_shape_features( + slide_manifest: DataFrame[SlideSchema], + slide_urlpath: str, + output_urlpath: str, + resize_factor: int = 16, + detection_probability_threshold: Optional[float] = None, + statistical_descriptors: StatisticalDescriptors = StatisticalDescriptors.ALL, + cellular_features: CellularFeatures = CellularFeatures.ALL, + property_type: PropertyType = PropertyType.ALL, + include_smaller_regions: bool = False, + label_cols: List[str] = None, + storage_options: dict = {}, + output_storage_options: dict = {}, + objects_column="stardist_geojson_url", + properties: List[str] = [ + "area", + "convex_area", + "eccentricity", + "equivalent_diameter", + "euler_number", + "extent", + "label", + "major_axis_length", + "minor_axis_length", + "perimeter", + "solidity", + ], +): + client = get_or_create_dask_client() + + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __extract_tile_shape_features, + row[objects_column], + row.tiles_url, + row.url, + output_urlpath, + resize_factor, + detection_probability_threshold, + row.id, + statistical_descriptors, + cellular_features, + property_type, + include_smaller_regions, + label_cols, + storage_options, + output_storage_options, + properties, + ) + futures.append(future) - logger.info(properties) - return properties + progress(futures) + results = client.gather(futures) + for idx, result in results.enumerate(): + slide_manifest.at[idx, "tile_shape_features_url"] = result["shape_features_url"] + return slide_manifest -def extract_tile_shape_features( - object_gdf: gpd.GeoDataFrame, - tiles_df: pd.DataFrame, - slide_width: int, - slide_height: int, + +def __extract_tile_shape_features( + objects_urlpath: str, + tiles_urlpath: str, + slide_urlpath: str, + output_urlpath: str, resize_factor: int = 16, detection_probability_threshold: Optional[float] = None, slide_id: str = "", @@ -174,6 +206,8 @@ def extract_tile_shape_features( property_type: PropertyType = PropertyType.ALL, include_smaller_regions: bool = False, label_cols: List[str] = None, + storage_options: dict = {}, + output_storage_options: dict = {}, properties: List[str] = [ "area", "convex_area", @@ -191,10 +225,8 @@ def extract_tile_shape_features( """Extracts shape and spatial features (HIF features) from a slide mask. Args: - object_gdf (gpd.GeoDataFrame): URL/path to slide (tiffslide supported formats) - tiles_df (pd.DataFrame): URL/path to object file (geopandas supported formats) - slide_width (int): slide width - slide_height (int): slide height + objects (Union[str, gpd.GeoDataFrame]): URL/path to slide (tiffslide supported formats) + tiles (Union[str, pd.DataFrame]): URL/path to object file (geopandas supported formats) resize_factor (int): factor to downsample slide image detection_probability_threshold (Optional[float]): detection probability threshold @@ -208,6 +240,31 @@ def extract_tile_shape_features( Returns: dict: output paths and the number of features generated """ + + ofs, path = fsspec.core.url_to_fs( + output_urlpath, + **output_storage_options, + ) + + output_fpath = Path(path) / "shape_features.parquet" + + if ofs.exists(str(output_fpath)): + logger.info( + f"Output file already exist: {ofs.unstrip_protocol(str(output_fpath))}" + ) + return {} + + with open(tiles_urlpath, **storage_options) as of: + tiles_df = pd.read_parquet(of) + + with open(objects_urlpath, **storage_options) as of: + object_gdf = gpd.read_file(of) + + with open(slide_urlpath, **storage_options) as of: + slide = tiffslide.TiffSlide(of) + slide_width = slide.dimensions[0] + slide_height = slide.dimensions[1] + if label_cols: tiles_df["Classification"] = tiles_df[label_cols].idxmax(axis=1) LabeledTileSchema.validate(tiles_df.reset_index()) @@ -337,7 +394,17 @@ def extract_tile_shape_features( r"_", " ", regex=True ) - return mdf + with ofs.open(output_fpath, "wb") as of: + mdf.to_parquet(of) + + props = { + "shape_features_url": ofs.unstrip_protocol(str(output_fpath)), + "num_features": len(mdf), + } + + logger.info(props) + + return props def fire_cli(): diff --git a/src/luna/pathology/cli/generate_tiles.py b/src/luna/pathology/cli/generate_tiles.py index 22a16d6b..eb4207b7 100644 --- a/src/luna/pathology/cli/generate_tiles.py +++ b/src/luna/pathology/cli/generate_tiles.py @@ -1,15 +1,12 @@ # General imports import itertools from pathlib import Path -from typing import Optional, Union -from urllib.parse import urlparse +from typing import Optional import fire import fsspec import pandas as pd -from dask.distributed import progress from loguru import logger -from multimethod import multimethod from pandera.typing import DataFrame from tiffslide import TiffSlide @@ -55,91 +52,55 @@ def cli( configure_dask_client(**config["dask_options"]) - output_filesystem, output_urlpath_prefix = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) - slide_id = Path(urlparse(config["slide_urlpath"]).path).stem - output_header_file = Path(output_urlpath_prefix) / f"{slide_id}.tiles.parquet" - - df = generate_tiles( + properties = __generate_tiles( config["slide_urlpath"], config["tile_size"], - config["storage_options"], + config["output_urlpath"], config["requested_magnification"], + config["storage_options"], + config["output_storage_options"], ) - with output_filesystem.open(output_header_file, "wb") as of: - print(f"saving to {output_header_file}") - df.to_parquet(of) - - properties = { - "slide_tiles": output_header_file, # "Tiles" are the metadata that describe them - "total_tiles": len(df), - "segment_keys": {"slide_id": str(slide_id)}, - } return properties -@multimethod def generate_tiles( slide_manifest: DataFrame[SlideSchema], tile_size: int, - storage_options: dict = {}, + output_urlpath: str, requested_magnification: Optional[int] = None, -) -> pd.DataFrame: - client = get_or_create_dask_client() - futures = { - row.id: client.submit( - _generate_tiles, - row.url, - tile_size, - storage_options, - requested_magnification, - ) - for row in slide_manifest.itertuples() - } - progress(futures) - results = client.gather(futures) - for k, v in results.items(): - v["id"] = str(k) - tiles = pd.concat(results) - return tiles.merge(slide_manifest, on="id") - - -@multimethod -def generate_tiles( - slide_urlpaths: Union[str, list[str]], - tile_size: int, storage_options: dict = {}, - requested_magnification: Optional[int] = None, + output_storage_options: dict = {}, ) -> pd.DataFrame: - if type(slide_urlpaths) == str: - slide_urlpaths = [slide_urlpaths] - client = get_or_create_dask_client() - futures = { - Path(urlparse(slide_urlpath).path).stem: client.submit( - _generate_tiles, - slide_urlpath, + + futures = [] + for slide in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __generate_tiles, + slide.url, tile_size, - storage_options, + output_urlpath, requested_magnification, + storage_options, + output_storage_options, ) - for slide_urlpath in slide_urlpaths - } - progress(futures) + futures.append(future) results = client.gather(futures) - for k, v in results.items(): - v["id"] = str(k) - return pd.concat(results) + for idx, result in enumerate(results): + slide_manifest.at[idx, "tiles_url"] = result["tiles_url"] + + return slide_manifest -def _generate_tiles( +def __generate_tiles( slide_urlpath: str, tile_size: int, - storage_options: dict = {}, + output_urlpath: str, requested_magnification: Optional[int] = None, -) -> pd.DataFrame: + storage_options: dict = {}, + output_storage_options: dict = {}, +) -> dict: """Rasterize a slide into smaller tiles Tiles addresses and arrays are saved as key-value pairs in (tiles.h5), @@ -156,6 +117,13 @@ def _generate_tiles( Returns: DataFrame[TileSchema]: tile manifest """ + slide_id = Path(slide_urlpath).stem + ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options) + output_file = str(Path(output_path) / f"{slide_id}.tiles.parquet") + if ofs.exists(output_file): + logger.info("Output file exists: {ofs.unstrip_protocol(output_file)}") + return + with fsspec.open(slide_urlpath, "rb", **storage_options) as f: slide = TiffSlide(f) logger.info(f"Slide size = [{slide.dimensions[0]},{slide.dimensions[1]}]") @@ -222,7 +190,17 @@ def _generate_tiles( # ]) # logger.info(f"lazy tiles: {lazy_arrays.shape}") - return tiles + with ofs.open(output_file, mode="wb") as of: + tiles.to_parquet(of) + + properties = { + "tiles_url": ofs.unstrip_protocol( + output_file + ), # "Tiles" are the metadata that describe them + "total_tiles": len(tiles), + } + + return properties def fire_cli(): diff --git a/src/luna/pathology/cli/infer_tile_labels.py b/src/luna/pathology/cli/infer_tile_labels.py index 4d5bf596..259595fb 100644 --- a/src/luna/pathology/cli/infer_tile_labels.py +++ b/src/luna/pathology/cli/infer_tile_labels.py @@ -9,21 +9,22 @@ import fsspec import pandas as pd import torch -from dask.distributed import Client -from fsspec import open +from dask.distributed import progress from loguru import logger +from pandera.typing import DataFrame from torch.utils.data import DataLoader from tqdm import tqdm -from luna.common.utils import get_config, save_metadata, timed -from luna.common.dask import configure_dask_client +from luna.common.dask import configure_dask_client, get_or_create_dask_client +from luna.common.utils import get_config, make_temp_directory, save_metadata, timed from luna.pathology.analysis.ml import ( HDF5Dataset, TorchTransformModel, post_transform_to_2d, ) -from luna.pathology.cli.run_tissue_detection import detect_tissue -from luna.pathology.cli.save_tiles import save_tiles +from luna.pathology.cli.generate_tiles import __generate_tiles +from luna.pathology.cli.run_tissue_detection import __detect_tissue, detect_tissue +from luna.pathology.cli.save_tiles import _save_tiles, save_tiles @timed @@ -78,63 +79,138 @@ def cli( if not config["tile_size"] and not config["tiles_urlpath"]: raise fire.core.FireError("Specify either tiles_urlpath or tile_size") - df_output = infer_tile_labels( - config["slide_urlpath"], - config["tiles_urlpath"], - config["tile_size"], - config["filter_query"], - config["requested_magnification"], - config["torch_model_repo_or_dir"], - config["model_name"], - config["num_cores"], - config["batch_size"], - config["output_urlpath"], - config["kwargs"], - config["use_gpu"], - config["dask_options"], - config["insecure"], - config["storage_options"], - config["output_storage_options"], - ) - - fs, output_path_prefix = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) - if config['slide_urlpath']: + if config["slide_urlpath"]: slide_id = Path(config["slide_urlpath"]).stem else: - slide_id = Path(config["tiles_urlpath"]).stem.removesuffix('.tiles') + slide_id = Path(config["tiles_urlpath"]).stem.removesuffix(".tiles") + + tiles_urlpath = config["tiles_urlpath"] + with make_temp_directory() as temp_dir: + if not tiles_urlpath: + tiles_result = __generate_tiles( + config["slide_urlpath"], + config["tile_size"], + (Path(temp_dir) / "generate_tiles").as_uri(), + config["tile_magnification"], + config["storage_options"], + ) + detect_tissue_result = __detect_tissue( + config["slide_urlpath"], + tiles_result["tiles_url"], + slide_id, + config["thumbnail_magnification"], + config["filter_query"], + config["batch_size"], + (Path(temp_dir) / "detect_tissue").as_uri(), + config["storage_options"], + ) + save_tiles_result = _save_tiles( + detect_tissue_result["tiles_urlpath"], + config["slide_urlpath"], + (Path(temp_dir) / "save_tiles").as_uri(), + config["batch_size"], + config["storage_options"], + ) + tiles_urlpath = save_tiles_result["tiles_url"] + + return __infer_tile_labels( + tiles_urlpath, + slide_id, + config["output_urlpath"], + config["torch_model_repo_or_dir"], + config["model_name"], + config["num_cores"], + config["batch_size"], + config["kwargs"], + config["use_gpu"], + config["insecure"], + config["storage_options"], + config["output_storage_options"], + ) - output_file = str(Path(output_path_prefix) / f"{slide_id}.tiles.parquet") - # - with fs.open(output_file, "wb") as of: - df_output.to_parquet(of) - # Save our properties and params - properties = { - "slide_tiles": output_file, - "feature_data": output_file, - "total_tiles": len(df_output), - "available_labels": list(df_output.columns), - } +def infer_tile_labels( + slide_manifest: DataFrame, + tile_size: Optional[int] = None, + filter_query: str = "", + thumbnail_magnification: Optional[int] = None, + tile_magnification: Optional[int] = None, + torch_model_repo_or_dir: str = "", + model_name: str = "", + num_cores: int = 1, + batch_size: int = 2000, + output_urlpath: str = ".", + kwargs: dict = {}, + use_gpu: bool = False, + dask_options: dict = {}, + insecure: bool = False, + storage_options: dict = {}, + output_storage_options: dict = {}, +) -> pd.DataFrame: + client = get_or_create_dask_client() + configure_dask_client(**dask_options) + + if "tiles_url" not in slide_manifest.columns: + if tile_size is None: + raise RuntimeError("Need to have generated tiles or specify tile_size") + # generate tiles + slide_manifest = detect_tissue( + slide_manifest, + None, + tile_size=tile_size, + thumbnail_magnification=thumbnail_magnification, + tile_magnification=tile_magnification, + filter_query=filter_query, + batch_size=batch_size, + storage_options=storage_options, + output_urlpath=output_urlpath, + output_storage_options=output_storage_options, + ) - return properties + slide_manifest = save_tiles( + slide_manifest, + output_urlpath, + batch_size, + storage_options, + output_storage_options, + ) + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __infer_tile_labels, + row.tiles_url, + row.id, + output_urlpath, + torch_model_repo_or_dir, + model_name, + num_cores, + batch_size, + kwargs, + use_gpu, + insecure, + storage_options, + output_storage_options, + ) + futures.append(future) -def infer_tile_labels( - slide_urlpath: str, + progress(futures) + results = client.gather(futures) + for idx, result in results.enumerate(): + slide_manifest.at[idx, "tiles_url"] = result + return slide_manifest + + +def __infer_tile_labels( tiles_urlpath: str, - tile_size: Optional[int], - filter_query: str, - requested_magnification: Optional[int], + slide_id: str, + output_urlpath: str, torch_model_repo_or_dir: str, model_name: str, num_cores: int, batch_size: int, - output_urlpath: str, kwargs: dict, use_gpu: bool, - dask_options: dict, insecure: bool, storage_options: dict, output_storage_options: dict, @@ -144,7 +220,6 @@ def infer_tile_labels( Decorates existing slide_tiles with additional columns corresponding to class prediction/scores from the model Args: - slide_urlpath (str): url/path to slide image (virtual slide formats compatible with TiffSlide, .svs, .tif, .scn, ...) tiles_urlpath (str): path to a slide-tile manifest file (.tiles.parquet) tile_size (int): size of tiles to use (at the requested magnification) filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores @@ -161,10 +236,26 @@ def infer_tile_labels( Returns: pd.DataFrame: augmented tiles dataframe """ - if insecure: ssl._create_default_https_context = ssl._create_unverified_context + ofs, output_path_prefix = fsspec.core.url_to_fs( + output_urlpath, + **output_storage_options, + ) + + output_file = str(Path(output_path_prefix) / f"{slide_id}.tiles.parquet") + + if ofs.exists(output_file): + logger.info(f"outputs already exist: {output_file}") + return + + tiles_df = ( + pd.read_parquet(tiles_urlpath, storage_options=storage_options) + .reset_index() + .set_index("address") + ) + # Get our model and transforms and construct the Tile Dataset and Classifier if os.path.exists(torch_model_repo_or_dir): source = "local" @@ -188,41 +279,6 @@ def infer_tile_labels( if not isinstance(ttm, TorchTransformModel): raise RuntimeError(f"Not a valid model, loaded model was of type {type(ttm)}") - # load/generate tiles - if tiles_urlpath: - with open(tiles_urlpath, **storage_options) as of: - df = pd.read_parquet(of).reset_index().set_index("address") - elif tile_size is not None: - configure_dask_client(**dask_options) - slide_id = Path(slide_urlpath).stem - tiles_h5_urlpath = str(Path(output_urlpath) / f"{slide_id}.tiles.h5") - - df = detect_tissue( - slide_urlpath, - tile_size=tile_size, - requested_magnification=requested_magnification, - filter_query=filter_query, - batch_size=batch_size, - storage_options=storage_options, - output_urlpath_prefix=output_urlpath + "/" + slide_id, - output_storage_options=output_storage_options, - ) - - df = save_tiles( - df, - slide_urlpath, - tiles_h5_urlpath, - batch_size, - storage_options, - output_storage_options, - ) - - df = df.reset_index().set_index("address") - else: - raise RuntimeError( - "Need to specify tiles_urlpath or both slide_urlpath and tile_size" - ) - pin_memory = False if use_gpu and torch.cuda.is_available(): pin_memory = True @@ -236,8 +292,10 @@ def infer_tile_labels( transform = ttm.transform ttm.model.to(device) - ds = HDF5Dataset(df, preprocess=preprocess, storage_options=storage_options) - loader = DataLoader(ds, num_workers=num_cores, batch_size=batch_size, pin_memory=pin_memory) + ds = HDF5Dataset(tiles_df, preprocess=preprocess, storage_options=storage_options) + loader = DataLoader( + ds, num_workers=num_cores, batch_size=batch_size, pin_memory=pin_memory + ) # Generate aggregate dataframe with torch.no_grad(): @@ -250,17 +308,27 @@ def infer_tile_labels( ] ) - if hasattr(ttm, "column_labels"): logger.info(f"Mapping column labels -> {ttm.column_labels}") df_scores = df_scores.rename(columns=ttm.column_labels) - df_output = df.join(df_scores) + df_output = tiles_df.join(df_scores) df_output.columns = df_output.columns.astype(str) df_output.index.name = "address" logger.info(df_output) - return df_output + + with ofs.open(output_file, "wb") as of: + df_output.to_parquet(of) + + # Save our properties and params + properties = { + "tiles_url": ofs.unstrip_protocol(output_file), + "total_tiles": len(df_output), + "available_labels": list(df_output.columns), + } + + return properties def fire_cli(): diff --git a/src/luna/pathology/cli/merge_shape_features.py b/src/luna/pathology/cli/merge_shape_features.py index 10cbd82b..cda169c7 100644 --- a/src/luna/pathology/cli/merge_shape_features.py +++ b/src/luna/pathology/cli/merge_shape_features.py @@ -1,20 +1,20 @@ -import fire -import glob -from typing import List, Union -import pandas as pd from pathlib import Path +from typing import List, Union + +import fire import fsspec -from fsspec import open +import pandas as pd -from luna.common.utils import get_config, timed, save_metadata -from luna.common.models import ShapeFeaturesSchema +from luna.common.utils import get_config, save_metadata, timed @timed @save_metadata def cli( - shape_features_urlpaths: Union[str,List[str]] = "???", + shape_features_urlpaths: Union[str, List[str]] = "???", output_urlpath: str = ".", + flatten_index: bool = True, + fraction_not_null: float = 0.5, storage_options: dict = {}, output_storage_options: dict = {}, local_config: str = "", @@ -24,6 +24,7 @@ def cli( Args: shape_features_urlpaths (List[str]): URL/paths to shape featurs parquet files output_urlpath (str): URL/path to output parquet file + fraction_not_null (float): fraction not null to keep column to keep in wide format storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions local_config (str): local config yaml file @@ -35,36 +36,49 @@ def cli( dfs = [] # type: list[str] if type(config["shape_features_urlpaths"]) == list: - for urlpath in config['shape_features_urlpaths']: - fs, path = fsspec.core.url_to_fs(urlpath, **config['storage_options']) - with fs.open(path, 'rb') as of: + for urlpath in config["shape_features_urlpaths"]: + fs, path = fsspec.core.url_to_fs(urlpath, **config["storage_options"]) + with fs.open(path, "rb") as of: df = pd.read_parquet(of) - ShapeFeaturesSchema.validate(df) dfs.append(df) else: - fs, path_prefix = fsspec.core.url_to_fs(config['shape_features_urlpaths'], - **config['storage_options']) + fs, path_prefix = fsspec.core.url_to_fs( + config["shape_features_urlpaths"], **config["storage_options"] + ) for path in fs.glob(f"{path_prefix}/**/shape_features.parquet"): - with fs.open(path, 'rb') as of: + with fs.open(path, "rb") as of: df = pd.read_parquet(of) - ShapeFeaturesSchema.validate(df) dfs.append(df) df = pd.concat(dfs) - fs, path_prefix = fsspec.core.url_to_fs(config['output_urlpath'], - **config['output_storage_options']) - path = Path(path_prefix) / 'long_shape_features.parquet' + fs, path_prefix = fsspec.core.url_to_fs( + config["output_urlpath"], **config["output_storage_options"] + ) + path = Path(path_prefix) / "long_shape_features.parquet" - with fs.open(path, 'wb', **config["output_storage_options"]) as of: + with fs.open(path, "wb", **config["output_storage_options"]) as of: df.to_parquet(of) - df.variable = df.variable.str.replace('µ','u').replace(r'(: |:)', ' ', regex=True).replace('[^a-zA-Z0-9 \n]', '', regex=True) - wide_path = Path(path_prefix) / 'wide_shape_features.parquet' - wide_df = df.pivot(index="slide_id", columns=["Parent", "Class", "variable"], values="value") - with fs.open(wide_path, 'wb', **config["output_storage_options"]) as of: + df.variable = ( + df.variable.str.replace("µ", "u") + .replace(r"(: |:)", " ", regex=True) + .replace("[^a-zA-Z0-9 \n]", "", regex=True) + ) + wide_path = Path(path_prefix) / "wide_shape_features.parquet" + wide_df = df.pivot( + index="slide_id", columns=["Parent", "Class", "variable"], values="value" + ) + wide_df = wide_df.loc[ + :, wide_df.isna().sum() < len(wide_df) * config["fraction_not_null"] + ] + if config["flatten_index"]: + wide_df.columns = ["_".join(col).strip() for col in wide_df.columns.values] + wide_df.columns = wide_df.columns.str.replace(" ", "_") + + with fs.open(wide_path, "wb", **config["output_storage_options"]) as of: wide_df.to_parquet(of) - return { + return { "long_shape_features": fs.unstrip_protocol(str(path)), "wide_shape_features": fs.unstrip_protocol(str(wide_path)), "num_features": len(wide_df.columns), @@ -74,7 +88,6 @@ def cli( def fire_cli(): fire.Fire(cli) + if __name__ == "__main__": fire_cli() - - diff --git a/src/luna/pathology/cli/run_stardist_cell_detection.py b/src/luna/pathology/cli/run_stardist_cell_detection.py index 5c3eef68..a9924850 100644 --- a/src/luna/pathology/cli/run_stardist_cell_detection.py +++ b/src/luna/pathology/cli/run_stardist_cell_detection.py @@ -5,14 +5,17 @@ import fsspec import pandas as pd from loguru import logger +from pandera.typing import DataFrame +from luna.common.dask import get_or_create_dask_client +from luna.common.models import SlideSchema from luna.common.runners import runner_provider from luna.common.utils import get_config, local_cache_urlpath, save_metadata, timed @timed @save_metadata -def stardist_simple( +def stardist_simple_cli( slide_urlpath: str = "???", cell_expansion_size: float = "???", # type: ignore image_type: str = "???", @@ -47,16 +50,8 @@ def stardist_simple( """ config = get_config(vars()) - fs, output_path = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) - slide_id = Path(config["slide_urlpath"]).stem - output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet" - if fs.exists(output_header_file): - logger.info(f"outputs already exist: {config['output_urlpath']}") - return - df = stardist_simple_main( + return __stardist_simple( config["slide_urlpath"], config["cell_expansion_size"], config["image_type"], @@ -65,36 +60,57 @@ def stardist_simple( config["num_cores"], config["image"], config["use_singularity"], - config['max_heap_size'], + config["max_heap_size"], config["storage_options"], config["output_storage_options"], ) - with fs.open(output_header_file, "wb") as of: - df.to_parquet(of) - - logger.info("generated cell data:") - logger.info(df) - - output_geojson_file = Path(output_path) / "cell_detections.geojson" - - properties = { - "cell_objects": str(output_header_file), - "feature_data": str(output_header_file), - "geojson_features": str(output_geojson_file), - "spatial": True, - "total_cells": len(df), - "segment_keys": {"slide_id": slide_id}, - } - return properties +def stardist_simple( + slide_manifest: DataFrame[SlideSchema], + cell_expansion_size: float, + image_type: str, + output_urlpath: str, + debug_opts: str, + num_cores: int, + image: str, + use_singularity: bool, + max_heap_size: str, + storage_options: dict, + output_storage_options: dict, + annotation_column: str = "stardist_geojson_url", +) -> pd.DataFrame: + client = get_or_create_dask_client() + + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __stardist_simple, + row.url, + cell_expansion_size, + image_type, + output_urlpath, + debug_opts, + num_cores, + image, + use_singularity, + max_heap_size, + storage_options, + output_storage_options, + ) + futures.append(future) + results = client.gather(futures) + for idx, result in enumerate(results): + slide_manifest.at[idx, annotation_column] = results["geojson_url"] + + return slide_manifest @local_cache_urlpath( file_key_write_mode={"slide_urlpath": "r"}, dir_key_write_mode={"output_urlpath": "w"}, ) -def stardist_simple_main( +def __stardist_simple( slide_urlpath: str, cell_expansion_size: float, image_type: str, @@ -128,7 +144,13 @@ def stardist_simple_main( fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options) ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options) - if ofs.protocol == 'file' and not ofs.exists(output_path): + slide_id = Path(slide_urlpath).stem + output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet" + if ofs.exists(output_header_file): + logger.info(f"outputs already exist: {output_header_file}") + return + + if ofs.protocol == "file" and not ofs.exists(output_path): ofs.mkdir(output_path) runner_type = "DOCKER" @@ -159,8 +181,11 @@ def stardist_simple_main( } runner = runner_provider.get(runner_type, **runner_config) executor = runner.run() - for line in executor: - print(line) + try: + for line in executor: + logger.info(line) + except TypeError: + print(executor, "is not iterable") stardist_output = Path(output_path) / "cell_detections.tsv" @@ -172,12 +197,28 @@ def stardist_simple_main( columns={"Centroid X µm": "x_coord", "Centroid Y µm": "y_coord"} ) # x,ys follow this convention - return df + with ofs.open(output_header_file, "wb") as of: + df.to_parquet(of) + + logger.info("generated cell data:") + logger.info(df) + + output_geojson_file = Path(output_path) / "cell_detections.geojson" + + properties = { + "geojson_url": ofs.unstrip_protocol(str(output_geojson_file)), + "tsv_url": ofs.unstrip_protocol(str(stardist_output)), + "parquet_url": ofs.unstrip_protocol(str(output_header_file)), + "spatial": True, + "total_cells": len(df), + } + + return properties @timed @save_metadata -def stardist_cell_lymphocyte( +def stardist_cell_lymphocyte_cli( slide_urlpath: str = "???", output_urlpath: str = ".", num_cores: int = 1, @@ -204,19 +245,11 @@ def stardist_cell_lymphocyte( pd.DataFrame: cell detections """ config = get_config(vars()) - - fs, output_path = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) slide_id = Path(config["slide_urlpath"]).stem - output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet" - if fs.exists(output_header_file): - logger.info(f"outputs already exist: {config['output_urlpath']}") - return - - df = stardist_cell_lymphocyte_main( + properties = __stardist_cell_lymphocyte( config["slide_urlpath"], config["output_urlpath"], + slide_id, config["num_cores"], config["use_gpu"], config["image"], @@ -225,34 +258,54 @@ def stardist_cell_lymphocyte( config["storage_options"], config["output_storage_options"], ) + return properties - with fs.open(output_header_file, "wb") as of: - df.to_parquet(of) - - logger.info("generated cell data:") - logger.info(df) - - output_geojson_file = Path(output_path) / "cell_detections.geojson" - - properties = { - "cell_objects": str(output_header_file), - "feature_data": str(output_header_file), - "geojson_features": str(output_geojson_file), - "spatial": True, - "total_cells": len(df), - "segment_keys": {"slide_id": slide_id}, - } - return properties +def stardist_cell_lymphocyte( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + num_cores: int, + use_gpu: bool = False, + image: str = "mskmind/qupath-stardist:0.4.3", + use_singularity: bool = False, + max_heap_size: str = "64G", + storage_options: dict = {}, + output_storage_options: dict = {}, + annotation_column: str = "lymphocyte_geojson_url", +): + client = get_or_create_dask_client() + + futures = [] + for row in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __stardist_cell_lymphocyte, + row.url, + output_urlpath, + row.id, + num_cores, + use_gpu, + image, + use_singularity, + max_heap_size, + storage_options, + output_storage_options, + ) + futures.append(future) + results = client.gather(futures) + for idx, result in enumerate(results): + slide_manifest.at[idx, annotation_column] = result["geojson_url"] + + return slide_manifest @local_cache_urlpath( file_key_write_mode={"slide_urlpath": "r"}, dir_key_write_mode={"output_urlpath": "w"}, ) -def stardist_cell_lymphocyte_main( +def __stardist_cell_lymphocyte( slide_urlpath: str, output_urlpath: str, + slide_id: str, num_cores: int, use_gpu: bool = False, image: str = "mskmind/qupath-stardist:0.4.3", @@ -276,10 +329,14 @@ def stardist_cell_lymphocyte_main( pd.DataFrame: cell detections """ fs, slide_path = fsspec.core.url_to_fs(slide_urlpath, **storage_options) - ofs, output_path = fsspec.core.url_to_fs(output_urlpath, **output_storage_options) - if ofs.protocol == 'file' and not ofs.exists(output_path): + output_header_file = Path(output_path) / f"{slide_id}_cell_objects.parquet" + if ofs.exists(output_header_file): + logger.info(f"outputs already exist: {output_header_file}") + return + + if ofs.protocol == "file" and not ofs.exists(output_path): ofs.mkdir(output_path) qupath_cmd = "QuPath-cpu" @@ -290,7 +347,6 @@ def stardist_cell_lymphocyte_main( if use_singularity: runner_type = "SINGULARITY" - slide_filename = Path(slide_path).name command = f"{qupath_cmd} script --image /inputs/{slide_filename} /scripts/stardist_nuclei_and_lymphocytes.groovy" logger.info(f"Launching {runner_type} container:") @@ -316,8 +372,11 @@ def stardist_cell_lymphocyte_main( } runner = runner_provider.get(runner_type, **runner_config) executor = runner.run() - for line in executor: - print(line) + try: + for line in executor: + logger.info(line) + except TypeError: + print(executor, "is not iterable") stardist_output = Path(output_path) / "cell_detections.tsv" @@ -329,14 +388,30 @@ def stardist_cell_lymphocyte_main( columns={"Centroid X µm": "x_coord", "Centroid Y µm": "y_coord"} ) # x,ys follow this convention - return df + with fs.open(output_header_file, "wb") as of: + df.to_parquet(of) + + logger.info("generated cell data:") + logger.info(df) + + output_geojson_file = Path(output_path) / "cell_detections.geojson" + + properties = { + "geojson_url": ofs.unstrip_protocol(str(output_geojson_file)), + "tsv_url": ofs.unstrip_protocol(str(stardist_output)), + "parquet_url": ofs.unstrip_protocol(str(output_header_file)), + "spatial": True, + "total_cells": len(df), + } + + return properties def fire_cli(): fire.Fire( { - "simple": stardist_simple, - "cell-lymphocyte": stardist_cell_lymphocyte, + "simple": stardist_simple_cli, + "cell-lymphocyte": stardist_cell_lymphocyte_cli, } ) diff --git a/src/luna/pathology/cli/run_tissue_detection.py b/src/luna/pathology/cli/run_tissue_detection.py index e2accb9f..76088cdf 100644 --- a/src/luna/pathology/cli/run_tissue_detection.py +++ b/src/luna/pathology/cli/run_tissue_detection.py @@ -1,17 +1,15 @@ # General imports from functools import partial from pathlib import Path -from typing import Optional, Union -from urllib.parse import urlparse +from typing import Dict, Optional +import dask.bag as db import fire # type: ignore import fsspec # type: ignore import numpy as np import pandas as pd from dask.distributed import progress -from fsspec import open # type: ignore from loguru import logger -from multimethod import multimethod from pandera.typing import DataFrame from PIL import Image, ImageEnhance from skimage.color import rgb2gray # type: ignore @@ -19,15 +17,15 @@ from tiffslide import TiffSlide from luna.common.dask import configure_dask_client, get_or_create_dask_client -from luna.common.models import SlideSchema, Tile +from luna.common.models import Tile from luna.common.utils import ( get_config, - grouper, local_cache_urlpath, + make_temp_directory, save_metadata, timed, ) -from luna.pathology.cli.generate_tiles import generate_tiles +from luna.pathology.cli.generate_tiles import __generate_tiles, generate_tiles from luna.pathology.common.utils import ( get_array_from_tile, get_downscaled_thumbnail, @@ -37,15 +35,16 @@ ) -def compute_otsu_score(tile: Tile, slide: TiffSlide, otsu_threshold: float) -> float: +def compute_otsu_score(tile: Tile, slide_path: str, otsu_threshold: float) -> float: """ Return otsu score for the tile. Args: row (pd.Series): row with tile metadata - slide_urlpath (str): path to slide + slide_path (str): path to slide otsu_threshold (float): otsu threshold value """ - tile_arr = get_array_from_tile(tile, slide, 10) + with TiffSlide(slide_path) as slide: + tile_arr = get_array_from_tile(tile, slide, 10) score = np.mean((rgb2gray(tile_arr) < otsu_threshold).astype(int)) return score @@ -58,7 +57,7 @@ def get_purple_score(x): def compute_purple_score( tile: Tile, - slide: TiffSlide, + slide_path: str, ) -> float: """ Return purple score for the tile. @@ -66,13 +65,14 @@ def compute_purple_score( row (pd.Series): row with tile metadata slide_url (str): path to slide """ - tile_arr = get_array_from_tile(tile, slide, 10) + with TiffSlide(slide_path) as slide: + tile_arr = get_array_from_tile(tile, slide, 10) return get_purple_score(tile_arr) def compute_stain_score( tile: Tile, - slide: TiffSlide, + slide_path: str, vectors, channel, stain_threshold: float, @@ -86,7 +86,8 @@ def compute_stain_score( channel (int): stain channel stain_threshold (float): stain threshold value """ - tile_arr = get_array_from_tile(tile, slide, 10) + with TiffSlide(slide_path) as slide: + tile_arr = get_array_from_tile(tile, slide, 10) stain = pull_stain_channel(tile_arr, vectors=vectors, channel=channel) score = np.mean(stain > stain_threshold) return score @@ -115,7 +116,7 @@ def cli( filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores tile_size (int): size of tiles to use (at the requested magnification) thumbnail_magnification (Optional[int]): Magnification scale at which to perform computation - output_urlpath (str): Output url/path prefix + output_urlpath (str): Output url/path dask_options (dict): dask options storage_options (dict): storage options to pass to reading functions output_storage_options (dict): storage options to pass to writing functions @@ -131,165 +132,114 @@ def cli( if not config["tile_size"] and not config["tiles_urlpath"]: raise fire.core.FireError("Specify either tiles_urlpath or tile_size") - output_filesystem, output_urlpath_prefix = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] - ) + slide_id = Path(config["slide_urlpath"]).stem - slide_id = Path(urlparse(config["slide_urlpath"]).path).stem - output_header_file = Path(output_urlpath_prefix) / f"{slide_id}.tiles.parquet" - - df = detect_tissue( - config["slide_urlpath"], - config["tiles_urlpath"], - config["tile_size"], - config["thumbnail_magnification"], - config["tile_magnification"], - config["filter_query"], - config["batch_size"], - config["storage_options"], - config["output_urlpath"], - config["output_storage_options"], - ) - with output_filesystem.open(output_header_file, "wb") as of: - logger.info(f"saving to {output_header_file}") - df.to_parquet(of) - properties = { - "tiles_manifest": output_header_file, - "total_tiles": len(df), - } + tiles_urlpath = config["tiles_urlpath"] + + with make_temp_directory() as temp_dir: + if not tiles_urlpath: + result = __generate_tiles( + config["slide_urlpath"], + config["tile_size"], + temp_dir, + config["tile_magnification"], + config["storage_options"], + ) + tiles_urlpath = result["tiles_url"] + + properties = __detect_tissue( + config["slide_urlpath"], + tiles_urlpath, + slide_id, + config["thumbnail_magnification"], + config["filter_query"], + config["batch_size"], + config["output_urlpath"], + config["storage_options"], + config["output_storage_options"], + ) return properties -@multimethod def detect_tissue( slide_manifest: DataFrame, - tile_size: int, - thumbnail_magnification: Optional[int] = None, - tile_magnification: Optional[int] = None, - filter_query: str = "", - batch_size: int = 2000, - storage_options: dict = {}, - output_urlpath_prefix: str = "", - output_storage_options: dict = {}, -): - slide_manifest = SlideSchema(slide_manifest) - dfs = [] - for row in slide_manifest.itertuples(): - df = detect_tissue( - row.url, - tile_size, - thumbnail_magnification, - tile_magnification, - filter_query, - batch_size, - storage_options, - output_urlpath_prefix, - output_storage_options, - ) - df["id"] = row.id - dfs.append(df) - tiles = pd.concat(dfs) - return tiles.merge(slide_manifest, on="id") - - -# this doesn't work: -# client = get_client() -# futures = { -# row.id: client.submit(detect_tissue, -# row.url, -# tile_size, -# thumbnail_magnification, -# filter_query, -# storage_options, -# output_urlpath_prefix, -# output_storage_options, -# ) for row in slide_manifest.itertuples() -# } -# progress(futures) -# results = client.gather(futures) -# for k, v in results.items(): -# v['id'] = str(k) -# tiles = pd.concat(results) -# return tiles.merge(slide_manifest, on='id') - - -@multimethod -def detect_tissue( - slide_urlpaths: Union[str, list[str]], - tile_size: int, + tile_size: Optional[int] = None, thumbnail_magnification: Optional[int] = None, tile_magnification: Optional[int] = None, filter_query: str = "", batch_size: int = 2000, storage_options: dict = {}, - output_urlpath_prefix: str = "", + output_urlpath: str = ".", output_storage_options: dict = {}, ) -> pd.DataFrame: - if type(slide_urlpaths) == str: - slide_urlpaths = [slide_urlpaths] - dfs = [] - for slide_urlpath in slide_urlpaths: - df = detect_tissue( - slide_urlpath, - "", - tile_size, - thumbnail_magnification, - tile_magnification, - filter_query, - batch_size, - storage_options, - output_urlpath_prefix, - output_storage_options, - ) - o = urlparse(slide_urlpath) - df["id"] = Path(o.path).stem - dfs.append(df) - return pd.concat(dfs) + client = get_or_create_dask_client() + + with make_temp_directory() as temp_dir: + if "tiles_url" not in slide_manifest.columns: + slide_manifest = generate_tiles( + slide_manifest, + tile_size, + temp_dir, + tile_magnification, + storage_options, + ) + + futures = [] + for slide in slide_manifest.itertuples(name="Slide"): + future = client.submit( + __detect_tissue, + slide.url, + slide.tiles_url, + slide.id, + thumbnail_magnification, + filter_query, + batch_size, + output_urlpath, + storage_options, + output_storage_options, + ) + futures.append(future) + progress(futures) + + results = client.gather(futures) + + for idx, result in enumerate(results): + slide_manifest.at[idx, "tiles_url"] = result["tiles_url"] + + return slide_manifest -@multimethod @local_cache_urlpath( file_key_write_mode={ "slide_urlpath": "r", - }, + } ) -def detect_tissue( +def __detect_tissue( slide_urlpath: str, - tiles_urlpath: str = "", - tile_size: Optional[int] = None, + tiles_urlpath: str, + slide_id: str, thumbnail_magnification: Optional[int] = None, - tile_magnification: Optional[int] = None, filter_query: str = "", batch_size: int = 2000, + output_urlpath: str = ".", storage_options: dict = {}, - output_urlpath_prefix: str = "", output_storage_options: dict = {}, -) -> pd.DataFrame: - """Run simple/deterministic tissue detection algorithms based on a filter query, to reduce tiles to those (likely) to contain actual tissue - Args: - slide_urlpath (str): slide url/path - tile_size (int): size of tiles to use (at the requested magnification) - thumbnail_magnification (Optional[int]): Magnification scale at which to perform computation - filter_query (str): pandas query by which to filter tiles based on their various tissue detection scores - storage_options (dict): storage options to pass to reading functions - output_urlpath_prefix (str): output url/path prefix - output_storage_options (dict): output storage optoins - Returns: - pd.DataFrame - """ - - client = get_or_create_dask_client() +) -> Dict: + output_filesystem, output_path = fsspec.core.url_to_fs( + output_urlpath, **output_storage_options + ) - if tiles_urlpath: - with open(tiles_urlpath, **storage_options) as of: - tiles_df = pd.read_parquet(of) - elif type(tile_size) == int: - tiles_df = generate_tiles( - slide_urlpath, tile_size, storage_options, tile_magnification + tiles_output_path = str(Path(output_path) / f"{slide_id}.tiles.parquet") + if output_filesystem.exists(tiles_output_path): + logger.info( + "Outputs already exist: {output_filesystem.unstrip_protocol(tiles_output_path)}" ) - else: - raise RuntimeError("Specify tile_size or tile_urlpath") + return + + tiles_df = pd.read_parquet(tiles_urlpath, storage_options=storage_options) + + get_or_create_dask_client() with TiffSlide(slide_urlpath) as slide: logger.info(f"Slide dimensions {slide.dimensions}") @@ -311,63 +261,52 @@ def detect_tissue( sample_arr = get_downscaled_thumbnail(slide, to_mag_scale_factor) logger.info(f"Sample array size: {sample_arr.shape}") - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/sample_arr.png", "wb", **output_storage_options - ) as f: - Image.fromarray(sample_arr).save(f, format="png") + with output_filesystem.open(Path(output_path) / "sample_arr.png", "wb") as f: + Image.fromarray(sample_arr).save(f, format="png") logger.info("Enhancing image...") enhanced_sample_img = ImageEnhance.Contrast( ImageEnhance.Color(Image.fromarray(sample_arr)).enhance(10) ).enhance(10) - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/enhanced_sample_arr.png", - "wb", - **output_storage_options, - ) as f: - enhanced_sample_img.save(f, format="png") + with output_filesystem.open( + Path(output_path) / "enhanced_sample_arr.png", + "wb", + ) as f: + enhanced_sample_img.save(f, format="png") logger.info("HSV space conversion...") hsv_sample_arr = np.array(enhanced_sample_img.convert("HSV")) - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/hsv_sample_arr.png", - "wb", - **output_storage_options, - ) as f: - Image.fromarray(np.array(hsv_sample_arr)).save(f, "png") + with output_filesystem.open( + Path(output_path) / "hsv_sample_arr.png", + "wb", + ) as f: + Image.fromarray(np.array(hsv_sample_arr)).save(f, "png") logger.info("Calculating max saturation...") hsv_max_sample_arr = np.max(hsv_sample_arr[:, :, 1:3], axis=2) - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/hsv_max_sample_arr.png", - "wb", - **output_storage_options, - ) as f: - Image.fromarray(hsv_max_sample_arr).save(f, "png") + with output_filesystem.open( + Path(output_path) / "hsv_max_sample_arr.png", + "wb", + ) as f: + Image.fromarray(hsv_max_sample_arr).save(f, "png") logger.info("Calculate and filter shadow mask...") shadow_mask = np.where(np.max(hsv_sample_arr, axis=2) < 10, 255, 0).astype(np.uint8) - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/shadow_mask.png", "wb", **output_storage_options - ) as f: - Image.fromarray(shadow_mask).save(f, "png") + with output_filesystem.open( + Path(output_path) / "shadow_mask.png", + "wb", + ) as f: + Image.fromarray(shadow_mask).save(f, "png") logger.info("Filter out shadow/dust/etc...") sample_arr_filtered = np.where( np.expand_dims(shadow_mask, 2) == 0, sample_arr, np.full(sample_arr.shape, 255) ).astype(np.uint8) - if output_urlpath_prefix: - with open( - output_urlpath_prefix + "/sample_arr_filtered.png", - "wb", - **output_storage_options, - ) as f: - Image.fromarray(sample_arr_filtered).save(f, "png") + with output_filesystem.open( + Path(output_path) / "sample_arr_filtered.png", + "wb", + ) as f: + Image.fromarray(sample_arr_filtered).save(f, "png") logger.info("Calculating otsu threshold...") threshold = threshold_otsu(rgb2gray(sample_arr_filtered)) @@ -390,104 +329,101 @@ def detect_tissue( ) # Get the otsu mask - if output_urlpath_prefix: - logger.info("Saving otsu mask") - otsu_mask = np.where(rgb2gray(sample_arr_filtered) < threshold, 255, 0).astype( - np.uint8 - ) - with open( - output_urlpath_prefix + "/otsu_mask.png", "wb", **output_storage_options - ) as f: - Image.fromarray(otsu_mask).save(f, "png") - - if output_urlpath_prefix: - logger.info("Saving stain thumbnail") - deconv_sample_arr = pull_stain_channel( - sample_arr_filtered, vectors=stain_vectors - ) - with open( - output_urlpath_prefix + "/deconv_sample_arr.png", - "wb", - **output_storage_options, - ) as f: - Image.fromarray(deconv_sample_arr).save(f, "png") - - logger.info("Saving stain masks") - stain0_mask = np.where( - deconv_sample_arr[..., 0] > threshold_stain0, 255, 0 - ).astype(np.uint8) - stain1_mask = np.where( - deconv_sample_arr[..., 1] > threshold_stain1, 255, 0 - ).astype(np.uint8) - with open( - output_urlpath_prefix + "/stain0_mask.png", "wb", **output_storage_options - ) as f: - Image.fromarray(stain0_mask).save(f, "png") - with open( - output_urlpath_prefix + "/stain1_mask.png", "wb", **output_storage_options - ) as f: - Image.fromarray(stain1_mask).save(f, "png") + logger.info("Saving otsu mask") + otsu_mask = np.where(rgb2gray(sample_arr_filtered) < threshold, 255, 0).astype( + np.uint8 + ) + with output_filesystem.open(Path(output_path) / "otsu_mask.png", "wb") as f: + Image.fromarray(otsu_mask).save(f, "png") + + logger.info("Saving stain thumbnail") + deconv_sample_arr = pull_stain_channel(sample_arr_filtered, vectors=stain_vectors) + with output_filesystem.open( + Path(output_path) / "deconv_sample_arr.png", + "wb", + ) as f: + Image.fromarray(deconv_sample_arr).save(f, "png") + + logger.info("Saving stain masks") + stain0_mask = np.where(deconv_sample_arr[..., 0] > threshold_stain0, 255, 0).astype( + np.uint8 + ) + stain1_mask = np.where(deconv_sample_arr[..., 1] > threshold_stain1, 255, 0).astype( + np.uint8 + ) + with output_filesystem.open( + Path(output_path) / "stain0_mask.png", + "wb", + ) as f: + Image.fromarray(stain0_mask).save(f, "png") + with output_filesystem.open( + Path(output_path) / "stain1_mask.png", + "wb", + ) as f: + Image.fromarray(stain1_mask).save(f, "png") if filter_query: - def f_many(iterator, tile_fn): - with TiffSlide(slide_urlpath) as slide: - return [tile_fn(tile=x, slide=slide) for x in iterator] + def f_many(iterator, tile_fn, **kwargs): + return [tile_fn(tile=x, **kwargs) for x in iterator] + chunks = db.from_sequence( + tiles_df.itertuples(name="Tile"), partition_size=batch_size + ) + results = {} if "otsu_score" in filter_query: logger.info(f"Starting otsu thresholding, threshold={threshold}") - chunks = grouper(tiles_df.itertuples(name="Tile"), batch_size) - otsu_tile_fn = partial(compute_otsu_score, otsu_threshold=threshold) - - futures = client.map(partial(f_many, tile_fn=otsu_tile_fn), chunks) - progress(futures) - tiles_df["otsu_score"] = np.concatenate(client.gather(futures)) + # chunks = grouper(tiles_df.itertuples(name="Tile"), batch_size) + results["otsu_score"] = chunks.map_partitions( + partial(f_many, tile_fn=compute_otsu_score), + slide_path=slide_urlpath, + otsu_threshold=threshold, + ) if "purple_score" in filter_query: logger.info("Starting purple scoring") - chunks = grouper(tiles_df.itertuples(name="Tile"), batch_size) - - futures = client.map(partial(f_many, tile_fn=compute_purple_score), chunks) - progress(futures) - tiles_df["purple_score"] = np.concatenate(client.gather(futures)) + results["purple_score"] = chunks.map_partitions( + partial(f_many, tile_fn=compute_purple_score), slide_path=slide_urlpath + ) if "stain0_score" in filter_query: logger.info( f"Starting stain thresholding, channel=0, threshold={threshold_stain0}" ) - - chunks = grouper(tiles_df.itertuples(name="Tile"), batch_size) - stain_tile_fn = partial( - compute_stain_score, + results["stain0_score"] = chunks.map_partitions( + partial(f_many, tile_fn=compute_stain_score), vectors=stain_vectors, channel=0, stain_threshold=threshold_stain0, + slide_path=slide_urlpath, ) - - futures = client.map(partial(f_many, tile_fn=stain_tile_fn), chunks) - progress(futures) - tiles_df["stain0_score"] = np.concatenate(client.gather(futures)) if "stain1_score" in filter_query: logger.info( f"Starting stain thresholding, channel=1, threshold={threshold_stain1}" ) - chunks = grouper(tiles_df.itertuples(name="Tile"), batch_size) - stain_tile_fn = partial( - compute_stain_score, + results["stain1_score"] = chunks.map_partitions( + partial(f_many, tile_fn=compute_stain_score), vectors=stain_vectors, channel=1, stain_threshold=threshold_stain1, + slide_path=slide_urlpath, ) - futures = client.map(partial(f_many, tile_fn=stain_tile_fn), chunks) - progress(futures) - tiles_df["stain1_score"] = np.concatenate(client.gather(futures)) - + for k, v in results.items(): + tiles_df[k] = v.compute() logger.info(f"Filtering based on query: {filter_query}") tiles_df = tiles_df.query(filter_query) logger.info(tiles_df) - return tiles_df + with output_filesystem.open(tiles_output_path, "wb") as of: + logger.info(f"saving to {tiles_output_path}") + tiles_df.to_parquet(of) + + properties = { + "tiles_url": output_filesystem.unstrip_protocol(tiles_output_path), + "total_tiles": len(tiles_df), + } + return properties def fire_cli(): diff --git a/src/luna/pathology/cli/save_tiles.py b/src/luna/pathology/cli/save_tiles.py index aa6f3509..4af71b87 100644 --- a/src/luna/pathology/cli/save_tiles.py +++ b/src/luna/pathology/cli/save_tiles.py @@ -1,23 +1,19 @@ # General imports from pathlib import Path -from typing import Optional +import dask.bag as db import fire import fsspec -import pandas as pd import h5py -from dask.distributed import Client, as_completed, progress -from fsspec import open +import pandas as pd +from dask.diagnostics import ProgressBar from loguru import logger from pandera.typing import DataFrame from tiffslide import TiffSlide - -from luna.common.models import TileSchema -from luna.common.dask import get_or_create_dask_client, configure_dask_client -from luna.common.utils import get_config, grouper, local_cache_urlpath, save_metadata, timed -from luna.pathology.cli.generate_tiles import generate_tiles -from luna.pathology.cli.run_tissue_detection import detect_tissue +from luna.common.dask import configure_dask_client, get_or_create_dask_client +from luna.common.models import SlideSchema +from luna.common.utils import get_config, local_cache_urlpath, save_metadata, timed from luna.pathology.common.utils import get_array_from_tile @@ -25,7 +21,7 @@ @save_metadata def cli( slide_urlpath: str = "???", - tiles_urlpath: str = "???", + tiles_urlpath: str = "???", batch_size: int = 2000, output_urlpath: str = ".", storage_options: dict = {}, @@ -54,42 +50,97 @@ def cli( configure_dask_client(**config["dask_options"]) - slide_id = Path(config["slide_urlpath"]).stem - fs, output_urlpath_prefix = fsspec.core.url_to_fs( - config["output_urlpath"], **config["output_storage_options"] + properties = _save_tiles( + config["tiles_urlpath"], + config["slide_urlpath"], + config["output_urlpath"], + config["batch_size"], + config["storage_options"], + config["output_storage_options"], ) - output_h5_path = str(Path(output_urlpath_prefix) / f"{slide_id}.tiles.h5") - output_h5_urlpath = fs.unstrip_protocol(output_h5_path) - output_header_path = str(Path(output_urlpath_prefix) / f"{slide_id}.tiles.parquet") - output_header_urlpath = fs.unstrip_protocol(output_h5_path) + return properties + + +def save_tiles( + slide_manifest: DataFrame[SlideSchema], + output_urlpath: str, + batch_size: int = 2000, + storage_options: dict = {}, + output_storage_options: dict = {}, +): + client = get_or_create_dask_client() + + if "tiles_url" not in slide_manifest.columns: + raise ValueError("Generate tiles first") + + output_filesystem, output_path_prefix = fsspec.core.url_to_fs( + output_urlpath, **output_storage_options + ) - if fs.exists(output_header_path) and fs.exists(output_h5_path): - logger.info( - f"outputs already exist: {output_h5_urlpath}, {output_header_urlpath}" + if not output_filesystem.exists(output_urlpath): + output_filesystem.mkdir(output_urlpath) + + futures = [] + for slide in slide_manifest.itertuples(name="Slide"): + future = client.submit( + _save_tiles, + slide.tiles_url, + slide.url, + output_urlpath, + batch_size, + storage_options, + output_storage_options, ) - return + futures.append(future) - with open(config["tiles_urlpath"], **config['storage_options']) as of: - df = pd.read_parquet(of) + results = client.gather(futures) + for idx, result in enumerate(results): + slide_manifest.at[idx, "tiles_url"] = result["tiles_url"] - df = save_tiles( - df, - config["slide_urlpath"], - output_h5_urlpath, - config["batch_size"], - config["storage_options"], - config["output_storage_options"], + return slide_manifest + + +def _save_tiles( + tiles_urlpath: str, + slide_urlpath: str, + output_urlpath: str, + batch_size: int = 2000, + storage_options: dict = {}, + output_storage_options: dict = {}, +): + slide_id = Path(slide_urlpath).stem + ofs, output_urlpath_prefix = fsspec.core.url_to_fs( + output_urlpath, **output_storage_options + ) + + output_h5_path = str(Path(output_urlpath_prefix) / f"{slide_id}.tiles.h5") + output_h5_url = ofs.unstrip_protocol(output_h5_path) + + output_tiles_path = str(Path(output_urlpath_prefix) / f"{slide_id}.tiles.parquet") + output_tiles_url = ofs.unstrip_protocol(output_tiles_path) + + if ofs.exists(output_tiles_path) and ofs.exists(output_h5_path): + logger.info(f"outputs already exist: {output_h5_url}, {output_tiles_url}") + return + tiles_df = __save_tiles( + tiles_urlpath, + slide_urlpath, + output_h5_path, + batch_size, + storage_options, + output_storage_options, ) - logger.info(df) - with fs.open(output_header_path, "wb") as of: - df.to_parquet(of) + tiles_df["tile_store"] = output_h5_url + logger.info(tiles_df) + with ofs.open(output_tiles_path, "wb") as of: + tiles_df.to_parquet(of) properties = { - "slide_tiles": output_header_urlpath, # "Tiles" are the metadata that describe them - "feature_data": output_header_urlpath, # Tiles can act like feature data - "total_tiles": len(df), + "tiles_url": output_tiles_url, # "Tiles" are the metadata that describe them + "feature_data": output_h5_url, # Tiles can act like feature data + "total_tiles": len(tiles_df), } return properties @@ -98,12 +149,13 @@ def cli( @local_cache_urlpath( file_key_write_mode={ "slide_urlpath": "r", + "output_h5_path": "w", }, ) -def save_tiles( - df: DataFrame[TileSchema], +def __save_tiles( + tiles_urlpath: str, slide_urlpath: str, - output_urlpath: str, + output_h5_path: str, batch_size: int = 2000, storage_options: dict = {}, output_storage_options: dict = {}, @@ -114,62 +166,36 @@ def save_tiles( and the corresponding manifest/header file (tiles.parquet) is also generated Args: - df (DataFrame[TileSchema]): tile manifest + tiles_urlpath (str): tile manifest slide_urlpath (str): url/path to slide image (virtual slide formats compatible with openslide, .svs, .tif, .scn, ...) output_urlpath (str): output url/path batch_size (int): size in batch dimension to chuck jobs - storage_options (dict): storage options to reading functions output_storage_options (dict): storage options to writing functions Returns: dict: metadata about function call """ - logger.info(f"Now generating tiles with batch_size={batch_size}!") - df = _save_tiles(df, slide_urlpath, output_urlpath, batch_size, storage_options, output_storage_options) - df["tile_store"] = output_urlpath - return df -@local_cache_urlpath( - file_key_write_mode={ - "output_urlpath": "w", - }, -) -def _save_tiles( - df, - slide_urlpath, - output_urlpath: str, - batch_size: int = 2000, - storage_options: dict = {}, - output_storage_options: dict = {}, -): - """ - save address:tile arrays key:value pair in hdf5 - a separate function from save_tiles such that the tile_store in the tile df is the non-cached path - """ - client = get_or_create_dask_client() + tiles_df = pd.read_parquet(tiles_urlpath, storage_options=storage_options) + + get_or_create_dask_client() + def f_many(iterator): - with open(slide_urlpath, **storage_options) as of: - slide = TiffSlide(of) - return [ - ( - x.address, - get_array_from_tile(x, slide), - ) - for x in iterator - ] - - chunks = grouper(df.itertuples(name="Tile"), batch_size) - - futures = client.map(f_many, chunks) - progress(futures) - - with h5py.File(output_urlpath, "w") as hfile: - for future in as_completed(futures): - for result in future.result(): - address, tile_arr = result - hfile.create_dataset(address, data=tile_arr) - - return df + with TiffSlide(slide_urlpath) as slide: + return [(x.address, get_array_from_tile(x, slide=slide)) for x in iterator] + + chunks = db.from_sequence( + tiles_df.itertuples(name="Tile"), partition_size=batch_size + ) + + ProgressBar().register() + results = chunks.map_partitions(f_many) + with h5py.File(output_h5_path, "w") as hfile: + for result in results.compute(): + address, tile_arr = result + hfile.create_dataset(address, data=tile_arr) + + return tiles_df def fire_cli(): diff --git a/src/luna/pathology/cli/slide_etl.py b/src/luna/pathology/cli/slide_etl.py index 458d299a..b02eeafc 100644 --- a/src/luna/pathology/cli/slide_etl.py +++ b/src/luna/pathology/cli/slide_etl.py @@ -1,6 +1,7 @@ # General imports import uuid from pathlib import Path +from typing import List, Union import fire import fsspec @@ -8,7 +9,6 @@ from dask.distributed import progress from fsspec import open # type: ignore from loguru import logger -from multimethod import multimethod from pandera.typing import DataFrame from tiffslide import TiffSlide @@ -108,9 +108,8 @@ def cli( df.to_parquet(of) -@multimethod def slide_etl( - slide_urls: list[str], + slide_urls: Union[str, List[str]], project_name: str, comment: str = "", storage_options: dict = {}, @@ -132,6 +131,8 @@ def slide_etl( Returns: df (DataFrame): dataframe containing the metadata of all the slides """ + if isinstance(slide_urls, str): + slide_urls = [slide_urls] client = get_or_create_dask_client() sb = SlideBuilder(storage_options, output_storage_options=output_storage_options) @@ -156,52 +157,13 @@ def slide_etl( ) for slide in slides ] + df = DataFrame[SlideSchema]( - pd.json_normalize( - [ - x.__dict__ - | {"properties." + str(k): v for k, v in x.properties.items()} - for x in client.gather(futures) - ] - ) + pd.json_normalize([x.__dict__ for x in client.gather(futures)]) ) - return df -@multimethod -def slide_etl( - slide_url: str, - project_name: str, - comment: str = "", - storage_options: dict = {}, - output_urlpath: str = "", - output_storage_options: dict = {}, - no_copy: bool = False, -) -> Slide: - """Ingest slide by adding them to a file or s3 based storage location and generating metadata about them - - Args: - slide_url (str): path to slide image - project_name (str): project name underwhich the slides should reside - comment (str): comment and description of dataset - storage_options (dict): storage options to pass to reading functions - output_urlpath (str): url/path to output table - output_storage_options (dict): storage options to pass to writing functions - - - Returns: - slide (Slide): slide object - """ - - sb = SlideBuilder(storage_options, output_storage_options=output_storage_options) - - slide = sb.get_slide(slide_url, project_name=project_name, comment=comment) - if not no_copy and output_urlpath: - slide = sb.copy_slide(slide, output_urlpath) - return slide - - class SlideBuilder: def __init__(self, storage_options: dict = {}, output_storage_options: dict = {}): self.storage_options = storage_options diff --git a/tests/luna/pathology/cli/test_dsa_viz.py b/tests/luna/pathology/cli/test_dsa_viz.py index fc4b4aba..b75af908 100644 --- a/tests/luna/pathology/cli/test_dsa_viz.py +++ b/tests/luna/pathology/cli/test_dsa_viz.py @@ -1,26 +1,16 @@ -import os -from pathlib import Path - import fire import pytest from luna.pathology.cli.dsa_viz import ( # bmp_polygon, - bitmask_polygon, - heatmap, - qupath_polygon, - regional_polygon, - stardist_cell, - stardist_polygon, + bitmask_polygon_cli, + heatmap_cli, + qupath_polygon_cli, + regional_polygon_cli, + stardist_cell_cli, + stardist_polygon_cli, ) -def verify_cleanup(output_file): - assert os.path.exists(output_file) - # cleanup - os.remove(output_file) - os.remove(str(Path(output_file).parent) + "/metadata.yml") - - def test_stardist_polygon_s3(s3fs_client): s3fs_client.mkdirs("dsatest", exist_ok=True) s3fs_client.put( @@ -28,7 +18,7 @@ def test_stardist_polygon_s3(s3fs_client): "s3://dsatest/test/", ) fire.Fire( - stardist_polygon, + stardist_polygon_cli, [ "--local_config", "tests/testdata/pathology/stardist_polygon_s3.yml", @@ -44,57 +34,58 @@ def test_stardist_polygon_s3(s3fs_client): ) -def test_stardist_polygon(): +def test_stardist_polygon(tmp_path): + print(tmp_path) fire.Fire( - stardist_polygon, + stardist_polygon_cli, [ + "--output_urlpath", + str(tmp_path), "--local_config", "tests/testdata/pathology/stardist_polygon.yml", ], ) output_file = ( - "tests/luna/pathology/cli/testouts" - "/StarDist_Segmentations_with_Lymphocyte_Classifications_123.json" + tmp_path / "StarDist_Segmentations_with_Lymphocyte_Classifications_123.json" ) - verify_cleanup(output_file) + assert output_file.exists() -def test_stardist_cell(): +def test_stardist_cell(tmp_path): fire.Fire( - stardist_cell, + stardist_cell_cli, [ + "--output_urlpath", + str(tmp_path), "--local_config", "tests/testdata/pathology/stardist_cell.yml", ], ) - output_file = ( - "tests/luna/pathology/cli/testouts" - "/Points_of_Classsified_StarDist_Cells_123.json" - ) - verify_cleanup(output_file) + output_file = tmp_path / "Points_of_Classsified_StarDist_Cells_123.json" + assert output_file.exists() -def test_regional_polygon(): +def test_regional_polygon(tmp_path): fire.Fire( - regional_polygon, + regional_polygon_cli, [ + "--output_urlpath", + str(tmp_path), "--local_config", "tests/testdata/pathology" "/regional_polygon.yml", ], ) - output_file = ( - "tests/luna/pathology/cli/testouts" "/Slideviewer_Regional_Annotations_123.json" - ) - verify_cleanup(output_file) + output_file = tmp_path / "Slideviewer_Regional_Annotations_123.json" + assert output_file.exists() def test_bitmask_polygon_invalid(): with pytest.raises(Exception): fire.Fire( - bitmask_polygon, + bitmask_polygon_cli, [ "--input_map", '{"Tumor": "non/existing/path/to/png.png"}', @@ -122,11 +113,13 @@ def test_bitmask_polygon(): """ -def test_heatmap(): +def test_heatmap(tmp_path): fire.Fire( - heatmap, + heatmap_cli, [ "tests/testdata/pathology/tile_scores.parquet", + "--output_urlpath", + str(tmp_path), "--column", "otsu_score", "--local_config", @@ -134,21 +127,21 @@ def test_heatmap(): ], ) - output_file = "tests/luna/pathology/cli/testouts" "/otsu_score_test_123.json" - verify_cleanup(output_file) + output_file = tmp_path / "otsu_score_test_123.json" + assert output_file.exists() -def test_qupath_polygon(): +def test_qupath_polygon(tmp_path): fire.Fire( - qupath_polygon, + qupath_polygon_cli, [ "tests/testdata/pathology/region_annotation_results.geojson", + "--output_urlpath", + str(tmp_path), "--local_config", "tests/testdata/pathology" "/qupath_polygon.yml", ], ) - output_file = ( - "tests/luna/pathology/cli/testouts" "/Qupath_Pixel_Classifier_Polygons_123.json" - ) - verify_cleanup(output_file) + output_file = tmp_path / "Qupath_Pixel_Classifier_Polygons_123.json" + assert output_file.exists() diff --git a/tests/luna/pathology/cli/test_run_stardist_cell_detection.py b/tests/luna/pathology/cli/test_run_stardist_cell_detection.py index 5b8e028a..bbba4b36 100644 --- a/tests/luna/pathology/cli/test_run_stardist_cell_detection.py +++ b/tests/luna/pathology/cli/test_run_stardist_cell_detection.py @@ -4,7 +4,7 @@ import fire import docker -from luna.pathology.cli.run_stardist_cell_detection import stardist_simple +from luna.pathology.cli.run_stardist_cell_detection import stardist_simple_cli tmppath = "tests/testdata/pathology/stardist_cell_detection" @@ -45,7 +45,7 @@ def mock_client(*args, **kwargs): monkeypatch.setattr(docker.models.containers.Container, "logs", mock_container) fire.Fire( - stardist_simple, + stardist_simple_cli, [ "--slide-urlpath", "tests/testdata/pathology/123.svs", diff --git a/tests/testdata/pathology/heatmap_config.yml b/tests/testdata/pathology/heatmap_config.yml index 23b4487f..a4fd844d 100644 --- a/tests/testdata/pathology/heatmap_config.yml +++ b/tests/testdata/pathology/heatmap_config.yml @@ -1,6 +1,5 @@ input_urlpath: tests/testdata/pathology/tile_scores.csv image_filename: 123.svs -output_urlpath: tests/luna/pathology/cli/testouts annotation_name: test tile_size: 128 scale_factor: 8 diff --git a/tests/testdata/pathology/stardist_cell.yml b/tests/testdata/pathology/stardist_cell.yml index 358ae566..1deac3f1 100644 --- a/tests/testdata/pathology/stardist_cell.yml +++ b/tests/testdata/pathology/stardist_cell.yml @@ -1,6 +1,5 @@ input_urlpath: tests/testdata/pathology/test_object_detection.tsv image_filename: 123.svs -output_urlpath: tests/luna/pathology/cli/testouts annotation_name: Points of Classsified StarDist Cells line_colors: Other: rgb(0, 255, 0) diff --git a/tests/testdata/pathology/stardist_cell_detection/metadata.yml b/tests/testdata/pathology/stardist_cell_detection/metadata.yml index 9fd63007..a560bebe 100644 --- a/tests/testdata/pathology/stardist_cell_detection/metadata.yml +++ b/tests/testdata/pathology/stardist_cell_detection/metadata.yml @@ -1,12 +1,19 @@ -cell_expansion_size: 8.0 -cell_objects: tests/testdata/pathology/stardist_cell_detection/123_cell_objects.parquet +cell_expansion_size: 8 debug_opts: '' -feature_data: tests/testdata/pathology/stardist_cell_detection/123_cell_objects.parquet +geojson_url: file:///gpfs/mskmind_ess/limr/repos/luna/tests/testdata/pathology/stardist_cell_detection/cell_detections.geojson +image: mskmind/qupath-stardist:0.4.3 image_type: BRIGHTFIELD_H_DAB -input_slide_image: /Users/limr/repos/luna/tests/testdata/pathology/123.svs -num_cores: 4 -output_dir: tests/testdata/pathology/stardist_cell_detection -segment_keys: - slide_id: '123' +local_config: '' +max_heap_size: 64G +num_cores: 1 +output_filesystem: file +output_storage_options: + auto_mkdir: true +output_urlpath: tests/testdata/pathology/stardist_cell_detection +parquet_url: file:///gpfs/mskmind_ess/limr/repos/luna/tests/testdata/pathology/stardist_cell_detection/776c7630596bf071cf44e2f301e1502c986402251bdfbd8c1e5d7b2168001b8d_cell_objects.parquet +slide_urlpath: tests/testdata/pathology/123.svs spatial: true +storage_options: {} total_cells: 9 +tsv_url: file:///gpfs/mskmind_ess/limr/repos/luna/tests/testdata/pathology/stardist_cell_detection/cell_detections.tsv +use_singularity: false diff --git a/tests/testdata/pathology/stardist_polygon.yml b/tests/testdata/pathology/stardist_polygon.yml index 3eb6da95..b94472e5 100644 --- a/tests/testdata/pathology/stardist_polygon.yml +++ b/tests/testdata/pathology/stardist_polygon.yml @@ -1,6 +1,5 @@ input_urlpath: tests/testdata/pathology/test_object_classification.geojson image_filename: 123.svs -output_urlpath: tests/luna/pathology/cli/testouts annotation_name: StarDist Segmentations with Lymphocyte Classifications line_colors: Other: rgb(0, 255, 0)