Skip to content

Commit

Permalink
Update core cropping process to include mask generation prior
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Nov 27, 2023
1 parent 9d8a6a6 commit 55ee0af
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 35 deletions.
43 changes: 33 additions & 10 deletions src/maldi_tools/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from alpineer.io_utils import validate_paths
from pyimzml.ImzMLParser import ImzMLParser
from scipy import signal
from skimage.io import imread
from skimage.io import imread, imsave
from tqdm.notebook import tqdm

from maldi_tools import plotting
Expand Down Expand Up @@ -346,6 +346,29 @@ def library_matching(
return peak_df


def generate_glycan_mask(
imz_data: ImzMLParser,
glycan_img_path: Path,
glycan_mask_path: Path,
):
"""Given a glycan image, generates an equivalent mask.
Args:
---
imz_data (ImzMLParser): The imzML object, needed for coordinate identification.
glycan_img_path (Path): Location of the .tiff file containing the glycan scan
glycan_mask_path (Path): Location where the mask will be saved
"""
validate_paths([glycan_img_path])

glycan_img = imread(glycan_img_path)
glycan_mask = np.zeros(glycan_img.shape)

coords = np.array([coord[:2] for coord in imz_data.coordinates])
glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] = 255
imsave(glycan_mask_path, glycan_mask)


def map_coordinates_to_core_name(
imz_data: ImzMLParser,
centroid_path: Path,
Expand Down Expand Up @@ -404,18 +427,18 @@ def map_coordinates_to_core_name(
return region_core_info


def generate_glycan_mask(
def crop_glycan_cores(
imz_data: ImzMLParser,
glycan_img_path: Path,
glycan_mask_path: Path,
region_core_info: pd.DataFrame,
cores_to_crop: Optional[List[str]] = None,
):
"""Generate a mask for the specified cores, provided a glycan image input.
"""Generate a mask for cropping out the specified cores.
Args:
---
imz_data (ImzMLParser): The imzML object, needed for coordinate identification.
glycan_img_path (Path): The path to the glycan image .tiff, needed to create the base mask.
glycan_mask_path (Path): The path to the glycan mask .tiff, needed to create the cropped mask.
region_core_info (pd.DataFrame): Defines the coordinates associated with each FOV.
cores_to_crop (Optional[List[str]]): Which cores to segment out. If None, use all.
Expand All @@ -424,14 +447,14 @@ def generate_glycan_mask(
np.ndarray:
The binary segmentation mask of the glycan image
"""
validate_paths([glycan_img_path])
validate_paths([glycan_mask_path])
if not cores_to_crop:
cores_to_crop = region_core_info["Core"].unique().tolist()

glycan_img = imread(glycan_img_path)
glycan_mask = np.zeros(glycan_img.shape)
glycan_mask = imread(glycan_mask_path)
core_cropped_mask = np.zeros(glycan_mask.shape)

coords = region_core_info.loc[region_core_info["Core"].isin(cores_to_crop), ["X", "Y"]].values
glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] = 255
core_cropped_mask[coords[:, 1] - 1, coords[:, 0] - 1] = 255

return glycan_mask
return core_cropped_mask
70 changes: 52 additions & 18 deletions templates/maldi-pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -554,16 +554,23 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Core Cropping"
"## Core Naming and Cropping"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After glycan matching, each core on the TMA should be appropriately named by the <a href=https://tsai.stanford.edu/research/maldi_tma/>TSAI MALDI tiler</a>. **Ensure that this step is completed before running the following section.**\n",
"For TMAs, each core is extracted all at once. However, this makes it difficult to locate the exact positions of each core. Additionally, the default names assigned to each core aren't particularly useful because they don't contain any information about their position on the TMA.\n",
"\n",
"To extract FOV-level statistics, a mask will be generated to segment out individual cores on a TMA. This section first maps each acquired coordinate on the slide to their core as defined by the TSAI MALDI tiler, then generates a mask for specific cores on the TMA."
"This section will help you assign informative names to each core and afterwards, segment out the locations of specific cores to generate FOV-level statistics."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"It is helpful first to create an all-encompassing mask with all the cores. This will make it clear where the TMA was scanned for the naming step. You will need to provide the path to one of your extracted glycan images first."
]
},
{
Expand All @@ -572,14 +579,32 @@
"metadata": {},
"outputs": [],
"source": [
"# TSAI MALDI tiler output, contains name of each core mapped to respective centroid\n",
"centroid_path = \"path/to/centroids.json\"\n",
"# define path to one glycan image, needed to properly dimension the mask\n",
"glycan_img_path = \"path/to/glycan_img.tiff\"\n",
"\n",
"# contains all coordinates in the order of acquisition\n",
"poslog_path = \"path/to/poslog.txt\"\n",
"# define a save path for your mask\n",
"glycan_mask_path = \"path/to/glycan_mask.tiff\"\n",
"\n",
"# define path to one glycan image, needed to find dimensions of mask\n",
"glycan_img_path = \"path/to/glycan_img.tiff\""
"# generate and save the glycan mask\n",
"extraction.generate_glycan_mask(\n",
" imz_data=imz_data,\n",
" glycan_img_path=glycan_img_path,\n",
" glycan_mask_path=glycan_mask_path\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Each core on the TMA should be appropriately named by the <a href=https://tsai.stanford.edu/research/maldi_tma/>TSAI MALDI tiler</a>. You will need to provide the TIFF saved at `glycan_mask_path` as input. **Ensure that this step is completed before running the following sections.**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The poslog file for your TMA run will contain each scanned coordinate in the exact order it was scanned. This, along with the tiler output, will be needed to map each coordinate to its respective core."
]
},
{
Expand All @@ -588,6 +613,15 @@
"metadata": {},
"outputs": [],
"source": [
"# TSAI MALDI tiler output, contains name of each core mapped to respective centroid\n",
"centroid_path = \"path/to/centroids.json\"\n",
"\n",
"# contains all coordinates in the order of acquisition\n",
"poslog_path = \"path/to/poslog.txt\"\n",
"\n",
"# define path to one glycan image, needed to find dimensions of mask\n",
"glycan_img_path = \"path/to/glycan_img.tiff\"\n",
"\n",
"# map coordinates to core names\n",
"region_core_info = extraction.map_coordinates_to_core_name(\n",
" imz_data=imz_data,\n",
Expand All @@ -596,6 +630,13 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Specify which cores you want to crop out, and visualize the resulting mask. You can use the mask at `core_cropping_mask` to subset on specific cores."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -611,15 +652,8 @@
" glycan_img_path=glycan_img_path,\n",
" region_core_info=region_core_info,\n",
" cores_to_crop=cores_to_crop\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
")\n",
"\n",
"# visualize the mask\n",
"_ = plt.imshow(core_cropping_mask)"
]
Expand Down
36 changes: 29 additions & 7 deletions tests/extraction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import pytest
import xarray as xr
from pyimzml.ImzMLParser import ImzMLParser
from pytest import TempPathFactory
from skimage.io import imread

from maldi_tools import extraction

Expand Down Expand Up @@ -140,6 +142,26 @@ def test_library_matching(image_xr: xr.DataArray, library: pd.DataFrame, _ppm: i
assert row.peak in {30, 45}


def test_generate_glycan_mask(
tmp_path_factory: TempPathFactory, imz_data: ImzMLParser, glycan_img_path: pathlib.Path
):
glycan_mask_path: pathlib.Path = tmp_path_factory.mktemp("glycan_mask") / "glycan_mask.tiff"
extraction.generate_glycan_mask(imz_data, glycan_img_path, glycan_mask_path)
assert os.path.exists(glycan_mask_path)

glycan_mask: np.ndarray = imread(glycan_mask_path)
coords: np.ndarray = np.array([coord[:2] for coord in imz_data.coordinates])
assert np.all(glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] == 255)

all_coords_X, all_coords_Y = np.meshgrid(np.arange(1, 11), np.arange(1, 11))
all_coords: np.ndarray = np.vstack((all_coords_X.ravel(), all_coords_Y.ravel())).T
coords_set: set = set(map(tuple, coords))
non_hit_indices: np.ndarray = np.array([tuple(coord) not in coords_set for coord in all_coords])
non_hit_coords: np.ndarray = all_coords[non_hit_indices]

assert np.all(glycan_mask[non_hit_coords[:, 1] - 1, non_hit_coords[:, 0] - 1] == 0)


def test_map_coordinates_to_core_name(
imz_data: ImzMLParser, centroid_path: pathlib.Path, poslog_path: pathlib.Path
):
Expand All @@ -162,7 +184,7 @@ def test_map_coordinates_to_core_name_malformed(
extraction.map_coordinates_to_core_name(imz_data, bad_centroid_path, poslog_path)


def test_generate_glycan_mask(
def test_crop_glycan_cores(
imz_data: ImzMLParser,
glycan_img_path: pathlib.Path,
centroid_path: pathlib.Path,
Expand All @@ -174,27 +196,27 @@ def test_generate_glycan_mask(
)
core_names: List[str] = list(region_core_info["Core"].unique())

glycan_mask: np.ndarray = extraction.generate_glycan_mask(
core_cropped_mask: np.ndarray = extraction.crop_glycan_cores(
imz_data, glycan_img_path, region_core_info, [core_names[0]]
)
coords: np.ndarray = region_core_info.loc[region_core_info["Core"] == core_names[0], ["X", "Y"]].values
assert np.all(glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] == 255)
assert np.all(core_cropped_mask[coords[:, 1] - 1, coords[:, 0] - 1] == 255)

all_coords_X, all_coords_Y = np.meshgrid(np.arange(1, 11), np.arange(1, 11))
all_coords: np.ndarray = np.vstack((all_coords_X.ravel(), all_coords_Y.ravel())).T
coords_set: set = set(map(tuple, coords))
non_hit_indices: np.ndarray = np.array([tuple(coord) not in coords_set for coord in all_coords])
non_hit_coords: np.ndarray = all_coords[non_hit_indices]

assert np.all(glycan_mask[non_hit_coords[:, 1] - 1, non_hit_coords[:, 0] - 1] == 0)
assert np.all(core_cropped_mask[non_hit_coords[:, 1] - 1, non_hit_coords[:, 0] - 1] == 0)

# test for all FOVs
glycan_mask = extraction.generate_glycan_mask(imz_data, glycan_img_path, region_core_info)
core_cropped_mask = extraction.crop_glycan_cores(imz_data, glycan_img_path, region_core_info)
coords = region_core_info.loc[:, ["X", "Y"]].values
assert np.all(glycan_mask[coords[:, 1] - 1, coords[:, 0] - 1] == 255)
assert np.all(core_cropped_mask[coords[:, 1] - 1, coords[:, 0] - 1] == 255)

coords_set = set(map(tuple, coords))
non_hit_indices = np.array([tuple(coord) not in coords_set for coord in all_coords])
non_hit_coords = all_coords[non_hit_indices]

assert np.all(glycan_mask[non_hit_coords[:, 1] - 1, non_hit_coords[:, 0] - 1] == 0)
assert np.all(core_cropped_mask[non_hit_coords[:, 1] - 1, non_hit_coords[:, 0] - 1] == 0)

0 comments on commit 55ee0af

Please sign in to comment.