Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: differential tissue position row/col max sizes for visium and visium 11 #1143

Merged
merged 2 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
from datetime import datetime
from typing import Dict, List, Mapping, Optional, Union
from typing import Dict, List, Mapping, Optional, Tuple, Union

import anndata
import matplotlib.colors as mcolors
Expand All @@ -29,6 +29,10 @@

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 14336
VISIUM_TISSUE_POSITION_MAX_ROW = 77
VISIUM_TISSUE_POSITION_MAX_COL = 127
VISIUM_11MM_TISSUE_POSITION_MAX_ROW = 127
VISIUM_11MM_TISSUE_POSITION_MAX_COL = 223
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000

Expand Down Expand Up @@ -57,6 +61,7 @@ def __init__(self, ignore_labels=False):
self._visium_and_is_single_true_matrix_size = None
self._hires_max_dimension_size = None
self._visium_error_suffix = None
self._visium_tissue_position_max = None

# Values will be instances of gencode.GeneChecker,
# keys will be one of gencode.SupportedOrganisms
Expand Down Expand Up @@ -122,6 +127,24 @@ def hires_max_dimension_size(self) -> Optional[int]:
self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
return self._hires_max_dimension_size

@property
def tissue_position_maxes(self) -> Tuple[int, int]:
if self._visium_tissue_position_max is None and self._is_visium_and_is_single_true:
# visium 11 has different requirements than other visium
if (
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
.astype(bool)
.any()
):
self._visium_tissue_position_max = (
VISIUM_11MM_TISSUE_POSITION_MAX_ROW,
VISIUM_11MM_TISSUE_POSITION_MAX_COL,
)
else:
self._visium_tissue_position_max = (VISIUM_TISSUE_POSITION_MAX_ROW, VISIUM_TISSUE_POSITION_MAX_COL)
return self._visium_tissue_position_max

def _is_single(self) -> bool | None:
"""
Determine value of uns.spatial.is_single. None if non-spatial.
Expand Down Expand Up @@ -1732,8 +1755,8 @@ def _validate_spatial_tissue_positions(self):

:rtype none
"""
self._validate_spatial_tissue_position("array_col", 0, 127)
self._validate_spatial_tissue_position("array_row", 0, 77)
self._validate_spatial_tissue_position("array_col", 0, self.tissue_position_maxes[1])
self._validate_spatial_tissue_position("array_row", 0, self.tissue_position_maxes[0])
self._validate_spatial_tissue_position("in_tissue", 0, 1)

def _check_spatial_uns(self):
Expand Down
65 changes: 43 additions & 22 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import os
import re
import tempfile
from typing import Union
from unittest import mock
Expand Down Expand Up @@ -1011,21 +1012,32 @@ def test__validate_tissue_position_required(self, tissue_position_name):
validator.adata = adata_visium.copy()
validator.adata.obs.pop(tissue_position_name)

# check visium
validator.adata.obs["assay_ontology_term_id"] = "EFO:0010961"
validator._check_spatial_obs()
assert validator.errors
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
)
validator.reset()

# check visium descendant
validator.adata.obs["assay_ontology_term_id"] = "EFO:0022860"
validator._check_spatial_obs()
assert validator.errors
assert (
f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
)
validator.reset()

@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062"])
@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062", "EFO:0022860"])
def test__validate_tissue_position_not_required(self, assay_ontology_term_id):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_slide_seqv2.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.uns["spatial"]["is_single"] = False
validator.adata.uns["spatial"]["is_single"] = False # setting to false removes the requirement
validator.adata.obs["is_primary_data"] = False

validator._check_spatial_obs()
assert not validator.errors

Expand All @@ -1041,43 +1053,52 @@ def test__validate_tissue_position_int_error(self, tissue_position_name):
assert validator.errors
assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0]

@pytest.mark.parametrize(
"tissue_position_name, min, error_message_token",
[
("array_col", 0, "between 0 and 127"),
("array_row", 0, "between 0 and 77"),
("in_tissue", 0, "0 or 1"),
],
)
def test__validate_tissue_position_int_min_error(self, tissue_position_name, min, error_message_token):
@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0022860", "EFO:0022859"])
@pytest.mark.parametrize("tissue_position_name, min", [("array_col", 0), ("array_row", 0), ("in_tissue", 0)])
def test__validate_tissue_position_int_min_error(self, assay_ontology_term_id, tissue_position_name, min):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.obs[tissue_position_name] = min - 1

# Confirm tissue_position is identified as invalid.
validator._check_spatial_obs()
assert validator.errors
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
assert (
re.match(f"^obs\['{tissue_position_name}'\] must be (between )?{min} (and|or) [0-9]+", validator.errors[0])
is not None
)

@pytest.mark.parametrize(
"tissue_position_name, max, error_message_token",
"assay_ontology_term_id, tissue_position_name, tissue_position_max",
[
("array_col", 127, "between 0 and 127"),
("array_row", 77, "between 0 and 77"),
("in_tissue", 1, "0 or 1"),
("EFO:0010961", "array_col", 127),
("EFO:0010961", "array_row", 77),
("EFO:0022860", "array_col", 223),
("EFO:0022860", "array_row", 127),
("EFO:0022859", "array_col", 127),
("EFO:0022859", "array_row", 77),
("EFO:0022859", "in_tissue", 1),
],
)
def test__validate_tissue_position_int_max_error(self, tissue_position_name, max, error_message_token):
def test__validate_tissue_position_int_max_error(
self, assay_ontology_term_id, tissue_position_name, tissue_position_max
):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs[tissue_position_name] = max + 1
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.obs[tissue_position_name] = tissue_position_max + 1

# Confirm tissue_position is identified as invalid.
validator._check_spatial_obs()
assert validator.errors
assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
assert (
re.match(
f"^obs\['{tissue_position_name}'\] must be (between )?[0-9]+ (and|or) {tissue_position_max}",
validator.errors[0],
)
is not None
)

@pytest.mark.parametrize(
"cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
Expand Down
Loading