Skip to content

Commit

Permalink
lint
Browse files Browse the repository at this point in the history
  • Loading branch information
Evan Molinelli authored and Evan Molinelli committed Nov 28, 2024
1 parent d8fe2e2 commit 1557253
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 40 deletions.
27 changes: 16 additions & 11 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
from datetime import datetime
from typing import Dict, List, Mapping, Optional, Union, Tuple
from typing import Dict, List, Mapping, Optional, Union

import anndata
import matplotlib.colors as mcolors
Expand Down Expand Up @@ -89,12 +89,16 @@ def adata(self, adata: anndata.AnnData):

@property
def visium_and_is_single_true_matrix_size(self) -> Optional[int]:
'''
"""
Returns the required matrix size based on assay type, if applicable, else returns None.
'''
"""
if self._visium_and_is_single_true_matrix_size is None:
# Visium 11M's raw matrix size is distinct from other visium assays
if bool(self.adata.obs['assay_ontology_term_id'].apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True)).any()):
if bool(
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
.any()
):
self._visium_error_suffix = ERROR_SUFFIX_VISIUM_11M
self._visium_and_is_single_true_matrix_size = VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
elif self._is_visium_including_descendants():
Expand All @@ -104,21 +108,23 @@ def visium_and_is_single_true_matrix_size(self) -> Optional[int]:

@property
def hires_max_dimension_size(self) -> Optional[int]:
'''
"""
Returns the restricted hires image dimension based on assay type, if applicable, else returns None.
'''
"""
if self._hires_max_dimension_size is None:
# Visium 11M's max dimension size is distinct from other visium assays
if bool(self.adata.obs['assay_ontology_term_id'].apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True)).any()):
if bool(
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
.any()
):
self._visium_error_suffix = ERROR_SUFFIX_VISIUM_11M
self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
elif self._is_visium_including_descendants():
self._visium_error_suffix = ERROR_SUFFIX_VISIUM
self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
return self._hires_max_dimension_size



def _is_single(self) -> bool | None:
"""
Determine value of uns.spatial.is_single. None if non-spatial.
Expand Down Expand Up @@ -1159,7 +1165,6 @@ def _has_valid_raw(self, force: bool = False) -> bool:
self._raw_layer_exists = True
is_sparse_matrix = matrix_format in SPARSE_MATRIX_TYPES


is_visium_and_is_single_true = self._is_visium_and_is_single_true()
if is_visium_and_is_single_true and x.shape[0] != self.visium_and_is_single_true_matrix_size:
self._raw_layer_exists = False
Expand Down Expand Up @@ -1849,7 +1854,7 @@ def _is_visium_including_descendants(self) -> bool:
.apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
.any()
)

return self.is_visium

def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
Expand Down
73 changes: 44 additions & 29 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,18 @@
from cellxgene_schema.schema import get_schema_definition
from cellxgene_schema.utils import getattr_anndata
from cellxgene_schema.validate import (
ASSAY_VISIUM,
ASSAY_VISIUM_11M,
ERROR_SUFFIX_VISIUM,
ERROR_SUFFIX_VISIUM_11M,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE,
VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
Validator,
)
from fixtures.examples_validate import (
visium_library_id
)
from cellxgene_schema.write_labels import AnnDataLabelAppender
from fixtures.examples_validate import visium_library_id

schema_def = get_schema_definition()

Expand All @@ -41,7 +38,6 @@ def validator() -> Validator:
# Override the schema definition here
validator._set_schema_def()


# lower threshold for low gene count warning
validator.schema_def["components"]["var"]["warn_if_less_than_rows"] = 1
return validator
Expand Down Expand Up @@ -259,7 +255,7 @@ def test_raw_values__contains_zero_row_in_tissue_1(self, validator_with_visium_a
"ERROR: Each cell must have at least one non-zero value in its row in the raw matrix.",
"ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
]

def test_raw_values__contains_zero_row_in_tissue_1_mixed_in_tissue_values(self, validator_with_visium_assay):
"""
Raw Matrix contains a row with all zeros and in_tissue is 1, and there are also values with in_tissue 0.
Expand Down Expand Up @@ -314,16 +310,22 @@ def test_raw_values__contains_some_zero_rows_in_tissue_0(self, validator_with_vi
"assay_ontology_term_id, req_matrix_size, image_size",
[
("EFO:0022858", VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
("EFO:0022860", VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM),
(
"EFO:0022860",
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
),
],
)
def test_raw_values__invalid_visium_and_is_single_true_row_length(self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size):
def test_raw_values__invalid_visium_and_is_single_true_row_length(
self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size
):
"""
Dataset is visium and uns['is_single'] is True, but raw.X is the wrong length.
"""
validator: Validator = validator_with_visium_assay
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id

# hires image size must be present in order to validate the raw.
validator._visium_and_is_single_true_matrix_size = None
validator._hires_max_dimension_size = image_size
Expand All @@ -333,27 +335,36 @@ def test_raw_values__invalid_visium_and_is_single_true_row_length(self, validato

validator.validate_adata()
if assay_ontology_term_id == ASSAY_VISIUM_11M:
_errors = [f"ERROR: When {ERROR_SUFFIX_VISIUM_11M}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
"ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements."]
_errors = [
f"ERROR: When {ERROR_SUFFIX_VISIUM_11M}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
"ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
]
else:
_errors = [f"ERROR: When {ERROR_SUFFIX_VISIUM}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
"ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements."]
_errors = [
f"ERROR: When {ERROR_SUFFIX_VISIUM}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
"ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
]

assert validator.errors == _errors


@pytest.mark.parametrize(
"assay_ontology_term_id, req_matrix_size, image_size",
[
("EFO:0022858", VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
("EFO:0022860", VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM),
(
"EFO:0022860",
VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
),
],
)
def test_raw_values__multiple_invalid_in_tissue_errors(self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size):
def test_raw_values__multiple_invalid_in_tissue_errors(
self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size
):
"""
Dataset is visium and uns['is_single'] is True, in_tissue has both 0 and 1 values and there
are issues validating rows of both in the matrix.
Expand All @@ -375,14 +386,18 @@ def test_raw_values__multiple_invalid_in_tissue_errors(self, validator_with_visi
validator.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
validator.validate_adata()
if assay_ontology_term_id == ASSAY_VISIUM_11M:
assert validator.errors[0] == f"ERROR: When {ERROR_SUFFIX_VISIUM_11M}, the raw matrix must be the " \
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly " \
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."
assert (
validator.errors[0] == f"ERROR: When {ERROR_SUFFIX_VISIUM_11M}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."
)
else:
assert validator.errors[0] == f"ERROR: When {ERROR_SUFFIX_VISIUM}, the raw matrix must be the " \
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly " \
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."

assert (
validator.errors[0] == f"ERROR: When {ERROR_SUFFIX_VISIUM}, the raw matrix must be the "
"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."
)

assert validator.errors[1:] == [
"ERROR: If obs['in_tissue'] contains at least one value 0, then there must be at least "
"one row with obs['in_tissue'] == 0 that has a non-zero value in the raw matrix.",
Expand Down

0 comments on commit 1557253

Please sign in to comment.