diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 8827661a..fdad3d7d 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -50,6 +50,7 @@ ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}" ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0" +ERROR_SUFFIX_SPARSE_FORMAT = f"Please ensure it is either a dense array or one of the supported sparse matrix encodings ({','.join(SUPPORTED_SPARSE_MATRIX_TYPES)})" class Validator: """Handles validation of AnnData""" @@ -936,7 +937,8 @@ def _validate_uns_dict(self, uns_dict: dict) -> None: category_mapping[column_name] = column.nunique() for key, value in uns_dict.items(): - if isinstance(value, scipy.sparse.csr_matrix) and value.nnz == 0: + if isinstance(value, scipy.sparse.csr_matrix): + if value.nnz == 0: self.errors.append(f"uns['{key}'] cannot be an empty value.") elif value is not None and not isinstance(value, (np.bool_, bool, numbers.Number)) and len(value) == 0: self.errors.append(f"uns['{key}'] cannot be an empty value.") @@ -1019,7 +1021,7 @@ def _validate_sparsity(self): self.errors.append(f"Invalid sparse encoding for {x_name} with encoding {matrix_format}. Onle {','.join(SUPPORTED_SPARSE_MATRIX_TYPES)} sparse encodings are supported.") continue elif matrix_format == "unknown": - self.errors.append(f"Unknown encoding for matrix {x_name}. Please ensure it is either a dense array or one of {','.join(SUPPORTED_SPARSE_MATRIX_TYPES)} sparse encodings.") + self.errors.append(f"Unknown encoding for matrix {x_name}. {ERROR_SUFFIX_SPARSE_FORMAT}") continue # It seems silly to perform this test for 'coo' and 'csc' formats, @@ -1219,7 +1221,7 @@ def _has_valid_raw(self, force: bool = False) -> bool: matrix_format = get_matrix_format(x) if matrix_format == "unknown": - self.errors.append(f"Unknown encoding for matrix {xloc}. Please ensure it is either a dense array or one of {','.join(SUPPORTED_SPARSE_MATRIX_TYPES)} sparse encodings.") + self.errors.append(f"Unknown encoding for matrix {xloc}. {ERROR_SUFFIX_SPARSE_FORMAT}") self._raw_layer_exists = False return self._raw_layer_exists diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py index 069620d3..b85c5128 100644 --- a/cellxgene_schema_cli/tests/test_schema_compliance.py +++ b/cellxgene_schema_cli/tests/test_schema_compliance.py @@ -2225,7 +2225,7 @@ def test_uns_bool_allowed(self, validator_with_adata): assert validator.errors == [] def test_uns_scipy_matrices_cannot_be_empty(self, validator_with_adata): - validator = validator_with_adata + validator: Validator = validator_with_adata validator.adata.uns["test"] = scipy.sparse.csr_matrix([[1]], dtype=int) validator.validate_adata() diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py index 9e43076f..3b6fc7da 100644 --- a/cellxgene_schema_cli/tests/test_validate.py +++ b/cellxgene_schema_cli/tests/test_validate.py @@ -18,6 +18,7 @@ ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM, + ERROR_SUFFIX_SPARSE_FORMAT, Validator, validate, ) @@ -1276,7 +1277,10 @@ def test_has_valid_raw(self, data, matrix_format, expected_result): @mock.patch("cellxgene_schema.validate.get_matrix_format", return_value="unknown") def test_has_valid_raw_with_unknown_format(self, mock_get_matrix_format): + # a matrix with unknown format should be invalid data = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32) validator = self.create_validator(data, "unknown") - with pytest.raises(AssertionError): - validator._has_valid_raw() + assert validator._has_valid_raw() is False + assert validator.errors == [ + f'Unknown encoding for matrix X. {ERROR_SUFFIX_SPARSE_FORMAT}' + ]