From fc210dfc812f9631d95bc337f28a8f2fa751e2bf Mon Sep 17 00:00:00 2001
From: Bento007 <trent.smith007@gmail.com>
Date: Thu, 14 Nov 2024 13:35:49 -0800
Subject: [PATCH 01/28] =?UTF-8?q?Bump=20version:=205.2.0=20=E2=86=92=205.2?=
 =?UTF-8?q?.1-rc.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg                                  | 2 +-
 cellxgene_schema_cli/cellxgene_schema/__init__.py | 2 +-
 cellxgene_schema_cli/setup.py                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 5485ee1c..3bd4ca9e 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 5.2.0
+current_version = 5.2.1-rc.0
 commit = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<prerel>rc)\.(?P<prerelversion>\d+))?
 serialize = 
diff --git a/cellxgene_schema_cli/cellxgene_schema/__init__.py b/cellxgene_schema_cli/cellxgene_schema/__init__.py
index 6c235c59..f8407c1e 100644
--- a/cellxgene_schema_cli/cellxgene_schema/__init__.py
+++ b/cellxgene_schema_cli/cellxgene_schema/__init__.py
@@ -1 +1 @@
-__version__ = "5.2.0"
+__version__ = "5.2.1-rc.0"
diff --git a/cellxgene_schema_cli/setup.py b/cellxgene_schema_cli/setup.py
index 2053acd6..9a62ca6e 100644
--- a/cellxgene_schema_cli/setup.py
+++ b/cellxgene_schema_cli/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="cellxgene-schema",
-    version="5.2.0",
+    version="5.2.1-rc.0",
     url="https://github.com/chanzuckerberg/single-cell-curation",
     license="MIT",
     author="Chan Zuckerberg Initiative",

From d03765b1c0fdcf3c0b654a5699841bf37063d36e Mon Sep 17 00:00:00 2001
From: Bento007 <trent.smith007@gmail.com>
Date: Thu, 14 Nov 2024 13:37:12 -0800
Subject: [PATCH 02/28] =?UTF-8?q?Bump=20version:=205.2.1-rc.0=20=E2=86=92?=
 =?UTF-8?q?=205.2.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .bumpversion.cfg                                  | 2 +-
 cellxgene_schema_cli/cellxgene_schema/__init__.py | 2 +-
 cellxgene_schema_cli/setup.py                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 3bd4ca9e..13a12d63 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 5.2.1-rc.0
+current_version = 5.2.1
 commit = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<prerel>rc)\.(?P<prerelversion>\d+))?
 serialize = 
diff --git a/cellxgene_schema_cli/cellxgene_schema/__init__.py b/cellxgene_schema_cli/cellxgene_schema/__init__.py
index f8407c1e..98886d26 100644
--- a/cellxgene_schema_cli/cellxgene_schema/__init__.py
+++ b/cellxgene_schema_cli/cellxgene_schema/__init__.py
@@ -1 +1 @@
-__version__ = "5.2.1-rc.0"
+__version__ = "5.2.1"
diff --git a/cellxgene_schema_cli/setup.py b/cellxgene_schema_cli/setup.py
index 9a62ca6e..ce67b058 100644
--- a/cellxgene_schema_cli/setup.py
+++ b/cellxgene_schema_cli/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="cellxgene-schema",
-    version="5.2.1-rc.0",
+    version="5.2.1",
     url="https://github.com/chanzuckerberg/single-cell-curation",
     license="MIT",
     author="Chan Zuckerberg Initiative",

From b256322b342b267e1288cfc3a68fd69b04bd25e0 Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Fri, 15 Nov 2024 16:24:16 -0800
Subject: [PATCH 03/28] removed strongly recommended from development stage
 (#1110)

---
 schema/drafts/5.2.1-experimental.md |  2 +-
 schema/drafts/5.3.0.md              | 54 +++--------------------------
 2 files changed, 6 insertions(+), 50 deletions(-)

diff --git a/schema/drafts/5.2.1-experimental.md b/schema/drafts/5.2.1-experimental.md
index 8a0d490b..4275dfe6 100644
--- a/schema/drafts/5.2.1-experimental.md
+++ b/schema/drafts/5.2.1-experimental.md
@@ -190,7 +190,7 @@ The following gene annotation dependencies are *pinned* for this version of the
             <td>
               <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>
             </td>
-            <td>MUST be ither the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FFBbt_00007002?lang=en"><code>FBbt:00007002</code></a> for <i>cell</i><br>or <code>"unknown"</code> when:
+            <td>MUST be either the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FFBbt_00007002?lang=en"><code>FBbt:00007002</code></a> for <i>cell</i><br>or <code>"unknown"</code> when:
               <ul>
                <li>
                  no appropriate term can be found (e.g. the cell type is unknown)
diff --git a/schema/drafts/5.3.0.md b/schema/drafts/5.3.0.md
index c6aa47b3..cbbdb46a 100644
--- a/schema/drafts/5.3.0.md
+++ b/schema/drafts/5.3.0.md
@@ -503,55 +503,11 @@ Curators MUST annotate the following columns in the `obs` dataframe:
     </tr>
     <tr>
       <th>Value</th>
-        <td>categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>. 
-<br><br>If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9606"><code>"NCBITaxon:9606"</code></a> for <i>Homo sapiens</i>, this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000001"><code>HsapDv:0000001</code></a> for <i>life cycle</i> with the following STRONGLY RECOMMENDED:
-          <br><br>
-          <table>
-          <thead>
-          <tr>
-          <th>For</th>
-          <th>Use</th>
-          </tr>
-          </thead>
-          <tbody>
-            <tr>
-              <td>Embryonic stage</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=HSAPDV&keywords=carnegie&submit=Search+terms">Carnegie stages 1-23</a><br>(up to 8 weeks after conception; e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000003">HsapDv:0000003</a>)</td>
-            </tr>
-            <tr>
-              <td>Fetal development</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=HSAPDV&keywords=post-fertilization&submit=Search+terms">9 to 38 week post-fertilization human stages</a><br>(9 weeks after conception and before birth; e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000046">HsapDv:0000046</a>)</td>
-            </tr>
-            <tr>
-              <td>After birth for the<br>first 12 months</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=HSAPDV&keywords=month-old&submit=Search+terms">1 to 12 month-old human stages</a><br>(e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000273">HsapDv:0000273)</a></td>
-            </tr>
-            <tr>
-              <td>After the first 12<br>months post-birth</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=HSAPDV&keywords=year-old&submit=Search+terms">year-old human stages</a><br>(e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000246">HsapDv:0000246)</a></td>
-            </tr>
-          </tbody></table>
-          <br>If <code>organism_ontolology_term_id</code> is 
-          <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10090"><code>"NCBITaxon:10090"</code></a> for <i>Mus musculus</i>, this MUST be the accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/mmusdv/classes?obo_id=MmusDv%3A0000001"><code>MmusDv:0000001</code></a> for <i>life cycle</i> with the following STRONGLY RECOMMENDED:
-          <br><br>
-          <table>
-          <thead>
-          <tr>
-          <th>For</th>
-          <th>Use</th>
-          </tr>
-          </thead>
-          <tbody>
-            <tr>
-              <td>From the time of conception<br>to 1 month after birth</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=MMUSDV&keywords=theiler+stage&submit=Search+terms">Theiler stages</a><br>(e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/mmusdv/classes?obo_id=MmusDv%3A0000003">MmusDv:0000003</a>)</td>
-            </tr>
-            <tr>
-              <td>From 2 months after birth</td>
-              <td>A term from the set of <a href="http://www.ontobee.org/search?ontology=MMUSDV&keywords=month-old&submit=Search+terms"> month-old stages</a><br>(e.g. <a href="https://www.ebi.ac.uk/ols4/ontologies/mmusdv/classes?obo_id=MmusDv%3A0000062">MmusDv:0000062)</a></td>
-            </tr>
-          </tbody></table>
-          <br> Otherwise, for all other organisms this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000105"<code>UBERON:0000105</code></a> for <i>life cycle stage</i>, excluding <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000071"<code>UBERON:0000071</code></a> for <i>death stage</i>.
+        <td>categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>.<br><br> 
+        If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9606"><code>"NCBITaxon:9606"</code></a> for <i>Homo sapiens</i>, this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/hsapdv/classes?obo_id=HsapDv%3A0000001"><code>HsapDv:0000001</code></a> for <i>life cycle</i>.<br><br>
+        If <code>organism_ontolology_term_id</code> is 
+        <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10090"><code>"NCBITaxon:10090"</code></a> for <i>Mus musculus</i>, this MUST be the accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/mmusdv/classes?obo_id=MmusDv%3A0000001"><code>MmusDv:0000001</code></a> for <i>life cycle</i>.<br><br>
+        Otherwise, for all other organisms this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000105"<code>UBERON:0000105</code></a> for <i>life cycle stage</i>, excluding <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000071"<code>UBERON:0000071</code></a> for <i>death stage</i>.
         </td>
     </tr>
 </tbody></table>

From 30498f06c53950baf5ef0b65f4f95d2977a6f37d Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Mon, 18 Nov 2024 16:12:45 -0500
Subject: [PATCH 04/28] chore: remove seurat references in validator (#1113)

Co-authored-by: Evan Molinelli <emolinelli@CZIMACOS4882.local>
---
 .../schema_definitions/schema_definition.yaml |  2 -
 .../cellxgene_schema/validate.py              | 79 ++-----------------
 cellxgene_schema_cli/tests/test_validate.py   | 64 +--------------
 3 files changed, 9 insertions(+), 136 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
index bca3da68..28a3fad5 100644
--- a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
+++ b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
@@ -2,8 +2,6 @@ title: Corpora schema version 5.X.X
 type: anndata
 # If sparsity of any expression matrix is greater than this and not csr sparse matrix, then there will be warning.
 sparsity: 0.5
-# If the R array will exceed this number in size, then Seurat conversion will fail
-max_size_for_seurat: 2147483647  # 2^31 - 1 (max value for 4-byte signed int)
 # Perform the checks for "raw" requirements IF:
 raw:
   obs:
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 4aa81adc..6869a9df 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -54,7 +54,6 @@ def reset(self):
         self.is_valid = False
         self.h5ad_path = ""
         self._raw_layer_exists = None
-        self.is_seurat_convertible: bool = True
         self.is_spatial = None
         self.is_visium = None
         self.is_visium_and_is_single_true = None
@@ -926,65 +925,6 @@ def _validate_sparsity(self):
                     f"to use this type of matrix for the given sparsity."
                 )
 
-    def _validate_seurat_convertibility(self):
-        """
-        Use length of component matrices to determine if the anndata object will be unable to be converted to Seurat by
-        virtue of the R language's array size limit (4-byte signed int length). Add warning for each matrix which is
-        too large.
-        rtype: None
-        """
-        # Seurat conversion is not supported for Visium datasets.
-        if self._is_visium():
-            self.warnings.append(
-                "Datasets with assay_ontology_term_id 'EFO:0010961' (Visium Spatial Gene Expression) are not compatible with Seurat."
-            )
-            self.is_seurat_convertible = False
-            return
-
-        to_validate = [(self.adata.X, "X")]
-        # check if there's raw data
-        if self.adata.raw:
-            to_validate.append((self.adata.raw.X, "raw.X"))
-        # Check length of component arrays
-        for matrix, matrix_name in to_validate:
-            matrix_format = get_matrix_format(self.adata, matrix)
-            if matrix_format in SPARSE_MATRIX_TYPES:
-                effective_r_array_size = self._count_matrix_nonzero(matrix_name, matrix)
-                is_sparse = True
-            elif matrix_format == "dense":
-                effective_r_array_size = max(matrix.shape)
-                is_sparse = False
-            else:
-                self.warnings.append(
-                    f"Unable to verify seurat convertibility for matrix {matrix_name} " f"of type {type(matrix)}"
-                )
-                continue
-
-            if effective_r_array_size > self.schema_def["max_size_for_seurat"]:
-                if is_sparse:
-                    self.warnings.append(
-                        f"This dataset cannot be converted to the .rds (Seurat v4) format. "
-                        f"{effective_r_array_size} nonzero elements in matrix {matrix_name} exceed the "
-                        f"limitations in the R dgCMatrix sparse matrix class (2^31 - 1 nonzero "
-                        f"elements)."
-                    )
-                else:
-                    self.warnings.append(
-                        f"This dataset cannot be converted to the .rds (Seurat v4) format. "
-                        f"{effective_r_array_size} elements in at least one dimension of matrix "
-                        f"{matrix_name} exceed the limitations in the R dgCMatrix sparse matrix class "
-                        f"(2^31 - 1 nonzero elements)."
-                    )
-
-                self.is_seurat_convertible = False
-
-        if self.adata.raw and self.adata.raw.X.shape[1] != self.adata.raw.var.shape[0]:
-            self.errors.append(
-                "This dataset has a mismatch between 1) the number of features in raw.X and 2) the number of features "
-                "in raw.var. These counts must be identical."
-            )
-            self.is_seurat_convertible = False
-
     def _validate_obsm(self):
         """
         Validates the embedding dictionary -- it checks that all values of adata.obsm are numpy arrays with the correct
@@ -1887,10 +1827,6 @@ def _deep_check(self):
         # Checks spatial
         self._check_spatial()
 
-        # Checks Seurat convertibility
-        logger.debug("Validating Seurat convertibility...")
-        self._validate_seurat_convertibility()
-
         # Checks each component
         for component_name, component_def in self.schema_def["components"].items():
             logger.debug(f"Validating component: {component_name}")
@@ -1976,7 +1912,7 @@ def validate(
     add_labels_file: str = None,
     ignore_labels: bool = False,
     verbose: bool = False,
-) -> (bool, list, bool):
+) -> (bool, list):
     from .write_labels import AnnDataLabelAppender
 
     """
@@ -1985,8 +1921,7 @@ def validate(
     :param Union[str, bytes, os.PathLike] h5ad_path: Path to h5ad file to validate
     :param str add_labels_file: Path to new h5ad file with ontology/gene labels added
 
-    :return (True, [], <bool>) if successful validation, (False, [list_of_errors], <bool>) otherwise; last bool is for
-    seurat convertibility
+    :return (True, []) if successful validation, (False, [list_of_errors]) otherwise
     :rtype tuple
     """
 
@@ -2004,7 +1939,7 @@ def validate(
 
     # Stop if validation was unsuccessful
     if not validator.is_valid:
-        return False, validator.errors, validator.is_seurat_convertible
+        return False, validator.errors
 
     if add_labels_file:
         label_start = datetime.now()
@@ -2015,10 +1950,6 @@ def validate(
             f"{writer.was_writing_successful}"
         )
 
-        return (
-            validator.is_valid and writer.was_writing_successful,
-            validator.errors + writer.errors,
-            validator.is_seurat_convertible,
-        )
+        return (validator.is_valid and writer.was_writing_successful, validator.errors + writer.errors)
 
-    return True, validator.errors, validator.is_seurat_convertible
+    return True, validator.errors
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index cead4b33..819cde43 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -27,10 +27,8 @@
     adata_visium,
     adata_with_labels,
     good_obs,
-    good_obsm,
     good_uns,
     good_uns_with_visium_spatial,
-    good_var,
     h5ad_invalid,
     h5ad_valid,
     visium_library_id,
@@ -297,7 +295,7 @@ def test__validate_with_h5ad_valid_and_labels(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             labels_path = "/".join([temp_dir, "labels.h5ad"])
 
-            success, errors, is_seurat_convertible = validate(h5ad_valid, labels_path)
+            success, errors = validate(h5ad_valid, labels_path)
 
             import anndata as ad
 
@@ -306,36 +304,32 @@ def test__validate_with_h5ad_valid_and_labels(self):
             assert adata.raw.X.has_canonical_format
             assert success
             assert not errors
-            assert is_seurat_convertible
             assert os.path.exists(labels_path)
             expected_hash = "55fbc095218a01cad33390f534d6690af0ecd6593f27d7cd4d26e91072ea8835"
             original_hash = self.hash_file(h5ad_valid)
             assert original_hash != expected_hash, "Writing labels did not change the dataset from the original."
 
     def test__validate_with_h5ad_valid_and_without_labels(self):
-        success, errors, is_seurat_convertible = validate(h5ad_valid)
+        success, errors = validate(h5ad_valid)
 
         assert success
         assert not errors
-        assert is_seurat_convertible
 
     def test__validate_with_h5ad_invalid_and_with_labels(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             labels_path = "/".join([temp_dir, "labels.h5ad"])
 
-            success, errors, is_seurat_convertible = validate(h5ad_invalid, labels_path)
+            success, errors = validate(h5ad_invalid, labels_path)
 
             assert not success
             assert errors
-            assert is_seurat_convertible
             assert not os.path.exists(labels_path)
 
     def test__validate_with_h5ad_invalid_and_without_labels(self):
-        success, errors, is_seurat_convertible = validate(h5ad_invalid)
+        success, errors = validate(h5ad_invalid)
 
         assert not success
         assert errors
-        assert is_seurat_convertible
 
 
 class TestCheckSpatial:
@@ -1001,56 +995,6 @@ def test__validate_cell_type_ontology_term_id_error(self, cell_type_ontology_ter
         )
 
 
-class TestSeuratConvertibility:
-    def validation_helper(self, matrix, raw=None):
-        data = anndata.AnnData(X=matrix, obs=good_obs, uns=good_uns, obsm=good_obsm, var=good_var)
-        if raw:
-            data.raw = raw
-        self.validator: Validator = Validator()
-        self.validator._set_schema_def()
-        self.validator.schema_def["max_size_for_seurat"] = 2**3 - 1  # Reduce size required to fail (faster tests)
-        self.validator.adata = data
-
-    def test_determine_seurat_convertibility(self):
-        # Sparse matrix with too many nonzero values is not Seurat-convertible
-        sparse_matrix_too_large = sparse.csr_matrix(np.ones((good_obs.shape[0], good_var.shape[0]), dtype=np.float32))
-        self.validation_helper(sparse_matrix_too_large)
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.warnings) == 1
-        assert not self.validator.is_seurat_convertible
-
-        # Reducing nonzero count by 1, to within limit, makes it Seurat-convertible
-        sparse_matrix_with_zero = sparse.csr_matrix(np.ones((good_obs.shape[0], good_var.shape[0]), dtype=np.float32))
-        sparse_matrix_with_zero[0, 0] = 0
-        self.validation_helper(sparse_matrix_with_zero)
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.warnings) == 0
-        assert self.validator.is_seurat_convertible
-
-        # Dense matrices with a dimension that exceeds limit will fail -- zeros are irrelevant
-        dense_matrix_with_zero = np.zeros((good_obs.shape[0], good_var.shape[0]), dtype=np.float32)
-        self.validation_helper(dense_matrix_with_zero)
-        self.validator.schema_def["max_size_for_seurat"] = 2**2 - 1
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.warnings) == 1
-        assert not self.validator.is_seurat_convertible
-
-        # Dense matrices with dimensions in bounds but total count over will succeed
-        dense_matrix = np.ones((good_obs.shape[0], good_var.shape[0]), dtype=np.float32)
-        self.validation_helper(dense_matrix)
-        self.validator.schema_def["max_size_for_seurat"] = 2**3 - 1
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.warnings) == 0
-        assert self.validator.is_seurat_convertible
-
-        # Visium datasets are not Seurat-convertible
-        self.validation_helper(sparse_matrix_with_zero)
-        self.validator.adata.obs = adata_visium.obs.copy()
-        self.validator._validate_seurat_convertibility()
-        assert len(self.validator.warnings) == 1
-        assert not self.validator.is_seurat_convertible
-
-
 class TestValidatorValidateDataFrame:
     @pytest.mark.parametrize("_type", [np.int64, np.int32, int, np.float64, np.float32, float, str])
     def test_succeed_categorical_types(self, tmp_path, _type, valid_adata):

From dd97d55250a2bad3b57e179dccbfa79d3cc51651 Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Mon, 18 Nov 2024 16:26:44 -0800
Subject: [PATCH 05/28] chore: bump cog version for schema 5.3.0 (#1116)

---
 cellxgene_schema_cli/requirements.txt                   | 2 +-
 scripts/schema_bump_dry_run_ontologies/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cellxgene_schema_cli/requirements.txt b/cellxgene_schema_cli/requirements.txt
index c2bbdc73..d8b2bcdd 100644
--- a/cellxgene_schema_cli/requirements.txt
+++ b/cellxgene_schema_cli/requirements.txt
@@ -1,5 +1,5 @@
 anndata>=0.8,<0.11
-cellxgene-ontology-guide==1.2.0 # update before a schema migration
+cellxgene-ontology-guide==1.3.0 # update before a schema migration
 click<9
 Cython<4
 numpy<2
diff --git a/scripts/schema_bump_dry_run_ontologies/requirements.txt b/scripts/schema_bump_dry_run_ontologies/requirements.txt
index cb7c918d..373412b0 100644
--- a/scripts/schema_bump_dry_run_ontologies/requirements.txt
+++ b/scripts/schema_bump_dry_run_ontologies/requirements.txt
@@ -1,2 +1,2 @@
 requests<3
-cellxgene-ontology-guide==1.2.0
+cellxgene-ontology-guide==1.3.0

From 48cb1754fe01f38cf8c79cbc979d877a1998f3d0 Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Tue, 19 Nov 2024 11:07:18 -0800
Subject: [PATCH 06/28] Added genetic ancestry (#1117)

---
 schema/drafts/5.3.0.md | 177 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/schema/drafts/5.3.0.md b/schema/drafts/5.3.0.md
index cbbdb46a..a65f74a8 100644
--- a/schema/drafts/5.3.0.md
+++ b/schema/drafts/5.3.0.md
@@ -559,6 +559,177 @@ Curators MUST annotate the following columns in the `obs` dataframe:
 </tbody></table>
 <br>
 
+If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then for each observation for the following fields, either all their values must be <code>float("nan")</code> or the sum of their values MUST be<code>1.0</code>:
+
+* <code>genetic_ancestry_African</code>
+* <code>genetic_ancestry_East_Asian</code>
+* <code>genetic_ancestry_European</code>
+* <code>genetic_ancestry_Indigenous_American</code>
+* <code>genetic_ancestry_Oceanian</code>
+* <code>genetic_ancestry_South_Asian</code>
+
+### genetic_ancestry_African
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_African</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0010"><code>"HANCESTRO:0010"</code></a> for <i>African</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
+### genetic_ancestry_East_Asian
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_East_Asian</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0009"><code>"HANCESTRO:0009"</code></a> for <i>East Asian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
+### genetic_ancestry_European
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_European</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0005"><code>"HANCESTRO:0005"</code></a> for <i>European</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
+### genetic_ancestry_Indigenous_American
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_Indigenous_American</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0013"><code>"HANCESTRO:0013"</code></a> for <i>Indigenous American</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
+### genetic_ancestry_Oceanian
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_Oceanian</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0017"><code>"HANCESTRO:0017"</code></a> for <i>Oceanian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
+### genetic_ancestry_South_Asian
+
+<table>
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <td>genetic_ancestry_South_Asian</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
+        value MUST be <code>"na"</code>.<br><br>If
+        <code>organism_ontolology_term_id</code> is
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0006"><code>"HANCESTRO:0006"</code></a> for <i>South Asian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
+      </td>
+    </tr>
+  </tbody>
+</table>
+<br />
+
 ### in_tissue
 
 <table><tbody>
@@ -1904,6 +2075,12 @@ When a dataset is uploaded, CELLxGENE Discover MUST automatically add the `schem
     * Added ranges for _Visium CytAssist Spatial Gene Expression, 6.5mm_ and _Visium CytAssist Spatial Gene Expression, 11mm_ 
   * Updated the requirements for `assay_ontology_term_id` to include descendants of  _Visium Spatial Gene Expression_. All observations must contain the same value. Also updated recommended values for assays.
   * Updated the requirements for `cell_type_ontology_term_id` to include descendants of  _Visium Spatial Gene Expression_.
+  * Added <code>genetic_ancestry_African</code>
+  * Added <code>genetic_ancestry_East_Asian</code>
+  * Added <code>genetic_ancestry_European</code>
+  * Added <code>genetic_ancestry_Indigenous_American</code>
+  * Added <code>genetic_ancestry_Oceanian</code>
+  * Added <code>genetic_ancestry_South_Asian</code>
   * Updated the requirements for `in_tissue` to include descendants of  _Visium Spatial Gene Expression_.
 * obsm (Embeddings)
   * Updated the requirements for `spatial` to include descendants of  _Visium Spatial Gene Expression_ and to prohibit 'Not a Number' values. 

From bb662b5a4227722e23725a3b849afbc6007261ea Mon Sep 17 00:00:00 2001
From: Trent Smith <1429913+Bento007@users.noreply.github.com>
Date: Tue, 19 Nov 2024 12:03:13 -0800
Subject: [PATCH 07/28] release: 5.2.2 (#1118)

---
 .bumpversion.cfg                                  | 2 +-
 cellxgene_schema_cli/cellxgene_schema/__init__.py | 2 +-
 cellxgene_schema_cli/setup.py                     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 13a12d63..87d0d865 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 5.2.1
+current_version = 5.2.2
 commit = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<prerel>rc)\.(?P<prerelversion>\d+))?
 serialize = 
diff --git a/cellxgene_schema_cli/cellxgene_schema/__init__.py b/cellxgene_schema_cli/cellxgene_schema/__init__.py
index 98886d26..15cf1350 100644
--- a/cellxgene_schema_cli/cellxgene_schema/__init__.py
+++ b/cellxgene_schema_cli/cellxgene_schema/__init__.py
@@ -1 +1 @@
-__version__ = "5.2.1"
+__version__ = "5.2.2"
diff --git a/cellxgene_schema_cli/setup.py b/cellxgene_schema_cli/setup.py
index ce67b058..db4ff43c 100644
--- a/cellxgene_schema_cli/setup.py
+++ b/cellxgene_schema_cli/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="cellxgene-schema",
-    version="5.2.1",
+    version="5.2.2",
     url="https://github.com/chanzuckerberg/single-cell-curation",
     license="MIT",
     author="Chan Zuckerberg Initiative",

From 34e76db0419d9c6961e1ba767f3cdbd8aa67102a Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Tue, 19 Nov 2024 15:47:14 -0800
Subject: [PATCH 08/28] feat: update "is visium" definition for
 cell_type_ontology_term_id (#1115)

---
 .../cellxgene_schema/validate.py              | 39 ++++++++++--
 cellxgene_schema_cli/tests/test_validate.py   | 60 ++++++++++++++++---
 2 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 6869a9df..98ce9e7d 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -21,7 +21,7 @@
 
 logger = logging.getLogger(__name__)
 
-ONTOLOGY_PARSER = OntologyParser(schema_version=f"v{schema.get_current_schema_version()}")
+ONTOLOGY_PARSER = OntologyParser(schema_version="v5.3.0")
 
 ASSAY_VISIUM = "EFO:0010961"
 ASSAY_SLIDE_SEQV2 = "EFO:0030062"
@@ -29,7 +29,7 @@
 VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
 
-ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
+ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
@@ -1475,12 +1475,12 @@ def _validate_spatial_cell_type_ontology_term_id(self):
         # Exit if:
         # - not Visium and is_single is True as no further checks are necessary
         # - in_tissue is not specified as checks are dependent on this value
-        if not self._is_visium_and_is_single_true() or "in_tissue" not in self.adata.obs:
+        if not self._is_visium_including_descendants() and self._is_single() or "in_tissue" not in self.adata.obs:
             return
 
         # Validate cell type: must be "unknown" if Visium and is_single is True and in_tissue is 0.
         if (
-            (self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM)
+            self._is_visium_including_descendants()
             & (self.adata.obs["in_tissue"] == 0)
             & (self.adata.obs["cell_type_ontology_term_id"] != "unknown")
         ).any():
@@ -1760,6 +1760,37 @@ def _is_visium(self) -> bool:
             self.is_visium = assay_ontology_term_id is not None and (assay_ontology_term_id == ASSAY_VISIUM).any()
         return self.is_visium
 
+    def _is_visium_including_descendants(self) -> bool:
+        """
+        Determine if the assay_ontology_term_id is Visium (descendant of EFO:0010961).
+
+        :return True if assay_ontology_term_id is Visium, False otherwise.
+        :rtype bool
+        """
+        if self.is_visium is None:
+            assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id")
+
+            if assay_ontology_term_id is not None:
+                # Convert to a regular Series if it's Categorical
+                assay_ontology_term_id = pd.Series(assay_ontology_term_id)
+
+                # Check if any term is a descendant of ASSAY_VISIUM
+                try:
+                    visium_results = assay_ontology_term_id.apply(
+                        lambda term: ASSAY_VISIUM
+                        in list(ONTOLOGY_PARSER.get_lowest_common_ancestors(ASSAY_VISIUM, term))
+                    )
+                    self.is_visium = visium_results.astype(bool).any()
+                except KeyError as e:
+                    # This generally means the assay_ontology_term_id is invalid, but we want the error to be raised
+                    # by our explicit validator checks, not this implicit one.
+                    logger.warning(f"KeyError processing assay_ontology_term_id ontology: {e}")
+                    self.is_visium = False
+            else:
+                self.is_visium = False
+
+        return self.is_visium
+
     def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
         """
         Validate the spatial image is of shape (,,3 or 4) and has a max dimension, if specified. A spatial image
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index 819cde43..801bc7cc 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -333,6 +333,31 @@ def test__validate_with_h5ad_invalid_and_without_labels(self):
 
 
 class TestCheckSpatial:
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, expected_is_visium",
+        [
+            # Parent term for Visium Spatial Gene Expression. This term and all its descendants are Visium
+            ("EFO:0010961", True),
+            # Visium Spatial Gene Expression V1
+            ("EFO:0022857", True),
+            # Visium CytAssist Spatial Gene Expression V2
+            ("EFO:0022858", True),
+            # Visium CytAssist Spatial Gene Expression, 11mm
+            ("EFO:0022860", True),
+            # Visium CytAssist Spatial Gene Expression, 6.5mm
+            ("EFO:0022859", True),
+            # Random other EFO term
+            ("EFO:0003740", False),
+        ],
+    )
+    def test__is_visium_descendant(self, assay_ontology_term_id, expected_is_visium):
+        validator: Validator = Validator()
+        validator._set_schema_def()
+        validator.adata = adata_visium.copy()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+
+        assert validator._is_visium_including_descendants() == expected_is_visium
+
     def test__validate_spatial_visium_ok(self):
         validator: Validator = Validator()
         validator._set_schema_def()
@@ -958,33 +983,54 @@ def test__validate_tissue_position_int_max_error(self, tissue_position_name, max
         assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
 
     @pytest.mark.parametrize(
-        "cell_type_ontology_term_id, in_tissue",
-        [("unknown", 0), (["unknown", "CL:0000066"], [0, 1]), ("CL:0000066", 1)],
+        "cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
+        [
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression
+            ("unknown", 0, "EFO:0010961"),
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
+            ("unknown", 0, "EFO:0022860"),
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1
+            # valid CL term is ok when in_tissue = 1 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
+            (["unknown", "CL:0000066"], [0, 1], ["EFO:0022857", "EFO:0022860"]),
+            # normal CL term for in_tissue = 1 and assay_ontology_term_id = 10x 3' v2
+            ("CL:0000066", 1, "EFO:0009899"),
+        ],
     )
-    def test__validate_cell_type_ontology_term_id_ok(self, cell_type_ontology_term_id, in_tissue):
+    def test__validate_cell_type_ontology_term_id_ok(
+        self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id
+    ):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
         validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
         validator.adata.obs.in_tissue = in_tissue
+        validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id
 
         # Confirm cell type is valid.
         validator._validate_spatial_cell_type_ontology_term_id()
         assert not validator.errors
 
     @pytest.mark.parametrize(
-        "cell_type_ontology_term_id, in_tissue",
+        "cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",
         [
-            ("CL:0000066", 0),
-            (["CL:0000066", "unknown"], [0, 1]),
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression
+            ("CL:0000066", 0, "EFO:0010961"),
+            (["CL:0000066", "unknown"], [0, 1], ["EFO:0010961", "EFO:0010961"]),
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium CytAssist Spatial Gene Expression, 11mm
+            ("CL:0000066", 0, "EFO:0022860"),
+            # MUST be unknown when in_tissue = 0 and assay_ontology_term_id = Visium Spatial Gene Expression V1
+            ("CL:0000066", 0, "EFO:0022857"),
         ],
     )
-    def test__validate_cell_type_ontology_term_id_error(self, cell_type_ontology_term_id, in_tissue):
+    def test__validate_cell_type_ontology_term_id_error(
+        self, cell_type_ontology_term_id, in_tissue, assay_ontology_term_id
+    ):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
         validator.adata.obs.cell_type_ontology_term_id = cell_type_ontology_term_id
         validator.adata.obs.in_tissue = in_tissue
+        validator.adata.obs.assay_ontology_term_id = assay_ontology_term_id
 
         # Confirm errors.
         validator._validate_spatial_cell_type_ontology_term_id()

From 5e0a05dc7274ccf7655d6e5db66bcf53c489cc0d Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Thu, 21 Nov 2024 10:10:12 -0500
Subject: [PATCH 09/28] chore: forbid any NaN in spatial embeddings # (#1119)

Co-authored-by: Evan Molinelli <emolinelli@CZIMACOS4882.local>
---
 .../cellxgene_schema/validate.py              |  9 +++++++--
 .../tests/test_schema_compliance.py           | 20 ++++++++++++++-----
 cellxgene_schema_cli/tests/test_validate.py   | 12 +++++++++++
 pyproject.toml                                |  5 +++++
 4 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 98ce9e7d..084e9769 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -944,6 +944,7 @@ def _validate_obsm(self):
             issue_list = self.errors
 
             regex_pattern = r"^[a-zA-Z][a-zA-Z0-9_.-]*$"
+            key_is_spatial = key.lower() == "spatial"
 
             unknown_key = False  # an unknown key does not match 'spatial' or 'X_{suffix}'
             if key.startswith("X_"):
@@ -954,7 +955,7 @@ def _validate_obsm(self):
                     self.errors.append(
                         f"Suffix for embedding key in 'adata.obsm' {key} does not match the regex pattern {regex_pattern}."
                     )
-            elif key.lower() != "spatial":
+            elif not key_is_spatial:
                 if not re.match(regex_pattern, key):
                     self.errors.append(
                         f"Embedding key in 'adata.obsm' {key} does not match the regex pattern {regex_pattern}."
@@ -1002,7 +1003,11 @@ def _validate_obsm(self):
                 # Check for inf/NaN values only if the dtype is numeric
                 if np.isinf(value).any():
                     issue_list.append(f"adata.obsm['{key}'] contains positive infinity or negative infinity values.")
-                if np.all(np.isnan(value)):
+
+                # spatial embeddings can't have any NaN; other embeddings can't be all NaNs
+                if key_is_spatial and np.any(np.isnan(value)):
+                    issue_list.append("adata.obs['spatial] contains at least one NaN value.")
+                elif np.all(np.isnan(value)):
                     issue_list.append(f"adata.obsm['{key}'] contains all NaN values.")
 
         if self._is_supported_spatial_assay() is False and obsm_with_x_prefix == 0:
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 0d3d5d4b..3646bc43 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -2141,20 +2141,30 @@ def test_obsm_values_str(self, validator_with_visium_assay, key):
     @pytest.mark.parametrize("key", ["X_umap", "spatial"])
     def test_obsm_values_nan(self, validator_with_visium_assay, key):
         """
-        values in obsm cannot all be NaN
+        test obsm NaN restrictions for different embedding types.
+        feature embeddings: X_* cannot be all NaN
+        spatial emeddings: 'spatial' cannot have any NaNs
         """
         validator = validator_with_visium_assay
         obsm = validator.adata.obsm
-        # It's okay if only one value is NaN
+
+        # Check embedding has any NaN
         obsm[key][0:100, 1] = numpy.nan
         validator.validate_adata()
-        assert validator.errors == []
 
-        # It's not okay if all values are NaN
+        if key != "spatial":
+            assert validator.errors == []
+        else:
+            assert validator.errors == ["ERROR: adata.obs['spatial] contains at least one NaN value."]
+
+        # Check embedding has all NaNs
         all_nan = numpy.full(obsm[key].shape, numpy.nan)
         obsm[key] = all_nan
         validator.validate_adata()
-        assert validator.errors == [f"ERROR: adata.obsm['{key}'] contains all NaN values."]
+        if key != "spatial":
+            assert validator.errors == [f"ERROR: adata.obsm['{key}'] contains all NaN values."]
+        else:
+            assert validator.errors == ["ERROR: adata.obs['spatial] contains at least one NaN value."]
 
     def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_adata):
         validator = validator_with_adata
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index 801bc7cc..accc6b86 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -1040,6 +1040,18 @@ def test__validate_cell_type_ontology_term_id_error(
             in validator.errors[0]
         )
 
+    def test__validate_embeddings_non_nans(self):
+        validator: Validator = Validator()
+        validator._set_schema_def()
+        validator.adata = adata_visium.copy()
+        validator.visium_and_is_single_true_matrix_size = 2
+
+        # invalidate spatial embeddings with NaN value
+        validator.adata.obsm["spatial"][0, 1] = np.nan
+        # Confirm spatial is valid.
+        validator.validate_adata()
+        assert validator.errors == ["ERROR: adata.obs['spatial] contains at least one NaN value."]
+
 
 class TestValidatorValidateDataFrame:
     @pytest.mark.parametrize("_type", [np.int64, np.int32, int, np.float64, np.float32, float, str])
diff --git a/pyproject.toml b/pyproject.toml
index 1dd902b4..01f9c970 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,3 +41,8 @@ show_error_codes = true
 ignore_missing_imports = true
 warn_unreachable = true
 warn_unused_configs = true
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "cellxgene_schema_cli"
+]
\ No newline at end of file

From 0392f2b0c25bd78884abbb455eee623325bd37a3 Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Fri, 22 Nov 2024 14:43:39 -0800
Subject: [PATCH 10/28] Added c. elegans (#1126)

---
 schema/drafts/5.2.1-experimental.md | 210 ++++++++++++++++++++--------
 1 file changed, 151 insertions(+), 59 deletions(-)

diff --git a/schema/drafts/5.2.1-experimental.md b/schema/drafts/5.2.1-experimental.md
index 4275dfe6..0190c884 100644
--- a/schema/drafts/5.2.1-experimental.md
+++ b/schema/drafts/5.2.1-experimental.md
@@ -8,7 +8,7 @@ Version: 5.2.1-experimental
 
 The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED" "MAY", and "OPTIONAL" in this document are to be interpreted as described in [BCP 14](https://tools.ietf.org/html/bcp14), [RFC2119](https://www.rfc-editor.org/rfc/rfc2119.txt), and [RFC8174](https://www.rfc-editor.org/rfc/rfc8174.txt) when, and only when, they appear in all capitals, as shown here.
 
-This draft is limited to **additions** or **modifications** to [schema 5.2.0](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.2.0/schema.md). If a 5.2.0 reference does not appear in this document, then no schema change is required. The following **temporary** constraints for *Danio rerio* and *Drosophila melanogaster* are specified:
+This draft is limited to **additions** or **modifications** to [schema 5.2.0](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.2.0/schema.md). If a 5.2.0 reference does not appear in this document, then no schema change is required. The following **temporary** constraints are specified:
 
 * The `organism_ontology_term_id` MUST be the same for all observations.
 * The `tissue_type` MUST be `'tissue'` for all observations.
@@ -24,6 +24,8 @@ The following ontology dependencies are *pinned* for this version of the schema.
 
 | Ontology | OBO Prefix | Release | Download |
 |:--|:--|:--|:--|
+| [C. elegans Development Ontology] | WBls |  [ 2024-09-26 Wormbase WS295](https://github.com/obophenotype/c-elegans-development-ontology/blob/vWS295) | [wbls.owl] |
+| [C. elegans Gross Anatomy Ontology] | WBbt | [2024-09-24 Wormbase WS295](https://github.com/obophenotype/c-elegans-gross-anatomy-ontology/blob/v2024-09-24) | [wbbt.owl] |
 | [Cell Ontology] | CL |  [2024-08-16] | [cl.owl]|
 | [Drosophila Anatomy Ontology] | FBbt | [2024-08-08](https://github.com/FlyBase/drosophila-anatomy-developmental-ontology/releases/tag/v2024-08-08) | [fbbt.owl] |
 | [Drosophila Development Ontology] | FBdv | [2024-08-07](https://github.com/FlyBase/drosophila-developmental-ontology/releases/tag/v2024-08-07) | [fbdv.owl] |
@@ -38,6 +40,11 @@ The following ontology dependencies are *pinned* for this version of the schema.
 | [Zebrafish Anatomy Ontology] | ZFA<br>ZFS | [2022-12-09] | [zfa.owl] |
 | | | | |
 
+[C. elegans Development Ontology]: https://obofoundry.org/ontology/wbls.html
+[wbls.owl]: https://github.com/obophenotype/c-elegans-development-ontology/blob/vWS295/wbls.owl
+[C. elegans Gross Anatomy Ontology]: https://obofoundry.org/ontology/wbbt.html
+
+[wbbt.owl]: https://github.com/obophenotype/c-elegans-gross-anatomy-ontology/blob/v2024-09-24/wbbt.owl
 [Cell Ontology]: http://obofoundry.org/ontology/cl.html
 [2024-08-16]: https://github.com/obophenotype/cell-ontology/releases/tag/v2024-08-16
 [cl.owl]: https://github.com/obophenotype/cell-ontology/releases/download/v2024-08-16/cl.owl
@@ -97,8 +104,9 @@ The following gene annotation dependencies are *pinned* for this version of the
 | <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9606"><code>NCBITaxon:9606</code></a><br>for <i>Homo sapiens</i> | [GENCODE (Human)] | Human reference GRCh38.p14<br>(GENCODE v44/Ensembl 110) | [gencode.v44.primary_assembly.annotation.gtf] |
 | <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A10090"><code>NCBITaxon:10090</code></a><br>for <i>Mus musculus</i> | [GENCODE (Mouse)] | Mouse reference GRCm39<br>(GENCODE vM33/Ensembl 110) | [gencode.vM33.primary_assembly.annotation.gtf] |
 | <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A2697049"><code>NCBITaxon:2697049</code></a><br>for <i>SARS-CoV-2</i>  | [ENSEMBL (COVID-19)] | SARS-CoV-2 reference (ENSEMBL assembly: ASM985889v3) | [Sars\_cov\_2.ASM985889v3.101.gtf] |
-| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>NCBITaxon:7955</code></a><br>for <i>Danio rerio</i> |  [ENSEMBL (Zebrafish)] | GRCz11.112 (Ensembl 112) | [Danio_rerio.GRCz11.112.gtf] |
-| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>| [ENSEMBL (Fruit fly)] | BDGP6.46 (Ensembl 112) | [Drosophila_melanogaster.BDGP6.46.112.gtf] | 
+| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a><br>for <i>Caenorhabditis elegans</i>  | [ENSEMBL (Caenorhabditis elegans)] | WBcel235 (GCA_000002985.3)<br>Ensembl 113 | [Caenorhabditis_elegans.WBcel235.113.gtf] |
+| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>NCBITaxon:7955</code></a><br>for <i>Danio rerio</i> |  [ENSEMBL (Zebrafish)] | GRCz11 (GCA_000002035.4)<br>Ensembl 113 | [Danio_rerio.GRCz11.113.gtf] |
+| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>| [ENSEMBL (Fruit fly)] | BDGP6.46 (GCA_000001215.4)<br>Ensembl 113 | [Drosophila_melanogaster.BDGP6.46.113.gtf] | 
 | | [ThermoFisher ERCC Spike-Ins] | ThermoFisher ERCC RNA Spike-In Control Mixes (Cat # 4456740, 4456739) | [cms_095047.txt] |
 
 [RNA Spike-In Control Mixes]: https://www.thermofisher.com/document-connect/document-connect.html?url=https%3A%2F%2Fassets.thermofisher.com%2FTFS-Assets%2FLSG%2Fmanuals%2Fcms_086340.pdf&title=VXNlciBHdWlkZTogRVJDQyBSTkEgU3Bpa2UtSW4gQ29udHJvbCBNaXhlcyAoRW5nbGlzaCAp
@@ -112,11 +120,14 @@ The following gene annotation dependencies are *pinned* for this version of the
 [ENSEMBL (COVID-19)]: https://covid-19.ensembl.org/index.html
 [Sars\_cov\_2.ASM985889v3.101.gtf]: https://ftp.ensemblgenomes.org/pub/viruses/gtf/sars_cov_2/Sars_cov_2.ASM985889v3.101.gtf.gz
 
+[ENSEMBL (Caenorhabditis elegans)]: https://useast.ensembl.org/Caenorhabditis_elegans/Info/Index
+[Caenorhabditis_elegans.WBcel235.113.gtf]: https://ftp.ensembl.org/pub/release-113/gtf/caenorhabditis_elegans/Caenorhabditis_elegans.WBcel235.113.gtf.gz
+
 [ENSEMBL (Zebrafish)]: https://useast.ensembl.org/Danio_rerio/Info/Index
-[Danio_rerio.GRCz11.112.gtf]: https://ftp.ensembl.org/pub/release-112/gtf/danio_rerio/Danio_rerio.GRCz11.112.gtf.gz
+[Danio_rerio.GRCz11.113.gtf]: https://ftp.ensembl.org/pub/release-113/gtf/danio_rerio/Danio_rerio.GRCz11.113.gtf.gz
 
 [ENSEMBL (Fruit fly)]: https://www.ensembl.org/Drosophila_melanogaster/Info/Index
-[Drosophila_melanogaster.BDGP6.46.112.gtf]: https://ftp.ensembl.org/pub/release-112/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.112.gtf.gz
+[Drosophila_melanogaster.BDGP6.46.113.gtf]: https://ftp.ensembl.org/pub/release-113/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.46.113.gtf.gz
 
 [ThermoFisher ERCC Spike-Ins]: https://www.thermofisher.com/order/catalog/product/4456740#/4456740
 [cms_095047.txt]: https://assets.thermofisher.com/TFS-Assets/LSG/manuals/cms_095047.txt
@@ -128,27 +139,57 @@ The following gene annotation dependencies are *pinned* for this version of the
 ### development_stage_ontology_term_id
 
 <table><tbody>
-  <tr>
-    <th>Key</th>
-    <td>development_stage_ontology_term_id</td>
-  </tr>
-  <tr>
-    <th>Annotator</th>
-    <td>Curator MUST annotate.</td>
-  </tr>
-  <tr>
-    <th>Value</th>
-    <td>
-      categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>.<br><br>
-      If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a> for <i>Danio rerio</i>, then this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/zfs/classes?obo_id=ZFS%3A0100000"><code>ZFS:0100000</code></a> for <i>zebrafish stage</i> and MUST NOT be <a href="https://www.ebi.ac.uk/ols4/ontologies/zfs/classes?obo_id=ZFS%3A0000000"><code>ZFS:0000000</code></a> for <i>Unknown</i>.<br><br>If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a> for <i>Drosophila melanogaster</i>, then this MUST be the most accurate FBdv term.
-      <br><br> Otherwise, for all other organisms this MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000105"><code>UBERON:0000105</code></a> for <i>life cycle stage</i>, excluding <a href="https://www.ebi.ac.uk/ols4/ontologies/uberon/classes?obo_id=UBERON%3A0000071"><code>UBERON:0000071</code></a> for <i>death stage</i>.
-    </td>
+    <tr>
+      <th>Key</th>
+      <td>development_stage_ontology_term_id</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+      <td>
+        categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>.<br><br>
+        <table>
+          <thead>
+            <tr>
+              <th>For <code>organism_ontolology_term_id</code></th>
+              <th>Value</th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>
+                <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a><br>for <i>Caenorhabditis elegans</i>
+              </td>
+              <td>
+                MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/wbls/classes?obo_id=WBls%3A0000075"><code>WBls:0000075</code></a><br>for <i>worm life stage</i>
+              </td>
+            </tr>
+            <tr>
+              <td>
+                <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a><br>for <i>Danio rerio</i>
+              </td>
+              <td>
+                MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/zfs/classes?obo_id=ZFS%3A0100000"><code>ZFS:0100000</code></a><br>for <i>zebrafish stage</i> and MUST NOT be <a href="https://www.ebi.ac.uk/ols4/ontologies/zfs/classes?obo_id=ZFS%3A0000000"><code>ZFS:0000000</code></a> for <i>Unknown</i>
+              </td>
+            </tr>
+            <tr>
+              <td>
+                <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>
+              </td>
+              <td>
+                MUST be the most accurate FBdv term
+              </td>
+            </tr>
+          </tbody>
+        </table>
+      </td>
   </tr>
 </tbody></table>
 <br>
 
----
-
 ### organism_cell_type_ontology_term_id
 
 <table><tbody>
@@ -163,7 +204,15 @@ The following gene annotation dependencies are *pinned* for this version of the
   <tr>
     <th>Value</th>
     <td>
-      categorical with <code>str</code> categories.<br><br>
+      categorical with <code>str</code> categories. This MUST be <code>"unknown"</code> when:
+      <ul>
+        <li>
+          no appropriate term can be found (e.g. the cell type is unknown)
+        </li>
+        <li>
+          <code>assay_ontology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/efo/classes?obo_id=EFO%3A0010961"><code>"EFO:0010961"</code></a> for <i>Visium Spatial Gene Expression</i>, <code>uns['spatial']['is_single']</code> is <code>True</code>, and the corresponding value of <code>in_tissue</code> is <code>0</code>
+        </li>
+      </ul>
       <table>
         <thead><tr>
           <th>For <code>organism_ontolology_term_id</code></th>
@@ -172,40 +221,27 @@ The following gene annotation dependencies are *pinned* for this version of the
         <tbody>
           <tr>
             <td>
-              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a><br>for <i>Danio rerio</i>
+              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a><br>for <i>Caenorhabditis elegans</i>
             </td>
             <td>
-              MUST be either the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/zfa/classes?obo_id=ZFA%3A0009000"><code>ZFA:0009000</code></a> for <i>cell</i><br>or <code>"unknown"</code> when:
-              <ul>
-               <li>
-                 no appropriate term can be found (e.g. the cell type is unknown)
-               </li>
-               <li>
-                <code>assay_ontology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/efo/classes?obo_id=EFO%3A0010961"><code>"EFO:0010961"</code></a> for<br><i>Visium Spatial Gene Expression</i>, <code>uns['spatial']['is_single']</code> is <code>True</code>,<br>and the corresponding value of <code>in_tissue</code> is <code>0</code>
-              </li>
-             </ul>
+              MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/wbbt/classes?obo_id=WBbt%3A0004017"><code>WBbt:0004017</code></a> for <i>Cell</i>
             </td>
           </tr>
+          <tr>
           <tr>
             <td>
-              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>
+              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a><br>for <i>Danio rerio</i>
             </td>
-            <td>MUST be either the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FFBbt_00007002?lang=en"><code>FBbt:00007002</code></a> for <i>cell</i><br>or <code>"unknown"</code> when:
-              <ul>
-               <li>
-                 no appropriate term can be found (e.g. the cell type is unknown)
-               </li>
-               <li>
-                <code>assay_ontology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/efo/classes?obo_id=EFO%3A0010961"><code>"EFO:0010961"</code></a> for<br><i>Visium Spatial Gene Expression</i>, <code>uns['spatial']['is_single']</code> is <code>True</code>,<br>and the corresponding value of <code>in_tissue</code> is <code>0</code>
-              </li>
-             </ul>
+            <td>
+              MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/zfa/classes?obo_id=ZFA%3A0009000"><code>ZFA:0009000</code></a> for <i>cell</i>
             </td>
           </tr>
           <tr>
             <td>
-              All other values of<br><code>organism_ontology_term_id</code>
+              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a><br>for <i>Drosophila melanogaster</i>
+            </td>
+            <td>MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FFBbt_00007002?lang=en"><code>FBbt:00007002</code></a> for <i>cell</i>
             </td>
-            <td>MUST be <code>"na"</code></td>
           </tr>
         </tbody>
       </table>
@@ -230,7 +266,12 @@ The following gene annotation dependencies are *pinned* for this version of the
   <tr>
     <th>Value</th>
     <td>
-      categorical with <code>str</code> categories. This MUST be a descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A33208"><code>NCBITaxon:33208</code></a> for <i>Metazoa</i>.<br><br>If <code>organism_ontology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a> for <i>Danio rerio</i> or <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a> for <i>Drosophila melanogaster</i>, then all observations MUST contain the same value. 
+      categorical with <code>str</code> categories. This MUST be a descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A33208"><code>NCBITaxon:33208</code></a> for <i>Metazoa</i>.<br><br>All observations MUST contain the same value when the <code>organism_ontology_term_id</code> is:
+      <ul>
+       <li><a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a> for <i>Caenorhabditis elegans</i> </li>
+       <li> <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a> for <i>Danio rerio</i></li>
+       <li><a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a> for <i>Drosophila melanogaster</i></li>
+      </ul>
     </td>
   </tr>
 </tbody></table>
@@ -261,6 +302,14 @@ The following gene annotation dependencies are *pinned* for this version of the
             </tr>
           </thead>
           <tbody>
+            <tr>
+              <td>
+                <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a><br>for <i>Caenorhabditis elegans</i>
+              </td>
+              <td>
+                MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/wbbt/classes?obo_id=WBBT%3A0005766"><code>WBbt:0005766</code></a> for <i>Anatomy</i>
+              </td>
+            </tr>
             <tr>
               <td>
                 <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a><br>for <i>Danio rerio</i>
@@ -277,12 +326,6 @@ The following gene annotation dependencies are *pinned* for this version of the
                 MUST be the most accurate descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes?obo_id=FBBT%3A10000000"><code>FBbt:10000000</code></a> for<br><i>anatomical entity</i> and MUST NOT be <a href="https://www.ebi.ac.uk/ols4/ontologies/fbbt/classes?obo_id=FBbt%3A00007002"><code>FBbt:00007002</code></a><br>for <i>cell</i> or any of its descendants.
               </td>
             </tr>
-            <tr>
-            <td>
-              All other values of<br><code>organism_ontology_term_id</code>
-            </td>
-            <td>MUST be <code>"na"</code></td>
-          </tr>
           </tbody>
         </table>
       </td>
@@ -292,6 +335,27 @@ The following gene annotation dependencies are *pinned* for this version of the
 
 ---
 
+### sex_ontology_term_id
+
+<table><tbody>
+    <tr>
+      <th>Key</th>
+      <td>sex_ontology_term_id</td>
+    </tr>
+    <tr>
+      <th>Annotator</th>
+      <td>Curator MUST annotate.</td>
+    </tr>
+    <tr>
+      <th>Value</th>
+        <td>categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>.<br><br>If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a> for <i>Caenorhabditis elegans</i>, this MUST be <a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0000384"><code>PATO:0000384</code></a> for <i>male</i> or <a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0001340"><code>PATO:0001340</code></a> for <i>hermaphrodite</i>.<br><br>Otherwise, this MUST be a descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0001894">PATO:0001894</a> for <i>phenotypic sex</i>.
+        </td>
+    </tr>
+</tbody></table>
+<br>
+
+---
+
 ### tissue_type
 
 <table><tbody>
@@ -306,12 +370,18 @@ The following gene annotation dependencies are *pinned* for this version of the
   <tr>
     <th>Value</th>
     <td>
-      categorical with <code>str</code> categories.<br><br>If <code>organism_ontology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a> for <i>Danio rerio</i> or <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a> for <i>Drosophila melanogaster</i>, then the value MUST be <code>"tissue"</code>.<br><br>Otherwise, the value MUST be <code>"tissue"</code>, <code>"organoid"</code>, or <code>"cell culture"</code>.
+      categorical with <code>str</code> categories.<br><br>The value MUST be <code>"tissue"</code> when the <code>organism_ontology_term_id</code> is:
+      <ul>
+       <li><a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6239"</code></a> for <i>Caenorhabditis elegans</i> </li>
+       <li> <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7955"><code>"NCBITaxon:7955"</code></a> for <i>Danio rerio</i></li>
+       <li><a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A7227"><code>"NCBITaxon:7227"</code></a> for <i>Drosophila melanogaster</i></li>
+      </ul>Otherwise, the value MUST be <code>"tissue"</code>, <code>"organoid"</code>, or <code>"cell culture"</code>.
     </td>
   </tr>
 </tbody></table>
 <br>
 
+
 ---
 
 ## var and raw.var (Gene Metadata)
@@ -355,6 +425,12 @@ The following gene annotation dependencies are *pinned* for this version of the
               <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A2697049"><code>"NCBITaxon:2697049"</code></a>
             </td>
           </tr>
+          <tr>
+            <td><i>Caenorhabditis elegans</i></td>
+            <td>
+              <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A6239"><code>"NCBITaxon:6293"</code></a>
+            </td>
+          </tr>
           <tr>
             <td><i>Danio rerio</i></td>
             <td>
@@ -388,18 +464,34 @@ The following gene annotation dependencies are *pinned* for this version of the
 * General Requirements
   * Updated requirements for supported organisms
 * Required Ontologies
+  * Added C. elegans Development Ontology (WBls) release 2024-09-26 Wormbase WS295
+  * Added C. elegans Gross Anatomy Ontology (WBbt) release 2024-09-24 Wormbase WS295
   * Added Drosophila Anatomy Ontology (FBbt) release 2024-08-08
   * Added Drosophila Development Ontology (FBdv) release 2024-08-07
   * Added Zebrafish Anatomy Ontology (ZFA+ZFS) release 2022-12-09
 * Required Gene Annotations
   * Refactored table to include NCBI Taxon for supported organisms
-  * Added  *Danio rerio* Reference GRCz11.112 (Ensembl 112)
-  * Added  *Drosophila melanogaster* Reference BDGP6.46 (Ensembl 112)
+  * Added *Caenorhabditis elegans* WBcel235 (GCA_000002985.3) Ensembl 113
+  * Added *Danio rerio* GRCz11 (GCA_000002035.4) Ensembl 113
+  * Added *Drosophila melanogaster* BDGP6.46 (GCA_000001215.4) Ensembl 113
 * obs (Cell metadata)
-  * Updated `development_stage_ontology_term_id` for *Danio rerio* and *Drosophila melanogaster* 
+  * Updated `development_stage_ontology_term_id` to include:
+    * *Caenorhabditis elegans*
+    * *Danio rerio*
+    * *Drosophila melanogaster* 
   * Added `organism_cell_type_ontology_term_id`
-  * Updated `organism_ontology_term_id` for *Danio rerio* and *Drosophila melanogaster* to require all observations to contain the same value
+  * Updated `organism_ontology_term_id` to require all observations to contain the same value for:
+    * *Caenorhabditis elegans*
+    * *Danio rerio*
+    * *Drosophila melanogaster* 
   * Added `organism_tissue_ontology_term_id`
-  * Updated `tissue_type` to require `"tissue"` for *Danio rerio* and *Drosophila melanogaster* 
+  * Updated `sex_ontology_term_id` for *Caenorhabditis elegans*
+  * Updated `tissue_type` to require `"tissue"` for:
+    * *Caenorhabditis elegans*
+    * *Danio rerio*
+    * *Drosophila melanogaster*
 * var and raw.var (Gene Metadata)
-  * Updated `feature_reference` for *Danio rerio* and *Drosophila melanogaster*
\ No newline at end of file
+  * Updated `feature_reference` to include:
+    * *Caenorhabditis elegans*
+    * *Danio rerio*
+    * *Drosophila melanogaster* 
\ No newline at end of file

From 0c9f9af72e41d25744cf5963f351169c13680c3c Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Mon, 25 Nov 2024 09:50:07 -0500
Subject: [PATCH 11/28] feat: update validation for obs['in_tissue'] to include
 descendants of Visiium (#1124)

Co-authored-by: Evan Molinelli <emolinelli@CZIMACOS4882.local>
---
 .../cellxgene_schema/utils.py                 | 14 +++
 .../cellxgene_schema/validate.py              | 88 +++++++++++--------
 .../tests/test_schema_compliance.py           | 21 +++++
 3 files changed, 86 insertions(+), 37 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/utils.py b/cellxgene_schema_cli/cellxgene_schema/utils.py
index fb8f58f4..e2b558f7 100644
--- a/cellxgene_schema_cli/cellxgene_schema/utils.py
+++ b/cellxgene_schema_cli/cellxgene_schema/utils.py
@@ -2,10 +2,12 @@
 import os
 import sys
 from base64 import b85encode
+from functools import lru_cache
 from typing import Dict, List, Union
 
 import anndata as ad
 import numpy as np
+from cellxgene_ontology_guide.ontology_parser import OntologyParser
 from scipy import sparse
 from xxhash import xxh3_64_intdigest
 
@@ -151,3 +153,15 @@ def get_hash_digest_column(dataframe):
         .astype(np.uint64)
         .apply(lambda v: b85encode(v.to_bytes(8, "big")).decode("ascii"))
     )
+
+
+@lru_cache()
+def is_ontological_descendant_of(onto: OntologyParser, term: str, target: str, include_self: bool = True) -> bool:
+    """
+    Determines if :term is an ontological descendant of :target and whether to include :term==:target.
+
+    This function is cached and is safe to call many times.
+
+    #TODO:[EM] needs testing
+    """
+    return term in set(onto.get_term_descendants(target, include_self))
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 084e9769..f7892e6b 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -17,7 +17,7 @@
 from scipy import sparse
 
 from . import gencode, schema
-from .utils import SPARSE_MATRIX_TYPES, get_matrix_format, getattr_anndata, read_h5ad
+from .utils import SPARSE_MATRIX_TYPES, get_matrix_format, getattr_anndata, is_ontological_descendant_of, read_h5ad
 
 logger = logging.getLogger(__name__)
 
@@ -211,7 +211,7 @@ def _validate_curie_ancestors(
                 is_valid_term_id = ONTOLOGY_PARSER.is_valid_term_id(term_id)
                 is_valid_ancestor_id = ONTOLOGY_PARSER.is_valid_term_id(ancestor)
                 if is_valid_term_id & is_valid_ancestor_id:
-                    is_descendant = ancestor in ONTOLOGY_PARSER.get_term_ancestors(term_id)
+                    is_descendant = ancestor in ONTOLOGY_PARSER.get_term_ancestors(term_id, inclusive)
                     checks.append(is_descendant)
 
         if True not in checks:
@@ -1477,18 +1477,25 @@ def _validate_spatial_cell_type_ontology_term_id(self):
 
         :rtype none
         """
-        # Exit if:
-        # - not Visium and is_single is True as no further checks are necessary
-        # - in_tissue is not specified as checks are dependent on this value
-        if not self._is_visium_including_descendants() and self._is_single() or "in_tissue" not in self.adata.obs:
+        self._is_visium_including_descendants()
+        self._is_single()
+        self._is_visium_and_is_single_true()
+
+        # skip checks if not a valid spatial assay with a corresponding "in_tissue" column
+        if not self.is_visium_and_is_single_true:
+            # not a valid spatial assay
+            return
+        elif self.is_visium_and_is_single_true and "in_tissue" not in self.adata.obs.columns:
+            # valid spatial assay, but missing "in_tissue" column
             return
 
-        # Validate cell type: must be "unknown" if Visium and is_single is True and in_tissue is 0.
-        if (
-            self._is_visium_including_descendants()
-            & (self.adata.obs["in_tissue"] == 0)
-            & (self.adata.obs["cell_type_ontology_term_id"] != "unknown")
-        ).any():
+        # Validate all out of tissue (in_tissue==0) spatial spots have unknown cell ontology term
+        is_spatial = self.adata.obs["assay_ontology_term_id"].apply(
+            lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True)
+        )
+        is_not_tissue = self.adata.obs["in_tissue"] == 0
+        is_not_unknown = self.adata.obs["cell_type_ontology_term_id"] != "unknown"
+        if (is_spatial & is_not_tissue & is_not_unknown).any():
             self.errors.append(
                 f"obs['cell_type_ontology_term_id'] must be 'unknown' when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0}."
             )
@@ -1500,11 +1507,21 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
 
         :rtype none
         """
+        # check for visium status and then is visium and single
+        # techdebt: the following lines are order dependent. Violates idempotence.
+        self._is_visium_including_descendants()
+        self._is_single()
+        self._is_visium_and_is_single_true()
+
         # Tissue position is foribidden if assay is not Visium and is_single is True.
         if tissue_position_name in self.adata.obs and (
-            not self._is_visium_and_is_single_true()
+            not (self.is_visium_and_is_single_true)
             or (
-                ~(self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM)
+                ~(
+                    self.adata.obs["assay_ontology_term_id"].apply(
+                        lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True)
+                    )
+                )
                 & (self.adata.obs[tissue_position_name].notnull())
             ).any()
         ):
@@ -1521,7 +1538,11 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
         if (
             tissue_position_name not in self.adata.obs
             or (
-                (self.adata.obs["assay_ontology_term_id"] == ASSAY_VISIUM)
+                (
+                    self.adata.obs["assay_ontology_term_id"].apply(
+                        lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True)
+                    )
+                )
                 & (self.adata.obs[tissue_position_name].isnull())
             ).any()
         ):
@@ -1767,34 +1788,27 @@ def _is_visium(self) -> bool:
 
     def _is_visium_including_descendants(self) -> bool:
         """
-        Determine if the assay_ontology_term_id is Visium (descendant of EFO:0010961).
+        Determine if the assay_ontology_term_id is Visium (inclusive descendant of EFO:0010961).
+        Returns True if ANY assay_ontology_term_id is a Visium descendant
 
         :return True if assay_ontology_term_id is Visium, False otherwise.
         :rtype bool
         """
-        if self.is_visium is None:
-            assay_ontology_term_id = self.adata.obs.get("assay_ontology_term_id")
-
-            if assay_ontology_term_id is not None:
-                # Convert to a regular Series if it's Categorical
-                assay_ontology_term_id = pd.Series(assay_ontology_term_id)
+        _assay_key = "assay_ontology_term_id"
+        includes_and_visium = False
 
-                # Check if any term is a descendant of ASSAY_VISIUM
-                try:
-                    visium_results = assay_ontology_term_id.apply(
-                        lambda term: ASSAY_VISIUM
-                        in list(ONTOLOGY_PARSER.get_lowest_common_ancestors(ASSAY_VISIUM, term))
-                    )
-                    self.is_visium = visium_results.astype(bool).any()
-                except KeyError as e:
-                    # This generally means the assay_ontology_term_id is invalid, but we want the error to be raised
-                    # by our explicit validator checks, not this implicit one.
-                    logger.warning(f"KeyError processing assay_ontology_term_id ontology: {e}")
-                    self.is_visium = False
-            else:
-                self.is_visium = False
+        # only compute if not already stored
+        if self.is_visium is None and _assay_key in self.adata.obs.columns:
+            # check if any assay_ontology_term_ids are descendants of VISIUM
+            includes_and_visium = (
+                self.adata.obs[_assay_key]
+                .apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
+                .any()
+            )
 
-        return self.is_visium
+        # save state and return
+        self.is_visium = includes_and_visium
+        return includes_and_visium
 
     def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
         """
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 3646bc43..7268f332 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -477,6 +477,27 @@ def test_column_presence_assay(self, validator_with_adata):
             "to missing dependent column in adata.obs.",
         ]
 
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, is_descendant",
+        [("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
+    )
+    def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_ontology_term_id, is_descendant):
+        """
+        Spatial assays that are descendants of visium must have a valid "in_tissue" column.
+        """
+        validator: Validator = validator_with_visium_assay
+
+        # reset and test
+        validator.reset()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator._validate_spatial_tissue_position("in_tissue", 0, 1)
+        if is_descendant:
+            assert validator.errors == []
+        else:
+            assert validator.errors == [
+                "obs['in_tissue'] is only allowed for descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
+            ]
+
     @pytest.mark.parametrize("reserved_column", schema_def["components"]["obs"]["reserved_columns"])
     def test_obs_reserved_columns_presence(self, validator_with_adata, reserved_column):
         """

From 478648ef138d4d6d26ee43bd68c84414fe2292fc Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Tue, 26 Nov 2024 11:41:54 -0500
Subject: [PATCH 12/28] feat: update validation for uns['spatial'] (#1129)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
Co-authored-by: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com>
---
 .../cellxgene_schema/validate.py              |  42 ++--
 .../tests/test_schema_compliance.py           |  11 +-
 cellxgene_schema_cli/tests/test_validate.py   | 201 +++++++++++++-----
 3 files changed, 179 insertions(+), 75 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index f7892e6b..024a51ec 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -28,8 +28,15 @@
 
 VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
+SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000
 
-ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
+CONDITION_IS_VISIUM = "a descendant of 'EFO:0010961' (Visium Spatial Gene Expression)"
+CONDITION_IS_SEQV2 = f"'{ASSAY_SLIDE_SEQV2}' (Slide-seqV2)"
+
+
+ERROR_SUFFIX_SPATIAL = f"obs['assay_ontology_term_id'] is either {CONDITION_IS_VISIUM} or {CONDITION_IS_SEQV2}"
+ERROR_SUFFIX_VISIUM = f"obs['assay_ontology_term_id'] is {CONDITION_IS_VISIUM}"
+ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and uns['spatial']['is_single'] is True"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
@@ -95,9 +102,11 @@ def _is_supported_spatial_assay(self) -> bool:
         """
         if self.is_spatial is None:
             try:
-                self.is_spatial = False
-                if self.adata.obs.assay_ontology_term_id.isin([ASSAY_VISIUM, ASSAY_SLIDE_SEQV2]).any():
-                    self.is_spatial = True
+                _spatial = (
+                    self._is_visium_including_descendants()
+                    or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).any()
+                )
+                self.is_spatial = bool(_spatial)
             except AttributeError:
                 # specific error reporting will occur downstream in the validation
                 self.is_spatial = False
@@ -1466,10 +1475,7 @@ def _validate_spatial_assay_ontology_term_id(self):
         # Validate assay ontology term ids are identical.
         term_count = obs["assay_ontology_term_id"].nunique()
         if term_count > 1:
-            self.errors.append(
-                "When obs['assay_ontology_term_id'] is either 'EFO:0010961' (Visium Spatial Gene Expression) or "
-                "'EFO:0030062' (Slide-seqV2), all observations must contain the same value."
-            )
+            self.errors.append(f"When {ERROR_SUFFIX_SPATIAL}" ", all observations must contain the same value.")
 
     def _validate_spatial_cell_type_ontology_term_id(self):
         """
@@ -1599,10 +1605,7 @@ def _check_spatial_uns(self):
         uns_spatial = self.adata.uns.get("spatial")
         is_supported_spatial_assay = self._is_supported_spatial_assay()
         if uns_spatial is not None and not is_supported_spatial_assay:
-            self.errors.append(
-                "uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values "
-                "'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
-            )
+            self.errors.append(f"uns['spatial'] is only allowed when {ERROR_SUFFIX_SPATIAL}")
             return
 
         # Exit if we aren't dealing with a supported spatial assay as no further checks are necessary.
@@ -1611,10 +1614,7 @@ def _check_spatial_uns(self):
 
         # spatial is required for supported spatial assays.
         if not isinstance(uns_spatial, dict):
-            self.errors.append(
-                "A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
-                "'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
-            )
+            self.errors.append("A dict in uns['spatial'] is required when " f"{ERROR_SUFFIX_SPATIAL}.")
             return
 
         # is_single is required.
@@ -1693,7 +1693,11 @@ def _check_spatial_uns(self):
                 self.errors.append("uns['spatial'][library_id]['images'] must contain the key 'hires'.")
             # hires is specified: proceed with validation of hires.
             else:
-                self._validate_spatial_image_shape("hires", uns_images["hires"], SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE)
+                _assay_term = self.adata.obs["assay_ontology_term_id"].values[0]
+                _max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
+                if is_ontological_descendant_of(ONTOLOGY_PARSER, _assay_term, "EFO:0022860", True):
+                    _max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
+                self._validate_spatial_image_shape("hires", uns_images["hires"], _max_size)
 
             # fullres is optional.
             uns_fullres = uns_images.get("fullres")
@@ -1802,12 +1806,12 @@ def _is_visium_including_descendants(self) -> bool:
             # check if any assay_ontology_term_ids are descendants of VISIUM
             includes_and_visium = (
                 self.adata.obs[_assay_key]
+                .astype("string")
                 .apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
                 .any()
             )
+            self.is_visium = includes_and_visium
 
-        # save state and return
-        self.is_visium = includes_and_visium
         return includes_and_visium
 
     def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 7268f332..425086fc 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -4,6 +4,7 @@
 
 import tempfile
 import unittest
+from copy import deepcopy
 
 import anndata
 import fixtures.examples_validate as examples
@@ -495,7 +496,7 @@ def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_onto
             assert validator.errors == []
         else:
             assert validator.errors == [
-                "obs['in_tissue'] is only allowed for descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
+                "obs['in_tissue'] is only allowed for obs['assay_ontology_term_id'] is a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
             ]
 
     @pytest.mark.parametrize("reserved_column", schema_def["components"]["obs"]["reserved_columns"])
@@ -1673,11 +1674,16 @@ def test_should_warn_for_low_gene_count(self, validator_with_adata):
         Raise a warning if there are too few genes
         """
         validator = validator_with_adata
+        # NOTE:[EM] changing the schema def here is stateful and results in unpredictable test results.
+        #  Reset after mutating.
+        _old_schema = deepcopy(validator.schema_def.copy())
+
         validator.schema_def["components"]["var"]["warn_if_less_than_rows"] = 100
         validator.validate_adata()
         assert validator.warnings == [
             "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix."
         ]
+        validator.schema_def = _old_schema
 
     @pytest.mark.parametrize(
         "df,column",
@@ -2198,7 +2204,6 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
         ]
         assert validator.is_spatial is False
         assert validator.warnings == [
-            "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
             "WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
             "Thus, it will not be available in Explorer",
             "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
@@ -2248,7 +2253,6 @@ def test_obsm_values_warn_start_with_X(self, validator_with_adata):
         validator.adata.obsm["harmony"] = pd.DataFrame(validator.adata.obsm["X_umap"], index=validator.adata.obs_names)
         validator.validate_adata()
         assert validator.warnings == [
-            "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
             "WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
             "Thus, it will not be available in Explorer",
             "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
@@ -2282,7 +2286,6 @@ def test_obsm_values_key_start_with_number(self, validator_with_adata):
             "'pandas.core.frame.DataFrame'>').",
         ]
         assert validator.warnings == [
-            "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
             "WARNING: Embedding key in 'adata.obsm' 3D is not 'spatial' nor does it start with 'X_'. "
             "Thus, it will not be available in Explorer",
             "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index accc6b86..9ab30177 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -15,6 +15,8 @@
     ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN,
     ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0,
     ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED,
+    SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE,
+    SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
     Validator,
     validate,
 )
@@ -423,10 +425,9 @@ def test__validate_spatial_type_error(self, spatial):
 
         # Confirm key type dict is required.
         validator.validate_adata()
-        assert validator.errors
         assert (
-            "A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values 'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
-            in validator.errors[0]
+            validator.errors[0]
+            == "ERROR: A dict in uns['spatial'] is required when obs['assay_ontology_term_id'] is either a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)."
         )
 
     def test__validate_spatial_is_single_false_ok(self):
@@ -448,25 +449,42 @@ def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self):
 
         # Confirm spatial is not allowed for 10x 3' v2.
         validator._check_spatial_uns()
-        assert len(validator.errors) == 1
-        assert (
-            "uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values "
-            "'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)." in validator.errors[0]
-        )
+        assert validator.errors == [
+            "uns['spatial'] is only allowed when obs['assay_ontology_term_id'] is either "
+            "a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)"
+        ]
 
-    def test__validate_spatial_required_if_visium(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, is_descendant",
+        [("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
+    )
+    def test__validate_spatial_required_if_visium(self, assay_ontology_term_id, is_descendant):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.adata.uns = good_uns.copy()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
 
-        # Confirm spatial is required for Visium.
-        validator._check_spatial_uns()
-        assert len(validator.errors) == 1
-        assert (
-            "A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
-            "'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)." in validator.errors[0]
-        )
+        if is_descendant:
+            # check pass if 'spatial' included
+            validator.adata.uns = good_uns_with_visium_spatial.copy()
+            validator._check_spatial_uns()
+            assert len(validator.errors) == 0
+            validator.reset()
+
+            # check fail if 'spatial' not included
+            validator.adata.uns = good_uns.copy()
+            validator._check_spatial_uns()
+            assert validator.errors == [
+                "A dict in uns['spatial'] is required when obs['assay_ontology_term_id'] is "
+                "either a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)."
+            ]
+            validator.reset()
+        else:
+            # check fail if 'spatial' included
+            validator.adata.uns = good_uns_with_visium_spatial.copy()
+            validator._check_spatial_uns()
+            assert len(validator.errors) == 1
+            validator.reset()
 
     def test__validate_spatial_required_if_slide_seqV2(self):
         validator: Validator = Validator()
@@ -476,11 +494,9 @@ def test__validate_spatial_required_if_slide_seqV2(self):
 
         # Confirm spatial is required for Slide-seqV2.
         validator._check_spatial_uns()
-        assert len(validator.errors) == 1
-        assert (
-            "A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
-            "'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)." in validator.errors[0]
-        )
+        assert validator.errors == [
+            "A dict in uns['spatial'] is required when obs['assay_ontology_term_id'] is either a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)."
+        ]
 
     def test__validate_spatial_allowed_keys_error(self):
         validator: Validator = Validator()
@@ -496,16 +512,26 @@ def test__validate_spatial_allowed_keys_error(self):
             "More than two top-level keys detected:" in validator.errors[0]
         )
 
-    def test__validate_is_single_required_visium_error(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, is_descendant",
+        [("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
+    )
+    def test__validate_is_single_required_visium_error(self, assay_ontology_term_id, is_descendant):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
         validator.adata.uns["spatial"].pop("is_single")
-
-        # Confirm is_single is identified as required.
         validator._check_spatial_uns()
-        assert validator.errors
-        assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0]
+
+        if is_descendant:
+            # if spatial, MUST specify `is_single`
+            assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0]
+        else:
+            # if not spatial, MUST NOT speciffy `is_single`
+            assert validator.errors == [
+                "uns['spatial'] is only allowed when obs['assay_ontology_term_id'] is either a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)"
+            ]
 
     def test__validate_is_single_required_slide_seqV2_error(self):
         validator: Validator = Validator()
@@ -560,19 +586,36 @@ def test__validate_library_id_forbidden_if_visium_or_is_single_false(self):
         assert len(validator.errors) == 1
         assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}." in validator.errors[0]
 
-    def test__validate_library_id_required_if_visium(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, is_descendant",
+        [("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
+    )
+    def test__validate_library_id_required_if_visium(self, assay_ontology_term_id, is_descendant):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.adata.uns["spatial"].pop(visium_library_id)
 
-        # Confirm library_id is identified as required.
-        validator._check_spatial_uns()
-        assert validator.errors
-        assert (
-            f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
-            in validator.errors[0]
-        )
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        if is_descendant:
+            # if spatial, `library_id` must exist
+            validator._check_spatial_uns()
+            assert len(validator.errors) == 0
+            validator.reset()
+
+            # if spatial, but missing from `uns`
+            validator.adata.uns["spatial"].pop(visium_library_id)
+            validator._check_spatial_uns()
+            assert validator.errors == [
+                f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
+            ]
+        else:
+            # if not spatial, MUST NOT define `library_id`
+            validator.adata.uns["spatial"][visium_library_id] = {"images": []}
+            validator._check_spatial_uns()
+            # Report the most general top level error
+            assert validator.errors == [
+                "uns['spatial'] is only allowed when obs['assay_ontology_term_id'] is either a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2)"
+            ]
 
     @pytest.mark.parametrize("library_id", [None, "invalid", 1, 1.0, True])
     def test__validate_library_id_type_error(self, library_id):
@@ -610,7 +653,11 @@ def test__validate_images_required_error(self):
         assert validator.errors
         assert "uns['spatial'][library_id] must contain the key 'images'." in validator.errors[0]
 
-    def test__validate_images_allowed_keys_error(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, is_descendant",
+        [("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
+    )
+    def test__validate_images_allowed_keys_error(self, assay_ontology_term_id, is_descendant):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
@@ -730,34 +777,84 @@ def test__validate_images_image_is_shape_error(self, image_name):
             "for example) or 4 (RGBA color model for example) for its last dimension" in validator.errors[0]
         )
 
-    def test__validate_images_hires_max_dimension_greater_than_error(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, hi_res_size, image_max",
+        [
+            ("EFO:0022858", 2001, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            ("EFO:0022860", 4001, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM),
+        ],
+    )
+    def test__validate_images_hires_max_dimension_greater_than_error(
+        self, assay_ontology_term_id, hi_res_size, image_max
+    ):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 2001, 3), dtype=np.uint8)
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros(
+            (1, hi_res_size, 3), dtype=np.uint8
+        )
 
         # Confirm hires is identified as invalid.
         validator._check_spatial_uns()
-        assert validator.errors
-        assert (
-            "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels"
-            in validator.errors[0]
-        )
+        assert validator.errors == [
+            f"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be {image_max} pixels, it has a largest dimension of {hi_res_size} pixels."
+        ]
 
-    def test__validate_images_hires_max_dimension_less_than_error(self):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, hi_res_size, size_requirement",
+        [
+            ("EFO:0022858", SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            ("EFO:0022858", SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            ("EFO:0022860", SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM),
+            (
+                "EFO:0022860",
+                SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
+                SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
+            ),
+        ],
+    )
+    def test__validate_images_hires_max_dimension(self, assay_ontology_term_id, hi_res_size, size_requirement):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1999, 3), dtype=np.uint8)
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros(
+            (1, hi_res_size, 3), dtype=np.uint8
+        )
 
         # Confirm hires is identified as invalid.
+        validator.reset()
         validator._check_spatial_uns()
-        assert validator.errors
-        assert (
-            "The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels"
-            in validator.errors[0]
+        if hi_res_size == size_requirement:
+            assert validator.errors == []
+        else:
+            assert validator.errors == [
+                f"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be {size_requirement} pixels, it has a largest dimension of {hi_res_size} pixels."
+            ]
+
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, hi_res_size, image_max",
+        [
+            ("EFO:0022858", 1999, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            ("EFO:0022860", 3999, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM),
+        ],
+    )
+    def test__validate_images_hires_max_dimension_less_than_error(self, assay_ontology_term_id, hi_res_size, image_max):
+        validator: Validator = Validator()
+        validator._set_schema_def()
+        validator.adata = adata_visium.copy()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros(
+            (1, hi_res_size, 3), dtype=np.uint8
         )
 
+        # Confirm hires is identified as invalid.
+        validator._check_spatial_uns()
+        assert validator.errors == [
+            f"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be {image_max} pixels, it has a largest dimension of {hi_res_size} pixels."
+        ]
+
     def test__validate_scalefactors_required_error(self):
         validator: Validator = Validator()
         validator._set_schema_def()
@@ -861,8 +958,8 @@ def test__validate_assay_type_ontology_term_id_not_unique_error(self):
         validator._validate_spatial_assay_ontology_term_id()
         assert validator.errors
         assert (
-            "When obs['assay_ontology_term_id'] is either 'EFO:0010961' (Visium Spatial Gene Expression) or "
-            "'EFO:0030062' (Slide-seqV2), all observations must contain the same value."
+            "When obs['assay_ontology_term_id'] is either a descendant"
+            " of 'EFO:0010961' (Visium Spatial Gene Expression) or 'EFO:0030062' (Slide-seqV2), all observations must contain the same value."
         ) in validator.errors[0]
 
     def test__validate_assay_type_ontology_term_id_not_unique_ok(self, valid_adata):

From 7f840ce1796bc8286ed888bb6351e35fef646467 Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Wed, 27 Nov 2024 10:55:01 -0800
Subject: [PATCH 13/28] feat: add genetic ancestry fields for schema 5.3
 (#1132)

---
 .../schema_definitions/schema_definition.yaml |  12 ++
 .../cellxgene_schema/validate.py              | 107 ++++++++++++++++
 .../tests/fixtures/examples_validate.py       |  90 +++++++++++++
 .../tests/fixtures/h5ads/example_valid.h5ad   | Bin 575888 -> 593864 bytes
 .../tests/test_schema_compliance.py           | 118 ++++++++++++++++++
 5 files changed, 327 insertions(+)

diff --git a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
index 28a3fad5..d14a0442 100644
--- a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
+++ b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
@@ -582,3 +582,15 @@ components:
           - "cell culture"
           - "organoid"
           - "tissue"
+      genetic_ancestry_African:
+        type: genetic_ancestry_value
+      genetic_ancestry_East_Asian:
+        type: genetic_ancestry_value
+      genetic_ancestry_European:
+        type: genetic_ancestry_value
+      genetic_ancestry_Indigenous_American:
+        type: genetic_ancestry_value
+      genetic_ancestry_Oceanian:
+        type: genetic_ancestry_value
+      genetic_ancestry_South_Asian:
+        type: genetic_ancestry_value
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 024a51ec..01a2d140 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -416,6 +416,109 @@ def _count_matrix_nonzero(self, matrix_name: str, matrix: Union[np.ndarray, spar
         self.number_non_zero[matrix_name] = nnz
         return nnz
 
+    def _validate_genetic_ancestry(self):
+        """
+        Performs row-based validation of the genetic_ancestry_X fields. This ensures that a valid row must be:
+        - all float('nan') if organism is not homo sapiens or info is unavailable
+        - sum to 1.0
+
+        Additionally, verifies that all rows with the same donor_id must have the same genetic ancestry values
+        """
+        ancestry_columns = [
+            "genetic_ancestry_African",
+            "genetic_ancestry_East_Asian",
+            "genetic_ancestry_European",
+            "genetic_ancestry_Indigenous_American",
+            "genetic_ancestry_Oceanian",
+            "genetic_ancestry_South_Asian",
+        ]
+
+        organism_column = "organism_ontology_term_id"
+        donor_id_column = "donor_id"
+
+        # Skip any additional validation if the genetic ancestry or organism columns are not present
+        # An error for missing columns will be raised at a different point
+        required_columns = ancestry_columns + [organism_column, donor_id_column]
+        for column in required_columns:
+            if column not in self.adata.obs.columns:
+                return
+
+        donor_id_to_ancestry_values = dict()
+
+        def is_valid_row(row):
+            ancestry_values = row[ancestry_columns]
+
+            # If ancestry values are different for the same donor id, then this row is invalid
+            donor_id = row[donor_id_column]
+            if donor_id in donor_id_to_ancestry_values:
+                if not donor_id_to_ancestry_values[donor_id].equals(ancestry_values):
+                    return False
+            else:
+                donor_id_to_ancestry_values[donor_id] = ancestry_values
+
+            # All values are NaN. This is always valid, regardless of organism
+            if ancestry_values.isna().all():
+                return True
+
+            # If any values are NaN, and we didn't return in the earlier all NaN check, then
+            # this is invalid
+            if ancestry_values.isna().any():
+                return False
+
+            # If organism is not homo sapiens, and we didn't return in the earlier all NaN check,
+            # then this row is invalid
+            if row[organism_column] != "NCBITaxon:9606":
+                return False
+
+            # The sum of genetic ancestry values should be approximately 1.0
+            if (
+                ancestry_values.apply(lambda x: isinstance(x, (float, int))).all()
+                and abs(ancestry_values.sum() - 1.0) <= 1e-6
+            ):
+                return True
+
+            return False
+
+        invalid_rows = ~self.adata.obs.apply(is_valid_row, axis=1)
+
+        if invalid_rows.any():
+            invalid_indices = self.adata.obs.index[invalid_rows].tolist()
+            self.errors.append(
+                f"obs rows with indices {invalid_indices} have invalid genetic_ancestry_* values. All "
+                f"observations with the same donor_id must contain the same genetic_ancestry_* values. If "
+                f"organism_ontolology_term_id is NOT 'NCBITaxon:9606' for Homo sapiens, then all genetic"
+                f"ancestry values MUST be float('nan'). If organism_ontolology_term_id is 'NCBITaxon:9606' "
+                f"for Homo sapiens, then the value MUST be a float('nan') if unavailable; otherwise, the "
+                f"sum of all genetic_ancestry_* fields must be equal to 1.0"
+            )
+
+    def _validate_individual_genetic_ancestry_value(self, column: pd.Series, column_name: str):
+        """
+        The following fields are valid for genetic_ancestry_value columns:
+        - float values between 0 and 1
+        - float('nan')
+        """
+        if column.dtype != float:
+            self.errors.append(f"Column '{column_name}' in obs must be float, not '{column.dtype.name}'.")
+            return
+
+        def is_individual_value_valid(value):
+            if isinstance(value, (float, int)) and 0 <= value <= 1:
+                return True
+            # Ensures only float('nan') or numpy.nan is valid, None is invalid
+            if isinstance(value, float) and pd.isna(value):
+                return True
+            return False
+
+        # Identify invalid values
+        invalid_values = column[~column.map(is_individual_value_valid)]
+
+        if not invalid_values.empty:
+            self.errors.append(
+                f"Column '{column_name}' in obs contains invalid values: {invalid_values.to_list()}. "
+                f"Valid values are floats between 0 and 1 or float('nan')."
+            )
+
     def _validate_column_feature_is_filtered(self, column: pd.Series, column_name: str, df_name: str):
         """
         Validates the "is_feature_filtered" in adata.var. This column must be bool, and for genes that are set to
@@ -505,6 +608,9 @@ def _validate_column(self, column: pd.Series, column_name: str, df_name: str, co
         if column_def.get("type") == "feature_is_filtered":
             self._validate_column_feature_is_filtered(column, column_name, df_name)
 
+        if column_def.get("type") == "genetic_ancestry_value":
+            self._validate_individual_genetic_ancestry_value(column, column_name)
+
         if "enum" in column_def:
             bad_enums = [v for v in column.drop_duplicates() if v not in column_def["enum"]]
             if bad_enums:
@@ -781,6 +887,7 @@ def _validate_dataframe(self, df_name: str):
                                 f"Column '{column_name}' in dataframe '{df_name}' contains a category '{category}' with "
                                 f"zero observations. These categories will be removed when `--add-labels` flag is present."
                             )
+                    self._validate_genetic_ancestry()
                 categorical_types = {type(x) for x in column.dtype.categories.values}
                 # Check for columns that have illegal categories, which are not supported by anndata 0.8.0
                 # TODO: check if this can be removed after upgading to anndata 0.10.0
diff --git a/cellxgene_schema_cli/tests/fixtures/examples_validate.py b/cellxgene_schema_cli/tests/fixtures/examples_validate.py
index 470c165c..accbecfc 100644
--- a/cellxgene_schema_cli/tests/fixtures/examples_validate.py
+++ b/cellxgene_schema_cli/tests/fixtures/examples_validate.py
@@ -48,6 +48,12 @@
             "HsapDv:0000003",
             "donor_1",
             "nucleus",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
         [
             "CL:0000192",
@@ -62,6 +68,12 @@
             "MmusDv:0000003",
             "donor_2",
             "na",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
     ],
     index=["X", "Y"],
@@ -78,6 +90,12 @@
         "development_stage_ontology_term_id",
         "donor_id",
         "suspension_type",
+        "genetic_ancestry_African",
+        "genetic_ancestry_East_Asian",
+        "genetic_ancestry_European",
+        "genetic_ancestry_Indigenous_American",
+        "genetic_ancestry_Oceanian",
+        "genetic_ancestry_South_Asian",
     ],
 )
 
@@ -144,6 +162,12 @@
             "donor_1",
             "na",
             0,
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
         [
             2,
@@ -161,6 +185,12 @@
             "donor_2",
             "na",
             1,
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
     ],
     index=["X", "Y"],
@@ -180,6 +210,12 @@
         "donor_id",
         "suspension_type",
         "in_tissue",
+        "genetic_ancestry_African",
+        "genetic_ancestry_East_Asian",
+        "genetic_ancestry_European",
+        "genetic_ancestry_Indigenous_American",
+        "genetic_ancestry_Oceanian",
+        "genetic_ancestry_South_Asian",
     ],
 )
 
@@ -203,6 +239,12 @@
             "HsapDv:0000003",
             "donor_1",
             "na",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
         [
             "CL:0000192",
@@ -217,6 +259,12 @@
             "MmusDv:0000003",
             "donor_2",
             "na",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
     ],
     index=["X", "Y"],
@@ -233,6 +281,12 @@
         "development_stage_ontology_term_id",
         "donor_id",
         "suspension_type",
+        "genetic_ancestry_African",
+        "genetic_ancestry_East_Asian",
+        "genetic_ancestry_European",
+        "genetic_ancestry_Indigenous_American",
+        "genetic_ancestry_Oceanian",
+        "genetic_ancestry_South_Asian",
     ],
 )
 
@@ -255,6 +309,12 @@
             "HsapDv:0000003",
             "donor_1",
             "na",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
         [
             "CL:0000192",
@@ -269,6 +329,12 @@
             "MmusDv:0000003",
             "donor_2",
             "na",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
     ],
     index=["X", "Y"],
@@ -285,6 +351,12 @@
         "development_stage_ontology_term_id",
         "donor_id",
         "suspension_type",
+        "genetic_ancestry_African",
+        "genetic_ancestry_East_Asian",
+        "genetic_ancestry_European",
+        "genetic_ancestry_Indigenous_American",
+        "genetic_ancestry_Oceanian",
+        "genetic_ancestry_South_Asian",
     ],
 )
 
@@ -493,6 +565,12 @@
             "tissue:1",
             "sre:1",
             "development_stage:1",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
         [
             "cell_type:1",
@@ -503,6 +581,12 @@
             "tissue:1",
             "sre:1",
             "development_stage:1",
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
+            float("nan"),
         ],
     ],
     index=["X", "Y"],
@@ -515,6 +599,12 @@
         "tissue_ontology_term_id",
         "self_reported_ethnicity_ontology_term_id",
         "development_stage_ontology_term_id",
+        "genetic_ancestry_African",
+        "genetic_ancestry_East_Asian",
+        "genetic_ancestry_European",
+        "genetic_ancestry_Indigenous_American",
+        "genetic_ancestry_Oceanian",
+        "genetic_ancestry_South_Asian",
     ],
 )
 
diff --git a/cellxgene_schema_cli/tests/fixtures/h5ads/example_valid.h5ad b/cellxgene_schema_cli/tests/fixtures/h5ads/example_valid.h5ad
index ec5f0aee29d9a739fe0048b38b37689e2ed5f6b0..a1b121bdf605779d0a34649907a8467a31fa1076 100644
GIT binary patch
delta 41271
zcmeHwd3aP+()YdH^zCjIP1ur<giZh<2$=4y9oZ7uI;;t6*b~+S1rinw2qug=f-Djd
zD&R)OQKJr;=x7IVNpK6}s4(+721m!>j3c-vu3^;St-5tj_wB>;J>PtPd~g5prk@Au
z{OZ&>r_NTl>U8Ja{Raj%KR9TAlC}9tXVtL1)&{Gy{)Ek9by%HM3*()RE1e2`8mpQz
zpGm;nSf9ciRp%>8se$)foCm(u(#It%w(B_}zvW}bb;p=`jdDrfhsybWa_BjE-`lI2
z4nFNT8KGKkeI?reO0-%rPqo}Ju2l7pE47TBt6KcGzHS*DQL#tU3s~^~?)b0Tz|?S8
z6ONN`EaEFn`9mZ;Si)CHcpzd@Jxeq=ND>$&;gN{1#D7BmT7S`dmiZ3s(}|BP{u3Wr
z_5O{_?BXApY18|6^IqHLh!P=Boii)k4Ka0E<kNhRLIP9gNk-t_67GeV21(>kI1p)#
z@|QkfJG~M1_tX;9e5miKMX594R8P&N&Vma_DrRE|`b0BPR<B&Qa#P9jrA6zOuiUt4
z!<MBvt2UG@FIqRp$fseZ&t-BGiZ*UqnzON_Xq}!vSN~s8Vhnn@{N@eoOIKDH;)*iQ
z5Ts$J&t(O2*R3c)=K9SWm*%WpsVhZt1@s?&{*MjKSiZ7oolr|r=Cim$>qk*=)-P*l
z*80txii2HQ06D$1A?iXX>7^yBiy(_eZ!uKuwMVH-pp1reDKt@DAvk+!nQ9RnrUJ{L
zozQX^+8fad$nTAkE1{&fmJAJjv<Rr}ttG&DO0`fbx-X_2eK6&tlzWBdgq%KF6qFM@
zy<F=8H}uispti3T35WY=E@+{QW-8%`Lf%E<d83e*)E9ZhQCd$}&{yjZ)x_ILyk<%@
zP^yDc=P{*j1ZNZ~zX`ICuo1w4iGUq$`#AUl?1TOPiq<-`9EZNT#5##kBJ=@lxSF?X
zn_(Dy6LRt>qmh;}ev%Bj^9nocuCdK@8r#{S`1d7f5e~HrT)I>1=N`e;RC6ii^q(15
z8r7b5u~}&`^4<QkN!mN=-O-NTdC#$Ev60nX*{MSJNc*S?+9C82lyt9lu=H|FCz<dv
zO!qb6B24!);iZ@!V8Vr%jxyl|n5HEt>R+fR6>J`i1{Yx_H7x`dOLz%lQdH#AD3a+E
zI3LrxxiRXSiC8xW2A+l3+`-w(3`L>cDk`8IY}Sx=C~Y>8PaDvjUx1i4iO8Q1zYNnR
z;X<DFh?WjphigZ!^x*7>g`gyB(5;t!BrF-Bb+y(G*Q%Q0D+;kZeF`7qQ8#8&*=gX~
zhnep%GD_bKm8&*z!+Xq+Fy!1HhRPKfxcnpLmvgh@m4qik1$_oCKF0iN7F1fF4wZ8}
z!#L?#?8h89dW|;QlA^*_?^|LaG8y;tQOHWxR;&A=CRtkwZSPy6oed9gnc}LZC@Ae^
z?*jL^HAjUTb&khos>gG7vV^aZ@Nfx_K%8JIKSat;lJbX!;tb{LP=+#0!toOBPZ*AS
zG}kn9yNQz60L12g4V3U8310=Txiy!~No>eX(Oi+{(%q3g2(}<wjfcZT2ElPgXH&GX
zOU$xSBx@RxXbHzixSxb$B^;;vZ%ow&SR&1pT1f~l;3%Lb&J?te32-r8()s{u5f4K%
zP$&U%k%aV#7FPmn$-uJl@K}a67KJXLP+|gRh~@AbOq)CR5aI+FnyI&&n~7y{STeN-
zv5FY4=JJ?jpOwl!Ct*Kfe~DL1wm@{MRtO!}YOSoIVKp|A1Y8)2N|`Grqix1kU2Nzm
zy(jZWX-^fIhpH#$nlX7IW=v+yjQdEK?5;VV>?*1~8r9AxH%$x`d0b{ZO2VThJVwG}
z3B!f4+Sh7-IGc?l+#fDxYfB@|m693k4<$J`)cr9j+9vyM7DM~4nDfcJ_lM(HEYZxS
z$A-CBp)PhYN1LJc_vc=x&9E#sXYZE8_eglJg!f7KE(!04gX6V<P#%TPc?%q-=e!k8
z5$OUKiP)g&Cao8Yiq-}}!vu5*N~5(TXq%uppl-aDq&nd2cr6nOCt_B@1k9?YQ~{+<
zPeh-iYJxTd9Fs70ic;B=w1`6!F|!;K;H6X@WtL6E%=5&nC0>FLQ>~a%d%~F;u>H76
z+90(T<V-?`sW-GwK-L~&^?`a~^@R&Wq9D<SBpPZ~YrSEuPm5J!U>C9aK@$@2O~!g1
zxma%;cyf^+4<$tULk$rZoF$R~k&}@mLjFRmX7OZgpgI6{APXE*v3&Dn)G!-Uho)df
zS3%JftuKt7q9v+>DV?bffx|?S;0%$W;GBx&YRDln47Ly%4)sJvzy%~wIvuNZPs3_!
zDOF6VR!SYERP+o?wNNULQW4WJwUbglN;Oidl2RR%N|=mp;0x1nw%mCL+bE-$Qb{u~
zb(B(tlxm?=HKii*F?AYK>PWaa1C<&Do;)O@p@hg7s39^I+81Ltzs|#MW<z8?vT`6d
zA6?bznK*yD^3k<yo{2Rz<dgGAsq>UdnuV$8nV2e|l#fz7DOFCX21?aZ>ijHpM&F-_
z&S>;(Om&@wDc@|YIcpZyTs|A+if5r*Ev2d{)k3+aFjt)c7iQs7&4ZZPNb(_<$V}Km
zWEMO|WHy{7G6$S<XuRiQC3$nOlB08RA(zk5+-d<lHU}l<!&xE=z&RJmLdcqnJzWH4
zgcd^$ktJ}3m`lMq4^bgx%|lcKWrUVN4I)UGj}4rkhhw?|A`38kCFBxW1v?5*YBkgo
zDuxS0N+5AQk~J`&$n{V~<OVoJWG$!*kgS6&B+$4JtF2sslh#40{flX?7vMH1gL5Rd
z0b&*+*+>gF6CL(lL^eYcksIM6k(<D?2uV4u#$K>(5f<J8HN@HqXNlYlk&BURgIprp
zVF!_0;5ZV<UW$u;2{vDj3Gh;?j#8zRI!~!uN+lFxsufe}ZQxvr#czikB+yL!olCW(
zLq#}sO-pghdyA0Ofh;Jcl)Dg9wUol*7|s-;db^>j5bM|jr-<wYwFt>R+M>N+Y7q+D
z1!ctAPmT0~7l>5_=ZbKg?}o@_xIzn;<0Si*p+5IO88Xy^P(%DS;<peVKwXZEycNjs
zE=R^;C_;vM1gePNx&k%A04o6^R-!})N!$;KD=@2cC1w?_pru5q{g^`6uO7418aPMf
zHxRiJ$wQDs<hM{p<YA~G@(7$+iK;&e&Q(}f@oKE0ZVh&P>MGp8M^|H8<*RUQJ`Rsz
z{-I*bKEDc^a#Jds0=~G_81NMm^sUB>{gf(Sjf2=ssXF3yP^y(uNhO$yD8^I)rLr-l
zJ_AL?sL1c3su&e{7ETd*4lWY$!_X2W4KSa`aoAOYBF{q;krPm}290BnHE0}P0M8m^
zy$B^lUZR{#^<_9qq!A*oN74kjL{7pEBCo)4BCkR_k=J184M<*xA|h|VVIs|B_cPT~
z;9QI350FFTP1r)@k5EtKEx17BZAe^)i>r1$uBnoBxTadyW9N>p!$F8HC8M%V8w`1+
z)b;hq+ey61>yg(;yh`GAP^y7a?lMfZQL4BM>l<2%^&O>D5v5uvx4IN_BQ_xK4D!^!
z!o^b5;sfxMA^8wWi2My|h<pTRiL^rG1|;Vom&nJkgUH``0N>i64TS7XxUV+iqWT1e
zZp6HQLJ<-;k8G$V{&_e<{0rdRgyb{GK?2R2F*|#WW{0~rp;i6@>WS;U5xIG5@oYfd
zjJEMBNZd@Lb0hY8&1P+|+6Gm~P}|`Yk#9h~5y`itOkbFBBi8dBlo9KDc#Oyoa1IF+
zmt(C-H=!yWkbe_0T8L4N42al*sneA5>8TQ2mfg#7{Z<mpF2{@pN|jTpZ3~WDeL0S0
z(pKc1N1jDdq30GY1FE-T-ux}v5Q{~HUm?e0RpEJ}T~zpns7-~dw`v)x21Q#@H#_Xu
zjywK$TXDxb;0&>x;Jlf3<u=?p**BxQ-Js-VRJWBFwZ!NFXON*@0nTm6uiTD&-!|k&
zKp8UBNO+7$PdG=U7sPDGrQaKJw<GF9?PVUSKs8To$ElBkcFK*0#9J^o2Idp#2USF3
z;S`ZLP%DtcLskV!qT5Ks1&4_wz$qjUy#woWR^nU@fT5Mh9|-e_41!%mu7cx42Ezp+
zLm*}ck|f9_G8DEDxf&iLG7QcV84k{0AsGQ#M6Q7nBFS)=h#O8Jfupx-+2FlZi%`>`
z=~gYuzyCJv3(FO$7}1DmJz^K!d51RLTD*(mbf`ONb3C^b3vi^Hrot7wv^Ok!G<fGd
z%ba*|q}T=B%Shwkr}{PyXDcE1PHn2gIM%Vj-Wpq;KpK4cur1b7x*M)d)>f&NaCon_
z^w2rmG$Amcs;SrAj%aWk(IS)5GCWwneg;ak2mObiX)~sepK-0*<955VG7wv#>1mq_
z9oTbE+oI5c-Ek1dR%2v`GGaP(6eA9^D4L<qx+11s;mkc+R3e?8(1#9T>Ep|pK5oLS
z*)y@R^vrZTeTKxlaZr13dslD^S8{t1jL@^C_Ha27=>_d~qb|K6@&J-PT&^$YMRB=k
zMloEjAEQ_}Lj~fXok+Z14vG(IhaDboN*WHzAYEZxD;MrTfx+OUE)RjBM3T6fp<LC~
z+{`dW!?~FeI)dzXZ4PL-M~ig0Q`}y+q9jB8JzBJm(iFulkjIm%C>{g3v(j-WQb@HZ
zs63=O)KpdzmljULp=x~G>^!JNgS%SGgFMip(a$UaRBRONA~Krw8>8!R=rHa_5=SSK
zh78=qD1X`(p<c%-k7Jd`>nLUd&y0dASO4S~bRv(@>BC0rYA#oIG}5RJ8Y3~wdl;AW
zG*)0bOVk|^8sssfl^D7PGj#^Kl(ag~rBsig`g0(W$Xu>u9*Y$4D9>lKfK^_oqeJ&$
zb?1*@yNlu65v`Baqby-%^X?PM7IC>{Ty8m|6;OY#en3pa3K91U;bJ`-3hx(cuF)AW
z*Be8@yHHU!m2BL&d8MM<06Xr(de*|>`%v$7toM3uqm=bVcaoOt25x^NqfLx9GrCbn
z&^XMVz=N|z=R?GByD?ffv*0!^ww=)}P!ox*RxqkOaLzGA-EqL_Opf`Lp)Mce;$B?3
zeB}){Y*cQAGxwvdz3spb=MeSw1NDw%_16dToyqDQu;T&D*m>X#f?WsHu0zy2A^)tc
zpXT-8;@u6A&tp0xE6bzof&Afi7y6E8-bKTJzGH_i#-5g*o|>ZEMZg6KZ)%B-qdZiO
zXWC<bRAK*K&FRnGr^Tt_NZPKzy1TRm*76$NCKkS<IV}%r^-Vogh}ebGkx-@m)+tP3
z58cqk8)lD8NWd`O$L-|phRJtoQ-rCzGQ=`Qz~T}1ILjj%+&sd5jlb+6Ey`kPewgiK
zp}+lMjBWdfwj!~1VYpi%@liZH@Iu+6+7Ro`M|AznCbcL0QmefY(6dj7o^;Y8^bDTC
zhUgg;qG$Ah53RB2;iylbB`olJo*+GU9@F)-Bs>k1_i2r=;Yn>lKv_CM7Rnxlb5EkO
z9pHHimGwP&nX>)h#X5Y>RPTW?&uer0iw@HXicm9nVyml(fx^F0G|;~UQqbDGTRZTR
zb%ZsdLGwQ`!rsSX^*xKn(T~^`n}=qj-nBSm&Tcj-nmvHmWhqoWWSiun2Zzw*7EG&n
z5>F;58voHp^sjgx7eix~gg#Hg+uCv%=GW%;6@?Q`!tOo%$SpV?c;s3YziE9{A2+S9
zq>y<ViuH&A2_8*{%-hu=^EM2JCC>5^jmkCt9VckkYG35ll@01kT9dW0QG@t9w1VEo
za11x8+6bPPao(a~$ICcx#V_ef`WqUx@#+%MHGD>dd>@;Fn6`n)r)RktUnAjU3FGrf
z!S8_gzoLy%{nf8%uUfAZRZYb#%&Tu2t@a=nMDSOHba@2yIHj$%C?{dtDXk!|K+-}6
z5}##(fh<138+GT4Ap@DA7r=G4<XvrmiUH1>xb(gLoHuC@i{Ij<ulz}0`qsjep`+-<
z`o-#~g!Z>_6k8zY9UR4^w=Y}$S@70r?F}n-2i7+`<^-1i7?YAoZ?clUw{#`RB}+c#
zm=ow;Hh&%S*`G9@_2?_1%FxqND1(P)f7UW*6(n<Yw3_J?@Zt<UeOW=Dw(mmaqQ7VC
z{gG**Ay?kRj{ak}N(1_05x!{NbJytx#0@*%#Zj#F*S|}n*zz7P#ZvIJ;8JXTHB|3$
zsDRMB4T{d<Ao`&GEDmCI%VmQ&9!CGwWbY=1O!p+LN$i{8cS#}B?F(5%xw^V-kn<s`
zTkJ3UkkqaIh}CsN`$wp5!t0^xPBy7q2Z`rUT?dq%Lv;&UFH?64#J_4=Yb|_3gT1fX
z<^|?)T8Q4$O?n5v+X~TpMu^^dy54o5eu8=@`8}VI-UXiuz3V?ky-Tr8^N!Cq>0Juv
z&ZFMV;JJW$N1wk;@0sx8XWCk8%jX)5KBLVGnESb=N?PFX=i11jVtdZPtnT<v;Mtfa
zdnxcN|1Y287m6_gFP8Eb)i;e({YSq-s}rbmEz0QMiW*ky2E?=?MIF~yHSGp>hP{ik
zc_rKCJL;S6RU!KuZBMT69cGO;3JL6!Fs&M@ROC;=^k5U7Ue$Dh*0^~3(^m#!l|_HT
z8^rr*$ZQpa%+`D{Ti^3+eXq~fk33s#Q2!&&mg9@i*`hBX#HPU6>fqVBNVC;(*=#L@
z9=mY1d|zs?eV0}se2^s}i;TXT2&f&{-yv!jhNxYnt6d6BKcm{`{p~-K+R>L-?K&vC
zglZRKljcRX%%ryaV2yo-y7OSWeTcQ?(q&37=dDrK%v*!Jf-8F1gI>WZlfErK^DuY(
zTh|w@_1h>K*iFT{rsc4si#=Iw^VfH=M_JGipVI8>D+=9QG++V){u=Y?+eG@b{UcN^
zIJ3;pHstCIRYZM3)mW}kuT4?1`Ehft?nw|Y`21nPJfnuvF1$=dh1CXbfOfn64r@bq
zJ51T9y=<-hiif(du@_3PPO(C~-SFoYcWBeB`@7jifUmat6T8|Ms)12jL(iYJ*7INU
zo(}NW>9a2Ur1e;>SSCSLn!4Mk1PYXfER!<*krWfHcQi6#tiEBf|4LdnCGC<`f!M#>
zTwQ&U+)Je^ENZpoi%x7Xb>b-caGx)?kM@^7s}-mfm;F%3_-PWB&_9>uOFv1vJYW9_
zQs5Uzm*t24;%H{zCr6hFN<T5WEMNLXQD9>67jYFG6|8-jXoV7{hK+Wy_hD6lMn9Yi
z=(8V21$gy^PXTT}j8hP11v>YC80Q}MK23l4RzJ5NP6ayme^?*CO6UF$=jspt{-+}{
z(+EJqtF{CveMIZ|fAyaShWO7S#M>0&if^WnYT$oX--wX<=%DFRsy%qnbkqMsRhgGn
z^>Ui2sxALnfRGbXiwF?5;&_QDr-Kf8@i)gkH07uWp5iFp>DFk#>UbTVH(+Cb9XgC}
z1fvsm*uf@1Y5WtN%nocZpkf@o33oks6o+DS_&b6BXr`uwbVSV5wvd_H4r_bZ<KgZa
z+ms|Do+HyNeokL#C?S3*-S{}x32!}Mo8c6v6eFsdhIU#L(_Z3f{2zU|cX|^v=>MZb
z9?~9rGyi)P=-mHdoO@h}o%_FY|A%t~I`@C){txE}@bkCx`5&$k=-mI|T>YKT|IYnS
zPyg^mK%M))bN`2P1UmPB=l&1p2;lk)m+PgsOY1+J3h4VUj0$w_|8TB;^gqKm_tf*w
zUlVoi|8R}~+n+F+;4mxDx&OmB_qzQH=j!M4hj5NS=kvew`5(>^!1Wha4~W)(I2F+M
zUl<kW-2dTR{hj`Q=l&0`0qWfUo%=tWBhb13JNJKBM}Q4r*nGgw_&;nzuY27z{b6*!
z_1_<cQ-RLse>k6ho&Nv-_UHc+T7Lik3=rf!XiJD`n9qODQow6nDQIPUzP{;GdLx6g
zU@tR@L5;L8tPpo|>_mCGfJ)#yFs;9!!1#NYUnA~g%D)ZM{Y?0FOzUq*Fyt!{>n|)Y
z@D9Y(u&AG|9K)-~y9OAy!dt(wjSbvdunR`?wBscii|r8qv29*oabw7>SWf(fk+KlF
z_Oeg39z92YDMYvWJBAf3<nD>T3u?Zdaizg;#`5$@_?=#+^0zjJb$_k?mqud9e2*YJ
z2&E6(rUou5xedN_X|c`@qn6?WiJHrLbfFzaCTlC54U4#uVyM}x&C^T93x#Rmgu=zU
zS&nbAf}^k5f>)v3S+zK};(QwmG?I$=q1VcYifF{HG+`Q1Gp>{{-Hm0=C-ZN{^tW1O
zj8|^)OEE-)+a!VQ5~kZA%oS{u@Fodwmhg=dzKNcY<_gOtfh`g)lJGJKFPHEN3FC!)
zA?>Y_hiJ7VP%L4()WfXs8VS=ifaZL<5XX$^!W=WMSSK;oOSn|RWfC4M;cN-#Ncg%?
zT%pkIPG*hhHXbvkJ0Hz>qJ$?&m~QVe%jZg%u4fA7<M`8cK0$_}(Dh<wJWaxM8Iw7G
zhJ@)&E#`c>SILZLnla7)EHeYsbTN{-09~A9#&pM$8Pk13W=vMajOR<3t_~9U)V@M5
zFAxmE^zs5Trkj||m>w%;yi~%41{Rk0P6_W0V!eH3PmqBrMcFIieG<OQz`|~j=`|~4
zk+4<5T>`j*t)DHxU`#jgm@6c%89OBGlrULQvwSxRcNe(AxR$DiU>KPG4%l1)rOh}(
z!jTg0DPer@g_NfUC)HOWhLE1L<_74=Y{pR%j+QVzdCl_lq&8!EvYT-n<BGt;CSDTg
zFJYI26C|7{;Q<mJDB(dm#`Pyg;3`RAu!M(5I7z}oC49Anhe>!iVOsw}ff16xH4;vi
zFkO*o9#W5lQzV=!;WWe*<_gj!feZ;}O4uvmED2vL;gJ#^Mej^8D==CT7z4+~*hkpC
zX{p{c#R6we*kWy2?hL&1x#~r_`86}mt#ko(EZ+8NgZjVVZLgYfCv3VWL$O0UUiM0N
zh5sIjlgh-{v$9g$X?V$0`}4NAE}5xHH#kSdyF=A`DAa>ctj&wJEMEa<E+XejsDBno
zgmH^(T4s8hg6}zb4x3MPdsCI3(3FkkdqG(XiuI<PxGtzIzN+M3$mmN97v3F?*VD$@
zGSb~jH0y!4(PB+8P?LkD`;pczyhU7BF(WG@BU6cks-KV-4`qHN{izfEv@EYTOL4K7
zJKf{W#CHSjxDIPb1kd{@HUJ|3itS{krz!)X>}TW*g8T*~SMmM0UT>OL8Em98P=O&(
zG!Ba=Q8^dhFpem$OGb)vHRS(-jA7t8j$}AAO~PTxNbxEoK%Hvur+Mh2-D@CcJQhxd
zb}Ee5A<`ARnVA_GcwMyT-^fa#mRxv6wBGxa)HEf{NV+{rIwVfO(iw2(1C+^x`WL8!
z9-LAyL|#Hx7FFm{uO)(g@}wvudD^nFlu@8g#KNPY{zDWQV~C_=D){CSDw|D<#f2{|
z!P$wmrQ;33*U=33(^9fhGn8>~j!KV*s=uMo1RA1#T3TwVTbXFAk_>l-TbV>Ux#+S<
zQq6<9DY-N*aa~f<l*u$EE_Di&eS|VosdikPcxU-E8jiRw9<MT;Dx(*caLehb9wm>4
zk47?0$%h&rRxp!SR%TjiT8c7@RvRwtbZ@#c8}eVm(sMZ9ot}>Qa~Y(%)3e;lJSfV=
z3JMJMGjY+*=ki%;DJjYVRy)(<abwtmqBssI8OkEABE{p)$Wj*bQb<e13(l87&Sb1^
zDOZ=3n&njrxtJFhL=kr)Jr%d#GLnq7rQm8_Zmh-BG<S-!g2o<O!3)DzLgEyxY!zkW
zn((G&DXV#Fre$K^i=q7)8q!SEvxIB*y4{(|8X8R;9$X&RL(wi=Xz3}L8F+~`j+zTE
zvDPnqPIu!{SjRPFxzoMMdPC*Rv@`|pwBCuLWn57@?$8Zfe@1$0mV%dCQ$?G&qEuX~
z%4S|W88|dIvScRShpya2&Aaf;Df--{XX0|$VpQZt+p-m!reQYTdc7XWHde-)fm5)Z
z$2TKAD>Fs81#<4h{0deuB^_68C9QW{&N#d~pk_Me{t9B~2K!t2ZtmOovh3UWBJE!@
zx&vxRWG9R4V&<KEefMrgc<XiGHtfATt@>@)`}j8OyZAQj{j43|hFwL#h3`$l$#AI$
zg7)<uo;KaS9%TD^i0vyFt&n}KX8U@W?JK@KC1_vo<&BE=^**+*_tV(9)CYq0^+C3;
z`1TaCuQi5Vx_$kP$-X|s_7z{GLiY9HfPH<0?dzj#Uu)T{kbQlO^<ew@INR4JNNbn+
zJI-SJdW`MslWbp~GTGNU7GwKb&-V3cwy)0w?d$K^zCKG+>%w=apozncOZL@oq{+TE
zuzfww_VxLIeLcbU72l;o_VvY}eSOJfUtebX+Q{~`iN@5huP52QzCtaz@Xabna6h7b
zea%SX26~<C>l<uen@#rh6x-K7P=zjh%?c8>uYcre!&Ukg+t;_*zP@9KpnW~f_Vo<g
z*FOdA>z{dd==Sx$*uK8Y_Vq8k`gQyIp0P@F``W_x^{mOhz8|o!e`Wjn0o&IPsdl!n
ze+$^xkEk*iUPm6ZujhjH^<%cLf9I9O_Vpho`}zsn*MD+8+t*JSuzfwx_Vt3HKH1mL
zO!oD2lYRXnXkWkND%ie$#Y;iAuV1r$ZR6^6``R9~uitPdbo=^k(7t|WtVP|vejl{2
zKd^oMk+Kc@imz%R1EAa2izfT}FRod)uRjIt>(3_p`U~6Fe{&7GeZ8dDVAt&{zOv=c
z?`-|y^h-9cwPUB<f8sVR-U9oZY}Z+%+wiu~zuV?T2`~0`^x?$mQ{f`x+R^Mg^|LQ}
zuZZ>h9roZQ`q{7ACR>kU9s2FZqK=*VRW^wN?!qd?m7^i2YkcF45ZevM&f#6k&3pL(
z&*?kF!CY_SVjKhZ(P6x}Cx^Udp>MYF_G&&YtDvY(h>^1_KIF2B%0*x$un4>a7J-?-
zRMuQx1acx@M2d(xHWs0x5D<}~z#>u<Sj4&li&$4+5$XyoLS2DHsOvjq#Kt1l6#^pG
z6<CgSzcG3!rsZ1+e<$JZCH#ZHA~ya}FpSu^L&6s&{4WXrB;lVW{ELMDZQwv`d`S{e
z@Vvo1hvN7_jEH>va9S#VM#6uR@Shn6W8?pl1m2bKUnKmVgj*zhR>JQ~_^&!P#l{~<
z0v}5FZxa4U!mScMC*hAJ{CC2%{>38tha~Wcg#RhwPbGX_!WSg`nS?(_91<IUAqjjb
z;jbk8wS?Ow+%Dlg65cDHjqj5L?vn6+30Fz@ZV4Zd@I4YfD4&fVk_14))e=4|;Uf~h
zSHky6_<ovy+Gb)6JRso*C45xEH4^@fgddXdZzWvuu*7&o!jDS0R>F@-_;Cq8A>rSJ
z;tJ(hC_{Nt!cR%KPQvvPep<rMNci_57{~wF5Qd^WCt<&Y8zg*O!p}?igoIx(W19aL
z%?wPxB;l7O+$iBD37?ekD-wRygsFYyH4}sI>k@uL!p#ysCE-6v_)Q7_F^C6aQc>Ot
zGIad5gx`S?>+y^%BQ*^VeJsJS7thG(yh9Ir@#Sa~_S)FL)5BhkU3v<8?JTK>y$%k0
z@vw{v>tSzKqaq4>yK&gtox|Q9rm*)4c6&MOy^_P;2zGV#uovH-D8k;J9QO8trczX@
zH^;Pn^e7f8@6o(aaMsp~U+WaZ{<9v);vpHm)(PK^Mz3{><Mud^jVC~dWO^X$3I?(X
z>>ulaY$6A;12~W!$YbH9Kz0xZvR82+JD6Q=J}4Z*5fKKmNgT)yrO|NVOZWl@g~K?I
z#drKtAUnd)OAlnPF$J>89LTykko5!t*%S_BQ#p_|&c`T_O=mqgkj>ygHj}h=;p?9C
zSepacEDmI^<v@0%DUcn-VjReh=0J7~2eM;>fowJhvN<$aE`0AA#*#)LJI+W`AUmD|
z*$EuTP7DOHlQ@v|>47XXPO``8$AptjVeAwRW2bT$i^pSuFm^hJu`{S07rxOK35T)y
zMiRr=nH<LAYuM<RaJDIo#gj2Q>YGayy3~2Rpy`;ffF}*Z*!djB;!zkK6D~AFFpOQq
zVeDcKWAT-2;+SwLuXa6*E#xp3kG|-baG4PY(lMcN?3Ja5vG}SsIwo9c3S(CV!r0Xu
z#ujrJtDkjo7`rAA#^O7GDU7|rXqk=)*9ODbbsWa7=XJ$lY^f=XE#okD1Lt!XyO9Bh
zv70!I-E62&VeE~jF!m->7+W3;W4CY>9L8?tg`kJAH**-fjjPkc*zLhE_7?7h9>!J#
z!`MoFCEDB+#=_En+OD_O8F6DEO6#kKWmfc|Vf}G!D4afrk)iKNj^!-P&%xi0;2S-f
zpQHFhQ4-<HBKu0Kx1FO;%75Tq`&=vL!==}38?DW+8$aU^0o-sbCE~o(1N@&Yw|A)4
zMp54P4m^3>F*i|2C!@3oIel}u%{qE3n6BkuAx7Vrh&x#eFioFmCd%n!u<QZ*lB(1A
z248#wO@&b1h55Q$V|1mzgySU~E8#c^M@g94H`gD-7^4ZJ!G4ke37ZQ<NH|i$Jtf>r
z!o4NjN5}Xy6$<n<1Vp@ai-hUHOA3knN&}1XBHj`>5N{bB6d|z4*F#{cR4_ygEU+du
zK=W-L5fK6lc@Y8&90-981w;@p^1EYw98DMkJ){P%FtF(HA_*^+@Dd3xm2jbii$pXb
z8dxR?ESK;K39pp!DhaQaaIu6-L^L53SR)BsFX0;`yjH^NB)ne2r4lZa7u5zyV55XL
zNqDn_Z<O#&5-yi;#TJRNRl+w*c$<W`OIYl4F`~vk59}JT&5isD{l|6YZ6yqes6ZGV
zfrVib*d-~DAmKy_4+z0H{sTi8_#RscUnOC3fXxaIk#LfPhf4TrGp6|;W@ccTo(bj#
zMo9P?2`5X~En$y@Q%soJS5i$3!f6tw=ZRUN3<+mS*el^I311t;di%=AAOlm1GD^au
zB|JvLV<ns|;T#EH7r@48;<x~VjI5%Jm+%A$Pn7T^3Hv0RE8)okR~+I7rwE3Dr%HI5
zgr`e*hJ^DZoG;;-2Cfj>XqF@}Tf%cBJXgZ=BwQfj`4V2hxNE>LEEEwVeNk)SFA|Jj
zROnyScHz)S|02O=`XWJN-%kG`0pF}GzNpnhM&pZGd<{JXnq9dD{fmTdT!a2aLU+Sy
zq%UfF@E5gL1S8ffxqdxjjo^qik}J|9)}HLWQ^eYfCH084H%F{}49AncsKp;$=%Ft~
ztWg}X;)}NFi`p1d#M+NNT#i^{Ibw}t1@(wEJ{Ym~=ZMv%N378Jv@M(8f;><UTK(>4
zY_;9==ezI=1BS+U#bCe^88~363>>g*1`b#@0|zXpfdiJ)z-T%7m$X8Gfax>@gcB+9
z1GeAD57>SK2W-EAU1CugI6=aRf#<s+5HS7*?kW}NCSh`sbjxSR3lB*s9Pp5ge3CWG
zN05CpGlbVFDiB_)z`|=4Sa_`hv)3B@0x00L8VU%vTgX=gp2vnj;CXCd@jMnzp>UA|
zE-^YF@EQqUFJa*!8S)j1aFIj>!bK8zy;Om4kwku(lrKCakuN+XfrW>}7{_0@NJ2ok
zNCFENN#Js+!7UQrDq-OviSoij(lO1yaFK+-Em8$yjfnh8BVWv+2rLA)NcmO?cQIkA
zTd|oKgf$7%Bgni+9TIjLSSTp=xxm6h5?E|=#CrRRFeE}i7!rYn;SpFECV_?F5?FXh
z0t*jGVBsM(KW9AOlcgRA7fBQlE|S2)MG{zeNCFEFNnqh22`oIM<_aUU5-ySu5H6Cy
z!bK8TxJUvE4@qF*AqgxzB!Pv86o~J~N+XahVc{Z)3WSR!u<(!s79Nto!b1|+@Q{M>
z{UoVDpM-NIJXyk1BrH56(V*~<1ZEEj*Pmd_kSY)^lE@b>lEA`65?FXh0t*jGVBsMN
ztb0hb{slw0NCFENNnqh332eAXf$|Hb4lI)JVhJxn91`Czl>`bUTqNOT5?(Ih6%t-4
z;Z^h+WHKz+BmZ6R*e=-ELGkc!!&3OiWT||{-U^xnY9qDiL)BU(`1aT%)R6~@98u~h
z$k}6$Qb+4x41=*c$YzkEgX<WK)4_P~?8Pz@plBbGiQL{KDC?<3;SVRPC@+^!_$PDT
z6gWrBsa#+hGpEDQefCWJrKDa<o}uRv)Kgb)CN%|(`|J+zJf*pwj@zX{9oUyyeV^Ll
zmuPKXl(5)jZp0~{f<FYgJ_cWUTU((2`lPMe?y%N9M(IQSG>2LYXKL*($UcnhmfgBY
z9`dcek1%}@+TOP~AmWI9gVpy9pE3sCt(0Q?qQrQzr^2*{w1HON*LFDeXD#;WxQ|t%
z^yBsfXnE8=)8hLPhS%B)0ySiW){qIu9<>hx_v7}NU48BJRKQPH_@K#_*H?6n44LTK
z{x&#LYad*7+VN=JgE%UUwe|>W$3OHw9Bs8YtnPpEk8J2q%3PubGJWDuv*lxY7AZ>I
zF*^)<(4HI6?AlPxM&cidS&P53``?V!CRi*H-@#|c?5nz+Zs+1{-%7<tg%%$TJ)U>O
zIlgCU{3Tnw=CYK2&z1Oo;-2PxFI6%ow34x?gLCI47B=cQ{R2#X%D$@Vbo3o{Pucx-
zPdO5_1C9N<_&<o%;w%Retd4HQzi^4VAEfHDL#xlBE-E%BchOpBal(|R>?@(=1xH&&
zqm7=)iqdLlnxG>^X?TvF?F#)AME`^w5+jEeB7JI&ff912fz$J|_5T%R=MOwYLe5aW
zi-Ou$HEq{BRI0EJ`%%M?IlC^jL*w-I=qz@!c%Hw0g#D6wpw1Fu%kF`^Q0Zw9f6}p7
zJq<UXbj%NIBznq-)mGQl37?;I%&`_Yob+dpg|9dqIKGtbMyrxO?(TY49c8uk!1Rlt
z|2d;rdlUH;<gK`s@D=3q0?REoRGG=TG8W}q*mlxU5YUDmK|&kFp=&b%T6@`@{>V2R
js%nW~`9Q5R^jZT8pB_MB;hQEjbP)6PGTnv3^icY5M?LWr

delta 26319
zcmZ`?33yc1^?z@Mc{2lu32VYO32Rt0S!W_3iGYMfNLUjPNn}ezAVFDzK!QsJ6-}ga
zKwAOVprs~Qo#2|dRM6TcwN_|dptcpPtzc~hm%smW?s<3S&H28LUp)7B@45G$yS(%6
z@7_DZ@dHEZ_YAI2bJw5nHH_Nru6x<n{G!L@Zhy(wuy}yK{kYGdPisS4=-Cu^i@RUK
z3mjvrc*6+Wxa1|@o_||8lZ>BvRf<kevvJ|W93Qsx`JRc_YkHQ;4L;*-XnWwO@3jQe
zxb5|%$m>aF?LyPIb5fNVnN;PPxWF_bxBc2RETML?rHTq-O<ggx&&KJ9J0niGpVJdD
z?8FI(yE)SP3*1lOz5*v9Ce!nD1<67oL*O9d%kfX^r$@>^aFzP7O)q}pioE!VTeWXo
zs8eKI$fMeK%Wltxgi5VXJxK=Z2IgUe%Yj3P$snB`M%)Fl&hN*t6H`A^nL7Q7J&Eo<
zk*bG1?KeTVr<G(DLvc^5k2wd5Kk)W3=R#h0Yq&WN%DZFae5mPe1<VDopTt6F?rDY0
z5;)ZZRZ79^X{DQs;9O5ci(v<$Yay{0sw{zNy)g1RsP2WOEQMXYFs%%Zktm0AB(8_#
z-YAwq@d_&m%6p^Ua=5VE>H+ukw)&eZ;4mgbOD`({&i1xa;4Fob`d}!jH--u+R7j!C
z6xvLoMhZ3dwi4k~A1eSSDbAOKalSqnmzRWb;XW8wL2(s*tnTo_WNRSoqqwt^G442p
zlBQtjB83VuWZnc9HltBnr&vSGo8izDD+E<-p7D`CBv}K!1zy#+G)d@_fdA2_Wh76=
zdg^%m6iz89B3Wi*TNoA3S@7LhtJIZiM($0q5`3l`W<PE9O&P=a_faPN9Dk4TNrpyR
z<XoEdJF}rZ;r=9lkIVScYPA!)8fhkUdc-rnmZlMXv?Me4`M9Vh7*2EGYcbs0ffr$z
zo&~MH7{h}ccp--SIB*Gu>Cw>orG`<fo&z009Y9rUTrBV$#62D9b1_VFSL@HiFg;fq
z&&M#$bBz~ZxT}$m|EKxx#j;@SXe$@z;L8KO<KXmY>!;mDVaMw+Iye>{40x}E)-jgP
zRX7?Bo%9Z@EmPUCXCCEV(gRgw<EqCw9v^4*sdztLuinN@A8<T+HT6mRC|<9~##Nti
z{AO00WSo5}UNPZm#=#>j_pzeUcr;$G-NxsCMGaZ7H{F`&8g1_R*xlFM2i58LEat=h
zbZeCv1~X_aRq10@x<CJ3E~=uTEdcw1R@c;#X!QWBNV}L30;j>78CIZnI7X*9)KZ1o
zAdx;;;2{DJ6}XSUN%44rk&F>eJNw2f82tq9FZ2fpJdiMyV%rj(#ky7Wv>fz+1E4a;
zN{>tRW9kq%N128=I-{$~l$?o~hCm((ESXVFrZo}5S6F3{q%14NRqpJ=yRiso-S-H*
zTi|;Iz7KH<{^_Bsr*znlwkJBH_h57ioZu#%lc_Yx20(Hyy5|&_mTOJKChW?!0;$dh
zK7vY4{HV%w0@bjk=O`~05wZ?o-eVyvkm1yQPUL-F;0WP}KWwGDmZ;vaoz_yS1ON0h
zUnX$5z}E}BOyK44z!lb~JZ^RsH=zo)cARLfi=y#Q_p4i}ofwDB90HfdS<jR^N3A=i
z4s~ev5SaX&GrgC<<cOW=<Y<RR{1dQ0iU(p?TQLs9PCQ=V2?Ad!@I=C}Yohg~ITUL0
zap;D^{(Ng`qO(x)^h3d4fMe*iiCz#+HF`lf)9HmW6iP8$sxz8~(3!1FsqHVYW}8DJ
z=TNu+TZgZEdiT1(e>JWq+)zNG6V#CKKr;$BI0fI(7q7O`OfRHP#dsf-qJaFXG5+9G
zD-AYN=p==juEzI&Vxcu0&QfR^g;J(sYF#0wmQW}-)#?N<6<R}}ek!V5AeG~kkT%Uq
z+gpf%G7OmAp`*g;2|u4^^)q|Gv1wRuPq;v$7mS#WqBoS2=mYynBtZv>Wbn^G(H9C(
zK-zRHt^Qie1G{HfgU$ZXOo{l=kQfN5Gf@PfVW!m^*3Gn1@fpN$$V`P3BnE+b4Hj~C
z1{P9y4Vs@a6GL?v!ofR?N#-y(Lt;22&q9#~#Uw_+E)pZ5jl?LpL}D~#%tkQ=Dp5eu
zEG(#LHWs;)LPayMN4}ViJ<>`rr3e!)QmBMN!Pyw9r%(ljj#KC$hV~X?;3P$bDU?uz
zp(+X$QmBzabrd>Hp%w}Sms(y3%&}78Ji($l*phKjGY227@o<>L1UN<FO7PD`F%j}e
z<U=Ki0@#lN8t34kotjIlzByQkZyrt|-&_oZDHNtq1%)aow2wmjD0G}c$0>A?LKo-a
znsDTNTob0v!_XoMmCVDEcg@F=cg{n-miegHLZP!1I!md^3-HOA4S5T&FN>gh0h(0|
zhX~DqQ-tP%e<6x_kVj%ZR4&Bsu3CVFG%dtJ4lb~KaB?BGy9CS<%vK6{C8)RvDhVxy
zLxiq{4nj-7Uy9;7C?K&EYDknpb1ABn!x=)jv|fZ2_)4*YvPD?za@d6txTbC+u@Wwk
zSOpo2QB*)BiAp#`;s$8G4p+NhEXLLDYDl~mqi%$0B-X$V5^LcY3OKkJhk6MP;YkcY
z7(-??lrKSr^-z5cmhg)uSi()vK~Xn@|2h;Kpn$|is3Eb5Y6_W~;T(w?NL`9z3zU+$
z1sX_fg%c#Ufmw!PJLI8&l%-f<T^UwgLZKE4)l=v^g^p7wtsEcOG7OYqz`UKZ_kf?3
zqd|8-TRE1|NQu7dQU7!qj$gs`_^>CHW1O1vTd&7CUqrBpRCZDbE5wHZ6U@6IaT#{?
zJy1YmH&m0j7Y>oQ4?0NHgSi|<17wi6AIecc)iNC9eaq3HpF$hOKR|3JeeZINPFsP|
zKujnd$|zzdMeK)Wj4&UBGZcS*Ihv8O5~oYb3e+fEi5fqL>Xn#Ow}Nb?q$UcT#*lds
z%vIQFhahhimhlL5tR@n(3LEe!9HNxR;1s1i4*m+XViguyvE1qkGb@mMJqeW<2^Fib
zw8jeTgM*ZQ5<})wU{+$1uL6_uE3wKjg{ml2L7{^b+DD=GO3c_!A>R#F%HB#0gfRd?
z3RO_3j6(Y;R9}hZx87g{&0oQV8_>Gv$l#tZVKq9)=b?NxMnzyhi55x;na99?BZ?QG
zfW(VXL*gZ9Ch;<yBk>BPu0ioCl#*zL1`=&>g2ZcJu0`=W<dJv-s!6;Fhe`Yz&X9Nu
zlGmX)4#gx+z%CMRLmP>A;1Y@7Kt>gcccGHRZ{ZLM*trg;)VV60QmyN-eJR!04Hwsu
zJE^vYL9hzr>M5?G3gh0UxPutCcRfa(q{J|V5~?v&MWMoKEbj1nEUu11XDHM{sYy3s
z>UoMQz6sa&e}<Zy(1;JAnZ!rje?Q!W{r54X-i%S7Kq-mS&_Lo0oFMTRFgKw16!K6&
z%FS4A-3A=yzfqs}hF3S>`{(a)h7ymf#I=?mGB%>WI}61dG3PnhMdEX4LjjE&umImC
zEZ_^s*n|;D8!_VaYV`26o3N;V!6Ay>xe=TG`6g?ac^(osWA+O$jl@^5gT%k#7>Tcm
zd56sZ(2VE-<7;s4{ujz?Fn{4@EVZ!)tGNgr6wyKvNn5BxDAZJiFW327tih00qq?>e
z+_MEA>M}&oN^1Kkq||;Owe}k9#*ACAYkgZVq2d-S+JJ^zu;|TOFzFN~;cCR(iuSl6
zZ!11HouHD02li8>1s#OEU~WU?gFF&`sNRN=o#7CnE^rD3v~R_!p0pjO`eiU}J0^F7
zn(Y{0wGHE2C_VwsVT73oskNBg9g1ra^?+R@dO{nCUT_HooZXJL71W{GDYclpst(Og
zg8g-vEg4Rb=nI!f^n(#QQ1pjV5(8itiGgs8L;%i_NP*;A@xe~pfkl?yiUT|dYB0ha
z42MY!fm0-KA$S{#VUR~+I8>5IgZ(5%zzGr~;SvhhC=;$Y>KT|k+QbzeempFU;MmNu
zK@5+9^4qOGeaHTf<a86By2l#m3YzdXN=Uoi%HR8}Cn-7me`Lr}<xS81AIY?BMju+p
z(`RitOWHo85AUHE#)h@4*RI>N*2pvAh27S`Nd4{B7p`t&P)lzd7(uw}PHUDcVVBx7
zfTq_xzUOyZ2`1#<<xPQ|yR64y%^0f-a_o}CM0-bQ-|bAYQ{d2D)=a;>d6{KGw_VmE
zX!ZD0{4MvfiZ8O`r0a5*E4&*XOt)5|C-1o1S_<d?<VyCvY*TgUSnN$VD<W+Vqql}@
z(k-tm$ArHhuok(DhL~Bd#(N{U9nj6BEh}`(hpfkX>wQ4#2;HQFM`$l3_kLdzoZgLd
z$OUajJpr8QXO4RM;7m{5jq}07$bxE$^fL0n;d?R74<|@;=6qc^tt;odj8QkvcR8a3
zNWCBZ%YpkWKTh<5`!REO&fJ3)dUEDo3c>k(-Y_&aSc$%jaAr1cxhBJ*-Bur*-iPWj
zYd>fs(I3u`7@!J*v-es1eZkCN7-!oMXluZfp>T%8FfggY;bcPqXRoS$1Xn+js~^SH
zk7hK6Z6C`h9n3wLF9@k5GN6D&CTGuLd+Xl!;#++;ZfqWW-&+UapW^fw#~S0gkO`^+
zsM~{Yzs*NI3Amo6&W83qmM_tslg3qQY8u8>oOKdsoy@jP;SBkS7Iw{UtY8`(djKt&
z4yQ=W;5;)q&ox|K$^&+FP;?UKYRg`$n>*8(1I>G}Gv{)Sd7NWD*Rp`oLhgYQg_3b|
zPklPsa@mR-Z=~QNRRYxQ#agKKxL8#)2={?$&MZ}EFQBVR+h-*t+mE2y=A-QE)~_mG
zyRLHi^-#PITd<7nS<Ypw;1(PNEW0rg%dSxIa2|`>bJmxFOV{!T(fzE3h6k}RH^Siu
z(PyvOv%^2!T)U^)mu{}xQ|wPStM(kmnCd+p7+$}}>^vM-sny@$Y`+=bSl$4xQk&mL
zv=K6H_x80yd3m|UCP;k6lkCmT$;rwzHZ#b{$jQw#YT(R^m}d)^52CmQGJb_ww?e~3
z6x-m!2cEuGFefkD*bW_k#Hd=R{@BycL&elVbp%s0^1@+b2jxrl=H+DuvyEH9d<e_B
z4T@{L{XAH|aXS=0hner7n1J~cR?f)C&do9Iq&foTPTRojP)@e73!0B%$yvd0mT?z!
z)O!0}7Rt<|{#Bm@XnWl=H=x}BZDeRCb@48CFG;&CSp24EnqMn*f_ESG%#K3Oo{!x9
z+-D!R_I&3a<1c$M&TrxBx9xG<XiSTYcmhds)ZdVj_BV}<>_UyhF<)gCMLM3e7Msyp
zyfEMm&tNDyY&{l@>#PhJpzWglUu}GJix#S%vSvhcbd5LtGG%(>5jH*LnRwHSV8;<_
zmFsMC<ntp|-<a*Rfv#<@Xi~PDt#I(9H=w+~nGk7s#%cQHY<g0&!}RX)rqh;?Ha)t@
z3mHeP8E$MH9Jt3jKiY;~%INytZ1l-UywM3z@SL@@q1_j$@8&Hr_eQLjqMc3~N!sv~
zV{G_lNNur(yN*AvUk#6YW~w@uKIt6+n~zzKxf1S&SDLK_(MduZHrm+OPBTvuQr>>l
zS`f811zz^}2m7mDjW_ck9C{i1HvBe>e8OsVx3pR?;7)5{)YMdmsV&FtSL_ML{3wew
zziQ2OmFmH#(+lTG-*CigpzM>tVFC{pn6~qr`a>g2UbDQOXgdlbYu>V^#^z&b{7f3e
zV;nwVP49fMjoVsgz&($8ilRk~h#&KjSflR7vKKi^9TdOqn69JZD;W(-f>wXmyFBv2
z-*Fy$g@CNT{9WwQiyhphC9jDt9fS4h(cj$8UE1ExU0Qa~@<*zF=bW`;VRskr8n~*%
zS`=+TW_$~>94$zBhpown#NS&pXc~Nc!Z8hU;N{<2+uZwJXQO)QVas(Gl>mp|vqHre
z$8t&fwHIA1rE@%;`O@d?*YSD@|FP4*VR@0&j276D;vJrR@$Go^n89r+-h~bAMNoxP
zKibrgddkvHa$g@j#hG<qw?oGV*w+;wQeU^Z-r!dC(X9$&)w-+C{>g$DcY0?bC8I+n
zb`kZFGpDe#shfV=ZjFTeH$2nf=${>3JubehX)jk#mgun(?D&wo`r!^oS6|8g_!G9g
z>`c7n_0aGMTAub-K&RH``n8xOv<Iv$Z~O~S65~^c<wcQ0XPna{pC<kqcgb58?0&<u
zkX?tzKR@b~Cpj#S9&|y$UmeqYa{M%(!e03^Za~Vp_y!a~@n_a5ciPt$-14K<>cWcS
zJjd0J7F2xBEolAR(Sn3X`8j6`rox&we6uul@lJQO{{z17;w_0z&l!%w@KO1PHEx6+
z<LQ`0R}VCvhGC)&8W%=9U!cQC)|gH-o$1BRZIjGM<G-xQX0*)fQHM@Bbskz0QR_7>
zYiPS04xYC<`F#VJqHJhxd&q>cw{Smd=2zCvX-#)>JiXy5r^fW0IdP7_d5DKO^v5@}
zy{I0D=s3>AR6PR|{zhdP$bAaF<{9|gPaQLG77ySz_5faDW&P~n0Dglr@a%W&r;IPe
z0M5pO^%Si5)*irb;|K7Pa{!Cj_afDr-Z}rDTv>|ny>sIqCptckHb(mmFu!+9%K7n=
za)D}7#gE*k(>6oYZAyT|A8}GPy1nquyErNTDI5Zwn&>vAxx8@qoz^w(Iv3t{8Nd#q
zJ(6mA9m|~(eykdg^J5itON-*$LB}k*9qr$9)6YYj+Z$W*TpQnxC2-_>e24hZ(C@9W
zK0i+v-466?mUmrkNsx=uF4(aY#0m|A&R*$r_OJ1J(RuM1$LHI6jkamJyr`-F$9k=*
zG{d;KTjBABR*b$EZK_*jCscG|yR}}ujhig&(rr-nBkoJ>@Oj(Z2mi$*5q+Ys!>aU1
z1fkjQosH{dv$J=&`=rm?+;$m~W9MsNsd@|IHraPwyrt1mSf=_kx*%GPW$ICISfW<V
z<l<+!y|D$+N|-RnI{?=2_FhG4-8y*}jM2|`1r|iN6{uNB_(vaY?D+(4az0Eu;2EBr
z--BTZYZ<Nk7@uu!`_hD#LEZ#U%5se2`O<VHr{3WxF#jQrYlWJH-o>@rhH6w%3q^b3
zRE4-7WwY@*4ATjv#%nN4S3@*ji(xv^)VLDEbQMJ78!${2YrGo6xcu!LjkposeZ(^{
z+Slvh$YJjq_s*Wk0RG}x*hhOlvQzhsZ;Tfn3gEqtS4zE|-M$CuxynjG1^SeY;dbh|
zveU+m=^THY)r`6o>T#@l%;R&PFIVBRMP8rVw-mz<)Y9To*G+wLxdT(5I5G8s6B8YF
zV(K#|rg3*->N6+CKI7{%+5qY_r$U*)<pR@-#i_qcV4A_s^c4cH#QNhDRtbgK0v8EP
z_j{Zb(A&_7=L)Rv_vi-C$NJ+kED)GHtg}FRE}WP)DV&%Nteu!VsT0$IwG-Da5fS7_
zoC-??rscLXeXPLg0^`0^oIY-AIdQE)uXJaDwEA}9Y=P+&?@Xs9fD?xVrbVkWot6YK
zjQxK_T!dka6PVVhu?&Vm_YIu*N`WT|oG);J6Vv$92AH!zT3R~sWPzs$e6_$+1uhhr
zmUOy&s^6eBpN=3*%R?uoHIfrwBk(L6Yu|U1z&8uLA%<1`#>Q9#h74nqz?%iG5qOKh
zw+Osd;B8S{%WvxK(Fn$57PVNns7~M=0^cg|Z35peFukx;`L*`z=O;SC#&-(5Q{Y_!
z-(_R%Sm>?b95R={ZX4I?44s66M_}p~XMyyRablmqeu3#d<J9lMxHkIoA=2SgxJ=+~
z0+X&&KSAI`fx8QgFK5h;1Fd_2-tNu<X*J-)w0dyjJ_08ROsfN@KCLF4m{uQ`(?rt-
z^cM;P1Rf}GK;RUCQw1I*@L<HXPK$;Jg`olu6L`45X#$TBc%;CiXm84C!00_cc>C>r
z#?#%Km5W=?h6}2<c>8w>XB%#)ejfLLJ5dCk(iDG)Tg<sRLBoQ?Q<&i8o$TCDM$qs<
z!OwA5ITN?8{gC=9>UXA?03J@_c67k(3LW2}xQzFhgE<-DkkJhi|AcXuL&u9)a4<X9
zNPvzPXzx0dlVv3GK6X|(Bh%;(1+7?C4=8^aMNepG!F;`-VJo8EcAl)Pu+axH{){O}
zaN#9S|4yMCJk3#?;aS<?OrtMcs6!KTGs8wdC_jK%`%?q^T4;0zUQ5Ky?EoHKHgCg}
z0B^BpXJ-fTIwLkbfVZSn`!ln$jX`!Wm|+a&4f2eftZ*(K2JJYAnTL`UeXYC<%sh;&
zqZ6Ty?U;gBranS6f>8fXxC1|uBKmpq(DqRriFroDj@MCn3>-RyVk~t}z)YtNcRCxw
zEqgRMFDuu`piO@|AW~6TA?&Cu9+K>=te}x?Z~o^7bAv_>9C`$^=dx)TXqyqDUJBs7
zUF?>Cna3%a;f%0x1(k?a<z^V;s9XAX!pQMd9JVSK!xL;Pa<VdvE1~UC%s-LyhqJIR
zapUe4EV%#@Z^e|WxGyt9*h7=JyMj46*_p;<23dHJ5j3XoFov>&S;p0ND?)f6Hx<r2
zhJ_Sz@u3WyYtwAioXlKfI%M32$unq5^tH0HaLi|d`2>!0Rz}FUh6~Ee$_pE_>|9}-
zqO;lloUCB3QAFwiyjP`;N`hJ0K{~g>GnxRNTdAJS4(Ei7dEER^I6Kdn&qZa2GP8{Z
zJclyqBxE7$hl9b8QDXOJE>4nCE<2o)8Oo)jtVhwj?98CC7>e(}YOduX@`5>GV~K5X
zC_CG@j>4#k55rQsv>ZHoDdSplbF%V`az2vDp+;TLR%L~-68!Q5&8uAOq2;U|!UIcV
zg>6W7F6yl0gq#pgk5zV=!His^f}fepOpLGO@<Vx8{tawqI2T6+Z*bAURc=mRDATx+
z2N=uAGS;yDnK?MQ*OEbW(iAq<*(%s&MinH}8E`cp!mZ~+I6Nw&gS?v=ZGc0hvXNCb
z@p)X;emdTx!#_M6qZ7@86tVdmPZFJv(FxmDK4II&HmDP}?QCeaI$^72z$a{V>Vys2
zU-2Y?;_3R8upX8?irX4NFWmAdZfj)FTNhs#e0c6Yo{s!5-CE%;%l5*kC#-Ey-{!f>
zop9cQ37<MRC9TIar#zkMg~d<e$$plif^@F$-Whak5$LLK=B=82SQzbqMz`iR@+N^+
zqotbOBydnrwMpPcC0IHT4|%k94YS^0uFZNifv5G8wy^0s#d>qpj$p0U&?_#D>o5ns
zIyBZRHjVWXPh-8p(^xMdHP%Z=jqN35ZL|WthSUl~)SU*b7g#TXwZ2{iYpfT+8tXN%
z#=Hi`X6XpM2-dhvR9G(X^#U&wc)7qU1YSv)x>Z-WN+?taTq*Dk0&8}!%hTHj8td%?
zjcL0<txt5sI#HnBuh8k5iD^7h=;sSuAn;WJPtyAny1>aoVT!<43p`ceLV>3VJYC=!
zdVfM2pqZG)nu%$wnV812MS*(%LZ|Ef3yt;ug~oH`V>M3{IA7oe0xuM}MBq|^7YSUu
zSVUYa@DhQq6L_hOwFlDAxyG6*YOEh~8`m1zk!S_&NHo?xq;Z`vz!caeux4Vq%QX>W
zjHP=-2F=8D26`tr9g${YI$bj{jWrX~*k)qU$6OOJl}@eGOiX9cOiW{{+-ZPjVme(j
zF^x47)0ml9%+F{dM(I?)+Lv`!pqZF1Kr=CoH51cVGck=d6Vq5RF|0>pO~eqZ`qkHQ
zoCXXOEe;5rB5<m}g9Iif<}6<mv8F3)`HeqRC}<|8Egmk?H51e6nu%$wnV81N#NrGX
zt#Pfr!#+kw*m$hK=>i7@&JZ|L;4Fc&Z5)07=Lm&dfkOg^1<n)r3W3K7Jf3mq=%knc
zZ?5o;@doqqaG~y^#Vg$%RF|MS@rqJig7WYsD7-+ZFF|>E4XZ9e`FPRGm!SN73957K
z5>yvj`Vv7^m!NEd%9o(JQ5{53`IeTt1eL%mVs!~BQC))a(9*v<r2Y(7pgp4GxhJib
ziKHs>+&e~|`!IP<;zg<=&&e_J+?UC7KPJyeP>DPbVDdbW8^Yu{z~nh4MxIl7{jbRL
zAe;9Rc^=H<c?gr|p-i5KF?mK_st7lc=QQFNL{2q%9%&as<atz#JdbAbj8_+<<asQ0
z3UO3Lo`XCziacj9c}9*(n+HUmvtr~qn>&`ta}JYdo1rT593tz8p(^s6$K?5nD0v<i
zBhTZb<aq*<=PQ{!Ph|3(&*Zs)S&kylS21~>#8HYoPv#*}<avrcX%u<Bn#uE2CSi&^
z7czOC#^f1UC^3FTo@Y>rOrB>_w<z+AL^MjCXW3Q|d7jPWxrp;C@?1=v62LF`DDpg)
z`%;nTdE8x0p64@Q^1OhDQIY3`b}NWH<E=^}&&WT?JVl-t*{VdI7c+Uj)<K?^#K^Ov
zpGf$LJTJ9#5zn;wry|egOrEc|XQm?0%gBHLel;jYo)zyz1Bg7Y<f0ULUd3}rk>?85
zSLC_U?oT4mH*ncZo>#}n^NmcN*KiSvJg?=Z=Mi~cM`0$<Rd#7ao~yYQMV{C5qpZj?
zexZmS1Vx^2j*;gLtgguOM%xfYo;PuVBF~%cGKoCb@H3;x^A;{&k>^|3Ohulz^3XAP
zwl8xk^1PkxSLC^t3}W(J$8#V=<aq~26TQBb+3{^mnQvzTeFvkTfVz2lC-dT+%!_w%
z@roDU#WrLsUX0&7A~w&w_#VD_x|?sF-b;-R;P;P|)2(Mql+$fsr+a_Q>F(jSE2sNY
z?ksk?53tkS8*{q&l_YYy``GF3XJvM}4^kZg^C8<n<#d0>J*k}T&tp#aVRpI)qE7c9
z^>P5eo1~oXBQdA@C_CN9xCfNeeLUuLpJ1o^Bs<+kYDU03%ue?ywvC<c)9iGA5p%jn
zc-|<d`%BwjlGA;Loo*95-DY;WN7?B<Yv&=S`zxB80sK~ycDm2og^<&Y#GGymJKbYE
zjnt0i3)CqA{DzWpx-aq2D5v`}JKa}ePWRQA({1IBWvAQ5PWLr_LX^{eovaJsH<y&t
zeUqK;ucJ=)t(en29(B4W*y+B_PWK&ly1!wk`z}8n%IW@=o$l{AN;%zj9unnrJM2lL
zobK=0>AuINDX04fcDnDg)BU6USksQ>2UH?E-ILTU%ITh>;_MyEKiO80)BQ6$-48jx
za=IT;rv&f|P0Hzh!hNZn?rH8WcDiR6u+#ku52JFrpW3Y;r~6lSx_{&1mDByZtx8V!
zGj_UX9ZvUL%;|p41u3Wd4?7n*-7naF<#hkaPWNB-%v4VIOUf58&$F7^vAn>|S5EgU
zE=oDwfAbttPWNlpS5Egoc7Kx7{V$izPWPLb)4j+}_YxPOobI=_#pHCqqcA(&@9oma
z>HfgAD5v{>{3t7@`=hd|6FFVD?+(nEhfU3eS6}zc_v=;iZFHq>g}YJjuj%;PajmF#
zul6N)VRf=K0n|R8zNglYZ!2jSfFoz{c45;>-Ujmr*YPsH8>?w}IqYuqu5br0@P1xY
zEg@bl(cV<6<<ffVT8QRovy#dpcRl63Xu4Z;VOpzyylDd(+IsNLo3=vJKaDj%)L8RJ
zjWyrYSo2MdHJ{a3^I45;K3i+(2uWyv5KHy{6Zl7g>E02sD_x#u<Qi*6t}!!mWPUm#
zo{}5J7oviH3j8mDzZCeqz!wDmiZC%xJ;DAh6uuVtKLY<(;BN%JDDWkLzeOCk^Y)!k
z_+H?*1%5~1-w6D!z`qsvcXH>gT_|)2{Ck1l6Zj7Tzc26~1^z(pyqy#Zrv(0!z<(C_
zLxDdM_+x=Tq4B3#tLMOJfzJs17lA(&_^$&0P2j%^T>F`bI4kfufj<}c9|HeG;5!B0
z8INm?UGWjdT>{@N@I3<W7WiI)?-RH_4rBi}#6=j!{Q~b1_@@FtAn;y+LEwE(Oyj@b
z8G+#k1%622p9%bPfgcw5fWQYGnCdqUIU)!@BJiUEKPK?w0zV<}lL9x!u&UoU9E-q^
zVLT=9(*plO;3ES6Qs8F<Zi-@i=dC#!LGLBQI4bb70{=?j=LCLU;E2F28ta|6V>-g#
zd3!<N7X^Mv;Fkq{Mc`KjZnbfA=dDdBye9DL0>2^fn*#q@;I{-m&N#O7cEY|D(!)~R
zSM9wed&7$Ry6v?n9UOPEmv+Q`JxpB`_qBMLr?@X}#?elZ;=Vp!K{NOD^Ez8`-_E>@
zRNS`<tqYm^cC}TA`(DP;#C^Ll_l<t%R~;VXZy3-Sa(CvwJ(&CUWbWIGi&xyYH`|b{
zxG!$U#kg-0bKhj<zI|yCOy2=k+_yhlqPQ<^$kDeh2gbN>fZMLPZwfD-nfs<P_r)tC
z`dgQSsmticyVQo`P*!H{JB;d}+ak)QY{h-kxF;3&#fu~OtxL7xIFh;Vs3`XxO}$LF
zMiloQ8{@v|%zcC01B(0NSBv#+olNGwS<HR2sTlz?hq-Sq+s52C#M~D*<MeHvJSG*2
z`(9zwR^q<nnEQ@r?mL0G@0HAbaYHV;;h0Ypfv$;Y?u&bJ^wm{$TW3;?`%Y%=JB6o_
z;=Wf?rv%KY%zbe)?gPxHxbHOPzSCpe7dPb8-XqeQncT6=eXn8eJBx{m;=Z%Vx&Z#v
zj^e(>%zfuXxi8)a(YJNxMY->M=DrJ<`z~beTf*G8l%Ed8eHStJ#XBE#fk$!QYk5c%
z_r;4J>Z`hn`(DS~*S__kxNjMA-*V=@xZxJvaKyVFn)@!NZc*IV-f>g6b?l2CnZ$ip
zG54+D{EGWlQl|v)Th5C6uI9c}-1kQAF6O>#7%=x$S37V+k8bOzoi^-!;=WbPeXF^6
z#eLWFh=+*#-o)JZW(W7(5aYfZxjPj1-DKw??rZO_Dehat+;@vTGZpv6+aJVzw{p)a
z?u%DIi25t;i(iqZDW|wEeovZQk>bAi1xXx<TE%_Q{i%;y{Mp}oO2!x`P7=6}z`X?S
zEpVd1-4(`Hu4aWjghEe&HObTZoke;Vfx8NPnZVr$(<@&WNUe3YIKjr6lWi8bM&K<1
z-y-l<fwv)6lt)*vT_})oWROm;v#~Z<vr3IMtJIiTWvqcwQfUt{(qjHHgw9}z3TXY|
z>=B=hwLuF6UMO&hz@-8&(v(LRxL7D$EASG5uM>Ewz-0oL3w*uo;$=c%xxgy~UMcV@
zfhz>A6u9;V5wTj}8wFk?@LGY_30x&`bv&*$*2hN}Hwk>Rz#9bKDDWnM`8khGV*Qx&
zkYoSr=UmSL?MO7%jznYacr@0INn`D}H0~=5>gU8X{{5X17#<+-K!F1SrwE)X@F0N)
zJ22I+egM<?oDX#<Q2H=|hYOr0@CboN3Op)?RsF{3SOkXfH-rQpD{#8NL4h*_&J;K+
zifzi19gQHDV;DIC=L#GWI4p3Uz*h)7PUBj88+5#mu<-<euM~Kq!1)3f2z-^mlWbh8
zXX#|2Fh$_21)eH!p}^Awo-XhV#<4BWnL^<jfoBOkTi_yriv^w|@LYxQVbnb^Pbkcf
z-pt3f6Ruu02~*dI-LXx1<Y07-80i<SZPcc`WiO>@Q(j%u=S_LM3Za&bv?=fB<&xTz
z@5~%SZOSY1WuJuNr3ihE_%c>kY_J=zzL*VO&I#%yG{G*D*r0uXU$MdNT)tw1cts-0
z27B_FlG$J{UU@1u*qiNFY_Jcl-<S>J#eQ1otCP@VjwUwPm)T%H=6`r;f@t6XM##|!
z1sJ6$1gDSUk5;MQ`WdX^A^5BZuM*R*CE+hQk^V5wHe8`(@-OtE^#gsDS1zNsf-z!G
zxvx($c^CS~)KMyxTnl|<>KK(uj)gulHC?5WTcM9k%}}Z2ROmyC7W%ARw|-4IzAm#M
zm40wPn_H5@Kfj;LKgJ*8t_pLz$V<>imr$~F!<uq@VP3%{j8j@r^$m88@1!?~zn4lS
ztKXJNls4tV4*b~~*Lh#0;Vsvv-sX0<`f;%FfWK!y%-wXr3qK@T3;XDm7QIvSK}l7a
zI={Kb9qdjQq`NdJqcD9L!M%MgpZj>3O3AlVQgYNy>LN;N0t`O@2hVu7xbwf_V;fh~
zoiMi1yC9&eqgRr?GF{S*ee7mBHrnl-M#{4edlwAX%JhCR@eZJgl3uR#(Y7WX@Twmt
zYHj2~66jvI*9EOdygpaO!|?VI?{z@uPy=*n^fJ@A^PlEiRnYLLXDZ}h@OJWRy|jiU
z{rrW$WQEp9-e=y+@t617Jl&!5-@Fx_o1Wnm<6HQ!%~KTZ$RUp6TOanq)9+cMVAhxT
z<>jU)RVN)h?ZR&!KgHM0v6obNy`kjq-Z}23!#?<^zjtBOh+*nvDplJw91GXo7WXf3
z#2;Xdh`f8jJI>`ge*`*z?Oo|_vYXism(F`<xUn?&<OK^4_3U-`C~9i^b3QEHFk0cl
z04v}sdIsM9uXp90h2MB1h2Qv6tUW1iU+36A=K<fI_fC(NFgo6VF<79x=$J3^PCsjk
z%YCuM2VK7QuISS8JlAm&BfdM&L;dkD@GJD%l61;={1u&X^Gm+S(=Pvfvv%jJoU^fw
zWs0&DowYrAXl~HQF_{$Bk8Irm9xkmH?UExLpRcqHqu#bq>)94}GSFoF{Q>HPE{=a2
zMYSSoeK5WkGWZ9Qg0Hj5MKm>kH1{;Q5<9(xQA$}2#+RRZ`oRr8f1&I6S8z{Pe@S#U
z(ZWPe*3EBVr*-vDf%f<CixyRWzt5fW8X7gAOPXzb`x^wIm69&LsgjE9q?Wf3zO0^v
zsJYpWU)MPK6+0dM>l%7MLh(%s!<erAC9aFYOj^!pGmp3VB53Ap6Mr4^?ymk))xrZE
e*8J$hPiq%Fiyh;t`ci2x{k=86tF|B?wf`Tqr}9Mr

diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 425086fc..7ee65a6d 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -1510,6 +1510,124 @@ def test_nan_values_must_be_rejected(self, validator_with_adata):
             in validator.errors
         )
 
+    @pytest.mark.parametrize(
+        "genetic_ancestry_African, genetic_ancestry_East_Asian, genetic_ancestry_European, "
+        "genetic_ancestry_Indigenous_American, genetic_ancestry_Oceanian, genetic_ancestry_South_Asian",
+        [
+            (0.0, 0.0, 0.0, 0.0, 0.0, 1.0),
+            (0.5, 0.5, 0.0, 0.0, 0.0, 0.0),
+            (0.0, 0.25, 0.25, 0.25, 0.25, 0.0),
+            (float("nan"), float("nan"), float("nan"), float("nan"), float("nan"), float("nan")),
+            (numpy.nan, numpy.nan, numpy.nan, numpy.nan, numpy.nan, numpy.nan),
+        ],
+    )
+    def test_genetic_ancestry__OK(
+        self,
+        validator_with_adata,
+        genetic_ancestry_African,
+        genetic_ancestry_East_Asian,
+        genetic_ancestry_European,
+        genetic_ancestry_Indigenous_American,
+        genetic_ancestry_Oceanian,
+        genetic_ancestry_South_Asian,
+    ):
+        """
+        genetic_ancestry_X fields must all be floats between 0 and 1 and sum to 1
+        OR they can all be NaN
+        """
+        validator = validator_with_adata
+        # Second organism in adata is not homo sapiens
+        validator.adata.obs["genetic_ancestry_African"] = [genetic_ancestry_African, float("nan")]
+        validator.adata.obs["genetic_ancestry_East_Asian"] = [genetic_ancestry_East_Asian, float("nan")]
+        validator.adata.obs["genetic_ancestry_European"] = [genetic_ancestry_European, float("nan")]
+        validator.adata.obs["genetic_ancestry_Indigenous_American"] = [
+            genetic_ancestry_Indigenous_American,
+            float("nan"),
+        ]
+        validator.adata.obs["genetic_ancestry_Oceanian"] = [genetic_ancestry_Oceanian, float("nan")]
+        validator.adata.obs["genetic_ancestry_South_Asian"] = [genetic_ancestry_South_Asian, float("nan")]
+        validator.validate_adata()
+        assert validator.errors == []
+
+    @pytest.mark.parametrize(
+        "genetic_ancestry_African, genetic_ancestry_East_Asian, genetic_ancestry_European, "
+        "genetic_ancestry_Indigenous_American, genetic_ancestry_Oceanian, genetic_ancestry_South_Asian",
+        [
+            # Non-float value of "random string"
+            (0.0, 0.0, 0.0, 1.0, 0.0, "random string"),
+            # Non-float value of True
+            (0.0, 0.0, 0.0, 1.0, 0.0, True),
+            # Non-float value of None
+            (0.0, 0.0, 0.0, 1.0, 0.0, None),
+            # Non-float value of numpy True
+            (0.0, 0.0, 0.0, 1.0, 0.0, numpy.True_),
+            # Non-float value of numpy NaN
+            (0.0, numpy.nan, 0.0, 1.0, 0.0, 0.0),
+            # One value is > 1
+            (0.0, 0.0, 1.1, 0.0, 0.0, 0.0),
+            # One value is < 0.0
+            (0.0, 0.0, -0.25, 1.0, 0.25, 0.0),
+            # Sum is > 1.0
+            (0.0, 0.1, 1.0, 0.0, 0.0, 0.0),
+            # Sum is < 1.0
+            (0.0, 0.25, 0.25, 0.25, 0.0, 0.0),
+            # Only all NaN is valid
+            (float("nan"), 0.0, 0.0, 0.0, 0.0, 0.0),
+            # Only all NaN is valid
+            (numpy.nan, 0.0, 0.0, 0.0, 0.0, 0.0),
+        ],
+    )
+    def test_genetic_ancestry__invalid(
+        self,
+        validator_with_adata,
+        genetic_ancestry_African,
+        genetic_ancestry_East_Asian,
+        genetic_ancestry_European,
+        genetic_ancestry_Indigenous_American,
+        genetic_ancestry_Oceanian,
+        genetic_ancestry_South_Asian,
+    ):
+        validator = validator_with_adata
+        # Second organism in adata is not homo sapiens
+        validator.adata.obs["genetic_ancestry_African"] = [genetic_ancestry_African, float("nan")]
+        validator.adata.obs["genetic_ancestry_East_Asian"] = [genetic_ancestry_East_Asian, float("nan")]
+        validator.adata.obs["genetic_ancestry_European"] = [genetic_ancestry_European, float("nan")]
+        validator.adata.obs["genetic_ancestry_Indigenous_American"] = [
+            genetic_ancestry_Indigenous_American,
+            float("nan"),
+        ]
+        validator.adata.obs["genetic_ancestry_Oceanian"] = [genetic_ancestry_Oceanian, float("nan")]
+        validator.adata.obs["genetic_ancestry_South_Asian"] = [genetic_ancestry_South_Asian, float("nan")]
+        validator.validate_adata()
+        assert len(validator.errors) > 0
+
+    def test_genetic_ancestry_same_donor_id(self, validator_with_adata):
+        """
+        genetic_ancestry_X fields must be the same when the donor id is the same
+        """
+        validator = validator_with_adata
+        original_donor_id_column = validator.adata.obs["donor_id"].copy()
+
+        # Second row should have identical donor id + genetic ancestry values, so this should pass validation
+        validator.adata.obs.iloc[1] = validator.adata.obs.iloc[0].values
+        validator.validate_adata()
+        assert validator.errors == []
+
+        # Update the genetic ancestry values to be different. This should now fail validation
+        validator.adata.obs["genetic_ancestry_African"] = [1.0, 0.0]
+        validator.adata.obs["genetic_ancestry_East_Asian"] = [0.0, 1.0]
+        validator.adata.obs["genetic_ancestry_European"] = [0.0, 0.0]
+        validator.adata.obs["genetic_ancestry_Indigenous_American"] = [0.0, 0.0]
+        validator.adata.obs["genetic_ancestry_Oceanian"] = [0.0, 0.0]
+        validator.adata.obs["genetic_ancestry_South_Asian"] = [0.0, 0.0]
+        validator.validate_adata()
+        assert len(validator.errors) > 0
+
+        # Change the donor id back to two different donor id's. Now, this should pass validation
+        validator.adata.obs["donor_id"] = original_donor_id_column
+        validator.validate_adata()
+        assert validator.errors == []
+
 
 class TestVar:
     """

From 4befffc49c2b10aa75dcf394b1c29ea040d67c9b Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Mon, 2 Dec 2024 15:43:12 -0800
Subject: [PATCH 14/28] updated genetic ancestry values (#1141)

---
 schema/drafts/5.3.0.md | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/schema/drafts/5.3.0.md b/schema/drafts/5.3.0.md
index a65f74a8..ba1cd0ab 100644
--- a/schema/drafts/5.3.0.md
+++ b/schema/drafts/5.3.0.md
@@ -583,10 +583,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
         If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0010"><code>"HANCESTRO:0010"</code></a> for <i>African</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>
@@ -610,10 +609,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
-        If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+                If <code>organism_ontolology_term_id</code> is NOT
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0009"><code>"HANCESTRO:0009"</code></a> for <i>East Asian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>
@@ -637,10 +635,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
         If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0005"><code>"HANCESTRO:0005"</code></a> for <i>European</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>
@@ -664,10 +661,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
         If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0013"><code>"HANCESTRO:0013"</code></a> for <i>Indigenous American</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>
@@ -691,10 +687,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
         If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0017"><code>"HANCESTRO:0017"</code></a> for <i>Oceanian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>
@@ -718,10 +713,9 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     <tr>
       <th>Value</th>
       <td>
-        <code>str</code> or <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
+        <code>float</code>. All observations with the same <code>donor_id</code> MUST contain the same value.<br><br>
         If <code>organism_ontolology_term_id</code> is NOT
-        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the
-        value MUST be <code>"na"</code>.<br><br>If
+        <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code>.<br><br>If
         <code>organism_ontolology_term_id</code> is
         <code>"NCBITaxon:9606"</code> for <i>Homo sapiens</i>, then the value MUST be a <code>float("nan")</code> if unavailable; otherwise, the value MUST be the genetic ancestry percentage of <a href="https://www.ebi.ac.uk/ols4/ontologies/hancestro/classes/http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252FHANCESTRO_0006"><code>"HANCESTRO:0006"</code></a> for <i>South Asian</i> expressed as a <code>float</code> greater than or equal to <code>0.0</code> and less than or equal to <code>1.0</code>
       </td>

From 0a166c41342d876263b0f3a17a8526448167bdda Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Tue, 3 Dec 2024 10:05:19 -0500
Subject: [PATCH 15/28] feat: cellxgene-schema must update validation for X
 (Matrix Layers) for descendants of Visium (#1133)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
---
 .../cellxgene_schema/validate.py              |  64 +++++++++--
 .../tests/test_schema_compliance.py           | 107 +++++++++++++++---
 cellxgene_schema_cli/tests/test_validate.py   |   6 +-
 3 files changed, 144 insertions(+), 33 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 01a2d140..7ce2d44a 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -24,19 +24,24 @@
 ONTOLOGY_PARSER = OntologyParser(schema_version="v5.3.0")
 
 ASSAY_VISIUM = "EFO:0010961"
+ASSAY_VISIUM_11M = "EFO:0022860"
 ASSAY_SLIDE_SEQV2 = "EFO:0030062"
 
 VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
+VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 14336
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000
 
 CONDITION_IS_VISIUM = "a descendant of 'EFO:0010961' (Visium Spatial Gene Expression)"
+CONDITION_IS_VISIUM_11M = f"'{ASSAY_VISIUM_11M} (Visium CytAssist Spatial Gene Expression, 11mm)"
 CONDITION_IS_SEQV2 = f"'{ASSAY_SLIDE_SEQV2}' (Slide-seqV2)"
 
-
 ERROR_SUFFIX_SPATIAL = f"obs['assay_ontology_term_id'] is either {CONDITION_IS_VISIUM} or {CONDITION_IS_SEQV2}"
 ERROR_SUFFIX_VISIUM = f"obs['assay_ontology_term_id'] is {CONDITION_IS_VISIUM}"
-ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and uns['spatial']['is_single'] is True"
+ERROR_SUFFIX_VISIUM_11M = f"obs['assay_ontology_term_id'] is {CONDITION_IS_VISIUM_11M}"
+
+ERROR_SUFFIX_IS_SINGLE = "uns['spatial']['is_single'] is True"
+ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and {ERROR_SUFFIX_IS_SINGLE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
 ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
@@ -49,7 +54,9 @@ def __init__(self, ignore_labels=False):
         self.schema_def = dict()
         self.schema_version: str = None
         self.ignore_labels = ignore_labels
-        self.visium_and_is_single_true_matrix_size = VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
+        self._visium_and_is_single_true_matrix_size = None
+        self._hires_max_dimension_size = None
+        self._visium_error_suffix = None
 
         # Values will be instances of gencode.GeneChecker,
         # keys will be one of gencode.SupportedOrganisms
@@ -77,6 +84,44 @@ def adata(self, adata: anndata.AnnData):
         self.reset()
         self._adata = adata
 
+    @property
+    def visium_and_is_single_true_matrix_size(self) -> Optional[int]:
+        """
+        Returns the required matrix size based on assay type, if applicable, else returns None.
+        """
+        if self._visium_and_is_single_true_matrix_size is None:
+            # Visium 11M's raw matrix size is distinct from other visium assays
+            if bool(
+                self.adata.obs["assay_ontology_term_id"]
+                .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
+                .any()
+            ):
+                self._visium_error_suffix = f"{ERROR_SUFFIX_VISIUM_11M} and {ERROR_SUFFIX_IS_SINGLE}"
+                self._visium_and_is_single_true_matrix_size = VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
+            elif self._is_visium_including_descendants():
+                self._visium_error_suffix = f"{ERROR_SUFFIX_VISIUM} and {ERROR_SUFFIX_IS_SINGLE}"
+                self._visium_and_is_single_true_matrix_size = VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
+        return self._visium_and_is_single_true_matrix_size
+
+    @property
+    def hires_max_dimension_size(self) -> Optional[int]:
+        """
+        Returns the restricted hires image dimension based on assay type, if applicable, else returns None.
+        """
+        if self._hires_max_dimension_size is None:
+            # Visium 11M's max dimension size is distinct from other visium assays
+            if bool(
+                self.adata.obs["assay_ontology_term_id"]
+                .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
+                .any()
+            ):
+                self._visium_error_suffix = ERROR_SUFFIX_VISIUM_11M
+                self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
+            elif self._is_visium_including_descendants():
+                self._visium_error_suffix = ERROR_SUFFIX_VISIUM
+                self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
+        return self._hires_max_dimension_size
+
     def _is_single(self) -> bool | None:
         """
         Determine value of uns.spatial.is_single. None if non-spatial.
@@ -1228,7 +1273,7 @@ def _has_valid_raw(self, force: bool = False) -> bool:
             if is_visium_and_is_single_true and x.shape[0] != self.visium_and_is_single_true_matrix_size:
                 self._raw_layer_exists = False
                 self.errors.append(
-                    f"When {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}, the raw matrix must be the "
+                    f"When {self._visium_error_suffix}, the raw matrix must be the "
                     f"unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
                     f"{self.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is "
                     f"{x.shape[0]}."
@@ -1800,10 +1845,7 @@ def _check_spatial_uns(self):
                 self.errors.append("uns['spatial'][library_id]['images'] must contain the key 'hires'.")
             # hires is specified: proceed with validation of hires.
             else:
-                _assay_term = self.adata.obs["assay_ontology_term_id"].values[0]
-                _max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
-                if is_ontological_descendant_of(ONTOLOGY_PARSER, _assay_term, "EFO:0022860", True):
-                    _max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
+                _max_size = self.hires_max_dimension_size
                 self._validate_spatial_image_shape("hires", uns_images["hires"], _max_size)
 
             # fullres is optional.
@@ -1906,20 +1948,18 @@ def _is_visium_including_descendants(self) -> bool:
         :rtype bool
         """
         _assay_key = "assay_ontology_term_id"
-        includes_and_visium = False
 
         # only compute if not already stored
         if self.is_visium is None and _assay_key in self.adata.obs.columns:
             # check if any assay_ontology_term_ids are descendants of VISIUM
-            includes_and_visium = (
+            self.is_visium = bool(
                 self.adata.obs[_assay_key]
                 .astype("string")
                 .apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
                 .any()
             )
-            self.is_visium = includes_and_visium
 
-        return includes_and_visium
+        return self.is_visium
 
     def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
         """
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 7ee65a6d..88fe4e2c 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -15,11 +15,19 @@
 from cellxgene_schema.schema import get_schema_definition
 from cellxgene_schema.utils import getattr_anndata
 from cellxgene_schema.validate import (
+    ASSAY_VISIUM_11M,
+    ERROR_SUFFIX_IS_SINGLE,
+    ERROR_SUFFIX_VISIUM,
+    ERROR_SUFFIX_VISIUM_11M,
     ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE,
+    SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE,
+    SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
+    VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
     VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
     Validator,
 )
 from cellxgene_schema.write_labels import AnnDataLabelAppender
+from fixtures.examples_validate import visium_library_id
 
 schema_def = get_schema_definition()
 
@@ -77,7 +85,8 @@ def validator_with_spatial_and_is_single_false(validator) -> Validator:
 @pytest.fixture
 def validator_with_visium_assay(validator) -> Validator:
     validator.adata = examples.adata_visium.copy()
-    validator.visium_and_is_single_true_matrix_size = 2
+    validator._visium_and_is_single_true_matrix_size = 2
+    validator._hires_max_dimension_size = None
     return validator
 
 
@@ -253,7 +262,7 @@ def test_raw_values__contains_zero_row_in_tissue_1_mixed_in_tissue_values(self,
         Raw Matrix contains a row with all zeros and in_tissue is 1, and there are also values with in_tissue 0.
         """
 
-        validator = validator_with_visium_assay
+        validator: Validator = validator_with_visium_assay
         validator.adata.X[1] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
         validator.adata.raw.X[1] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
         validator.validate_adata()
@@ -298,39 +307,101 @@ def test_raw_values__contains_some_zero_rows_in_tissue_0(self, validator_with_vi
         validator.validate_adata()
         assert validator.errors == []
 
-    def test_raw_values__invalid_visium_and_is_single_true_row_length(self, validator_with_visium_assay):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, req_matrix_size, image_size",
+        [
+            ("EFO:0022858", VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            (
+                "EFO:0022860",
+                VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
+                SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
+            ),
+        ],
+    )
+    def test_raw_values__invalid_visium_and_is_single_true_row_length(
+        self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size
+    ):
         """
         Dataset is visium and uns['is_single'] is True, but raw.X is the wrong length.
         """
-        validator = validator_with_visium_assay
-        validator.visium_and_is_single_true_matrix_size = VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
+        validator: Validator = validator_with_visium_assay
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+
+        # hires image size must be present in order to validate the raw.
+        validator._visium_and_is_single_true_matrix_size = None
+        validator._hires_max_dimension_size = image_size
+        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = numpy.zeros(
+            (1, image_size, 3), dtype=numpy.uint8
+        )
 
         validator.validate_adata()
-        assert validator.errors == [
-            f"ERROR: When {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}, the raw matrix must be the "
-            "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
-            f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
-            "ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
-        ]
+        if assay_ontology_term_id == ASSAY_VISIUM_11M:
+            _errors = [
+                f"ERROR: When {ERROR_SUFFIX_VISIUM_11M} and {ERROR_SUFFIX_IS_SINGLE}, the raw matrix must be the "
+                "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
+                f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
+                "ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
+            ]
+        else:
+            _errors = [
+                f"ERROR: When {ERROR_SUFFIX_VISIUM} and {ERROR_SUFFIX_IS_SINGLE}, the raw matrix must be the "
+                "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
+                f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
+                "ERROR: Raw data may be missing: data in 'raw.X' does not meet schema requirements.",
+            ]
+
+        assert validator.errors == _errors
 
-    def test_raw_values__multiple_invalid_in_tissue_errors(self, validator_with_visium_assay):
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, req_matrix_size, image_size",
+        [
+            ("EFO:0022858", VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
+            (
+                "EFO:0022860",
+                VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE,
+                SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
+            ),
+        ],
+    )
+    def test_raw_values__multiple_invalid_in_tissue_errors(
+        self, validator_with_visium_assay, assay_ontology_term_id, req_matrix_size, image_size
+    ):
         """
         Dataset is visium and uns['is_single'] is True, in_tissue has both 0 and 1 values and there
         are issues validating rows of both in the matrix.
         """
 
         validator = validator_with_visium_assay
-        validator.visium_and_is_single_true_matrix_size = VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE
+
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        # hires image size must be present in order to validate the raw.
+        validator._visium_and_is_single_true_matrix_size = None
+        validator._hires_max_dimension_size = image_size
+        validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = numpy.zeros(
+            (1, image_size, 3), dtype=numpy.uint8
+        )
         validator.adata.X = numpy.zeros(
             [validator.adata.obs.shape[0], validator.adata.var.shape[0]], dtype=numpy.float32
         )
         validator.adata.raw = validator.adata.copy()
         validator.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
         validator.validate_adata()
-        assert validator.errors == [
-            f"ERROR: When {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}, the raw matrix must be the "
-            "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
-            f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2.",
+        if assay_ontology_term_id == ASSAY_VISIUM_11M:
+            assert (
+                validator.errors[0]
+                == f"ERROR: When {ERROR_SUFFIX_VISIUM_11M} and {ERROR_SUFFIX_IS_SINGLE}, the raw matrix must be the "
+                "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
+                f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."
+            )
+        else:
+            assert (
+                validator.errors[0]
+                == f"ERROR: When {ERROR_SUFFIX_VISIUM} and {ERROR_SUFFIX_IS_SINGLE}, the raw matrix must be the "
+                "unfiltered feature-barcode matrix 'raw_feature_bc_matrix'. It must have exactly "
+                f"{validator.visium_and_is_single_true_matrix_size} rows. Raw matrix row count is 2."
+            )
+
+        assert validator.errors[1:] == [
             "ERROR: If obs['in_tissue'] contains at least one value 0, then there must be at least "
             "one row with obs['in_tissue'] == 0 that has a non-zero value in the raw matrix.",
             "ERROR: Each observation with obs['in_tissue'] == 1 must have at least one "
@@ -496,7 +567,7 @@ def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_onto
             assert validator.errors == []
         else:
             assert validator.errors == [
-                "obs['in_tissue'] is only allowed for obs['assay_ontology_term_id'] is a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
+                f"obs['in_tissue'] is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
             ]
 
     @pytest.mark.parametrize("reserved_column", schema_def["components"]["obs"]["reserved_columns"])
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index 9ab30177..9ea024b1 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -364,7 +364,7 @@ def test__validate_spatial_visium_ok(self):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.visium_and_is_single_true_matrix_size = 2
+        validator._visium_and_is_single_true_matrix_size = 2
         # Confirm spatial is valid.
         validator.validate_adata()
         assert not validator.errors
@@ -384,7 +384,7 @@ def test__validate_spatial_visium_dense_matrix_ok(self):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.visium_and_is_single_true_matrix_size = 2
+        validator._visium_and_is_single_true_matrix_size = 2
         validator.adata.X = validator.adata.X.toarray()
         validator.adata.raw = validator.adata.copy()
         validator.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
@@ -1141,7 +1141,7 @@ def test__validate_embeddings_non_nans(self):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.visium_and_is_single_true_matrix_size = 2
+        validator._visium_and_is_single_true_matrix_size = 2
 
         # invalidate spatial embeddings with NaN value
         validator.adata.obsm["spatial"][0, 1] = np.nan

From 5293502f26362d940ab9ef754b57ba1083f8c770 Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Tue, 3 Dec 2024 13:47:01 -0500
Subject: [PATCH 16/28] fix: handle both string and category encoded
 "assay_ontology_term_id" (#1142)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
---
 .../cellxgene_schema/validate.py               | 18 ++++++++++--------
 .../tests/test_schema_compliance.py            | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 7ce2d44a..434983bc 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -1648,8 +1648,10 @@ def _validate_spatial_cell_type_ontology_term_id(self):
             return
 
         # Validate all out of tissue (in_tissue==0) spatial spots have unknown cell ontology term
-        is_spatial = self.adata.obs["assay_ontology_term_id"].apply(
-            lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True)
+        is_spatial = (
+            self.adata.obs["assay_ontology_term_id"]
+            .apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
+            .astype(bool)
         )
         is_not_tissue = self.adata.obs["in_tissue"] == 0
         is_not_unknown = self.adata.obs["cell_type_ontology_term_id"] != "unknown"
@@ -1676,9 +1678,9 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
             not (self.is_visium_and_is_single_true)
             or (
                 ~(
-                    self.adata.obs["assay_ontology_term_id"].apply(
-                        lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True)
-                    )
+                    self.adata.obs["assay_ontology_term_id"]
+                    .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True))
+                    .astype(bool)
                 )
                 & (self.adata.obs[tissue_position_name].notnull())
             ).any()
@@ -1697,9 +1699,9 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
             tissue_position_name not in self.adata.obs
             or (
                 (
-                    self.adata.obs["assay_ontology_term_id"].apply(
-                        lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True)
-                    )
+                    self.adata.obs["assay_ontology_term_id"]
+                    .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, True))
+                    .astype(bool)
                 )
                 & (self.adata.obs[tissue_position_name].isnull())
             ).any()
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 88fe4e2c..616da096 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -562,6 +562,7 @@ def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_onto
         # reset and test
         validator.reset()
         validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.obs["assay_ontology_term_id"] = validator.adata.obs["assay_ontology_term_id"].astype("category")
         validator._validate_spatial_tissue_position("in_tissue", 0, 1)
         if is_descendant:
             assert validator.errors == []
@@ -632,6 +633,22 @@ def test_assay_ontology_term_id(self, validator_with_adata, assay_ontology_term_
         ]
         assert validator.errors == [self.get_format_error_message(error_message_suffix, error)]
 
+    def test_assay_ontology_term_id__as_categorical(self, validator_with_visium_assay):
+        """
+        Formally, assay_ontology_term_id is expected to be a categorical variable of type string. However, it should work for categorical dtypes as well.
+        """
+        validator: Validator = validator_with_visium_assay
+
+        # check encoding as string
+        validator._check_spatial_obs()
+        assert validator.errors == []
+        validator.reset()
+
+        # force encoding as 'categorical'
+        validator.adata.obs["assay_ontology_term_id"] = validator.adata.obs["assay_ontology_term_id"].astype("category")
+        validator._check_spatial_obs()
+        assert validator.errors == []
+
     def test_cell_type_ontology_term_id_invalid_term(self, validator_with_adata):
         validator = validator_with_adata
         validator.adata.obs.loc[validator.adata.obs.index[0], "cell_type_ontology_term_id"] = "EFO:0000001"

From bc905f4cfa7eb70955e411eaf8126996f30e41dc Mon Sep 17 00:00:00 2001
From: Trent Smith <1429913+Bento007@users.noreply.github.com>
Date: Tue, 3 Dec 2024 11:19:15 -0800
Subject: [PATCH 17/28] fix: clean up the cli (#1108)

---
 cellxgene_schema_cli/cellxgene_schema/cli.py  | 37 +++++++++----------
 .../cellxgene_schema/validate.py              |  4 --
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/cli.py b/cellxgene_schema_cli/cellxgene_schema/cli.py
index 33fce82b..1254a7ba 100644
--- a/cellxgene_schema_cli/cellxgene_schema/cli.py
+++ b/cellxgene_schema_cli/cellxgene_schema/cli.py
@@ -1,7 +1,10 @@
+import logging
 import sys
 
 import click
 
+logger = logging.getLogger("cellxgene_schema")
+
 
 @click.group(
     name="schema",
@@ -9,11 +12,13 @@
     short_help="Apply and validate the cellxgene data integration schema to an h5ad file.",
     context_settings=dict(max_content_width=85, help_option_names=["-h", "--help"]),
 )
-def schema_cli():
-    pass
+@click.option("-v", "--verbose", help="When present will set logging level to debug", is_flag=True)
+def schema_cli(verbose):
+    logging.basicConfig(level=logging.ERROR)
+    logger.setLevel(logging.DEBUG if verbose else logging.INFO)
 
 
-@click.command(
+@schema_cli.command(
     name="validate",
     short_help="Check that an h5ad follows the cellxgene data integration schema.",
     help="Check that an h5ad follows the cellxgene data integration schema. If validation fails this command will "
@@ -31,27 +36,25 @@ def schema_cli():
     type=click.Path(exists=False, dir_okay=False, writable=True),
 )
 @click.option("-i", "--ignore-labels", help="Ignore ontology labels when validating", is_flag=True)
-@click.option("-v", "--verbose", help="When present will set logging level to debug", is_flag=True)
-def schema_validate(h5ad_file, add_labels_file, ignore_labels, verbose):
+def schema_validate(h5ad_file, add_labels_file, ignore_labels):
     # Imports are very slow so we defer loading until Click arg validation has passed
-
-    print("Loading dependencies")
+    logger.info("Loading dependencies")
     try:
         import anndata  # noqa: F401
     except ImportError:
         raise click.ClickException("[cellxgene] cellxgene-schema requires anndata") from None
 
-    print("Loading validator modules")
+    logger.info("Loading validator modules")
     from .validate import validate
 
-    is_valid, _, _ = validate(h5ad_file, add_labels_file, ignore_labels=ignore_labels, verbose=verbose)
+    is_valid, _, _ = validate(h5ad_file, add_labels_file, ignore_labels=ignore_labels)
     if is_valid:
         sys.exit(0)
     else:
         sys.exit(1)
 
 
-@click.command(
+@schema_cli.command(
     name="remove-labels",
     short_help="Create a copy of an h5ad without portal-added labels",
     help="Create a copy of an h5ad without portal-added labels.",
@@ -61,24 +64,24 @@ def schema_validate(h5ad_file, add_labels_file, ignore_labels, verbose):
 def remove_labels(input_file, output_file):
     from .remove_labels import AnnDataLabelRemover
 
-    print("Loading dependencies")
+    logger.info("Loading dependencies")
     try:
         import anndata  # noqa: F401
     except ImportError:
         raise click.ClickException("[cellxgene] cellxgene-schema requires anndata") from None
 
-    print(f"Loading h5ad from {input_file}")
+    logger.info(f"Loading h5ad from {input_file}")
     adata = anndata.read_h5ad(input_file)
     anndata_label_remover = AnnDataLabelRemover(adata)
     if not anndata_label_remover.schema_def:
         return
-    print("Removing labels")
+    logger.info("Removing labels")
     anndata_label_remover.remove_labels()
-    print(f"Labels have been removed. Writing to {output_file}")
+    logger.info(f"Labels have been removed. Writing to {output_file}")
     anndata_label_remover.adata.write(output_file, compression="gzip")
 
 
-@click.command(
+@schema_cli.command(
     name="migrate",
     short_help="Convert an h5ad to the latest schema version.",
     help="Convert an h5ad from the previous to latest minor schema version. No validation will be "
@@ -94,9 +97,5 @@ def migrate(input_file, output_file, collection_id, dataset_id):
     migrate(input_file, output_file, collection_id, dataset_id)
 
 
-schema_cli.add_command(schema_validate)
-schema_cli.add_command(migrate)
-schema_cli.add_command(remove_labels)
-
 if __name__ == "__main__":
     schema_cli()
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 434983bc..4f92ef05 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -2130,10 +2130,6 @@ def validate(
 
     # Perform validation
     start = datetime.now()
-    if verbose:
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO, format="%(message)s")
     validator = Validator(
         ignore_labels=ignore_labels,
     )

From be904b30f768679601aa65e74f1a4eb5d9275fab Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Tue, 3 Dec 2024 13:48:47 -0800
Subject: [PATCH 18/28] updated sex_ontology_term_id (#1145)

---
 schema/drafts/5.3.0.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/schema/drafts/5.3.0.md b/schema/drafts/5.3.0.md
index ba1cd0ab..7090d268 100644
--- a/schema/drafts/5.3.0.md
+++ b/schema/drafts/5.3.0.md
@@ -1011,7 +1011,12 @@ If <code>organism_ontolology_term_id</code> is <code>"NCBITaxon:9606"</code> for
     </tr>
     <tr>
       <th>Value</th>
-        <td>categorical with <code>str</code> categories. This MUST be a descendant of <a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0001894">PATO:0001894</a> for  <i>phenotypic sex</i> or <code>"unknown"</code> if unavailable.
+        <td>categorical with <code>str</code> categories. This MUST be <code>"unknown"</code> if unavailable; otherwise, this MUST be one of:<br><br>
+        <ul>
+        <li><a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0000383">PATO:0000383</a> for  <i>female</i></li>
+        <li><a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0000384">PATO:0000384</a> for  <i>male</i></li>
+        <li><a href="https://www.ebi.ac.uk/ols4/ontologies/pato/classes?obo_id=PATO%3A0001340">PATO:0001340</a> for  <i>hermaphrodite</i></li>
+        </ul>
         </td>
     </tr>
 </tbody></table>
@@ -2061,7 +2066,7 @@ When a dataset is uploaded, CELLxGENE Discover MUST automatically add the `schem
   * Updated _Visium Spatial Gene Expression_ table row to _Descendants of Visium Spatial Gene Expression_
   * Added matrix requirements for _Visium CytAssist Spatial Gene Expression, 11mm_.
 * obs (Cell metadata)
-  * Updated  the requirements for `array_col`:
+  * Updated the requirements for `array_col`:
     *  MUST be annotated if the `assay_ontology_term_id` is a descendant of _Visium Spatial Gene Expression_
     * Added ranges for _Visium CytAssist Spatial Gene Expression, 6.5mm_ and _Visium CytAssist Spatial Gene Expression, 11mm_ 
   * Updated the requirements for `array_row`:
@@ -2076,6 +2081,7 @@ When a dataset is uploaded, CELLxGENE Discover MUST automatically add the `schem
   * Added <code>genetic_ancestry_Oceanian</code>
   * Added <code>genetic_ancestry_South_Asian</code>
   * Updated the requirements for `in_tissue` to include descendants of  _Visium Spatial Gene Expression_.
+  * Updated the requirements for `sex_ontology_term_id` to limit values to <i>female</i>, <i>hermaphrodite</i>, <i>male</i>, or `"unknown"`
 * obsm (Embeddings)
   * Updated the requirements for `spatial` to include descendants of  _Visium Spatial Gene Expression_ and to prohibit 'Not a Number' values. 
   * Updated the requirements for `X_{suffix}` to include descendants of  _Visium Spatial Gene Expression_.

From 3d2fa6ddff1e2bcf2be99b04136fdfbed82fe2d8 Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Thu, 5 Dec 2024 13:20:43 -0800
Subject: [PATCH 19/28] chore: add back return value from is_seurat_convertible
 (#1147)

---
 cellxgene_schema_cli/cellxgene_schema/validate.py | 11 ++++++-----
 cellxgene_schema_cli/tests/test_validate.py       |  8 ++++----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 4f92ef05..60d3a6ba 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -2115,7 +2115,7 @@ def validate(
     add_labels_file: str = None,
     ignore_labels: bool = False,
     verbose: bool = False,
-) -> (bool, list):
+) -> (bool, list, bool):
     from .write_labels import AnnDataLabelAppender
 
     """
@@ -2124,7 +2124,8 @@ def validate(
     :param Union[str, bytes, os.PathLike] h5ad_path: Path to h5ad file to validate
     :param str add_labels_file: Path to new h5ad file with ontology/gene labels added
 
-    :return (True, []) if successful validation, (False, [list_of_errors]) otherwise
+    :return (True, [], False) if successful validation, (False, [list_of_errors], False) otherwise;
+    last bool is for seurat convertability which is deprecated / unused
     :rtype tuple
     """
 
@@ -2138,7 +2139,7 @@ def validate(
 
     # Stop if validation was unsuccessful
     if not validator.is_valid:
-        return False, validator.errors
+        return False, validator.errors, False
 
     if add_labels_file:
         label_start = datetime.now()
@@ -2149,6 +2150,6 @@ def validate(
             f"{writer.was_writing_successful}"
         )
 
-        return (validator.is_valid and writer.was_writing_successful, validator.errors + writer.errors)
+        return (validator.is_valid and writer.was_writing_successful, validator.errors + writer.errors, False)
 
-    return True, validator.errors
+    return True, validator.errors, False
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index 9ea024b1..b60f2a19 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -297,7 +297,7 @@ def test__validate_with_h5ad_valid_and_labels(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             labels_path = "/".join([temp_dir, "labels.h5ad"])
 
-            success, errors = validate(h5ad_valid, labels_path)
+            success, errors, _ = validate(h5ad_valid, labels_path)
 
             import anndata as ad
 
@@ -312,7 +312,7 @@ def test__validate_with_h5ad_valid_and_labels(self):
             assert original_hash != expected_hash, "Writing labels did not change the dataset from the original."
 
     def test__validate_with_h5ad_valid_and_without_labels(self):
-        success, errors = validate(h5ad_valid)
+        success, errors, _ = validate(h5ad_valid)
 
         assert success
         assert not errors
@@ -321,14 +321,14 @@ def test__validate_with_h5ad_invalid_and_with_labels(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             labels_path = "/".join([temp_dir, "labels.h5ad"])
 
-            success, errors = validate(h5ad_invalid, labels_path)
+            success, errors, _ = validate(h5ad_invalid, labels_path)
 
             assert not success
             assert errors
             assert not os.path.exists(labels_path)
 
     def test__validate_with_h5ad_invalid_and_without_labels(self):
-        success, errors = validate(h5ad_invalid)
+        success, errors, _ = validate(h5ad_invalid)
 
         assert not success
         assert errors

From b517b86b5d07bcf9520196990c856137ab743a6c Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Fri, 6 Dec 2024 09:59:57 -0500
Subject: [PATCH 20/28] feat: differential tissue position row/col max sizes
 for visium and visium 11 (#1143)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
---
 .../cellxgene_schema/validate.py              | 29 ++++++++-
 cellxgene_schema_cli/tests/test_validate.py   | 65 ++++++++++++-------
 2 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 60d3a6ba..5510536e 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -4,7 +4,7 @@
 import os
 import re
 from datetime import datetime
-from typing import Dict, List, Mapping, Optional, Union
+from typing import Dict, List, Mapping, Optional, Tuple, Union
 
 import anndata
 import matplotlib.colors as mcolors
@@ -29,6 +29,10 @@
 
 VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
 VISIUM_11MM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 14336
+VISIUM_TISSUE_POSITION_MAX_ROW = 77
+VISIUM_TISSUE_POSITION_MAX_COL = 127
+VISIUM_11MM_TISSUE_POSITION_MAX_ROW = 127
+VISIUM_11MM_TISSUE_POSITION_MAX_COL = 223
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
 SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000
 
@@ -57,6 +61,7 @@ def __init__(self, ignore_labels=False):
         self._visium_and_is_single_true_matrix_size = None
         self._hires_max_dimension_size = None
         self._visium_error_suffix = None
+        self._visium_tissue_position_max = None
 
         # Values will be instances of gencode.GeneChecker,
         # keys will be one of gencode.SupportedOrganisms
@@ -122,6 +127,24 @@ def hires_max_dimension_size(self) -> Optional[int]:
                 self._hires_max_dimension_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
         return self._hires_max_dimension_size
 
+    @property
+    def tissue_position_maxes(self) -> Tuple[int, int]:
+        if self._visium_tissue_position_max is None and self._is_visium_and_is_single_true:
+            # visium 11 has different requirements than other visium
+            if (
+                self.adata.obs["assay_ontology_term_id"]
+                .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
+                .astype(bool)
+                .any()
+            ):
+                self._visium_tissue_position_max = (
+                    VISIUM_11MM_TISSUE_POSITION_MAX_ROW,
+                    VISIUM_11MM_TISSUE_POSITION_MAX_COL,
+                )
+            else:
+                self._visium_tissue_position_max = (VISIUM_TISSUE_POSITION_MAX_ROW, VISIUM_TISSUE_POSITION_MAX_COL)
+        return self._visium_tissue_position_max
+
     def _is_single(self) -> bool | None:
         """
         Determine value of uns.spatial.is_single. None if non-spatial.
@@ -1732,8 +1755,8 @@ def _validate_spatial_tissue_positions(self):
 
         :rtype none
         """
-        self._validate_spatial_tissue_position("array_col", 0, 127)
-        self._validate_spatial_tissue_position("array_row", 0, 77)
+        self._validate_spatial_tissue_position("array_col", 0, self.tissue_position_maxes[1])
+        self._validate_spatial_tissue_position("array_row", 0, self.tissue_position_maxes[0])
         self._validate_spatial_tissue_position("in_tissue", 0, 1)
 
     def _check_spatial_uns(self):
diff --git a/cellxgene_schema_cli/tests/test_validate.py b/cellxgene_schema_cli/tests/test_validate.py
index b60f2a19..cd7652bf 100644
--- a/cellxgene_schema_cli/tests/test_validate.py
+++ b/cellxgene_schema_cli/tests/test_validate.py
@@ -1,5 +1,6 @@
 import hashlib
 import os
+import re
 import tempfile
 from typing import Union
 from unittest import mock
@@ -1011,21 +1012,32 @@ def test__validate_tissue_position_required(self, tissue_position_name):
         validator.adata = adata_visium.copy()
         validator.adata.obs.pop(tissue_position_name)
 
+        # check visium
+        validator.adata.obs["assay_ontology_term_id"] = "EFO:0010961"
         validator._check_spatial_obs()
         assert validator.errors
         assert (
             f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
         )
+        validator.reset()
+
+        # check visium descendant
+        validator.adata.obs["assay_ontology_term_id"] = "EFO:0022860"
+        validator._check_spatial_obs()
+        assert validator.errors
+        assert (
+            f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}." in validator.errors[0]
+        )
+        validator.reset()
 
-    @pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062"])
+    @pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062", "EFO:0022860"])
     def test__validate_tissue_position_not_required(self, assay_ontology_term_id):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_slide_seqv2.copy()
         validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
-        validator.adata.uns["spatial"]["is_single"] = False
+        validator.adata.uns["spatial"]["is_single"] = False  # setting to false removes the requirement
         validator.adata.obs["is_primary_data"] = False
-
         validator._check_spatial_obs()
         assert not validator.errors
 
@@ -1041,43 +1053,52 @@ def test__validate_tissue_position_int_error(self, tissue_position_name):
         assert validator.errors
         assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0]
 
-    @pytest.mark.parametrize(
-        "tissue_position_name, min, error_message_token",
-        [
-            ("array_col", 0, "between 0 and 127"),
-            ("array_row", 0, "between 0 and 77"),
-            ("in_tissue", 0, "0 or 1"),
-        ],
-    )
-    def test__validate_tissue_position_int_min_error(self, tissue_position_name, min, error_message_token):
+    @pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0022860", "EFO:0022859"])
+    @pytest.mark.parametrize("tissue_position_name, min", [("array_col", 0), ("array_row", 0), ("in_tissue", 0)])
+    def test__validate_tissue_position_int_min_error(self, assay_ontology_term_id, tissue_position_name, min):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
         validator.adata.obs[tissue_position_name] = min - 1
 
         # Confirm tissue_position is identified as invalid.
         validator._check_spatial_obs()
-        assert validator.errors
-        assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
+        assert (
+            re.match(f"^obs\['{tissue_position_name}'\] must be (between )?{min} (and|or) [0-9]+", validator.errors[0])
+            is not None
+        )
 
     @pytest.mark.parametrize(
-        "tissue_position_name, max, error_message_token",
+        "assay_ontology_term_id, tissue_position_name, tissue_position_max",
         [
-            ("array_col", 127, "between 0 and 127"),
-            ("array_row", 77, "between 0 and 77"),
-            ("in_tissue", 1, "0 or 1"),
+            ("EFO:0010961", "array_col", 127),
+            ("EFO:0010961", "array_row", 77),
+            ("EFO:0022860", "array_col", 223),
+            ("EFO:0022860", "array_row", 127),
+            ("EFO:0022859", "array_col", 127),
+            ("EFO:0022859", "array_row", 77),
+            ("EFO:0022859", "in_tissue", 1),
         ],
     )
-    def test__validate_tissue_position_int_max_error(self, tissue_position_name, max, error_message_token):
+    def test__validate_tissue_position_int_max_error(
+        self, assay_ontology_term_id, tissue_position_name, tissue_position_max
+    ):
         validator: Validator = Validator()
         validator._set_schema_def()
         validator.adata = adata_visium.copy()
-        validator.adata.obs[tissue_position_name] = max + 1
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.obs[tissue_position_name] = tissue_position_max + 1
 
         # Confirm tissue_position is identified as invalid.
         validator._check_spatial_obs()
-        assert validator.errors
-        assert f"obs['{tissue_position_name}'] must be {error_message_token}" in validator.errors[0]
+        assert (
+            re.match(
+                f"^obs\['{tissue_position_name}'\] must be (between )?[0-9]+ (and|or) {tissue_position_max}",
+                validator.errors[0],
+            )
+            is not None
+        )
 
     @pytest.mark.parametrize(
         "cell_type_ontology_term_id, in_tissue, assay_ontology_term_id",

From 9f84b18fab6d5b58f008171469037ddca6cc6e9f Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Fri, 6 Dec 2024 10:48:24 -0500
Subject: [PATCH 21/28] feat: support for visium descendants in
 obs['assay_ontology_term_id'] (#1148)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
---
 .../cellxgene_schema/validate.py              | 13 +++--
 .../tests/test_schema_compliance.py           | 54 ++++++++++++++++---
 2 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 5510536e..781b72d1 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -67,7 +67,7 @@ def __init__(self, ignore_labels=False):
         # keys will be one of gencode.SupportedOrganisms
         self.gene_checkers = dict()
 
-    def reset(self):
+    def reset(self, hi_res_size: Optional[int] = None, true_mat_size: Optional[int] = None):
         self.errors = []
         self.warnings = []
         self.is_valid = False
@@ -76,6 +76,8 @@ def reset(self):
         self.is_spatial = None
         self.is_visium = None
         self.is_visium_and_is_single_true = None
+        self._hires_max_dimension_size = hi_res_size
+        self._visium_and_is_single_true_matrix_size = true_mat_size
 
         # Matrix (e.g., X, raw.X, ...) number non-zero cache
         self.number_non_zero = dict()
@@ -99,6 +101,7 @@ def visium_and_is_single_true_matrix_size(self) -> Optional[int]:
             if bool(
                 self.adata.obs["assay_ontology_term_id"]
                 .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
+                .astype(bool)
                 .any()
             ):
                 self._visium_error_suffix = f"{ERROR_SUFFIX_VISIUM_11M} and {ERROR_SUFFIX_IS_SINGLE}"
@@ -118,6 +121,7 @@ def hires_max_dimension_size(self) -> Optional[int]:
             if bool(
                 self.adata.obs["assay_ontology_term_id"]
                 .apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM_11M, True))
+                .astype(bool)
                 .any()
             ):
                 self._visium_error_suffix = ERROR_SUFFIX_VISIUM_11M
@@ -172,7 +176,7 @@ def _is_supported_spatial_assay(self) -> bool:
             try:
                 _spatial = (
                     self._is_visium_including_descendants()
-                    or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).any()
+                    or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).astype(bool).any()
                 )
                 self.is_spatial = bool(_spatial)
             except AttributeError:
@@ -1981,6 +1985,7 @@ def _is_visium_including_descendants(self) -> bool:
                 self.adata.obs[_assay_key]
                 .astype("string")
                 .apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
+                .astype(bool)
                 .any()
             )
 
@@ -2099,8 +2104,6 @@ def validate_adata(self, h5ad_path: Union[str, bytes, os.PathLike] = None) -> bo
         :rtype bool
         """
         logger.info("Starting validation...")
-        # Re-start errors in case a new h5ad is being validated
-        self.reset()
 
         if h5ad_path:
             logger.debug("Reading the h5ad file...")
@@ -2108,6 +2111,8 @@ def validate_adata(self, h5ad_path: Union[str, bytes, os.PathLike] = None) -> bo
             self.h5ad_path = h5ad_path
             self._validate_encoding_version()
             logger.debug("Successfully read the h5ad file")
+            # Re-start errors in case a new h5ad is being validated
+            self.reset()
 
         # Fetches schema def for latest major schema version
         self._set_schema_def()
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 616da096..aa69890e 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -17,6 +17,7 @@
 from cellxgene_schema.validate import (
     ASSAY_VISIUM_11M,
     ERROR_SUFFIX_IS_SINGLE,
+    ERROR_SUFFIX_SPATIAL,
     ERROR_SUFFIX_VISIUM,
     ERROR_SUFFIX_VISIUM_11M,
     ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE,
@@ -85,8 +86,8 @@ def validator_with_spatial_and_is_single_false(validator) -> Validator:
 @pytest.fixture
 def validator_with_visium_assay(validator) -> Validator:
     validator.adata = examples.adata_visium.copy()
-    validator._visium_and_is_single_true_matrix_size = 2
-    validator._hires_max_dimension_size = None
+    validator.reset(None, None)
+
     return validator
 
 
@@ -207,6 +208,7 @@ def test_raw_values__invalid_spatial(self, validator_with_visium_assay, invalid_
 
         validator = validator_with_visium_assay
         validator.adata.raw.X[0, 1] = invalid_value
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: All non-zero values in raw matrix must be positive integers of type numpy.float32.",
@@ -247,7 +249,8 @@ def test_raw_values__contains_zero_row_in_tissue_1(self, validator_with_visium_a
         Raw Matrix contains a row with all zeros and in_tissue is 1, but no values are in_tissue 0.
         """
 
-        validator = validator_with_visium_assay
+        validator: Validator = validator_with_visium_assay
+        validator.reset(None, 2)
         validator.adata.obs["in_tissue"] = 1
         validator.adata.X[0] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
         validator.adata.raw.X[0] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
@@ -265,6 +268,7 @@ def test_raw_values__contains_zero_row_in_tissue_1_mixed_in_tissue_values(self,
         validator: Validator = validator_with_visium_assay
         validator.adata.X[1] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
         validator.adata.raw.X[1] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: Each observation with obs['in_tissue'] == 1 must have at least one "
@@ -286,6 +290,7 @@ def test_raw_values__contains_all_zero_rows_in_tissue_0(self, validator_with_vis
         )
         validator.adata.raw = validator.adata.copy()
         validator.adata.raw.var.drop("feature_is_filtered", axis=1, inplace=True)
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: If obs['in_tissue'] contains at least one value 0, then there must be at least "
@@ -304,6 +309,7 @@ def test_raw_values__contains_some_zero_rows_in_tissue_0(self, validator_with_vi
         validator.adata.obs["cell_type_ontology_term_id"] = "unknown"
         validator.adata.X[0] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
         validator.adata.raw.X[0] = numpy.zeros(validator.adata.var.shape[0], dtype=numpy.float32)
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == []
 
@@ -328,8 +334,6 @@ def test_raw_values__invalid_visium_and_is_single_true_row_length(
         validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
 
         # hires image size must be present in order to validate the raw.
-        validator._visium_and_is_single_true_matrix_size = None
-        validator._hires_max_dimension_size = image_size
         validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = numpy.zeros(
             (1, image_size, 3), dtype=numpy.uint8
         )
@@ -640,15 +644,40 @@ def test_assay_ontology_term_id__as_categorical(self, validator_with_visium_assa
         validator: Validator = validator_with_visium_assay
 
         # check encoding as string
-        validator._check_spatial_obs()
+        validator.reset(None, 2)
+        validator._check_spatial()
+        validator._validate_raw()
         assert validator.errors == []
-        validator.reset()
 
         # force encoding as 'categorical'
+        validator.reset(None, 2)
         validator.adata.obs["assay_ontology_term_id"] = validator.adata.obs["assay_ontology_term_id"].astype("category")
-        validator._check_spatial_obs()
+        validator._check_spatial()
+        validator._validate_raw()
         assert validator.errors == []
 
+    @pytest.mark.parametrize(
+        "assay_ontology_term_id, all_same",
+        [("EFO:0010961", True), ("EFO:0030062", True), ("EFO:0022860", True), ("EFO:0008995", False)],
+    )
+    def test_assay_ontology_term_id__all_same(self, validator_with_visium_assay, assay_ontology_term_id, all_same):
+        """
+        Spatial assays (descendants of Visium Spatia Gene Expression, or Slide-SeqV2) require all values in the column to be identical.
+        """
+        validator: Validator = validator_with_visium_assay
+
+        # mix values (with otherwise allowed values)
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+        validator.adata.obs["assay_ontology_term_id"].iloc[0] = "EFO:0010183"
+
+        # check that unique values are allowed
+        validator._check_spatial_obs()
+        EXPECTED_ERROR = f"When {ERROR_SUFFIX_SPATIAL}, all observations must contain the same value."
+        if all_same:
+            assert EXPECTED_ERROR in validator.errors
+        else:
+            assert validator.errors not in validator.errors
+
     def test_cell_type_ontology_term_id_invalid_term(self, validator_with_adata):
         validator = validator_with_adata
         validator.adata.obs.loc[validator.adata.obs.index[0], "cell_type_ontology_term_id"] = "EFO:0000001"
@@ -1698,6 +1727,7 @@ def test_genetic_ancestry_same_donor_id(self, validator_with_adata):
 
         # Second row should have identical donor id + genetic ancestry values, so this should pass validation
         validator.adata.obs.iloc[1] = validator.adata.obs.iloc[0].values
+
         validator.validate_adata()
         assert validator.errors == []
 
@@ -1708,11 +1738,13 @@ def test_genetic_ancestry_same_donor_id(self, validator_with_adata):
         validator.adata.obs["genetic_ancestry_Indigenous_American"] = [0.0, 0.0]
         validator.adata.obs["genetic_ancestry_Oceanian"] = [0.0, 0.0]
         validator.adata.obs["genetic_ancestry_South_Asian"] = [0.0, 0.0]
+        validator.reset(None, 2)
         validator.validate_adata()
         assert len(validator.errors) > 0
 
         # Change the donor id back to two different donor id's. Now, this should pass validation
         validator.adata.obs["donor_id"] = original_donor_id_column
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == []
 
@@ -1795,6 +1827,7 @@ def test_feature_is_filtered(self, validator_with_adata):
             X[i, 0] = 0
         X[0, 0] = 1
 
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: Some features are 'True' in 'feature_is_filtered' of dataframe 'var', "
@@ -1804,6 +1837,7 @@ def test_feature_is_filtered(self, validator_with_adata):
 
         # Test that feature_is_filtered is a bool and not a string
         var["feature_is_filtered"] = "string"
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: Column 'feature_is_filtered' in dataframe 'var' must be boolean, not 'object'."
@@ -2383,6 +2417,7 @@ def test_obsm_values_nan(self, validator_with_visium_assay, key):
 
         # Check embedding has any NaN
         obsm[key][0:100, 1] = numpy.nan
+        validator.reset(None, 2)
         validator.validate_adata()
 
         if key != "spatial":
@@ -2393,6 +2428,7 @@ def test_obsm_values_nan(self, validator_with_visium_assay, key):
         # Check embedding has all NaNs
         all_nan = numpy.full(obsm[key].shape, numpy.nan)
         obsm[key] = all_nan
+        validator.reset(None, 2)
         validator.validate_adata()
         if key != "spatial":
             assert validator.errors == [f"ERROR: adata.obsm['{key}'] contains all NaN values."]
@@ -2419,6 +2455,7 @@ def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_
         validator = validator_with_visium_assay
         validator.adata.uns["default_embedding"] = "spatial"
         del validator.adata.obsm["X_umap"]
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == []
         assert validator.is_spatial is True
@@ -2522,6 +2559,7 @@ def test_obsm_key_name_whitespace(self, validator_with_adata):
 
         del obsm["X_ umap"]
         obsm["u m a p"] = obsm["X_umap"]
+        validator.reset(None, 2)
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: Embedding key in 'adata.obsm' u m a p does not match the regex pattern ^[a-zA-Z][a-zA-Z0-9_.-]*$."

From 6de64f64ab00c995cc067b786eb418d35ecea725 Mon Sep 17 00:00:00 2001
From: Evan Molinelli <ejmolinelli@users.noreply.github.com>
Date: Fri, 6 Dec 2024 16:49:20 -0500
Subject: [PATCH 22/28] feat: X_{suffix} to include descendants of Visium
 (#1144)

Co-authored-by: Evan Molinelli <emolinelli@Evan-CZI-Laptop.local>
---
 .../tests/test_schema_compliance.py           | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index aa69890e..85baab4b 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -2436,7 +2436,10 @@ def test_obsm_values_nan(self, validator_with_visium_assay, key):
             assert validator.errors == ["ERROR: adata.obs['spatial] contains at least one NaN value."]
 
     def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_adata):
-        validator = validator_with_adata
+        """
+        X_{suffix} embeddings MUST exist for non-spatial datasets
+        """
+        validator: Validator = validator_with_adata
         validator.adata.obsm["harmony"] = validator.adata.obsm["X_umap"]
         validator.adata.uns["default_embedding"] = "harmony"
         del validator.adata.obsm["X_umap"]
@@ -2451,14 +2454,27 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
             "WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
         ]
 
-    def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay):
-        validator = validator_with_visium_assay
+    @pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0010961", "EFO:0030062", "EFO:0022860"])
+    def test_obsm_values_no_X_embedding__visium_dataset(self, validator_with_visium_assay, assay_ontology_term_id):
+        """
+        X_{suffix} embeddings MAY exist for spatial datasets
+        """
+        validator: Validator = validator_with_visium_assay
         validator.adata.uns["default_embedding"] = "spatial"
-        del validator.adata.obsm["X_umap"]
-        validator.reset(None, 2)
-        validator.validate_adata()
+        validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
+
+        # may have X_{suffix} embedding
+        validator._validate_obsm()
+        assert validator.is_spatial is True
         assert validator.errors == []
+        validator.reset()
+
+        # may also have no X_{suffix} embedding
+        del validator.adata.obsm["X_umap"]
+        validator._validate_obsm()
         assert validator.is_spatial is True
+        assert validator.errors == []
+        validator.reset()
 
     def test_obsm_values_no_X_embedding__slide_seq_v2_dataset(self, validator_with_slide_seq_v2_assay):
         validator = validator_with_slide_seq_v2_assay

From a6c9086bc57d0d374701d5ac2a5941cf1517c2de Mon Sep 17 00:00:00 2001
From: Joyce Yan <5653616+joyceyan@users.noreply.github.com>
Date: Tue, 10 Dec 2024 15:13:00 -0800
Subject: [PATCH 23/28] chore: update logging to log by donor id instead
 (#1150)

---
 cellxgene_schema_cli/cellxgene_schema/validate.py    | 9 ++++++---
 cellxgene_schema_cli/tests/test_schema_compliance.py | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 781b72d1..6b62f796 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -554,9 +554,10 @@ def is_valid_row(row):
         invalid_rows = ~self.adata.obs.apply(is_valid_row, axis=1)
 
         if invalid_rows.any():
-            invalid_indices = self.adata.obs.index[invalid_rows].tolist()
+            donor_ids = self.adata.obs[donor_id_column].tolist()
+            unique_donor_ids = list(set(donor_ids))
             self.errors.append(
-                f"obs rows with indices {invalid_indices} have invalid genetic_ancestry_* values. All "
+                f"obs rows with donor ids {unique_donor_ids} have invalid genetic_ancestry_* values. All "
                 f"observations with the same donor_id must contain the same genetic_ancestry_* values. If "
                 f"organism_ontolology_term_id is NOT 'NCBITaxon:9606' for Homo sapiens, then all genetic"
                 f"ancestry values MUST be float('nan'). If organism_ontolology_term_id is 'NCBITaxon:9606' "
@@ -959,7 +960,6 @@ def _validate_dataframe(self, df_name: str):
                                 f"Column '{column_name}' in dataframe '{df_name}' contains a category '{category}' with "
                                 f"zero observations. These categories will be removed when `--add-labels` flag is present."
                             )
-                    self._validate_genetic_ancestry()
                 categorical_types = {type(x) for x in column.dtype.categories.values}
                 # Check for columns that have illegal categories, which are not supported by anndata 0.8.0
                 # TODO: check if this can be removed after upgading to anndata 0.10.0
@@ -2058,6 +2058,9 @@ def _deep_check(self):
         # Checks spatial
         self._check_spatial()
 
+        # Validate genetic ancestry
+        self._validate_genetic_ancestry()
+
         # Checks each component
         for component_name, component_def in self.schema_def["components"].items():
             logger.debug(f"Validating component: {component_name}")
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index 85baab4b..bf83c97b 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -1740,7 +1740,7 @@ def test_genetic_ancestry_same_donor_id(self, validator_with_adata):
         validator.adata.obs["genetic_ancestry_South_Asian"] = [0.0, 0.0]
         validator.reset(None, 2)
         validator.validate_adata()
-        assert len(validator.errors) > 0
+        assert len(validator.errors) == 1
 
         # Change the donor id back to two different donor id's. Now, this should pass validation
         validator.adata.obs["donor_id"] = original_donor_id_column

From c2f497926bc8468bba6f196c0d90b0ae22789d5e Mon Sep 17 00:00:00 2001
From: Trent Smith <1429913+Bento007@users.noreply.github.com>
Date: Wed, 11 Dec 2024 15:37:17 -0800
Subject: [PATCH 24/28] fix(devop): code coverage failing (#1149)

---
 .github/workflows/push_tests.yml | 13 +++++++++----
 codecov.yaml                     | 27 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 codecov.yaml

diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml
index cd5577d4..86de15a3 100644
--- a/.github/workflows/push_tests.yml
+++ b/.github/workflows/push_tests.yml
@@ -57,8 +57,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: coverage-cli
-          path: ./.coverage*
+          path: .coverage*
           retention-days: 3
+          include-hidden-files: true
 
   unit-tests-migration-assistant:
     runs-on: ubuntu-latest
@@ -88,8 +89,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: coverage-migration-assisstant
-          path: ./.coverage*
+          path: .coverage*
           retention-days: 3
+          include-hidden-files: true
 
   unit-test-ontology-dry-run:
     runs-on: ubuntu-latest
@@ -119,8 +121,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: coverage-ontology-dry-run
-          path: ./.coverage*
+          path: .coverage*
           retention-days: 3
+          include-hidden-files: true
 
   unit-test-genes-dry-run:
     runs-on: ubuntu-latest
@@ -150,8 +153,9 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: coverage-genes-dry-run
-          path: ./.coverage*
+          path: .coverage*
           retention-days: 3
+          include-hidden-files: true
 
   submit-codecoverage:
     needs:
@@ -184,6 +188,7 @@ jobs:
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         with:
+          token: ${{ secrets.CODECOV_TOKEN }}
           env_vars: OS,PYTHON
           files: ./coverage.xml
           flags: unittests
diff --git a/codecov.yaml b/codecov.yaml
new file mode 100644
index 00000000..9dbaf5d1
--- /dev/null
+++ b/codecov.yaml
@@ -0,0 +1,27 @@
+comment:
+  layout: "header, diff, components"
+
+component_management:
+  default_rules:
+    statuses:
+      - type: project
+        target: auto
+        branches:
+          - "!main"
+  individual_components:
+    - component_id: module_cellxgene_schema_cli
+      name: cellxgene_schema_cli
+      paths:
+        - cellxgene_schema_cli/**
+    - component_id: module_migration_assistant
+      name: migration_assistant
+      paths:
+        - scripts/migration_assistant/**
+    - component_id: module_schema_bump_dry_run_genes
+      name: schema_bump_dry_run_genes
+      paths:
+        - scripts/schema_bump_dry_run_genes/**
+    - component_id: module_schema_bump_dry_run_ontologies
+      name: schema_bump_dry_run_ontologies
+      paths:
+        - scripts/schema_bump_dry_run_ontologies/**

From ee393cd4b77b7ff156b13c705f3d09f7f83c0133 Mon Sep 17 00:00:00 2001
From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com>
Date: Mon, 16 Dec 2024 10:11:49 -0500
Subject: [PATCH 25/28] feat: refactor suspension type validation logic to be
 simpler and more performant (#1155)

---
 .../schema_definitions/schema_definition.yaml | 286 +++++-------------
 .../cellxgene_schema/validate.py              |  86 ++----
 .../tests/test_schema_compliance.py           |  25 +-
 3 files changed, 117 insertions(+), 280 deletions(-)

diff --git a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
index d14a0442..28a153d7 100644
--- a/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
+++ b/cellxgene_schema_cli/cellxgene_schema/schema_definitions/schema_definition.yaml
@@ -186,7 +186,12 @@ components:
         type: curie
         dependencies:
           - # If tissue_type is tissue OR organoid
-            rule: "tissue_type == 'tissue' | tissue_type == 'organoid'"
+            rule:
+              column: tissue_type
+              match_exact:
+                terms:
+                  - tissue
+                  - organoid
             error_message_suffix: >-
               When 'tissue_type' is 'tissue' or 'organoid',
               'tissue_ontology_term_id' MUST be a descendant term id of 'UBERON:0001062' (anatomical entity).
@@ -199,7 +204,11 @@ components:
                   UBERON:
                     - UBERON:0001062
           - # If tissue_type is cell culture
-            rule: "tissue_type == 'cell culture'"
+            rule:
+              column: tissue_type
+              match_exact:
+                terms:
+                  - cell culture
             error_message_suffix: >-
               When 'tissue_type' is 'cell culture', 'tissue_ontology_term_id' MUST be either a CL term
               (excluding 'CL:0000255' (eukaryotic cell), 'CL:0000257' (Eumycetozoan cell),
@@ -222,7 +231,11 @@ components:
         type: curie
         dependencies:
           - # If organism is Human
-            rule: "organism_ontology_term_id == 'NCBITaxon:9606'"
+            rule:
+              column: organism_ontology_term_id
+              match_exact:
+                terms:
+                  - NCBITaxon:9606
             error_message_suffix: >-
               When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens),
               self_reported_ethnicity_ontology_term_id MUST be formatted as one
@@ -285,7 +298,11 @@ components:
         type: curie
         dependencies:
           - # If organism is Human
-            rule: "organism_ontology_term_id == 'NCBITaxon:9606'"
+            rule:
+              column: organism_ontology_term_id
+              match_exact:
+                terms:
+                  - NCBITaxon:9606
             error_message_suffix: >-
               When 'organism_ontology_term_id' is 'NCBITaxon:9606' (Homo sapiens),
               'development_stage_ontology_term_id' MUST be the most accurate descendant of 'HsapDv:0000001' or unknown.
@@ -300,7 +317,11 @@ components:
               exceptions:
                 - unknown
           - # If organism is Mouse
-            rule: "organism_ontology_term_id == 'NCBITaxon:10090'"
+            rule:
+              column: organism_ontology_term_id
+              match_exact:
+                terms:
+                  - NCBITaxon:10090
             error_message_suffix: >-
               When 'organism_ontology_term_id' is 'NCBITaxon:10090' (Mus musculus),
               'development_stage_ontology_term_id' MUST be the most accurate descendant of 'MmusDv:0000001' or unknown.
@@ -353,227 +374,70 @@ components:
           selected the most appropriate value for the assay(s) between 'cell', 'nucleus', and 'na'. Please contact cellxgene@chanzuckerberg.com
           during submission so that the assay(s) can be added to the schema definition document.
         dependencies:
-          - # If assay_ontology_term_id is EFO:0030080 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
+          - # 'suspension_type' MUST be 'cell' or 'nucleus'
+            rule:
+              column: assay_ontology_term_id
+              match_ancestors_inclusive:
                 ancestors:
-                  EFO:
-                    - EFO:0030080
-                inclusive: True
+                  - EFO:0030080
+                  - EFO:0010184
+              match_exact:
+                terms:
+                  - EFO:0010010
+                  - EFO:0008722
+                  - EFO:0010550
+                  - EFO:0008780
+                  - EFO:0700010
+                  - EFO:0700011
+                  - EFO:0009919
+                  - EFO:0030060
+                  - EFO:0022490
+                  - EFO:0030028
             type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0030080 or its descendants
             enum:
               - "cell"
               - "nucleus"
-          - # If assay_ontology_term_id is EFO:0007045 or its descendants, 'suspension_type' MUST be 'nucleus'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
+          - # 'suspension_type' MUST be 'nucleus'
+            rule:
+              column: assay_ontology_term_id
+              match_ancestors_inclusive:
                 ancestors:
-                  EFO:
-                    - EFO:0007045
-                inclusive: True
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0007045 or its descendants
-            enum:
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0010184 or its descendants, 'suspension_type' MUST be 'cell' or 'nucleus'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
-                ancestors:
-                  EFO:
-                    - EFO:0010184
-                inclusive: True
+                  - EFO:0007045
+                  - EFO:0002761
+              match_exact:
+                terms:
+                  - EFO:0008720
+                  - EFO:0030026
             type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0010184 or its descendants
             enum:
-              - "cell"
               - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008994 or its descendants, 'suspension_type' MUST be 'na'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
+          - #'suspension_type' MUST be 'cell'
+            rule:
+              column: assay_ontology_term_id
+              match_ancestors_inclusive:
                 ancestors:
-                  EFO:
-                    - EFO:0008994
-                inclusive: True
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008994 or its descendants
-            enum:
-              - "na"
-          - # If assay_ontology_term_id is EFO:0008919 or its descendants, 'suspension_type' MUST be 'cell'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
-                ancestors:
-                  EFO:
-                    - EFO:0008919
-                inclusive: True
+                  - EFO:0008919
+              match_exact:
+                terms:
+                  - EFO:0030002
+                  - EFO:0008853
+                  - EFO:0008796
+                  - EFO:0700003
+                  - EFO:0700004
+                  - EFO:0008953
             type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008919 or its descendants
             enum:
               - "cell"
-          - # If assay_ontology_term_id is EFO:0002761 or its descendants, 'suspension_type' MUST be 'nucleus'
-            complex_rule:
-              match_ancestors:
-                column: assay_ontology_term_id
+          - # 'suspension_type' MUST be 'na'
+            rule:
+              column: assay_ontology_term_id
+              match_ancestors_inclusive:
                 ancestors:
-                  EFO:
-                    - EFO:0002761
-                inclusive: True
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0002761 or its descendants
-            enum:
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0010010, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0010010'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0010010
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008720, 'suspension_type' MUST be 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0008720'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008720
-            enum:
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008722, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0008722'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008722
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0030002, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0030002'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0030002
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0008853, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0008853'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008853
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0030026, 'suspension_type' MUST be 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0030026'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0030026
-            enum:
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0010550, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0010550'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0010550
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008796, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0008796'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008796
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0700003, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0700003'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0700003
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0700004, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0700004'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0700004
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0008780, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0008780'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008780
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008953, 'suspension_type' MUST be 'cell'
-            rule: "assay_ontology_term_id == 'EFO:0008953'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008953
-            enum:
-              - "cell"
-          - # If assay_ontology_term_id is EFO:0700010, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0700010'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0700010
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0700011, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0700011'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0700011
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0009919, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0009919'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0009919
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0030060, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0030060'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0030060
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0022490, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0022490'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0022490
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0030028, 'suspension_type' MUST be 'cell' or 'nucleus'
-            rule: "assay_ontology_term_id == 'EFO:0030028'"
-            type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0030028
-            enum:
-              - "cell"
-              - "nucleus"
-          - # If assay_ontology_term_id is EFO:0008992, 'suspension_type' MUST be 'na'
-            rule: "assay_ontology_term_id == 'EFO:0008992'"
+                  - EFO:0008994
+              match_exact:
+                terms:
+                  - EFO:0008992
             type: categorical
-            error_message_suffix: >-
-              when 'assay_ontology_term_id' is EFO:0008992
             enum:
               - "na"
       tissue_type:
diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py
index 6b62f796..292cc8ec 100644
--- a/cellxgene_schema_cli/cellxgene_schema/validate.py
+++ b/cellxgene_schema_cli/cellxgene_schema/validate.py
@@ -13,7 +13,6 @@
 import scipy
 from anndata._core.sparse_dataset import SparseDataset
 from cellxgene_ontology_guide.ontology_parser import OntologyParser
-from pandas.errors import UndefinedVariableError
 from scipy import sparse
 
 from . import gencode, schema
@@ -630,7 +629,9 @@ def _validate_column_feature_is_filtered(self, column: pd.Series, column_name: s
                     f"these features must be 0."
                 )
 
-    def _validate_column(self, column: pd.Series, column_name: str, df_name: str, column_def: dict):
+    def _validate_column(
+        self, column: pd.Series, column_name: str, df_name: str, column_def: dict, default_error_message_suffix=None
+    ):
         """
         Given a schema definition and the column of a dataframe, verify that the column satisfies the schema.
         If there are any errors, it adds them to self.errors
@@ -640,6 +641,7 @@ def _validate_column(self, column: pd.Series, column_name: str, df_name: str, co
         :param str df_name: Name of the dataframe
         :param dict column_def: schema definition for this specific column,
         e.g. schema_def["obs"]["columns"]["cell_type_ontology_term_id"]
+        :param str default_error_message_suffix: default error message suffix to be added to errors found here
 
         :rtype None
         """
@@ -708,10 +710,11 @@ def _validate_column(self, column: pd.Series, column_name: str, df_name: str, co
                     self._validate_curie_str(term_str, column_name, column_def["curie_constraints"])
 
         # Add error suffix to errors found here
-        if "error_message_suffix" in column_def:
+        error_message_suffix = column_def.get("error_message_suffix", default_error_message_suffix)
+        if error_message_suffix:
             error_total_count = len(self.errors)
             for i in range(error_original_count, error_total_count):
-                self.errors[i] = self.errors[i] + " " + column_def["error_message_suffix"]
+                self.errors[i] = self.errors[i] + " " + error_message_suffix
 
     def _validate_column_dependencies(
         self, df: pd.DataFrame, df_name: str, column_name: str, dependencies: List[dict]
@@ -731,73 +734,38 @@ def _validate_column_dependencies(
         """
 
         all_rules = []
-
         for dependency_def in dependencies:
-            if "complex_rule" in dependency_def:
-                if "match_ancestors" in dependency_def["complex_rule"]:
-                    query_fn, args = self._generate_match_ancestors_query_fn(
-                        dependency_def["complex_rule"]["match_ancestors"]
-                    )
-                    term_id, ontologies, ancestors, ancestor_inclusive = args
-                    query_exp = f"@query_fn({term_id}, {ontologies}, {ancestors}, {ancestor_inclusive})"
-            elif "rule" in dependency_def:
-                query_exp = dependency_def["rule"]
-            else:
-                continue
-
+            terms_to_match = set()
+            column_to_match = dependency_def["rule"]["column"]
+            if "match_ancestors_inclusive" in dependency_def["rule"]:
+                ancestors = dependency_def["rule"]["match_ancestors_inclusive"]["ancestors"]
+                for ancestor in ancestors:
+                    terms_to_match.update(ONTOLOGY_PARSER.get_term_descendants(ancestor, include_self=True))
+            if "match_exact" in dependency_def["rule"]:
+                terms_to_match.update(dependency_def["rule"]["match_exact"]["terms"])
             try:
-                column = getattr(df.query(query_exp, engine="python"), column_name)
-            except UndefinedVariableError:
+                match_query = df[column_to_match].isin(terms_to_match)
+                match_df = df[match_query]
+                column = getattr(match_df, column_name)
+                error_message_suffix = dependency_def.get("error_message_suffix", None)
+                if not error_message_suffix:
+                    matched_values = list(getattr(match_df, column_to_match).unique())
+                    error_message_suffix = f"when '{column_to_match}' is in {matched_values}"
+            except KeyError:
                 self.errors.append(
                     f"Checking values with dependencies failed for adata.{df_name}['{column_name}'], "
                     f"this is likely due to missing dependent column in adata.{df_name}."
                 )
                 return pd.Series(dtype=np.float64)
 
-            all_rules.append(query_exp)
-
-            self._validate_column(column, column_name, df_name, dependency_def)
+            all_rules.append(match_query)
+            self._validate_column(column, column_name, df_name, dependency_def, error_message_suffix)
 
-        # Set column with the data that's left
-        all_rules = " | ".join(all_rules)
-        column = getattr(df.query("not (" + all_rules + " )", engine="python"), column_name)
+        # Return column of data that was not matched by any of the rules
+        column = getattr(df[~np.logical_or.reduce(all_rules)], column_name)
 
         return column
 
-    def _generate_match_ancestors_query_fn(self, rule_def: Dict):
-        """
-        Generates vectorized function and args to query a pandas dataframe. Function will determine whether values from
-        a specified column is a descendant term to a group of specified ancestors, returning a Bool.
-        :param rule_def: defines arguments to pass into vectorized ancestor match validation function
-        :return: Tuple(function, Tuple(str, List[str], List[str]))
-        """
-        validate_curie_ancestors_vectorized = np.vectorize(self._validate_curie_ancestors)
-        ancestor_map = rule_def["ancestors"]
-        inclusive = rule_def["inclusive"]
-
-        # hack: pandas dataframe query doesn't support Dict inputs
-        ontology_keys = []
-        ancestor_list = []
-        for key, val in ancestor_map.items():
-            ontology_keys.append(key)
-            ancestor_list.append(val)
-
-        def is_ancestor_match(
-            term_id: str,
-            ontologies: List[str],
-            ancestors: List[str],
-            ancestor_inclusive: bool,
-        ) -> bool:
-            allowed_ancestors = dict(zip(ontologies, ancestors))
-            return validate_curie_ancestors_vectorized(term_id, allowed_ancestors, inclusive=ancestor_inclusive)
-
-        return is_ancestor_match, (
-            rule_def["column"],
-            ontology_keys,
-            ancestor_list,
-            inclusive,
-        )
-
     def _validate_list(self, list_name: str, current_list: List[str], element_type: str):
         """
         Validates the elements of a list based on the type definition. Adds errors to self.errors if any
diff --git a/cellxgene_schema_cli/tests/test_schema_compliance.py b/cellxgene_schema_cli/tests/test_schema_compliance.py
index bf83c97b..f78ad6da 100644
--- a/cellxgene_schema_cli/tests/test_schema_compliance.py
+++ b/cellxgene_schema_cli/tests/test_schema_compliance.py
@@ -1484,13 +1484,15 @@ def test_suspension_type(self, validator, assay, suspension_types):
         if "na" in suspension_types:
             invalid_suspension_type = "nucleus" if "nucleus" not in suspension_types else "cell"
         obs = validator.adata.obs
-        obs.loc[obs.index[1], "suspension_type"] = invalid_suspension_type
-        obs.loc[obs.index[1], "assay_ontology_term_id"] = assay
+        obs["suspension_type"] = invalid_suspension_type
+        obs["assay_ontology_term_id"] = assay
+        obs["suspension_type"] = obs["suspension_type"].astype("category")
+        obs["assay_ontology_term_id"] = obs["assay_ontology_term_id"].astype("category")
         validator.validate_adata()
         assert validator.errors == [
             f"ERROR: Column 'suspension_type' in dataframe 'obs' contains invalid values "
             f"'['{invalid_suspension_type}']'. Values must be one of {suspension_types} when "
-            f"'assay_ontology_term_id' is {assay}"
+            f"'assay_ontology_term_id' is in ['{assay}']"
         ]
 
     @pytest.mark.parametrize(
@@ -1517,13 +1519,15 @@ def test_suspension_type_ancestors_inclusive(self, validator_with_adata, assay,
         if "na" in suspension_types:
             invalid_suspension_type = "nucleus" if "nucleus" not in suspension_types else "cell"
             obs["suspension_type"] = obs["suspension_type"].cat.remove_unused_categories()
-        obs.loc[obs.index[1], "assay_ontology_term_id"] = assay
-        obs.loc[obs.index[1], "suspension_type"] = invalid_suspension_type
+        obs["suspension_type"] = invalid_suspension_type
+        obs["assay_ontology_term_id"] = assay
+        obs["suspension_type"] = obs["suspension_type"].astype("category")
+        obs["assay_ontology_term_id"] = obs["assay_ontology_term_id"].astype("category")
         validator.validate_adata()
         assert validator.errors == [
             f"ERROR: Column 'suspension_type' in dataframe 'obs' contains invalid values "
             f"'['{invalid_suspension_type}']'. Values must be one of {suspension_types} when "
-            f"'assay_ontology_term_id' is {assay} or its descendants"
+            f"'assay_ontology_term_id' is in ['{assay}']"
         ]
 
     def test_suspension_type_with_descendant_term_id_failure(self, validator_with_adata):
@@ -1533,14 +1537,15 @@ def test_suspension_type_with_descendant_term_id_failure(self, validator_with_ad
         """
         validator = validator_with_adata
         obs = validator.adata.obs
-        obs.loc[obs.index[0], "assay_ontology_term_id"] = "EFO:0022615"  # descendant of EFO:0008994
-        obs.loc[obs.index[0], "suspension_type"] = "nucleus"
-
+        obs["suspension_type"] = "nucleus"
+        obs["assay_ontology_term_id"] = "EFO:0022615"  # descendant of EFO:0008994
+        obs["suspension_type"] = obs["suspension_type"].astype("category")
+        obs["assay_ontology_term_id"] = obs["assay_ontology_term_id"].astype("category")
         validator.validate_adata()
         assert validator.errors == [
             "ERROR: Column 'suspension_type' in dataframe 'obs' contains invalid values "
             "'['nucleus']'. Values must be one of ['na'] when "
-            "'assay_ontology_term_id' is EFO:0008994 or its descendants"
+            "'assay_ontology_term_id' is in ['EFO:0022615']"
         ]
 
     def test_suspension_type_with_descendant_term_id_success(self, validator_with_adata):

From 2fc4898c6267797740a794cf222e42fdd9b7508d Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Tue, 17 Dec 2024 17:40:33 -0800
Subject: [PATCH 26/28] Add template for adding species

---
 .github/ISSUE_TEMPLATE/add-species.md | 228 ++++++++++++++++++++++++++
 .github/ISSUE_TEMPLATE/tech-issue.md  |   8 +-
 2 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 .github/ISSUE_TEMPLATE/add-species.md

diff --git a/.github/ISSUE_TEMPLATE/add-species.md b/.github/ISSUE_TEMPLATE/add-species.md
new file mode 100644
index 00000000..31504d14
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/add-species.md
@@ -0,0 +1,228 @@
+---
+name: Add species
+about: Editor's template for adding new species
+title: Draft <species>
+labels: drafting, multispecies discovery, schema
+assignees: brianraymor
+
+---
+
+## Pending Issues
+
+1. Waiting on sscrdv to be submitted to OLS for use in references
+1. [FAANG](http://www.faang.org/) is the Functional Annotation of ANimal Genomes project. _We are working to understand the genotype to phenotype link in domesticated animals._ Per their [Ontology Improver](https://data.faang.org/ontology?sortTerm=key&sortDirection=asc), *Dv terms are not referenced. Both UBERON and CL are in use. Their [schema](https://github.com/FAANG/dcc-metadata/blob/9e7c1b5304fc57a724d197384e83243562bebbf4/json_schema/type/samples/faang_samples_specimen.metadata_rules.json#L154):
+
+```
+"name": "developmental stage",
+"description": "Ontology for Developmental stage, UBERON is preferred to EFO.",
+```
+
+
+## Design
+
+This draft design reflects additions to corresponding sections in [schema 5.2.0](https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.2.0/schema.md). Reviewers are expected to be familiar with the CELLxGENE schema.
+
+**Editorial Notes** that are inlined in the design below will not be surfaced in the schema. 
+
+---
+
+### Required Ontologies
+
+
+| Ontology | OBO Prefix | Release | Download |
+|:--|:--|:--|:--|
+| [Unavailable](https://github.com/OBOFoundry/OBOFoundry.github.io/tree/master/ontology) | SscrDv | [Releases](https://github.com/obophenotype/developmental-stage-ontologies/releases) | TBD |
+|||||
+
+
+#### Editorial Notes
+
+This ontology is under active development. CELLxGENE pins ontology releases in each version of the schema. A specific release of the ontology above must be selected in the future.
+
+
+---
+
+### Required Gene Annotations
+
+| Organism | Source | Required version | Download |
+|:--|:--|:--|:--|
+| <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9823"><code>"NCBITaxon:9823"</code></a><br>for <i>Sus scrofa domesticus</i>  | [ENSEMBL (Sus scrofa domesticus)] | Sscrofa11.1 (GCA_000003025.6) | [Sus_scrofa.Sscrofa11.1.113.gtf] |
+
+
+[ENSEMBL (Sus scrofa domesticus)]: https://useast.ensembl.org/Sus_scrofa/Info/Index
+[Sus_scrofa.Sscrofa11.1.113.gtf]: https://ftp.ensembl.org/pub/release-113/gtf/sus_scrofa/Sus_scrofa.Sscrofa11.1.113.gtf.gz
+
+#### Editorial Notes
+
+
+---
+
+## `obs` (Cell Metadata)
+
+### cell_type_ontology_term_id
+
+No schema changes are required. 
+
+#### Editorial Notes
+
+---
+
+### development_stage_ontology_term_id
+
+<table><tbody>
+  <tr>
+    <th>Key</th>
+    <td>development_stage_ontology_term_id</td>
+  </tr>
+  <tr>
+    <th>Annotator</th>
+    <td>Curator MUST annotate.</td>
+  </tr>
+  <tr>
+    <th>Value</th>
+    <td>
+      categorical with <code>str</code> categories. If unavailable, this MUST be <code>"unknown"</code>.<br><br>
+      If <code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9823"><code>"NCBITaxon:9823"</code></a> for <i>Sus scrofa domesticus</i>, this MUST be the most accurate descendant of <code>SscrDv:0000000</code> for <i>life cycle stage<i>.
+    </td>
+  </tr>
+</tbody></table>
+<br>
+
+#### Editorial Notes
+
+This may be outdated, but [potential recommendations](https://github.com/obophenotype/developmental-stage-ontologies/blob/master/external/bgee/report.md#sus-scrofa):
+
+```
+UBERON:0000104 life cycle
+    UBERON:0000068 embryo stage
+        UBERON:0000106 zygote stage
+        UBERON:0000107 cleavage stage
+            UBERON:0007232 2 cell stage
+            UBERON:0007233 4 cell stage
+            UBERON:0007236 8 cell stage
+        UBERON:0000108 blastula stage
+        UBERON:0000109 gastrula stage
+        UBERON:0000110 neurula stage
+        UBERON:0000111 organogenesis stage
+            SscrDv:0000081 ridge limb stage (pig)
+            SscrDv:0000082 bud limb stage (pig)
+            SscrDv:0000083 paddle limb stage (pig)
+        UBERON:0007220 late embryonic stage
+    UBERON:0000092 post-embryonic stage
+        UBERON:0000066 fully formed stage
+            UBERON:0000112 sexually immature stage
+                UBERON:0018685 nursing stage
+                    UBERON:0007221 neonate stage
+                        SscrDv:0000072 0-day-old stage (pig)
+                        SscrDv:0000073 1-day-old stage (pig)
+                        SscrDv:0000074 2-day-old stage (pig)
+                        SscrDv:0000075 3-day-old stage (pig)
+                        SscrDv:0000076 4-day-old stage (pig)
+                        SscrDv:0000077 5-day-old stage (pig)
+                        SscrDv:0000078 6-day-old stage (pig)
+                    UBERON:0034920 infant stage
+                        SscrDv:0000010 1-week-old stage (pig)
+                        SscrDv:0000011 2-week-old stage (pig)
+                        SscrDv:0000012 3-week-old stage (pig)
+                            SscrDv:0000018 21-day-old stage (pig)
+                            SscrDv:0000019 22-day-old stage (pig)
+                            SscrDv:0000020 23-day-old stage (pig)
+                            SscrDv:0000021 24-day-old stage (pig)
+                            SscrDv:0000022 25-day-old stage (pig)
+                            SscrDv:0000023 26-day-old stage (pig)
+                            SscrDv:0000024 27-day-old stage (pig)
+                        SscrDv:0000013 4-week-old stage (pig)
+                            SscrDv:0000025 28-day-old stage (pig)
+                            SscrDv:0000026 29-day-old stage (pig)
+                            SscrDv:0000027 30-day-old stage (pig)
+                            SscrDv:0000028 31-day-old stage (pig)
+                            SscrDv:0000029 32-day-old stage (pig)
+                            SscrDv:0000030 33-day-old stage (pig)
+                            SscrDv:0000031 34-day-old stage (pig)
+                        SscrDv:0000014 5-week-old stage (pig)
+                            SscrDv:0000032 35-day-old stage (pig)
+                            SscrDv:0000033 36-day-old stage (pig)
+                            SscrDv:0000034 37-day-old stage (pig)
+                            SscrDv:0000035 38-day-old stage (pig)
+                            SscrDv:0000036 39-day-old stage (pig)
+                            SscrDv:0000037 40-day-old stage (pig)
+                            SscrDv:0000038 41-day-old stage (pig)
+                        SscrDv:0000015 6-week-old stage (pig)
+                        SscrDv:0000016 7-week-old stage (pig)
+                UBERON:0034919 juvenile stage
+                    SscrDv:0000039 2-month-old stage (pig)
+                        SscrDv:0000017 8-week-old stage (pig)
+                        SscrDv:0000040 9-week-old stage (pig)
+                        SscrDv:0000041 10-week-old stage (pig)
+                        SscrDv:0000042 11-week-old stage (pig)
+                    SscrDv:0000043 3-month-old stage (pig)
+                        SscrDv:0000044 12-week-old stage (pig)
+                        SscrDv:0000045 13-week-old stage (pig)
+                        SscrDv:0000046 14-week-old stage (pig)
+                        SscrDv:0000047 15-week-old stage (pig)
+                    SscrDv:0000048 4-month-old stage (pig)
+                        SscrDv:0000049 16-week-old stage (pig)
+                        SscrDv:0000050 17-week-old stage (pig)
+                        SscrDv:0000051 18-week-old stage (pig)
+                        SscrDv:0000052 19-week-old stage (pig)
+                        SscrDv:0000053 20-week-old stage (pig)
+                    SscrDv:0000054 5-month-old stage (pig)
+                        SscrDv:0000055 21-week-old stage (pig)
+                        SscrDv:0000056 22-week-old stage (pig)
+                        SscrDv:0000057 23-week-old stage (pig)
+                        SscrDv:0000058 24-week-old stage (pig)
+                    SscrDv:0000059 6-month-old stage (pig)
+                    SscrDv:0000060 7-month-old stage (pig)
+                    SscrDv:0000061 8-month-old stage (pig)
+                    SscrDv:0000062 9-month-old stage (pig)
+                    SscrDv:0000063 10-month-old stage (pig)
+            UBERON:0000113 post-juvenile
+                UBERON:0018241 prime adult stage
+                    SscrDv:0000064 11-month-old stage (pig)
+                    SscrDv:0000065 1-year-old stage (pig)
+                    SscrDv:0000066 2-year-old stage (pig)
+                    SscrDv:0000067 3-year-old stage (pig)
+                    SscrDv:0000068 4-year-old stage (pig)
+                    SscrDv:0000069 5-year-old stage (pig)
+                    SscrDv:0000070 6-year-old stage (pig)
+                    SscrDv:0000071 7-year-old stage (pig)
+                UBERON:0007222 late adult stage
+```
+
+---
+
+### disease_ontology_term_id
+
+No schema changes are required.
+
+#### Editorial Notes
+
+---
+
+### organism_ontolology_term_id
+
+<code>organism_ontolology_term_id</code> is <a href="https://www.ebi.ac.uk/ols4/ontologies/ncbitaxon/classes?obo_id=NCBITaxon%3A9823"><code>"NCBITaxon:9823"</code></a> for <i>Sus scrofa domesticus</i> 
+
+---
+
+### sex_ontology_term_id
+
+No schema changes are required.
+
+#### Editorial Notes
+
+---
+
+### tissue_ontology_term_id
+
+No schema changes are required.
+
+
+#### Editorial Notes
+
+---
+
+## Reference
+
+
+[BGEE](https://www.bgee.org/species/9823)
diff --git a/.github/ISSUE_TEMPLATE/tech-issue.md b/.github/ISSUE_TEMPLATE/tech-issue.md
index 4965be8c..7b75dbb8 100644
--- a/.github/ISSUE_TEMPLATE/tech-issue.md
+++ b/.github/ISSUE_TEMPLATE/tech-issue.md
@@ -1,9 +1,11 @@
 ---
 name: Tech Issue
-about: Engineering-specific technical work that is not product-specific. Engineering team "owns" these issues.
-title: ""
+about: Engineering-specific technical work that is not product-specific. Engineering
+  team "owns" these issues.
+title: ''
 labels: tech
-assignees: ""
+assignees: ''
+
 ---
 
 ## Motivation

From b4b1a5f54534eed0d40135492be8a4cd3f28540c Mon Sep 17 00:00:00 2001
From: Brian Raymor <brianraymor@chanzuckerberg.com>
Date: Thu, 19 Dec 2024 15:23:39 -0800
Subject: [PATCH 27/28] updated X matrix requirement (#1157)

---
 schema/drafts/5.3.0.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/schema/drafts/5.3.0.md b/schema/drafts/5.3.0.md
index 7090d268..70de7b79 100644
--- a/schema/drafts/5.3.0.md
+++ b/schema/drafts/5.3.0.md
@@ -163,9 +163,8 @@ The types below are python3 types. Note that a python3 `str` is a sequence of Un
 
 ## `X` (Matrix Layers)
 
-The data stored in the `X` data matrix is the data that is viewable in CELLxGENE Explorer. CELLxGENE does not impose any additional constraints on the `X` data matrix.
+The data stored in the `AnnData.X` data matrix is the data that is viewable in CELLxGENE Explorer. For `AnnData.X`, `AnnData.raw.X`, and all layers, if a data matrix contains 50% or more values that are zeros, it MUST be encoded as a [`scipy.sparse.csr_matrix`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html) with zero values encoded as <a href="https://docs.scipy.org/doc/scipy/tutorial/sparse.html#sparse-arrays-implicit-zeros-and-duplicates">implicit zeros</a>.
 
-In any layer, if a matrix has 50% or more values that are zeros, it is STRONGLY RECOMMENDED that the matrix be encoded as a [`scipy.sparse.csr_matrix`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html) with zero values encoded as <a href="https://docs.scipy.org/doc/scipy/tutorial/sparse.html#sparse-arrays-implicit-zeros-and-duplicates">implicit zeros</a>.
 
 CELLxGENE's matrix layer requirements are tailored to optimize data reuse. Because each assay has different characteristics, the requirements differ by assay type. In general, CELLxGENE requires submission of "raw" data suitable for computational reuse when a standard raw matrix format exists for an assay. It is STRONGLY RECOMMENDED to also include a "normalized" matrix with processed values ready for data analysis and suitable for visualization in CELLxGENE Explorer. So that CELLxGENE's data can be provided in download formats suitable for both R and Python, the schema imposes the following requirements:
 
@@ -2097,6 +2096,8 @@ When a dataset is uploaded, CELLxGENE Discover MUST automatically add the `schem
   * Updated the requirements for  <code>spatial[<i>library_id</i>]['scalefactors']</code> to include descendants of  _Visium Spatial Gene Expression_.
   * Updated the requirements for  <code>spatial[<i>library_id</i>]['scalefactors']['spot_diameter_fullres']</code> to include descendants of  _Visium Spatial Gene Expression_.
   * Updated the requirements for  <code>spatial[<i>library_id</i>]['scalefactors']['tissue_hires_scalef']</code> to include descendants of  _Visium Spatial Gene Expression_.
+* X (Matrix Layers)
+  * Updated the STRONGLY RECOMMENDED requirement to a MUST. A matrix with 50% or more values that are zeros MUST be encoded as `scipy.sparse.csr_matrix`.
 
 ### schema v5.2.0
 

From e8c97c074bf5c49a9cbb4a08e3715c603cee4429 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 Jan 2025 14:16:00 -0800
Subject: [PATCH 28/28] chore(deps): update numpy requirement from <2 to <3 in
 /cellxgene_schema_cli (#1163)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 cellxgene_schema_cli/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cellxgene_schema_cli/requirements.txt b/cellxgene_schema_cli/requirements.txt
index d8b2bcdd..18b58ada 100644
--- a/cellxgene_schema_cli/requirements.txt
+++ b/cellxgene_schema_cli/requirements.txt
@@ -2,7 +2,7 @@ anndata>=0.8,<0.11
 cellxgene-ontology-guide==1.3.0 # update before a schema migration
 click<9
 Cython<4
-numpy<2
+numpy<3
 pandas>2,<3
 PyYAML<7
 scipy<2

For	Use
Embryonic stage	A term from the set of Carnegie stages 1-23 (up to 8 weeks after conception; e.g. HsapDv:0000003)
Fetal development	A term from the set of 9 to 38 week post-fertilization human stages (9 weeks after conception and before birth; e.g. HsapDv:0000046)
After birth for the first 12 months	A term from the set of 1 to 12 month-old human stages (e.g. HsapDv:0000273)
After the first 12 months post-birth	A term from the set of year-old human stages (e.g. HsapDv:0000246)
For	Use
From the time of conception to 1 month after birth	A term from the set of Theiler stages (e.g. MmusDv:0000003)
From 2 months after birth	A term from the set of month-old stages (e.g. MmusDv:0000062)
Key	genetic_ancestry_African
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0010"` for African expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	genetic_ancestry_East_Asian
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0009"` for East Asian expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	genetic_ancestry_European
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0005"` for European expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	genetic_ancestry_Indigenous_American
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0013"` for Indigenous American expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	genetic_ancestry_Oceanian
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0017"` for Oceanian expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	genetic_ancestry_South_Asian
Annotator	Curator MUST annotate.
Value	+ `str` or `float`. All observations with the same `donor_id` MUST contain the same value. + If `organism_ontolology_term_id` is NOT + `"NCBITaxon:9606"` for Homo sapiens, then the + value MUST be `"na"`. If + `organism_ontolology_term_id` is + `"NCBITaxon:9606"` for Homo sapiens, then the value MUST be a `float("nan")` if unavailable; otherwise, the value MUST be the genetic ancestry percentage of `"HANCESTRO:0006"` for South Asian expressed as a `float` greater than or equal to `0.0` and less than or equal to `1.0` +
Key	sex_ontology_term_id
Annotator	Curator MUST annotate.
Value	categorical with `str` categories. If unavailable, this MUST be `"unknown"`. If `organism_ontolology_term_id` is `"NCBITaxon:6239"` for Caenorhabditis elegans, this MUST be `PATO:0000384` for male or `PATO:0001340` for hermaphrodite. Otherwise, this MUST be a descendant of PATO:0001894 for phenotypic sex. +
Key	development_stage_ontology_term_id
Annotator	Curator MUST annotate.
Value	+ categorical with `str` categories. If unavailable, this MUST be `"unknown"`. + If `organism_ontolology_term_id` is `"NCBITaxon:9823"` for Sus scrofa domesticus, this MUST be the most accurate descendant of `SscrDv:0000000` for life cycle stage. +