From dfcd2c26f10196adab8bc3233e0f25dc5a214534 Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Tue, 9 Apr 2024 16:11:43 -0400 Subject: [PATCH] feat: add get_term_children and get_term_parents (#186) --- .../ontology_parser.py | 102 +++++++++++++----- api/python/tests/test_ontology_parser.py | 16 +++ 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index da9889e6..62ba67c4 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -28,8 +28,7 @@ def __init__(self, schema_version: Optional[str] = None): def _parse_ontology_name(self, term_id: str) -> str: """ Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or - is not - from an ontology supported by cellxgene-ontology-guide, raise a ValueError. + is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError. :param term_id: str ontology term to parse :return: str name of ontology that term belongs to @@ -67,7 +66,7 @@ def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]: """ Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as - an ancestor. + an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -88,7 +87,7 @@ def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[s def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: """ Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be - included as an ancestor. + included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -110,7 +109,8 @@ def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]: """ Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True, - the term itself will be included as an ancestor. + the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a + supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -135,7 +135,8 @@ def map_term_ancestors_with_distances( ) -> Dict[str, Dict[str, int]]: """ Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is - True, the term itself will be included as an ancestor. + True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a + supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -151,10 +152,32 @@ def map_term_ancestors_with_distances( """ return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids} + def get_term_parents(self, term_id: str) -> List[str]: + """ + Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of + a supported ontology. + + Example + >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser + >>> ontology_parser = OntologyParser() + >>> ontology_parser.get_term_parents("CL:0000101") + ['CL:0000526'] + + :param term_id: str ontology term to find parents for + :return: List[str] of parent terms + """ + if term_id in VALID_NON_ONTOLOGY_TERMS: + return [] + ontology_name = self._parse_ontology_name(term_id) + ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"] + parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1] + return parents + def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int: """ Get the distance between two ontology terms. The distance is defined as the number of edges between the two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint. + Raises ValueError if term IDs are not valid members of a supported ontology. :param term_id_1: str ontology term to find distance for :param term_id_2: str ontology term to find distance for @@ -172,6 +195,7 @@ def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[st """ Get the lowest common ancestors between two ontology terms that is from the given ontology. Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors. + Raises ValueError if term IDs are not valid members of a supported ontology. :param term_id_1: str ontology term to find LCA for :param term_id_2: str ontology term to find LCA for @@ -198,7 +222,8 @@ def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[st def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]: """ Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term - that are part of the high-level ontology terms supported by cellxgene-ontology-guide. + that are part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if + term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -223,7 +248,7 @@ def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) {"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]} Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self - as a descendant. + as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. :param term_ids: list of str ontology terms to map high level terms for :param high_level_terms: list of str ontology terms to be mapped to descendant term_ids @@ -235,7 +260,8 @@ def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str]) def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]: """ Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the - term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. + term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError + if term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -263,7 +289,7 @@ def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided. Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the - provided input. + provided input. Raises ValueError if term ID is not valid member of a supported ontology. :param term_ids: list of str ontology terms to map high level terms for :param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids @@ -275,7 +301,7 @@ def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]: """ Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as - a descendant. + a descendant. Raises ValueError if term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -300,7 +326,7 @@ def get_term_descendants(self, term_id: str, include_self: bool = False) -> List def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]: """ Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be - included as a descendant. + included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -335,10 +361,34 @@ def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = Fal return descendants_dict + def get_term_children(self, term_id: str) -> List[str]: + """ + Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a + supported ontology. + + Example + >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser + >>> ontology_parser = OntologyParser() + >>> ontology_parser.get_term_children("CL:0000526") + ['CL:0000101'] + + :param term_id: str ontology term to find children for + :return: List[str] of children terms + """ + if term_id in VALID_NON_ONTOLOGY_TERMS: + return [] + ontology_name = self._parse_ontology_name(term_id) + children = [] + for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): + for ancestor, distance in candidate_metadata["ancestors"].items(): + if ancestor == term_id and distance == 1: + children.append(candidate_child) + return children + def get_term_graph(self, term_id: str) -> OntologyNode: """ Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the - same ontology as the root term ID. + same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -370,18 +420,16 @@ def get_term_graph(self, term_id: str) -> OntologyNode: :param term_id: str ontology term to build subtree for :return: OntologyNode representation of graph with term_id as root. """ - ontology_name = self._parse_ontology_name(term_id) term_label = self.get_term_label(term_id) root = OntologyNode(term_id, term_label) - for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items(): - for ancestor, distance in candidate_metadata["ancestors"].items(): - if ancestor == term_id and distance == 1: - root.add_child(self.get_term_graph(candidate_descendant)) + for child_term_id in self.get_term_children(term_id): + root.add_child(self.get_term_graph(child_term_id)) return root def is_term_deprecated(self, term_id: str) -> bool: """ - Check if an ontology term is deprecated. + Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported + ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -401,6 +449,7 @@ def is_term_deprecated(self, term_id: str) -> bool: def get_term_replacement(self, term_id: str) -> Union[str, None]: """ Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise. + Raises ValueError if term ID is not valid member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -427,7 +476,8 @@ def get_term_metadata(self, term_id: str) -> Dict[str, Any]: Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation). Consider maps to List[str] of alternate ontology terms to consider using instead of this term - All keys map to None if no metadata of that type is present. + All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member + of a supported ontology. :param term_id: str ontology term to fetch metadata for :return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata. @@ -442,7 +492,8 @@ def get_term_metadata(self, term_id: str) -> Dict[str, Any]: def get_term_label(self, term_id: str) -> str: """ - Fetch the human-readable label for a given ontology term. + Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a + supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -461,7 +512,8 @@ def get_term_label(self, term_id: str) -> str: def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: """ - Fetch the human-readable label for a given list of ontology terms. + Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid + member of a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -476,7 +528,8 @@ def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]: def get_term_description(self, term_id: str) -> Optional[str]: """ - Fetch the description for a given ontology term. + Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a + supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser @@ -495,7 +548,8 @@ def get_term_description(self, term_id: str) -> Optional[str]: def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]: """ - Fetch the descriptions for a given list of ontology terms. + Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of + a supported ontology. Example >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index d78a434f..4f8d83fd 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -298,6 +298,22 @@ def test_get_distance_between_terms(ontology_parser): assert ontology_parser.get_distance_between_terms(term_id_1="CL:0000001", term_id_2="CL:0000008") == -1 +@pytest.mark.parametrize( + "term_id,expected", + [("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", [])], +) +def test_get_term_parents(ontology_parser, term_id, expected): + assert ontology_parser.get_term_parents(term_id) == expected + + +@pytest.mark.parametrize( + "term_id,expected", + [("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", [])], +) +def test_get_term_children(ontology_parser, term_id, expected): + assert ontology_parser.get_term_children(term_id) == expected + + def test_get_term_graph(ontology_parser): graph = ontology_parser.get_term_graph("CL:0000000") assert graph.to_dict() == {