Skip to content

Commit

Permalink
feat: add get_term_children and get_term_parents (#186)
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria authored Apr 9, 2024
1 parent 9b2fe53 commit dfcd2c2
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 24 deletions.
102 changes: 78 additions & 24 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ def __init__(self, schema_version: Optional[str] = None):
def _parse_ontology_name(self, term_id: str) -> str:
"""
Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or
is not
from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
is not from an ontology supported by cellxgene-ontology-guide, raise a ValueError.
:param term_id: str ontology term to parse
:return: str name of ontology that term belongs to
Expand Down Expand Up @@ -67,7 +66,7 @@ def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool
def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[str]:
"""
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as
an ancestor.
an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -88,7 +87,7 @@ def get_term_ancestors(self, term_id: str, include_self: bool = False) -> List[s
def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
"""
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be
included as an ancestor.
included as an ancestor. Raises ValueError if the term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -110,7 +109,8 @@ def map_term_ancestors(self, term_ids: Iterable[str], include_self: bool = False
def get_term_ancestors_with_distances(self, term_id: str, include_self: bool = False) -> Dict[str, int]:
"""
Get the ancestor ontology terms for a given term, and their distance from the term_id. If include_self is True,
the term itself will be included as an ancestor.
the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -135,7 +135,8 @@ def map_term_ancestors_with_distances(
) -> Dict[str, Dict[str, int]]:
"""
Get the ancestor ontology terms for each term in a list, and their distance from the term_id. If include_self is
True, the term itself will be included as an ancestor.
True, the term itself will be included as an ancestor. Raises ValueError if the term ID is not valid member of a
supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -151,10 +152,32 @@ def map_term_ancestors_with_distances(
"""
return {term_id: self.get_term_ancestors_with_distances(term_id, include_self) for term_id in term_ids}

def get_term_parents(self, term_id: str) -> List[str]:
"""
Get the direct parent ontology terms for a given term. Raises ValueError if the term ID is not valid member of
a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_parents("CL:0000101")
['CL:0000526']
:param term_id: str ontology term to find parents for
:return: List[str] of parent terms
"""
if term_id in VALID_NON_ONTOLOGY_TERMS:
return []
ontology_name = self._parse_ontology_name(term_id)
ancestors: Dict[str, int] = self.cxg_schema.ontology(ontology_name)[term_id]["ancestors"]
parents: List[str] = [ancestor for ancestor, distance in ancestors.items() if distance == 1]
return parents

def get_distance_between_terms(self, term_id_1: str, term_id_2: str) -> int:
"""
Get the distance between two ontology terms. The distance is defined as the number of edges between the
two terms. Terms must be from the same ontology. Returns -1 if terms are disjoint.
Raises ValueError if term IDs are not valid members of a supported ontology.
:param term_id_1: str ontology term to find distance for
:param term_id_2: str ontology term to find distance for
Expand All @@ -172,6 +195,7 @@ def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[st
"""
Get the lowest common ancestors between two ontology terms that is from the given ontology.
Terms must be from the same ontology. Ontologies are DAGs, so there may be multiple lowest common ancestors.
Raises ValueError if term IDs are not valid members of a supported ontology.
:param term_id_1: str ontology term to find LCA for
:param term_id_2: str ontology term to find LCA for
Expand All @@ -198,7 +222,8 @@ def get_lowest_common_ancestors(self, term_id_1: str, term_id_2: str) -> List[st
def get_high_level_terms(self, term_id: str, high_level_terms: List[str]) -> List[str]:
"""
Get the high-level ontology terms for a given term. High-level terms are defined as the ancestors of the term
that are part of the high-level ontology terms supported by cellxgene-ontology-guide.
that are part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError if
term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -223,7 +248,7 @@ def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str])
{"CL:0000003": ["CL:0000000", ...], "CL:0000005": ["CL:0000000", ...]}
Where each term_id is mapped to a List[str] of high-level terms that it is a descendant of. Includes self
as a descendant.
as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
:param term_ids: list of str ontology terms to map high level terms for
:param high_level_terms: list of str ontology terms to be mapped to descendant term_ids
Expand All @@ -235,7 +260,8 @@ def map_high_level_terms(self, term_ids: List[str], high_level_terms: List[str])
def get_highest_level_term(self, term_id: str, high_level_terms: List[str]) -> Union[str, None]:
"""
Get the highest level ontology term for a given term. The highest level term is defined as the ancestor of the
term that is part of the high-level ontology terms supported by cellxgene-ontology-guide.
term that is part of the high-level ontology terms supported by cellxgene-ontology-guide. Raises ValueError
if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand Down Expand Up @@ -263,7 +289,7 @@ def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str
Where each term_id is mapped to the highest level term that it is a descendant of, from the list provided.
Includes term itself as a descendant. Maps to None if term_id does not map to any high level terms among the
provided input.
provided input. Raises ValueError if term ID is not valid member of a supported ontology.
:param term_ids: list of str ontology terms to map high level terms for
:param high_level_terms: list of str ontology terms that can be mapped to descendant term_ids
Expand All @@ -275,7 +301,7 @@ def map_highest_level_term(self, term_ids: List[str], high_level_terms: List[str
def get_term_descendants(self, term_id: str, include_self: bool = False) -> List[str]:
"""
Get the descendant ontology terms for a given term. If include_self is True, the term itself will be included as
a descendant.
a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -300,7 +326,7 @@ def get_term_descendants(self, term_id: str, include_self: bool = False) -> List
def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = False) -> Dict[str, List[str]]:
"""
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be
included as a descendant.
included as a descendant. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand Down Expand Up @@ -335,10 +361,34 @@ def map_term_descendants(self, term_ids: Iterable[str], include_self: bool = Fal

return descendants_dict

def get_term_children(self, term_id: str) -> List[str]:
"""
Get the direct children ontology terms for a given term. Raises ValueError if term ID is not valid member of a
supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_children("CL:0000526")
['CL:0000101']
:param term_id: str ontology term to find children for
:return: List[str] of children terms
"""
if term_id in VALID_NON_ONTOLOGY_TERMS:
return []
ontology_name = self._parse_ontology_name(term_id)
children = []
for candidate_child, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
for ancestor, distance in candidate_metadata["ancestors"].items():
if ancestor == term_id and distance == 1:
children.append(candidate_child)
return children

def get_term_graph(self, term_id: str) -> OntologyNode:
"""
Get the DAG of OntologyNode relationships, with the input term as the root node. Only includes terms from the
same ontology as the root term ID.
same ontology as the root term ID. Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand Down Expand Up @@ -370,18 +420,16 @@ def get_term_graph(self, term_id: str) -> OntologyNode:
:param term_id: str ontology term to build subtree for
:return: OntologyNode representation of graph with term_id as root.
"""
ontology_name = self._parse_ontology_name(term_id)
term_label = self.get_term_label(term_id)
root = OntologyNode(term_id, term_label)
for candidate_descendant, candidate_metadata in self.cxg_schema.ontology(ontology_name).items():
for ancestor, distance in candidate_metadata["ancestors"].items():
if ancestor == term_id and distance == 1:
root.add_child(self.get_term_graph(candidate_descendant))
for child_term_id in self.get_term_children(term_id):
root.add_child(self.get_term_graph(child_term_id))
return root

def is_term_deprecated(self, term_id: str) -> bool:
"""
Check if an ontology term is deprecated.
Check if an ontology term is deprecated. Raises ValueError if term ID is not valid member of a supported
ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -401,6 +449,7 @@ def is_term_deprecated(self, term_id: str) -> bool:
def get_term_replacement(self, term_id: str) -> Union[str, None]:
"""
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.
Raises ValueError if term ID is not valid member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -427,7 +476,8 @@ def get_term_metadata(self, term_id: str) -> Dict[str, Any]:
Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
Consider maps to List[str] of alternate ontology terms to consider using instead of this term
All keys map to None if no metadata of that type is present.
All keys map to None if no metadata of that type is present. Raises ValueError if term ID is not valid member
of a supported ontology.
:param term_id: str ontology term to fetch metadata for
:return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
Expand All @@ -442,7 +492,8 @@ def get_term_metadata(self, term_id: str) -> Dict[str, Any]:

def get_term_label(self, term_id: str) -> str:
"""
Fetch the human-readable label for a given ontology term.
Fetch the human-readable label for a given ontology term. Raises ValueError if term ID is not valid member of a
supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -461,7 +512,8 @@ def get_term_label(self, term_id: str) -> str:

def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
"""
Fetch the human-readable label for a given list of ontology terms.
Fetch the human-readable label for a given list of ontology terms. Raises ValueError if term ID is not valid
member of a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -476,7 +528,8 @@ def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:

def get_term_description(self, term_id: str) -> Optional[str]:
"""
Fetch the description for a given ontology term.
Fetch the description for a given ontology term. Raises ValueError if term ID is not valid member of a
supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand All @@ -495,7 +548,8 @@ def get_term_description(self, term_id: str) -> Optional[str]:

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
"""
Fetch the descriptions for a given list of ontology terms.
Fetch the descriptions for a given list of ontology terms. Raises ValueError if term ID is not valid member of
a supported ontology.
Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
Expand Down
16 changes: 16 additions & 0 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,22 @@ def test_get_distance_between_terms(ontology_parser):
assert ontology_parser.get_distance_between_terms(term_id_1="CL:0000001", term_id_2="CL:0000008") == -1


@pytest.mark.parametrize(
"term_id,expected",
[("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", [])],
)
def test_get_term_parents(ontology_parser, term_id, expected):
assert ontology_parser.get_term_parents(term_id) == expected


@pytest.mark.parametrize(
"term_id,expected",
[("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", [])],
)
def test_get_term_children(ontology_parser, term_id, expected):
assert ontology_parser.get_term_children(term_id) == expected


def test_get_term_graph(ontology_parser):
graph = ontology_parser.get_term_graph("CL:0000000")
assert graph.to_dict() == {
Expand Down

0 comments on commit dfcd2c2

Please sign in to comment.