Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

97 owl file including and loading in db #105

Merged
merged 4 commits into from
Nov 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/pyeed/adapter/ncbi_protein_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,14 @@ def map_organism(self, seq_record: SeqRecord) -> Tuple[Any, Any]:
logger.info(
f"For {seq_record.id} {feature.qualifiers['db_xref']} taxonomy ID(s) were found, using the first one. Skipping organism assignment"
)
return (None, None)

taxonomy_id = feature.qualifiers["db_xref"][0]
# check wether one of the db_xref is a taxonomy id starts with 'taxon:'
taxonomy_id = None
for db_xref in feature.qualifiers["db_xref"]:
logger.debug(f"Checking db_xref: {db_xref}")
if db_xref.startswith("taxon:"):
taxonomy_id = db_xref
break

if ":" in taxonomy_id:
taxonomy_id = int(taxonomy_id.split(":")[1])
Expand Down
119 changes: 119 additions & 0 deletions src/pyeed/analysis/ontology_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from Bio.Align import Alignment as Alignment
from pyeed.dbconnect import DatabaseConnector
from pyeed.main import Pyeed
from rdflib import Graph, RDF, RDFS, OWL, Namespace



class OntologyAdapter():
"""
Adapter class to load ontology files into the database.
"""

def import_ontology_file_in_db(self, file_path: str, db: DatabaseConnector):
"""
Imports an ontology file into the database.

:param file_path: The path to the ontology file.
:param db: The database connector

:return: None
"""

# Load the OWL file
g = Graph()
g.parse(file_path)

# Create a namespace for the ontology
IAO_NS = Namespace("http://purl.obolibrary.org/obo/IAO_")
OBOINOWL_NS = Namespace("http://www.geneontology.org/formats/oboInOwl#")

# create a dictonary of the labels
dicts_labels = {}
for s, p, o in g.triples((None, RDFS.label, None)):
dicts_labels[str(s)] = str(o)

# Iterate over the classes in the OWL file
for s, p, o in g.triples((None, RDF.type, OWL.Class)):
class_name = str(s)
db.execute_write("CREATE (c:OntologyObject {name: $name})", parameters = {"name": class_name})

# add discreption, example in CARD: <obo:IAO_0000115>eccC5 is a.....</obo:IAO_0000115>
for _, _, desc in g.triples((s, IAO_NS['0000115'], None)):
description = str(desc)
db.execute_write("""
MATCH (c:OntologyObject {name: $name})
SET c.description = $description
""", parameters = {"name": class_name, "description": description})

# add the label to the class
db.execute_write("""
MATCH (c:OntologyObject {name: $name})
SET c.label = $label
""", parameters = {"name": class_name, "label": dicts_labels[class_name]})

# add the synonyms to the class
# <oboInOwl:hasExactSynonym>Mtub_eccC5_FLO</oboInOwl:hasExactSynonym>
for _, _, syn in g.triples((s, OBOINOWL_NS.hasExactSynonym, None)):
synonym = str(syn)
db.execute_write("""
MATCH (c:OntologyObject {name: $name})
SET c.synonym = $synonym
""", parameters = {"name": class_name, "synonym": synonym})


# Create relationships (subclasses, properties)
for s, p, o in g.triples((None, RDFS.subClassOf, None)):
if (o, RDF.type, OWL.Class) in g:
subclass = str(s)
superclass = str(o)
db.execute_write("""
MATCH (sub:OntologyObject {name: $subclass}), (super:OntologyObject {name: $superclass})
CREATE (sub)-[:SUBCLASS_OF]->(super)
""", parameters = {"subclass": subclass, "superclass": superclass})

# handels the case where the subclass is a restriction, RO_ (in CARD)
elif (o, RDF.type, OWL.Restriction) in g:
on_property = None
some_values_from = None

# Extract onProperty
for _, _, prop in g.triples((o, OWL.onProperty, None)):
on_property = str(prop)

# Extract someValuesFrom
for _, _, value in g.triples((o, OWL.someValuesFrom, None)):
some_values_from = str(value)

if on_property and some_values_from:
# create a realtionship of type CustomRealationship with the name on_property and the description which can be checked in the dict
# link is between the subclass and the some_values_from
db.execute_write("""
MATCH (sub:OntologyObject {name: $subclass}), (super:OntologyObject {name: $some_values_from})
CREATE (sub)-[:CustomRelationship {name: $on_property, description: $description}]->(super)
""", parameters = {"subclass": subclass, "some_values_from": some_values_from, "on_property": on_property, "description": dicts_labels[on_property]})





if __name__ == "__main__":

uri = "bolt://localhost:7687"
username = "neo4j"
password = "12345678"

file_path = "/home/nab/Niklas/TEM-lactamase/CARD_Data_Ontologies/aro.owl"

eedb = Pyeed(uri, user=username, password=password)
eedb.db.wipe_database()
eedb.db.remove_db_constraints(user=username, password=password)

eedb.db.initialize_db_constraints(user=username, password=password)


db = eedb.db
ontology_adapter = OntologyAdapter()

ontology_adapter.import_ontology_file_in_db(file_path, db)

2 changes: 2 additions & 0 deletions src/pyeed/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ def fetch_from_primary_db(

accessions = self.db.execute_read(query)[0]["accessions"]
ids = [id for id in ids if id not in accessions]
# count how many sequences are already in the database
logger.info(f"Found {len(accessions)} sequences in the database.")

logger.info(f"Fetching {len(ids)} sequences from {db}.")
if db.lower() == "uniprot":
Expand Down
47 changes: 47 additions & 0 deletions src/pyeed/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ class Protein(StrictStructuredNode):
site = RelationshipTo("Site", "HAS_SITE", model=SiteRel)
region = RelationshipTo("Region", "HAS_REGION", model=RegionRel)
go_annotation = RelationshipTo("GOAnnotation", "ASSOCIATED_WITH")
ontology_object = RelationshipTo("OntologyObject", "ASSOCIATED_WITH")
mutation = RelationshipTo("Protein", "MUTATION", model=Mutation)
pairwise_aligned = RelationshipTo(
"Protein", "PAIRWISE_ALIGNED", model=PairwiseAlignmentResult
Expand Down Expand Up @@ -469,3 +470,49 @@ class DNA(StrictStructuredNode):
pairwise_aligned = RelationshipTo(
"DNA", "PAIRWISE_ALIGNED", model=PairwiseAlignmentResult
)


class CustomRealationship(StructuredRel):
"""A custom relationship between two ontology objects."""

name = StringProperty(required=True)
description = StringProperty()

@classmethod
def validate_and_connect(
cls,
molecule1: StrictStructuredNode,
molecule2: StrictStructuredNode,
name: str,
description: str,
):
molecule1.custom_relationships.connect(
molecule2,
{
"name": name,
"description": description,
},
)

return cls(
name=name,
description=description,
)

@property
def label(self):
return self.name



class OntologyObject(StrictStructuredNode):
"""A node representing an ontology object in the database."""

name = StringProperty(required=True, unique_index=True)
description = StringProperty()
label = StringProperty()
synonyms = ArrayProperty(StringProperty())

# Relationships
subclasses = RelationshipTo("OntologyObject", "SUBCLASS_OF")
custom_relationships = RelationshipTo("OntologyObject", "CUSTOM_RELATIONSHIP", model=CustomRealationship)