diff --git a/src/pyeed/adapter/ncbi_protein_mapper.py b/src/pyeed/adapter/ncbi_protein_mapper.py
index 0df33af..3522f47 100644
--- a/src/pyeed/adapter/ncbi_protein_mapper.py
+++ b/src/pyeed/adapter/ncbi_protein_mapper.py
@@ -40,9 +40,14 @@ def map_organism(self, seq_record: SeqRecord) -> Tuple[Any, Any]:
f"For {seq_record.id} {feature.qualifiers['db_xref']} taxonomy ID(s) were found, using the first one. Skipping organism assignment"
- return (None, None)
- taxonomy_id = feature.qualifiers["db_xref"][0]
+ # check wether one of the db_xref is a taxonomy id starts with 'taxon:'
+ taxonomy_id = None
+ for db_xref in feature.qualifiers["db_xref"]:
+ logger.debug(f"Checking db_xref: {db_xref}")
+ if db_xref.startswith("taxon:"):
+ taxonomy_id = db_xref
+ break
if ":" in taxonomy_id:
taxonomy_id = int(taxonomy_id.split(":")[1])
diff --git a/src/pyeed/analysis/ontology_loading.py b/src/pyeed/analysis/ontology_loading.py
new file mode 100644
index 0000000..43a7093
--- /dev/null
+++ b/src/pyeed/analysis/ontology_loading.py
@@ -0,0 +1,119 @@
+from Bio.Align import Alignment as Alignment
+from pyeed.dbconnect import DatabaseConnector
+from pyeed.main import Pyeed
+from rdflib import Graph, RDF, RDFS, OWL, Namespace
+class OntologyAdapter():
+ """
+ Adapter class to load ontology files into the database.
+ """
+ def import_ontology_file_in_db(self, file_path: str, db: DatabaseConnector):
+ """
+ Imports an ontology file into the database.
+ :param file_path: The path to the ontology file.
+ :param db: The database connector
+ :return: None
+ """
+ # Load the OWL file
+ g = Graph()
+ g.parse(file_path)
+ # Create a namespace for the ontology
+ IAO_NS = Namespace("http://purl.obolibrary.org/obo/IAO_")
+ OBOINOWL_NS = Namespace("http://www.geneontology.org/formats/oboInOwl#")
+ # create a dictonary of the labels
+ dicts_labels = {}
+ for s, p, o in g.triples((None, RDFS.label, None)):
+ dicts_labels[str(s)] = str(o)
+ # Iterate over the classes in the OWL file
+ for s, p, o in g.triples((None, RDF.type, OWL.Class)):
+ class_name = str(s)
+ db.execute_write("CREATE (c:OntologyObject {name: $name})", parameters = {"name": class_name})
+ # add discreption, example in CARD: eccC5 is a.....
+ for _, _, desc in g.triples((s, IAO_NS['0000115'], None)):
+ description = str(desc)
+ db.execute_write("""
+ MATCH (c:OntologyObject {name: $name})
+ SET c.description = $description
+ """, parameters = {"name": class_name, "description": description})
+ # add the label to the class
+ db.execute_write("""
+ MATCH (c:OntologyObject {name: $name})
+ SET c.label = $label
+ """, parameters = {"name": class_name, "label": dicts_labels[class_name]})
+ # add the synonyms to the class
+ # Mtub_eccC5_FLO
+ for _, _, syn in g.triples((s, OBOINOWL_NS.hasExactSynonym, None)):
+ synonym = str(syn)
+ db.execute_write("""
+ MATCH (c:OntologyObject {name: $name})
+ SET c.synonym = $synonym
+ """, parameters = {"name": class_name, "synonym": synonym})
+ # Create relationships (subclasses, properties)
+ for s, p, o in g.triples((None, RDFS.subClassOf, None)):
+ if (o, RDF.type, OWL.Class) in g:
+ subclass = str(s)
+ superclass = str(o)
+ db.execute_write("""
+ MATCH (sub:OntologyObject {name: $subclass}), (super:OntologyObject {name: $superclass})
+ CREATE (sub)-[:SUBCLASS_OF]->(super)
+ """, parameters = {"subclass": subclass, "superclass": superclass})
+ # handels the case where the subclass is a restriction, RO_ (in CARD)
+ elif (o, RDF.type, OWL.Restriction) in g:
+ on_property = None
+ some_values_from = None
+ # Extract onProperty
+ for _, _, prop in g.triples((o, OWL.onProperty, None)):
+ on_property = str(prop)
+ # Extract someValuesFrom
+ for _, _, value in g.triples((o, OWL.someValuesFrom, None)):
+ some_values_from = str(value)
+ if on_property and some_values_from:
+ # create a realtionship of type CustomRealationship with the name on_property and the description which can be checked in the dict
+ # link is between the subclass and the some_values_from
+ db.execute_write("""
+ MATCH (sub:OntologyObject {name: $subclass}), (super:OntologyObject {name: $some_values_from})
+ CREATE (sub)-[:CustomRelationship {name: $on_property, description: $description}]->(super)
+ """, parameters = {"subclass": subclass, "some_values_from": some_values_from, "on_property": on_property, "description": dicts_labels[on_property]})
+if __name__ == "__main__":
+ uri = "bolt://localhost:7687"
+ username = "neo4j"
+ password = "12345678"
+ file_path = "/home/nab/Niklas/TEM-lactamase/CARD_Data_Ontologies/aro.owl"
+ eedb = Pyeed(uri, user=username, password=password)
+ eedb.db.wipe_database()
+ eedb.db.remove_db_constraints(user=username, password=password)
+ eedb.db.initialize_db_constraints(user=username, password=password)
+ db = eedb.db
+ ontology_adapter = OntologyAdapter()
+ ontology_adapter.import_ontology_file_in_db(file_path, db)
\ No newline at end of file
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index bc14238..88dfc8f 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -83,6 +83,8 @@ def fetch_from_primary_db(
accessions = self.db.execute_read(query)[0]["accessions"]
ids = [id for id in ids if id not in accessions]
+ # count how many sequences are already in the database
+ logger.info(f"Found {len(accessions)} sequences in the database.")
logger.info(f"Fetching {len(ids)} sequences from {db}.")
if db.lower() == "uniprot":
diff --git a/src/pyeed/model.py b/src/pyeed/model.py
index 138de3c..235f6cb 100644
--- a/src/pyeed/model.py
+++ b/src/pyeed/model.py
@@ -439,6 +439,7 @@ class Protein(StrictStructuredNode):
site = RelationshipTo("Site", "HAS_SITE", model=SiteRel)
region = RelationshipTo("Region", "HAS_REGION", model=RegionRel)
go_annotation = RelationshipTo("GOAnnotation", "ASSOCIATED_WITH")
+ ontology_object = RelationshipTo("OntologyObject", "ASSOCIATED_WITH")
mutation = RelationshipTo("Protein", "MUTATION", model=Mutation)
pairwise_aligned = RelationshipTo(
"Protein", "PAIRWISE_ALIGNED", model=PairwiseAlignmentResult
@@ -469,3 +470,49 @@ class DNA(StrictStructuredNode):
pairwise_aligned = RelationshipTo(
"DNA", "PAIRWISE_ALIGNED", model=PairwiseAlignmentResult
+class CustomRealationship(StructuredRel):
+ """A custom relationship between two ontology objects."""
+ name = StringProperty(required=True)
+ description = StringProperty()
+ @classmethod
+ def validate_and_connect(
+ cls,
+ molecule1: StrictStructuredNode,
+ molecule2: StrictStructuredNode,
+ name: str,
+ description: str,
+ ):
+ molecule1.custom_relationships.connect(
+ molecule2,
+ {
+ "name": name,
+ "description": description,
+ },
+ )
+ return cls(
+ name=name,
+ description=description,
+ )
+ @property
+ def label(self):
+ return self.name
+class OntologyObject(StrictStructuredNode):
+ """A node representing an ontology object in the database."""
+ name = StringProperty(required=True, unique_index=True)
+ description = StringProperty()
+ label = StringProperty()
+ synonyms = ArrayProperty(StringProperty())
+ # Relationships
+ subclasses = RelationshipTo("OntologyObject", "SUBCLASS_OF")
+ custom_relationships = RelationshipTo("OntologyObject", "CUSTOM_RELATIONSHIP", model=CustomRealationship)