Skip to content

Commit

Permalink
Merged dev into main, resolved conflicts by accepting dev version
Browse files Browse the repository at this point in the history
  • Loading branch information
haeussma committed Jun 4, 2024
2 parents 2e57aa4 + aec3a21 commit 5e52f48
Show file tree
Hide file tree
Showing 30 changed files with 351 additions and 4,920 deletions.
Binary file added .DS_Store
Binary file not shown.
Binary file added docs/.DS_Store
Binary file not shown.
Binary file added docs/examples/.DS_Store
Binary file not shown.
Binary file removed docs/examples/image.png
Binary file not shown.
1,538 changes: 0 additions & 1,538 deletions docs/examples/logs/py4cytoscape.log.1

This file was deleted.

173 changes: 0 additions & 173 deletions docs/examples/logs/py4cytoscape.log.2

This file was deleted.

1,707 changes: 0 additions & 1,707 deletions docs/examples/logs/py4cytoscape.log.3

This file was deleted.

1,472 changes: 0 additions & 1,472 deletions docs/examples/logs/py4cytoscape.log.4

This file was deleted.

2 changes: 1 addition & 1 deletion pyeed/core/abstractannotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class AbstractAnnotation(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/alignmentresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class AlignmentResult(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 2 additions & 0 deletions pyeed/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ class Annotation(Enum):
FAMILY = "http://semanticscience.org/resource/SIO_001380"
MOTIVE = "http://semanticscience.org/resource/SIO_000131"
CODING_SEQ = "http://semanticscience.org/resource/SIO_001276"
ALPHAHELIX = "http://semanticscience.org/resource/SIO_010468"
BETASTRAND = "http://semanticscience.org/resource/SIO_010469"
2 changes: 1 addition & 1 deletion pyeed/core/blastdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class BlastData(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/clustalomegaresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ClustalOmegaResult(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/dnarecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class DNARecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/organism.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class Organism(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/pairwisealignmentresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PairwiseAlignmentResult(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
4 changes: 1 addition & 3 deletions pyeed/core/proteinrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class ProteinRecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down Expand Up @@ -145,7 +145,6 @@ def get_id(cls, protein_id: str) -> "ProteinRecord":

import nest_asyncio


nest_asyncio.apply()

if isinstance(protein_id, list) and all(isinstance(x, str) for x in protein_id):
Expand All @@ -166,7 +165,6 @@ def get_ids(cls, accession_ids: List[str]) -> List["ProteinRecord"]:

import nest_asyncio


nest_asyncio.apply()

return asyncio.run(
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class Region(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/regionset.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class RegionSet(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_object_terms: Set[str] = PrivateAttr(
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Sequence(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/sequencerecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class SequenceRecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Site(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/standardnumbering.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class StandardNumbering(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="63f43b11e0d359e1d0a1f541cea25dd484ad0072"
default="5ca1d8073b90b91effc0fe9e3aaa578caf05980f"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
137 changes: 137 additions & 0 deletions pyeed/fetch/pdbmapper.py
Original file line number Diff line number Diff line change
@@ -1 +1,138 @@
from __future__ import annotations

import json
import logging
from typing import TYPE_CHECKING, List

from pyeed.core import Annotation, Organism

if TYPE_CHECKING:
from pyeed.core import ProteinRecord

LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)
LOGGER.addHandler(logging.StreamHandler())


class PDBMapper:
def __init__(self):
pass

def map_pdb_data(self, pdb_data: str) -> List[ProteinRecord]:
from pyeed.core import ProteinRecord

records = []

pdb_entries = json.loads(pdb_data)
entries = pdb_entries["data"]["entry"]["polymer_entities"]
try:
assert len(entries) > 0, f"No entries found in the PDB data {pdb_data}"
except TypeError:
return []
for entry in entries:
structure_id = entry["rcsb_id"]
seq_info = entry["rcsb_polymer_entity_container_identifiers"]
seq_organism = entry["rcsb_entity_source_organism"]
try:
seq_id = next(
(
identifier["database_accession"]
for identifier in seq_info["reference_sequence_identifiers"]
if "database_accession" in identifier
),
None,
)
except TypeError:
continue
if not seq_id:
continue

try:
tax_id = next(
(
identifier["ncbi_taxonomy_id"]
for identifier in seq_organism
if "ncbi_taxonomy_id" in identifier
),
None,
)
except TypeError:
tax_id = None

sequence = entry["entity_poly"]["pdbx_seq_one_letter_code"]

if tax_id:
organism = Organism(
id=str(tax_id),
taxonomy_id=int(tax_id),
)
else:
organism = None

prot_record = ProteinRecord(
id=seq_id,
sequence=sequence,
organism=organism,
structure_id=structure_id,
)

if entry["rcsb_polymer_entity_feature"]:
for feature in entry["rcsb_polymer_entity_feature"]:
if feature["type"] == "Pfam":
for region in feature["feature_positions"]:
region = prot_record.add_to_regions(
id=feature["feature_id"],
name=feature["type"],
start=region["beg_seq_id"],
end=region["end_seq_id"],
)
region.add_object_term(Annotation.FAMILY.value)

try:
for instance in entry["polymer_entity_instances"]:
polymer_instances = instance.get(
"rcsb_polymer_instance_feature", None
)
if not polymer_instances:
continue
for feature in instance["rcsb_polymer_instance_feature"]:
if feature["name"] == "sheet":
for strand in feature["feature_positions"]:
region = prot_record.add_to_regions(
name=feature["name"],
start=strand["beg_seq_id"],
end=strand["end_seq_id"],
)
region.add_object_term(Annotation.BETASTRAND.value)
region.add_object_term(
"http://edamontology.org/topic_3542"
) # secondary structure

elif feature["name"] == "helix":
for helix in feature["feature_positions"]:
region = prot_record.add_to_regions(
name=feature["name"],
start=helix["beg_seq_id"],
end=helix["end_seq_id"],
)
region.add_object_term(Annotation.ALPHAHELIX.value)
region.add_object_term(
"http://edamontology.org/topic_3542"
) # secondary structure

elif feature["name"] == "binding_site":
positions = [
site["beg_seq_id"]
for site in feature["feature_positions"]
]
site = prot_record.add_to_sites(
name=feature["name"], positions=positions
)
site.add_object_term(Annotation.BINDING_SITE.value)

except KeyError:
pass

records.append(prot_record)

return records
Loading

0 comments on commit 5e52f48

Please sign in to comment.