Skip to content

Commit

Permalink
Refactor sequence alignment #63 (#70)
Browse files Browse the repository at this point in the history
* okay import fixed and fixed proteinRecord Test

* working on fixing the fetcher not working but fixed al lot of wrong refences

* okay fixeed protein record and test

* API update

* ruff is happy now with the test

* API update

* all work with simple test

* API update

* fix circular import

* API update

* move `ProteinRecord` into functions

* move `ProteinRecord` into functions

* added test to detect circular imports

* API update

* enabled tests upon PR

* API update

* use `psycopg2-binary` to prevent missing bins

* API update

* API update

* API update

* tests for alignment

* API update

* fix circular import

* remove literals

* API update

---------

Co-authored-by: Niklas Abraham <[email protected]>
Co-authored-by: sdRDM Bot <[email protected]>
Co-authored-by: Jan Range <[email protected]>
Co-authored-by: Max Häußler <[email protected]>
Co-authored-by: max <[email protected]>
  • Loading branch information
6 people authored May 8, 2024
1 parent 8390fb4 commit fd8c319
Show file tree
Hide file tree
Showing 20 changed files with 92 additions and 72 deletions.
17 changes: 10 additions & 7 deletions pyeed/align/hmm.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
from __future__ import annotations

import os
from typing import List, Union
from typing import List, Union, TYPE_CHECKING

import pyhmmer
from pydantic import BaseModel, Field

# TODO import sequence record
from pyeed.core.abstractsequence import AbstractSequence
from pyeed.core.alignment import Alignment
from pyeed.core.sequence import Sequence
from pyeed.core.abstractannotation import AbstractAnnotation

if TYPE_CHECKING:
from pyeed.core.alignment import Alignment
from pyeed.core.sequence import Sequence

# TODO FIX ENTIRE THING

Expand Down Expand Up @@ -54,7 +57,7 @@ def build_model(self) -> pyhmmer.easel.MSA:

def search(
self,
sequence: Union[AbstractSequence, List[AbstractSequence]],
sequence: Union["AbstractSequence", List["AbstractSequence"]],
**pipeline_kwargs,
):

Expand Down Expand Up @@ -103,7 +106,7 @@ def from_file(cls, path: str):
return cls(model=model, name=model.name)

def _prepare_sequences(
self, sequences: Union[AbstractSequence, List[AbstractSequence]]
self, sequences: Union["AbstractSequence", List["AbstractSequence"]]
):
if not isinstance(sequences, list):
sequences = [sequences]
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/abstractannotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AbstractAnnotation(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/alignmentdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class AlignmentData(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/blastdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class BlastData(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/clustalomegadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ClustalOmegaData(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Cluster(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/dnarecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class DNARecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/organism.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class Organism(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/pairwisealignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PairwiseAlignment(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
27 changes: 4 additions & 23 deletions pyeed/core/proteinrecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,10 @@
from sdRDM.tools.utils import elem2dict

from pyeed.container.abstract_container import Blastp
from pyeed.core.dnarecord import DNARecord
from pyeed.core.region import Region
from pyeed.core.sequencerecord import SequenceRecord
from pyeed.core.site import Site
from pyeed.fetch.blast import Blast, BlastProgram, NCBIDataBase
from pyeed.fetch.blast import BlastProgram, NCBIDataBase
from pyeed.fetch.proteinfetcher import ProteinFetcher

from .dnarecord import DNARecord
from .region import Region
from .sequencerecord import SequenceRecord
from .site import Site
Expand Down Expand Up @@ -106,7 +103,7 @@ class ProteinRecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down Expand Up @@ -283,6 +280,7 @@ def from_sequence(
AssertionError: If the specified database is not supported.
"""

from pyeed.fetch.blast import BlastProgram
from pyeed.fetch.proteinfetcher import ProteinFetcher

nest_asyncio.apply()
Expand Down Expand Up @@ -436,20 +434,3 @@ def from_ncbi(self):
def from_accessions(self):
raise DeprecationWarning("This method is deprecated. Use `get_ids` instead.")


if __name__ == "__main__":

mat_accessions = [
"MBP1912539.1",
"SEV92896.1",
"MBO8174569.1",
"WP_042680787.1",
"NPA47376.1",
"WP_167889085.1",
"WP_048165429.1",
"ACS90033.1",
]

mats = ProteinRecord.get_ids(mat_accessions)

print(mats)
2 changes: 1 addition & 1 deletion pyeed/core/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Region(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class Sequence(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/sequencerecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class SequenceRecord(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_object_terms: Set[str] = PrivateAttr(default={"http://edamontology.org/data_0849"})
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Site(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/core/standardnumbering.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class StandardNumbering(

_repo: Optional[str] = PrivateAttr(default="https://github.com/PyEED/pyeed")
_commit: Optional[str] = PrivateAttr(
default="6a0708b0501e7ce4e280bbea2af990b0d473371d"
default="6c9e70ca7a0928f3e16ace15c2fd2e16a2b6c0d7"
)

_raw_xml_data: Dict = PrivateAttr(default_factory=dict)
Expand Down
2 changes: 1 addition & 1 deletion pyeed/fetch/ncbiproteinmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def map(self, responses: List[str]) -> List[ProteinRecord]:

protein_infos = []
for record in seq_records:
protein_info = ProteinRecord(id=record.id, sequence=str(record.seq))
protein_info = ProteinRecord(source_id=record.id, sequence=str(record.seq))

protein_info.organism = Organism(**self.map_organism(record))

Expand Down
4 changes: 4 additions & 0 deletions pyeed/fetch/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,17 @@ def make_batches(self):
"""

batches = []
print(self.ids)
print(self.batch_size)
for i in range(0, len(self.ids), self.batch_size):
batch = self.ids[i : i + self.batch_size]
print(batch)
if len(batch) > 1:
batch_string = ",".join(batch)
else:
batch_string = str(batch[0])
batches.append(batch_string)

print(batches)
self.ids = batches
return batches
32 changes: 32 additions & 0 deletions tests/unit/alignment_tests/test_clustalo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

# Generated by CodiumAI
from pyeed.align.clustalo import ClustalOmega
from pytest_mock import mocker


import pytest

class TestClustalOmega:

# ClustalOmega aligns multiple sequences and returns the alignment result.
def test_align_multiple_sequences(self):
# Arrange
sequences = ["ATCG", "GCTA", "TTAA"]
clustal_omega = ClustalOmega()

# Act
alignment = clustal_omega.align(sequences)

# Assert
assert isinstance(alignment, MultipleSeqAlignment)
assert len(alignment) == len(sequences)

# ClustalOmega fails to get the image from Docker Hub.
def test_fail_to_get_image_from_docker_hub(self):
# Arrange
clustal_omega = ClustalOmega()
clustal_omega._client.images.get.side_effect = docker.errors.ImageNotFound

# Act and Assert
with pytest.raises(Exception):
clustal_omega.run_container(command="", data=[])
21 changes: 21 additions & 0 deletions tests/unit/alignment_tests/test_hmm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

# Generated by CodiumAI
from pyeed.core.alignment import Alignment
from pyeed.align.hmm import HMM


import pytest

class TestHMM:

# HMM can be initialized with a name and an Alignment object
def test_initialized_with_name_and_alignment(self):
alignment = Alignment()
alignment.add_to_input_sequences(source_id="seq1", sequence="ATCG")
alignment.add_to_input_sequences(source_id="seq2", sequence="GCTA")

hmm = HMM(name="test_hmm", alignment=alignment)

assert hmm.name == "test_hmm"
assert hmm.alignment == alignment
assert hmm.model is None
35 changes: 7 additions & 28 deletions tests/unit/network_tests/test_newtork_graph_build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import json

import pytest

from pyeed.core import ProteinRecord
from pyeed.network import SequenceNetwork



class TestNetworkGraphBuild:

def test_general_build_networkx(self):
Expand All @@ -24,33 +27,10 @@ def test_general_build_networkx(self):
sequences=mats,
weight="identity",
dimensions=2,
)

assert network.network is not None


def test_cytoscope(self):
mat_accessions = [
"MBP1912539.1",
"SEV92896.1",
"MBO8174569.1",
"WP_042680787.1",
]
mats = ProteinRecord.get_ids(mat_accessions)
# Create a network
network = SequenceNetwork(
sequences=mats,
weight="identity",
dimensions=2,
)
threshhold = 0.85
# now create a cytoscape graph
# careful if at this point cytoscope is not installed and running in background test will fail
network.create_cytoscape_graph(collection="tests", title="test_cytoscape", threshold=threshhold)
# check if the hidden edeges are above the threshold
assert [item[1] for item in list(network._get_edges_visibilities().items())].count(False) == 4
)

def test_graph_build(self):

def test_cytoscape_degree(self):
mat_accessions = [
"MBP1912539.1",
"SEV92896.1",
Expand Down Expand Up @@ -95,4 +75,3 @@ def test_cytoscape_nodes_size(self):
network.set_nodes_size(column_name="degree_with_threshold_{}".format(threshhold), min_size=20, max_size=100)
network.color_nodes(column_name='species')


0 comments on commit fd8c319

Please sign in to comment.