Skip to content

Commit

Permalink
Merge pull request #69 from compomics/fix/xtandem-modifications
Browse files Browse the repository at this point in the history
Fixes in `io.xtandem`
  • Loading branch information
RalfG authored Mar 4, 2024
2 parents 329b1e3 + ae6f049 commit c502c35
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 33 deletions.
21 changes: 10 additions & 11 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
{
"esbonio.sphinx.confDir": "${workspaceFolder}/docs/source",
"[python]": {
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {"source.organizeImports": true},
"editor.rulers": [88]
"esbonio.sphinx.confDir": "${workspaceFolder}/docs/source",
"[python]": {
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"python.formatting.provider": "black",
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
"editor.rulers": [99]
},
"python.testing.pytestArgs": ["tests"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
28 changes: 11 additions & 17 deletions psm_utils/io/xtandem.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

from psm_utils.exceptions import PSMUtilsException
from psm_utils.io._base_classes import ReaderBase
from psm_utils.peptidoform import Peptidoform
from psm_utils.peptidoform import Peptidoform, format_number_as_string
from psm_utils.psm import PSM

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -119,34 +119,28 @@ def __iter__(self):
psm = self._parse_entry(entry, run)
yield psm

def _parse_peptidoform(self, peptide_entry, charge: int) -> Peptidoform:
"""Parse X!Tandem XML peptide entry to :py:class:`~psm_utils.peptidoform.Peptidoform`."""
@staticmethod
def _parse_peptidoform(peptide_entry, charge):
if "aa" in peptide_entry:
# Parse modifications
seq_list = list(peptide_entry["seq"])
mod_dict = {}
unmodified_seq = seq_list.copy()

for mod_entry in peptide_entry["aa"]:
# Locations are encoded relative to position in protein
mod_loc = mod_entry["at"] - peptide_entry["start"]
mass_shift = float(mod_entry["modified"])

# Check if site matches amino acid
if not mod_entry["type"] == seq_list[mod_loc]:
if not mod_entry["type"] == unmodified_seq[mod_loc]:
raise XTandemModificationException(
f"Found unexpected residue `{seq_list[mod_loc]}` at "
f"modification location for `{mod_entry}`."
)

# Add modifications to dict
if mod_loc not in mod_dict:
mod_dict[mod_loc] = float(mod_entry["modified"])
else:
# "sum" multiple modifications per site, e.g.,
# cmm + ammonia-loss = pyro-cmm
mod_dict[mod_loc] += float(mod_entry["modified"])

# Add modification in ProForma format
for mod_loc, mass_shift in mod_dict.items():
seq_list[mod_loc] += f"[{mass_shift:+g}]"
# Add to sequence in ProForma format
seq_list[mod_loc] += f"[{format_number_as_string(mass_shift)}]"

proforma_seq = "".join(seq_list)

else:
Expand All @@ -170,7 +164,7 @@ def _parse_entry(self, entry, run: str) -> PSM:
precursor_mz=entry["mh"] - mass.nist_mass["H"][0][0],
retention_time=entry["rt"],
run=run,
protein_list=[entry["protein"][0]["label"]],
protein_list=[protein["label"] for protein in entry["protein"]],
source="X!Tandem",
provenance_data={
"xtandem_filename": str(self.filename),
Expand Down
6 changes: 4 additions & 2 deletions psm_utils/peptidoform.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def _rename_modification_list(mods):
for mod in mods:
try:
if isinstance(mod, proforma.MassModification):
mod_value = _format_number_as_string(mod.value)
mod_value = format_number_as_string(mod.value)
else:
mod_value = mod.value
if mod_value in mapping:
Expand Down Expand Up @@ -517,14 +517,16 @@ def apply_fixed_modifications(self):
self.properties["fixed_modifications"] = []


def _format_number_as_string(num):
def format_number_as_string(num):
"""Format number as string for ProForma mass modifications."""
# Using this method over `:+g` string formatting to avoid rounding and scientific notation
num = float(num)
plus = "+" if np.sign(num) == 1 else "" # Add plus sign if positive
num = str(num).rstrip("0").rstrip(".") # Remove trailing zeros and decimal point
return plus + num



class PeptidoformException(PSMUtilsException):
"""Error while handling :py:class:`Peptidoform`."""

Expand Down
2 changes: 1 addition & 1 deletion tests/test_io/test_xtandem.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test__parse_peptidoform(self):
},
2,
),
"expected_out": "C[+39.9954]WASLWTAR/2",
"expected_out": "C[+57.022][-17.02655]WASLWTAR/2",
},
]

Expand Down
4 changes: 2 additions & 2 deletions tests/test_peptidoform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pyteomics import proforma

from psm_utils.peptidoform import Peptidoform, _format_number_as_string
from psm_utils.peptidoform import Peptidoform, format_number_as_string


class TestPeptidoform:
Expand Down Expand Up @@ -63,4 +63,4 @@ def test_format_number_as_string():
]

for test_case_in, expected_out in test_cases:
assert _format_number_as_string(test_case_in) == expected_out
assert format_number_as_string(test_case_in) == expected_out

0 comments on commit c502c35

Please sign in to comment.