Skip to content

Commit

Permalink
Merge pull request #88 from compomics/feature/tsv-raise-exception
Browse files Browse the repository at this point in the history
Improve TSV reading error handling
  • Loading branch information
RalfG authored Aug 14, 2024
2 parents 8438dbc + dd9aac0 commit 0d5af95
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 4 deletions.
15 changes: 12 additions & 3 deletions psm_utils/io/tsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@
from pydantic import ValidationError

from psm_utils.io._base_classes import ReaderBase, WriterBase
from psm_utils.io._utils import set_csv_field_size_limit
from psm_utils.io.exceptions import PSMUtilsIOException
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io._utils import set_csv_field_size_limit

set_csv_field_size_limit()

Expand All @@ -74,12 +74,21 @@ def __iter__(self):
"""Iterate over file and return PSMs one-by-one."""
with open(self.filename, "rt") as open_file:
reader = csv.DictReader(open_file, delimiter="\t")
failed_rows = 0
for row in reader:
try:
yield PSM(**self._parse_entry(row))
except ValidationError:
except ValidationError as e:
failed_rows += 1
logger.warning(f"Could not parse PSM from row: `{row}`")
continue
if failed_rows >= 3:
raise PSMUtilsIOException(
"Could not parse PSM from three consecutive rows. Verify that the "
"file is formatted correctly as a psm_utils TSV file or that the "
"correct file type reader is used."
) from e
else:
failed_rows = 0

@staticmethod
def _parse_entry(entry: dict) -> dict:
Expand Down
4 changes: 4 additions & 0 deletions tests/test_data/test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
spectrum_id peptidoform
peptide1 ACDEK/2
peptide2 AC[Carbamidomethyl]DEFGR/3
peptide3 [Acetyl]-AC[Carbamidomethyl]DEFGHIK/2
22 changes: 21 additions & 1 deletion tests/test_io/test_tsv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Tests for psm_utils.io.tsv."""

from psm_utils.io.tsv import TSVReader, TSVWriter # noqa: F401
import pytest

from psm_utils.io.exceptions import PSMUtilsIOException
from psm_utils.io.tsv import TSVReader
from psm_utils.peptidoform import Peptidoform

test_cases = [
(
Expand Down Expand Up @@ -30,3 +34,19 @@ class TestTSVReader:
def test__parse_entry(self):
for test_in, expected_out in test_cases:
assert TSVReader._parse_entry(test_in) == expected_out

def test_iter(self):
reader = TSVReader("tests/test_data/test.tsv")
for psm in reader:
assert psm.peptidoform == Peptidoform("ACDEK/2")
assert psm.spectrum_id == "peptide1"
assert psm.provenance_data == {}
assert psm.metadata == {}
assert psm.rescoring_features == {}
break

def test_iter_raises(self):
with TSVReader("tests/test_data/peprec.tsv") as reader:
with pytest.raises(PSMUtilsIOException):
for psm in reader:
pass

0 comments on commit 0d5af95

Please sign in to comment.