Skip to content

Commit

Permalink
Very simple support for StPeter in protxml.DataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
levitsky committed Jan 9, 2025
1 parent 7dc2fdd commit bff06b2
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyteomics/proforma.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def resolve(self, name=None, id=None, **kwargs):
return self._cache[cache_key].copy()
value = self._resolve_impl(name, id, **kwargs)
self._cache[cache_key] = value
return value.copy()
return value.copy()

def __call__(self, name=None, id=None, **kwargs):
return self.resolve(name, id, **kwargs)
Expand Down
11 changes: 11 additions & 0 deletions pyteomics/protxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from . import xml, auxiliary as aux, _schema_defaults
import operator as op


class ProtXML(xml.MultiProcessingXML):
"""Parser class for protXML files."""
file_format = 'protXML'
Expand Down Expand Up @@ -127,6 +128,7 @@ def _get_info_smart(self, element, **kwargs):
info['unique_stripped_peptides'] = info['unique_stripped_peptides'].split('+')
return info


def read(source, read_schema=False, iterative=True, **kwargs):
"""Parse `source` and iterate through protein groups.
Expand Down Expand Up @@ -180,6 +182,7 @@ def _is_decoy_prefix(pg, prefix='DECOY_'):
"""
return all(p['protein_name'].startswith(prefix) for p in pg['protein'])


def _is_decoy_suffix(pg, suffix='_DECOY'):
"""Determine if a protein group should be considered decoy.
Expand All @@ -201,6 +204,7 @@ def _is_decoy_suffix(pg, suffix='_DECOY'):
"""
return all(p['protein_name'].endswith(suffix) for p in pg['protein'])


is_decoy = _is_decoy_prefix

fdr = aux._make_fdr(_is_decoy_prefix, _is_decoy_suffix)
Expand All @@ -209,6 +213,7 @@ def _is_decoy_suffix(pg, suffix='_DECOY'):
filter = aux._make_filter(chain, _is_decoy_prefix, _is_decoy_suffix, _key, qvalues)
filter.chain = aux._make_chain(filter, 'filter', True)


def DataFrame(*args, **kwargs):
"""Read protXML output files into a :py:class:`pandas.DataFrame`.
Expand Down Expand Up @@ -241,6 +246,7 @@ def DataFrame(*args, **kwargs):
kwargs = kwargs.copy()
sep = kwargs.pop('sep', None)
pd_kwargs = kwargs.pop('pd_kwargs', {})

def gen_items():
with chain(*args, **kwargs) as f:
for item in f:
Expand All @@ -260,6 +266,11 @@ def gen_items():
out['indistinguishable_protein'] = [p['protein_name'] for p in out['indistinguishable_protein']]
else:
out['indistinguishable_protein'] = sep.join(p['protein_name'] for p in out['indistinguishable_protein'])
if 'analysis_result' in out:
for ar in out['analysis_result']:
if ar['analysis'] == 'stpeter':
out.update(ar['StPeterQuant'])

yield out
return pd.DataFrame(gen_items(), **pd_kwargs)

Expand Down
1 change: 1 addition & 0 deletions tests/test_protxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,6 @@ def test_filter_df_suffix(self):
fdf = protxml.filter_df(df, decoy_suffix='_SUF', **kw)
self.assertEqual(fdf.shape, (1, 17))


if __name__ == '__main__':
unittest.main()

0 comments on commit bff06b2

Please sign in to comment.