From 720542127e533bf6b2a881aad6e56bd804cbf0f8 Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 23 Oct 2024 16:24:19 +0200 Subject: [PATCH 1/3] Fix issue where an array of peptidoforms is coerced into a 2D array of amino acids and modifications if all peptidoforms have the same length --- psm_utils/psm_list.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/psm_utils/psm_list.py b/psm_utils/psm_list.py index ff71c32..5b49c09 100644 --- a/psm_utils/psm_list.py +++ b/psm_utils/psm_list.py @@ -97,13 +97,7 @@ def __getitem__(self, item) -> PSM | list[PSM]: # Return new PSMList from slice return PSMList(psm_list=self.psm_list[item]) elif isinstance(item, str): - # Return PSM property as array across full PSMList - try: - # Let NumPy coerce dtype (e.g., multidimensional arrays) - return np.array([psm[item] for psm in self.psm_list]) - except ValueError: - # If dtype is not consistent, force dtype to be object - return np.array([psm[item] for psm in self.psm_list], dtype=object) + return np.fromiter([psm[item] for psm in self.psm_list], dtype=object, count=len(self)) elif _is_iterable_of_bools(item): # Return new PSMList with items that were True return PSMList(psm_list=[self.psm_list[i] for i in np.flatnonzero(item)]) From bb5073f39461c882674b583e2dcddf4b55228a6b Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 23 Oct 2024 16:26:49 +0200 Subject: [PATCH 2/3] Update tests --- tests/test_psm_list.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_psm_list.py b/tests/test_psm_list.py index cff86aa..01064df 100644 --- a/tests/test_psm_list.py +++ b/tests/test_psm_list.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from psm_utils import Peptidoform, PSM, PSMList +from psm_utils import PSM, Peptidoform, PSMList sample_psm_list = [ PSM(peptidoform="ACDK", spectrum_id=1, score=140.2), @@ -42,7 +42,7 @@ def test___get_item__(self): # Multiple PSM properties as 2D array np.testing.assert_equal( psm_list[["spectrum_id", "score"]], - np.array([["1", 140.2], ["2", 132.9], ["3", 55.7]]), + np.array([["1", 140.2], ["2", 132.9], ["3", 55.7]], dtype=object), ) # Index by multiple indices From 0d47b1402c10b43fa22517b8f0ec458000185f1a Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 23 Oct 2024 16:43:51 +0200 Subject: [PATCH 3/3] Add comment again --- psm_utils/psm_list.py | 1 + 1 file changed, 1 insertion(+) diff --git a/psm_utils/psm_list.py b/psm_utils/psm_list.py index 5b49c09..c0339eb 100644 --- a/psm_utils/psm_list.py +++ b/psm_utils/psm_list.py @@ -97,6 +97,7 @@ def __getitem__(self, item) -> PSM | list[PSM]: # Return new PSMList from slice return PSMList(psm_list=self.psm_list[item]) elif isinstance(item, str): + # Return PSM property as array across full PSMList return np.fromiter([psm[item] for psm in self.psm_list], dtype=object, count=len(self)) elif _is_iterable_of_bools(item): # Return new PSMList with items that were True