diff --git a/.gitignore b/.gitignore
index e2a06e8..bfcef11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,3 +161,4 @@ cython_debug/
 *.csv
 *_df.csv
 *.tsv
+/tests/test_data/hMICAL1_coiPAnP-N2-200_3Murea-1Mthiourea-200mMtcep_14733.d/
diff --git a/pyproject.toml b/pyproject.toml
index ca5d136..05c39ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ name = "quantms-utils"
 description = "Python scripts and helpers for the quantMS workflow"
 readme = "README.md"
 license = "MIT"
-version = "0.0.15"
+version = "0.0.16"
 authors = [
     "Yasset Perez-Riverol <ypriverol@gmail.com>",
     "Dai Chengxin <chengxin2024@126.com>",
diff --git a/quantmsutils/__init__.py b/quantmsutils/__init__.py
index 6561790..d62d967 100644
--- a/quantmsutils/__init__.py
+++ b/quantmsutils/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.15"
+__version__ = "0.0.16"
diff --git a/quantmsutils/mzml/mzml_statistics.py b/quantmsutils/mzml/mzml_statistics.py
index 1b3d706..9fd349d 100644
--- a/quantmsutils/mzml/mzml_statistics.py
+++ b/quantmsutils/mzml/mzml_statistics.py
@@ -1,238 +1,376 @@
 import re
 import sqlite3
 from pathlib import Path
+from typing import Optional, List
 
 import click
+import numpy as np
 import pandas as pd
-import pyarrow
-from pyopenms import MSExperiment, MzMLFile
+import pyarrow as pa
+import pyarrow.parquet as pq
+from pyopenms import MzMLFile
+
+
+class BatchWritingConsumer:
+    """
+    A class to consume mass spectrometry data and write to a parquet file in batches from mzML files using
+    pyopenms streaming.
+    """
+
+    def __init__(
+        self,
+        parquet_schema: pa.Schema,
+        id_parquet_schema: pa.Schema,
+        output_path,
+        batch_size=10000,
+        id_only=False,
+    ):
+        self.parquet_schema = parquet_schema
+        self.id_parquet_schema = id_parquet_schema
+        self.output_path = output_path
+        self.batch_size = batch_size
+        self.id_only = id_only
+        self.batch_data = []
+        self.psm_parts = []
+        self.parquet_writer = None
+        self.id_parquet_writer = None
+        self.acquisition_datetime = None
+        self.scan_pattern = re.compile(r"[scan|spectrum]=(\d+)")
+
+    def setExperimentalSettings(self, settings):
+        self.acquisition_datetime = settings.getDateTime().get()
+
+    def setExpectedSize(self, a, b):
+        pass
+
+    def consumeChromatogram(self, chromatogram):
+        pass
+
+    def consumeSpectrum(self, spectrum):
+        """
+        Consume spectrum data and write to parquet file.
+        :param spectrum: spectrum data.
+        :return: None
+        """
+
+        peaks = spectrum.get_peaks()
+        mz_array, intensity_array = peaks[0], peaks[1]
+        peak_per_ms = len(mz_array)
+        base_peak_intensity = float(np.max(intensity_array)) if peak_per_ms > 0 else None
+        total_intensity = float(np.sum(intensity_array)) if peak_per_ms > 0 else None
+        ms_level = spectrum.getMSLevel()
+        rt = spectrum.getRT()
+
+        if ms_level == 2:
+            precursor = spectrum.getPrecursors()[0]
+            charge_state = precursor.getCharge()
+            exp_mz = precursor.getMZ()
+
+            if self.id_only:
+                scan_id = self.scan_pattern.findall(spectrum.getNativeID())[0]
+                self.psm_parts.append(
+                    [
+                        {
+                            "scan": scan_id,
+                            "ms_level": ms_level,
+                            "mz": mz_array,
+                            "intensity": intensity_array,
+                        }
+                    ]
+                )
+
+            row_data = {
+                "SpectrumID": spectrum.getNativeID(),
+                "MSLevel": float(ms_level),
+                "Charge": float(charge_state) if charge_state is not None else None,
+                "MS_peaks": float(peak_per_ms),
+                "Base_Peak_Intensity": (
+                    float(base_peak_intensity) if base_peak_intensity is not None else None
+                ),
+                "Summed_Peak_Intensities": (
+                    float(total_intensity) if total_intensity is not None else None
+                ),
+                "Retention_Time": float(rt),
+                "Exp_Mass_To_Charge": float(exp_mz) if exp_mz is not None else None,
+                "AcquisitionDateTime": str(self.acquisition_datetime),
+            }
+        elif ms_level == 1:
+            row_data = {
+                "SpectrumID": spectrum.getNativeID(),
+                "MSLevel": float(ms_level),
+                "Charge": None,
+                "MS_peaks": float(peak_per_ms),
+                "Base_Peak_Intensity": (
+                    float(base_peak_intensity) if base_peak_intensity is not None else None
+                ),
+                "Summed_Peak_Intensities": (
+                    float(total_intensity) if total_intensity is not None else None
+                ),
+                "Retention_Time": float(rt),
+                "Exp_Mass_To_Charge": None,
+                "AcquisitionDateTime": str(self.acquisition_datetime),
+            }
+        else:
+            return
+
+        self.batch_data.append(row_data)
+
+        # Write batch when it reaches specified size
+        if len(self.batch_data) >= self.batch_size:
+            self._write_batch()
+
+    def _write_batch(self):
+        """
+        Write accumulated batch data more efficiently using PyArrow's streaming writer.
+
+        Improvements:
+        - Directly stream data without creating a full in-memory table
+        - Reduce memory overhead for large datasets
+        - More efficient batch processing
+        """
+        try:
+            # If no data, return early
+            if not self.batch_data:
+                return
+
+            # Initialize writers lazily if not already created
+            if self.parquet_writer is None:
+                self.parquet_writer = pq.ParquetWriter(
+                    where=self.output_path, schema=self.parquet_schema, compression="gzip"
+                )
+
+            # Create a RecordBatch directly from the current batch
+            batch = pa.RecordBatch.from_pylist(self.batch_data, schema=self.parquet_schema)
+
+            # Write the batch directly
+            self.parquet_writer.write_batch(batch)
+
+            # Clear the batch data
+            self.batch_data = []
+
+            # Handle ID-only data if applicable
+            if self.id_only and self.psm_parts:
+                # Similar approach for spectrum ID data
+                if self.id_parquet_writer is None:
+                    self.id_parquet_writer = pq.ParquetWriter(
+                        where=f"{Path(self.output_path).stem}_spectrum_df.parquet",
+                        schema=self.id_parquet_schema,
+                        compression="gzip",
+                    )
+
+                id_batch = pa.RecordBatch.from_pylist(
+                    self.psm_parts, schema=self.id_parquet_schema
+                )
+                self.id_parquet_writer.write_batch(id_batch)
+                self.psm_parts = []
+
+        except Exception as e:
+            print(f"Error during batch writing: {e}")
+            raise
+
+    def finalize(self):
+        """
+        Finalize the writing process.
+        :return:
+        """
+        if self.batch_data:
+            self._write_batch()
+
+        # Write spectrum data if id_only
+        if self.id_only and self.psm_parts:
+            self._write_batch()
+
+        if self.parquet_writer:
+            self.parquet_writer.close()
+
+        if self.id_parquet_writer:
+            self.id_parquet_writer.close()
+
+
+def column_exists(conn, table_name: str) -> List[str]:
+    """
+    Fetch the existing columns in the specified SQLite table.
+    """
+    table_info = pd.read_sql_query(f"PRAGMA table_info({table_name});", conn)
+    return set(table_info["name"].tolist())
 
 
 @click.command("mzmlstats")
-@click.option("--ms_path", type=click.Path(exists=True))
+@click.option("--ms_path", type=click.Path(exists=True), required=True)
+@click.option("--id_only", is_flag=True, help="Generate a csv with the spectrum id and the peaks")
 @click.option(
-    "--id_only", is_flag=True, help="Generate a csv with the spectrum id and the peaks"
+    "--batch_size", type=int, default=10000, help="Number of rows to write in each batch"
 )
 @click.pass_context
-def mzml_statistics(ctx, ms_path: str, id_only: bool = False) -> None:
+def mzml_statistics(ctx, ms_path: str, id_only: bool = False, batch_size: int = 10000) -> None:
     """
     The mzml_statistics function parses mass spectrometry data files, either in
     .mzML or Bruker .d formats, to extract and compile a set of statistics about the spectra contained within.
     It supports generating detailed or ID-only CSV files based on the spectra data.
 
     # Command line usage example
-    python script_name.py mzml_statistics --ms_path "path/to/file.mzML"
+    quantmsutilsc mzmlstats --ms_path "path/to/file.mzML"
 
     :param ctx: Click context
+
     :param ms_path: A string specifying the path to the mass spectrometry file.
     :param id_only: A boolean flag that, when set to True, generates a CSV file containing only the spectrum ID and
     peaks data for MS level 2 spectra.
+    :param batch_size: An integer specifying the number of rows to write in each batch.
 
     """
-    file_columns = [
-        "SpectrumID",
-        "MSLevel",
-        "Charge",
-        "MS_peaks",
-        "Base_Peak_Intensity",
-        "Summed_Peak_Intensities",
-        "Retention_Time",
-        "Exp_Mass_To_Charge",
-        "AcquisitionDateTime",
-    ]
-
-    def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
+    schema = pa.schema(
+        [
+            pa.field("SpectrumID", pa.string(), nullable=True),
+            pa.field("MSLevel", pa.float64(), nullable=True),
+            pa.field("Charge", pa.float64(), nullable=True),
+            pa.field("MS_peaks", pa.float64(), nullable=True),
+            pa.field("Base_Peak_Intensity", pa.float64(), nullable=True),
+            pa.field("Summed_Peak_Intensities", pa.float64(), nullable=True),
+            pa.field("Retention_Time", pa.float64(), nullable=True),
+            pa.field("Exp_Mass_To_Charge", pa.float64(), nullable=True),
+            pa.field("AcquisitionDateTime", pa.string(), nullable=True),
+        ]
+    )
+
+    id_schema = pa.schema(
+        [
+            ("scan", pa.string()),
+            ("ms_level", pa.int32()),
+            ("mz", pa.list_(pa.float64())),
+            ("intensity", pa.list_(pa.float64())),
+        ]
+    )
+
+    def batch_write_mzml_streaming(
+        file_name: str,
+        parquet_schema: pa.Schema,
+        output_path: str,
+        id_parquet_schema: pa.Schema,
+        id_only: bool = False,
+        batch_size: int = 10000,
+    ) -> Optional[str]:
+        """
+        Parse mzML file in a streaming manner and write to Parquet.
+        """
+        consumer = BatchWritingConsumer(
+            parquet_schema=parquet_schema,
+            output_path=output_path,
+            batch_size=batch_size,
+            id_only=id_only,
+            id_parquet_schema=id_parquet_schema,
+        )
+        try:
+            MzMLFile().transform(file_name.encode(), consumer)
+            consumer.finalize()
+            return output_path
+        except Exception as e:
+            print(f"Error during streaming: {e}")
+            return None
+
+    def batch_write_bruker_d(file_name: str, output_path: str, batch_size: int = 10000) -> str:
         """
-        Parse mzML file and return a pandas DataFrame with the information. If id_only is True, it will also save a csv.
-        @param file_name: The file name of the mzML file
-        @param file_columns: The columns of the DataFrame
-        @param id_only: If True, it will save a csv with the spectrum id, mz and intensity
-        @return: A pandas DataFrame with the information of the mzML file
+        Batch processing and writing of Bruker .d files.
         """
+        sql_filepath = f"{file_name}/analysis.tdf"
 
-        info = []
-        psm_part_info = []
-        exp = MSExperiment()
-        acquisition_datetime = exp.getDateTime().get()
-        MzMLFile().load(file_name, exp)
-        for spectrum in exp:
-            id_ = spectrum.getNativeID()
-            ms_level = spectrum.getMSLevel()
-            rt = spectrum.getRT() if spectrum.getRT() else None
-
-            peaks_tuple = spectrum.get_peaks()
-            peak_per_ms = len(peaks_tuple[0])
-
-            if not spectrum.metaValueExists("base peak intensity"):
-                bpc = max(peaks_tuple[1]) if len(peaks_tuple[1]) > 0 else None
-            else:
-                bpc = spectrum.getMetaValue("base peak intensity")
-
-            if not spectrum.metaValueExists("total ion current"):
-                tic = sum(peaks_tuple[1]) if len(peaks_tuple[1]) > 0 else None
-            else:
-                tic = spectrum.getMetaValue("total ion current")
-
-            if ms_level == 1:
-                info_list = [
-                    id_,
-                    ms_level,
-                    None,
-                    peak_per_ms,
-                    bpc,
-                    tic,
-                    rt,
-                    None,
-                    acquisition_datetime,
-                ]
-            elif ms_level == 2:
-                charge_state = spectrum.getPrecursors()[0].getCharge()
-                emz = (
-                    spectrum.getPrecursors()[0].getMZ()
-                    if spectrum.getPrecursors()[0].getMZ()
-                    else None
-                )
-                info_list = [
-                    id_,
-                    ms_level,
-                    charge_state,
-                    peak_per_ms,
-                    bpc,
-                    tic,
-                    rt,
-                    emz,
-                    acquisition_datetime,
-                ]
-                mz_array = peaks_tuple[0]
-                intensity_array = peaks_tuple[1]
-            else:
-                info_list = [
-                    id_,
-                    ms_level,
-                    None,
-                    None,
-                    None,
-                    None,
-                    rt,
-                    None,
-                    acquisition_datetime,
-                ]
+        with sqlite3.connect(sql_filepath) as conn:
+            # Retrieve acquisition datetime
+            acquisition_date_time = conn.execute(
+                "SELECT Value FROM GlobalMetadata WHERE key='AcquisitionDateTime'"
+            ).fetchone()[0]
 
-            if id_only and ms_level == 2:
-                psm_part_info.append(
-                    [
-                        re.findall(r"[scan|spectrum]=(\d+)", id_)[0],
-                        ms_level,
-                        mz_array,
-                        intensity_array,
-                    ]
-                )
-            info.append(info_list)
-
-        if id_only and len(psm_part_info) > 0:
-            pd.DataFrame(
-                psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]
-            ).to_parquet(
-                f"{Path(ms_path).stem}_spectrum_df.parquet",
-                index=False,
-                compression="gzip",
+            # Check which optional columns exist
+            columns = column_exists(conn, "frames")
+
+            # Get allowed columns from the schema
+            allowed_columns = {
+                "Id": "Id",
+                "MsMsType": "CASE WHEN MsMsType IN (8, 9) THEN 2 WHEN MsMsType = 0 THEN 1 ELSE NULL END",
+                "NumPeaks": "NumPeaks",
+                "MaxIntensity": "MaxIntensity",
+                "SummedIntensities": "SummedIntensities",
+                "Time": "Time",
+                "Charge": "Charge",
+                "MonoisotopicMz": "MonoisotopicMz",
+            }
+
+            # Construct safe column list
+            safe_columns = []
+            column_mapping = {}
+            for schema_col_name, sql_expr in allowed_columns.items():
+                if schema_col_name in columns or schema_col_name == "Id":
+                    safe_columns.append(sql_expr)
+                    column_mapping[schema_col_name] = sql_expr
+
+            # Construct the query using parameterized safe columns
+            query = f"""SELECT {', '.join(safe_columns)} FROM frames"""
+
+            schema = pa.schema(
+                [
+                    pa.field("Id", pa.int32(), nullable=False),
+                    pa.field("MsMsType", pa.int32(), nullable=True),
+                    pa.field("NumPeaks", pa.int32(), nullable=True),
+                    pa.field("MaxIntensity", pa.float64(), nullable=True),
+                    pa.field("SummedIntensities", pa.float64(), nullable=True),
+                    pa.field("Time", pa.float64(), nullable=True),
+                    pa.field("Charge", pa.int32(), nullable=True),
+                    pa.field("MonoisotopicMz", pa.float64(), nullable=True),
+                    pa.field("AcquisitionDateTime", pa.string(), nullable=True),
+                ]
             )
 
-        return pd.DataFrame(info, columns=file_columns)
+            # Set up parquet writer
+            parquet_writer = pq.ParquetWriter(output_path, schema=schema, compression="gzip")
 
-    def parse_bruker_d(file_name: str, file_columns: list):
-        sql_filepath = f"{file_name}/analysis.tdf"
-        if not Path(sql_filepath).exists():
-            msg = f"File '{sql_filepath}' not found"
-            raise FileNotFoundError(msg)
-        conn = sqlite3.connect(sql_filepath)
-        c = conn.cursor()
-
-        datetime_cmd = (
-            "SELECT Value FROM GlobalMetadata WHERE key='AcquisitionDateTime'"
-        )
-        acquisition_date_time = c.execute(datetime_cmd).fetchall()[0][0]
+            try:
+                # Stream data in batches
+                for chunk in pd.read_sql_query(query, conn, chunksize=batch_size):
+                    chunk["AcquisitionDateTime"] = acquisition_date_time
+                    for col in schema.names:
+                        if col not in chunk.columns:
+                            chunk[col] = None
+                    batch_table = pa.Table.from_pandas(chunk, schema=schema)
+                    parquet_writer.write_table(batch_table)
 
-        df = pd.read_sql_query(
-            "SELECT Id, MsMsType, NumPeaks, MaxIntensity, SummedIntensities, Time FROM frames",
-            conn,
-        )
-        df["AcquisitionDateTime"] = acquisition_date_time
+            finally:
+                parquet_writer.close()
 
-        # {8:'DDA-PASEF', 9:'DIA-PASEF'}
-        if 8 in df["MsMsType"].values:
-            mslevel_map = {0: 1, 8: 2}
-        elif 9 in df["MsMsType"].values:
-            mslevel_map = {0: 1, 9: 2}
-        else:
-            msg = f"Unrecognized ms type '{df['MsMsType'].values}'"
-            raise ValueError(msg)
-        df["MsMsType"] = df["MsMsType"].map(mslevel_map)
+        return output_path
 
-        try:
-            # This line raises an sqlite error if the table does not exist
-            _ = conn.execute("SELECT * from Precursors LIMIT 1").fetchall()
-            precursor_df = pd.read_sql_query("SELECT * from Precursors", conn)
-        except sqlite3.OperationalError as e:
-            if "no such table: Precursors" in str(e):
-                print(
-                    f"No precursors recorded in {file_name}, This is normal for DIA data."
-                )
-                precursor_df = pd.DataFrame()
-            else:
-                raise
+    # Resolve file path
+    ms_path = _resolve_ms_path(ms_path)
+    output_path = f"{Path(ms_path).stem}_ms_info.parquet"
 
-        if len(df) == len(precursor_df):
-            df = pd.concat([df, precursor_df["Charge", "MonoisotopicMz"]], axis=1)
-            df["Charge"] = df["Charge"].fillna(0)
-        else:
-            df[["Charge", "Exp_Mass_To_Charge"]] = None, None
-
-        df = df[
-            [
-                "Id",
-                "MsMsType",
-                "Charge",
-                "NumPeaks",
-                "MaxIntensity",
-                "SummedIntensities",
-                "Time",
-                "Exp_Mass_To_Charge",
-                "AcquisitionDateTime",
-            ]
-        ]
-        df.columns = pd.Index(file_columns)
-
-        return df
-
-    if not (Path(ms_path).exists()):
-        print(f"Not found '{ms_path}', trying to find alias")
-        ms_path_path = Path(ms_path)
-        path_stem = str(ms_path_path.stem)
-        candidates = (
-            list(ms_path_path.parent.glob("*.d"))
-            + list(ms_path_path.parent.glob("*.mzml"))
-            + list(ms_path_path.parent.glob("*.mzML"))
+    # Choose processing method based on file type
+    if Path(ms_path).suffix == ".d":
+        batch_write_bruker_d(file_name=ms_path, output_path=output_path, batch_size=batch_size)
+    elif Path(ms_path).suffix.lower() in [".mzml"]:
+        batch_write_mzml_streaming(
+            file_name=ms_path,
+            parquet_schema=schema,
+            id_parquet_schema=id_schema,
+            output_path=output_path,
+            id_only=id_only,
+            batch_size=batch_size,
         )
+    else:
+        raise RuntimeError(f"Unsupported file type: {ms_path}")
 
-        candidates = [c for c in candidates if path_stem in str(c)]
 
-        if len(candidates) == 1:
-            ms_path = str(candidates[0].resolve())
-        else:
-            raise FileNotFoundError()
+def _resolve_ms_path(ms_path: str) -> str:
+    """
+    Resolve mass spectrometry file path with improved candidate search.
+    """
+    path_obj = Path(ms_path)
+    if path_obj.exists():
+        return str(path_obj)
 
-    if Path(ms_path).suffix == ".d" and Path(ms_path).is_dir():
-        ms_df = parse_bruker_d(ms_path, file_columns)
-    elif Path(ms_path).suffix in [".mzML", ".mzml"]:
-        ms_df = parse_mzml(ms_path, file_columns, id_only)
-    else:
-        msg = f"Unrecognized or the mass spec file '{ms_path}' do not exist"
-        raise RuntimeError(msg)
-
-    ms_df.to_parquet(
-        f"{Path(ms_path).stem}_ms_info.parquet",
-        engine="pyarrow",
-        index=False,
-        compression="gzip",
-    )
+    candidates = list(path_obj.parent.glob(f"{path_obj.stem}*"))
+    valid_extensions = {".d", ".mzml", ".mzML"}
+    candidates = [str(c.resolve()) for c in candidates if c.suffix.lower() in valid_extensions]
+
+    if len(candidates) == 1:
+        return candidates[0]
+
+    raise FileNotFoundError(f"No unique file found for {ms_path}")
diff --git a/quantmsutils/psm/psm_conversion.py b/quantmsutils/psm/psm_conversion.py
index 232f3af..9b06705 100644
--- a/quantmsutils/psm/psm_conversion.py
+++ b/quantmsutils/psm/psm_conversion.py
@@ -141,6 +141,8 @@ def convert_psm(
 
             if hit.metaValueExists("MS:1001491"):
                 global_qvalue = hit.getMetaValue("MS:1001491")
+            elif hit.metaValueExists("q-value"):
+                global_qvalue = hit.getMetaValue("q-value")
 
             charge = hit.getCharge()
             peptidoform = hit.getSequence().toString()
diff --git a/quantmsutils/rescoring/ms2rescore.py b/quantmsutils/rescoring/ms2rescore.py
index 030c59c..4c46809 100644
--- a/quantmsutils/rescoring/ms2rescore.py
+++ b/quantmsutils/rescoring/ms2rescore.py
@@ -5,17 +5,130 @@
 import importlib.resources
 import json
 import logging
-from typing import List
 
 import click
 import pyopenms as oms
 from ms2rescore import package_data, rescore
 from psm_utils import PSMList
 from psm_utils.io.idxml import IdXMLReader, IdXMLWriter
+from typing import Iterable, List, Union
+from pathlib import Path
+from psm_utils.psm import PSM
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 
 
+class IDXMLReaderPatch(IdXMLReader):
+    def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None:
+        """
+        Patch Reader for idXML files based on IDXMLReader.
+
+        Parameters
+        ----------
+        filename: str, pathlib.Path
+            Path to idXML file.
+
+        Examples
+        --------
+        """
+        super().__init__(filename, *args, **kwargs)
+        self.protein_ids, self.peptide_ids = self._parse_idxml()
+        self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0])
+        self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0])
+        self.skip_invalid_psm = 0
+
+    def __iter__(self) -> Iterable[PSM]:
+        """
+        Iterate over file and return PSMs one-by-one.
+                Test cases will:
+
+        Input PSM 1: PeptideHit	with metavalue
+            "MSGF:ScoreRatio" value="0.212121212121212"/>
+            "MSGF:Energy" value="130.0"/>
+            "MSGF:lnEValue" value="-3.603969939390662"/>
+            "MSGF:lnExplainedIonCurrentRatio" value="-0.881402756873971"/>
+            "MSGF:lnNTermIonCurrentRatio" value="-1.931878317286471"/>
+            "MSGF:lnCTermIonCurrentRatio" value="-1.311462733724937"/>
+            "MSGF:lnMS2IonCurrent" value="9.702930189540499"/>
+            "MSGF:MeanErrorTop7" value="259.986879999999985"/>
+            "MSGF:sqMeanErrorTop7" value="6.75931777721344e04"/>
+            "MSGF:StdevErrorTop7" value="143.678020000000004"/>
+        PSM2: PeptideHit No above metaValue
+
+        Run:
+        reader = IDXMLReaderPatch(input_file)
+        psm_list = reader.read_file()
+
+        psm_list: return [PSM 1]
+
+        """
+        for peptide_id in self.peptide_ids:
+            for peptide_hit in peptide_id.getHits():
+                psm = self._parse_psm(self.protein_ids, peptide_id, peptide_hit)
+                if psm is not None:
+                    yield psm
+                else:
+                    self.skip_invalid_psm += 1
+
+    def _parse_psm(
+            self,
+            protein_ids: oms.ProteinIdentification,
+            peptide_id: oms.PeptideIdentification,
+            peptide_hit: oms.PeptideHit,
+    ) -> PSM:
+        """
+        Parse idXML :py:class:`~pyopenms.PeptideHit` to :py:class:`~psm_utils.psm.PSM`.
+
+        Uses additional information from :py:class:`~pyopenms.ProteinIdentification` and
+        :py:class:`~pyopenms.PeptideIdentification` to annotate parameters of the
+        :py:class:`~psm_utils.psm.PSM` object.
+        """
+        peptidoform = self._parse_peptidoform(
+            peptide_hit.getSequence().toString(), peptide_hit.getCharge()
+        )
+        # This is needed to calculate a qvalue before rescoring the PSMList
+        peptide_id_metadata = {
+            "idxml:score_type": str(peptide_id.getScoreType()),
+            "idxml:higher_score_better": str(peptide_id.isHigherScoreBetter()),
+            "idxml:significance_threshold": str(peptide_id.getSignificanceThreshold()),
+        }
+        peptide_hit_metadata = {
+            key: peptide_hit.getMetaValue(key) for key in self.user_params_metadata
+        }
+
+        # Get search engines score features and check valueExits
+        rescoring_features = {}
+        for key in self.rescoring_features:
+            feature = peptide_hit.metaValueExists(key)
+            if not feature:
+                return None
+            else:
+                rescoring_features[key] = float(peptide_hit.getMetaValue(key))
+
+        return PSM(
+            peptidoform=peptidoform,
+            spectrum_id=peptide_id.getMetaValue("spectrum_reference"),
+            run=self._get_run(protein_ids, peptide_id),
+            is_decoy=self._is_decoy(peptide_hit),
+            score=peptide_hit.getScore(),
+            precursor_mz=peptide_id.getMZ(),
+            retention_time=peptide_id.getRT(),
+            # NOTE: ion mobility will be supported by OpenMS in the future
+            protein_list=[
+                accession.decode() for accession in peptide_hit.extractProteinAccessionsSet()
+            ],
+            rank=peptide_hit.getRank() + 1,  # 0-based to 1-based
+            source="idXML",
+            # Storing proforma notation of peptidoform and UNIMOD peptide sequence for mapping back
+            # to original sequence in writer
+            provenance_data={str(peptidoform): peptide_hit.getSequence().toString()},
+            # Store metadata of PeptideIdentification and PeptideHit objects
+            metadata={**peptide_id_metadata, **peptide_hit_metadata},
+
+            rescoring_features=rescoring_features,
+        )
+
+
 def parse_cli_arguments_to_config(
     config_file: str = None,
     feature_generators: str = None,
@@ -119,9 +232,14 @@ def parse_cli_arguments_to_config(
 def rescore_idxml(input_file, output_file, config) -> None:
     """Rescore PSMs in an idXML file and keep other information unchanged."""
     # Read PSMs
-    reader = IdXMLReader(input_file)
+    reader = IDXMLReaderPatch(input_file)
     psm_list = reader.read_file()
 
+    if reader.skip_invalid_psm != 0:
+        logging.warning(
+            f"Removed {reader.skip_invalid_psm} PSMs without search engine features!"
+        )
+
     # Rescore
     rescore(config, psm_list)
 
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index 2b3fb7f..6b076ba 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -1,7 +1,7 @@
 # recipe/meta.yaml
 package:
   name: quantms-utils
-  version: "0.0.15"
+  version: "0.0.16"
 
 source:
   path: ../