Skip to content

Commit

Permalink
minor cleaning tool.
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Dec 3, 2024
1 parent 2980bb6 commit 8c4c822
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 8 deletions.
6 changes: 4 additions & 2 deletions quantmsutils/diann/diann2mztab.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,8 @@ def mztab_mtd(index_ref, dia_params, fasta, charge, missed_cleavages, diann_vers
:type charge: int
:param missed_cleavages: Missed cleavages set by Dia-NN
:type missed_cleavages: int
:param diann_version: Version of DIA-NN
:type diann_version: str
:return: MTD sub-table
:rtype: pandas.core.frame.DataFrame
"""
Expand Down Expand Up @@ -1084,15 +1086,15 @@ def __find_info(directory, n):
# TODO seconds returned from precursor.getRT()
target.loc[:, "RT"] = target.apply(lambda x: x["RT"] / 60, axis=1)

RT_matched = pd.merge_asof(group, target, on="RT", direction="nearest")
rt_matched = pd.merge_asof(group, target, on="RT", direction="nearest")
new_target = target
new_target.columns = [
"scan_RT",
"scan_opt_global_spectrum_reference",
"MS2.Scan",
"scan_exp_mass_to_charge",
]
scan_matched = pd.merge(RT_matched, new_target, on="MS2.Scan")
scan_matched = pd.merge(rt_matched, new_target, on="MS2.Scan")

# Cross validation spectrum ID between scan matched and RT matched
# Keep Scan matched When RT matched and DIA-NN Scan matched are inconsistent in mzML.
Expand Down
4 changes: 2 additions & 2 deletions quantmsutils/mzml/mzml_statistics.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
import sqlite3
from pathlib import Path
from typing import Optional, List
from typing import Optional, List, Set

import click
import numpy as np
Expand Down Expand Up @@ -198,7 +198,7 @@ def finalize(self):
self.id_parquet_writer.close()


def column_exists(conn, table_name: str) -> List[str]:
def column_exists(conn, table_name: str) -> Set[str]:
"""
Fetch the existing columns in the specified SQLite table.
"""
Expand Down
11 changes: 7 additions & 4 deletions tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_extract_sample_from_expdesign_help():

assert result.exit_code == 0


def test_sage_feature_file():
runner = CliRunner()
result = runner.invoke(
Expand Down Expand Up @@ -110,6 +111,7 @@ def test_convert_psm_help():

assert result.exit_code == 0


# def test_batch_convert_parquet():
# files = ["RD139_Narrow_UPS1_0_1fmol_inj1.mzML",
# "RD139_Narrow_UPS1_0_1fmol_inj2.mzML",
Expand All @@ -135,6 +137,7 @@ def test_check_samplesheet_help():

assert result.exit_code == 0


# test the validation of an SDRF file
def test_check_samplesheet_sdrf():
runner = CliRunner()
Expand Down Expand Up @@ -187,15 +190,15 @@ def test_mzml_statistics():

table1 = pd.read_parquet("BSA1_F1_ms_info.parquet")
table2 = pd.read_parquet("tests/test_data/BSA1_F1_ms_info.parquet")
table2 = table2.set_index('scan')
table1 = table1.set_index('scan')
table2 = table2.set_index("scan")
table1 = table1.set_index("scan")

assert table1.compare(table2).empty

id_table = pd.read_parquet("BSA1_F1_spectrum_df.parquet")
id_table2 = pd.read_parquet("tests/test_data/BSA1_F1_spectrum_df.parquet")
id_table = id_table.set_index('scan')
id_table2 = id_table2.set_index('scan')
id_table = id_table.set_index("scan")
id_table2 = id_table2.set_index("scan")

assert id_table.shape == id_table2.shape

Expand Down

0 comments on commit 8c4c822

Please sign in to comment.