Skip to content

Commit

Permalink
Merge pull request #12 from bigbio/dev
Browse files Browse the repository at this point in the history
Major changes - including all sync from quantms
  • Loading branch information
ypriverol authored Aug 7, 2024
2 parents 332589e + cb1cfec commit 008d67c
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 57 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ channels:
- conda-forge
dependencies:
- click
- sdrf-pipelines>=0.0.28
- sdrf-pipelines>=0.0.29
- pyopenms
- ms2rescore=3.0.2
- psm-utils=0.8.0
Expand Down
2 changes: 1 addition & 1 deletion quantmsutils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.3"
11 changes: 9 additions & 2 deletions quantmsutils/mzml/mzml_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ def parse_mzml(file_name: str, file_columns: list, id_only: bool = False):
if id_only and len(psm_part_info) > 0:
pd.DataFrame(
psm_part_info, columns=["scan", "ms_level", "mz", "intensity"]
).to_parquet(f"{Path(ms_path).stem}_spectrum_df.parquet", index=False, compression="gzip")
).to_parquet(
f"{Path(ms_path).stem}_spectrum_df.parquet",
index=False,
compression="gzip",
)

return pd.DataFrame(info, columns=file_columns)

Expand Down Expand Up @@ -227,5 +231,8 @@ def parse_bruker_d(file_name: str, file_columns: list):
raise RuntimeError(msg)

ms_df.to_parquet(
f"{Path(ms_path).stem}_ms_info.parquet", engine="pyarrow", index=False, compression="gzip"
f"{Path(ms_path).stem}_ms_info.parquet",
engine="pyarrow",
index=False,
compression="gzip",
)
9 changes: 7 additions & 2 deletions quantmsutils/psm/psm_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ def mods_position(peptide):
"psmconvert", short_help="Convert idXML to parquet file with PSMs information."
)
@click.option("--idxml", type=click.Path(exists=True))
@click.option("--spectra_file", type=click.Path(exists=True), help="Parquet file from mzml_statistics")
@click.option(
"--spectra_file",
type=click.Path(exists=True),
help="Parquet file from mzml_statistics",
)
@click.option("--export_decoy_psm", is_flag=True)
@click.pass_context
def convert_psm(ctx, idxml: str, spectra_file: str, export_decoy_psm: bool = False):
Expand Down Expand Up @@ -173,4 +177,5 @@ def convert_psm(ctx, idxml: str, spectra_file: str, export_decoy_psm: bool = Fal
)

pd.DataFrame(parquet_data, columns=_parquet_field).to_parquet(
f"{Path(idxml).stem}_psm.csv", index=False, engine="pyarrow", compression="gzip")
f"{Path(idxml).stem}_psm.csv", index=False, engine="pyarrow", compression="gzip"
)
4 changes: 3 additions & 1 deletion quantmsutils/quantmsutilsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


@click.version_option(version=__version__, package_name="quantmsutils", message="%(package)s %(version)s")
@click.version_option(
version=__version__, package_name="quantmsutils", message="%(package)s %(version)s"
)
@click.group(context_settings=CONTEXT_SETTINGS)
def cli():
pass
Expand Down
2 changes: 1 addition & 1 deletion quantmsutils/rescoring/ms2rescore.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def filter_out_artifact_psms(
"--ms2pip_model_dir",
help="The path of MS²PIP model (default: `./`)",
type=str,
default="./"
default="./",
)
@click.option(
"-ms2tol",
Expand Down
122 changes: 88 additions & 34 deletions quantmsutils/sdrf/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,50 @@ def print_error(error, context="Line", context_str=""):
sys.exit(1)


def check_sdrf(check_ms, sdrf, validate_ontologies):
df = SdrfDataFrame.parse(sdrf)
if validate_ontologies:
errors = df.validate(DEFAULT_TEMPLATE)
if check_ms:
errors = errors + df.validate(MASS_SPECTROMETRY)
for error in errors:
print(error)
if not errors:
print("Everying seems to be fine. Well done.")
else:
print("There were validation errors!")
else:
errors = False
print("No ontology term validation was performed.")
def check_sdrf(
input_sdrf: str,
skip_ms_validation: bool = False,
skip_factor_validation: bool = False,
skip_experimental_design_validation: bool = False,
use_ols_cache_only: bool = False,
skip_sdrf_validation: bool = False,
):
"""
Check the SDRF file for errors. If any errors are found, print them and exit with a non-zero status code.
@param input_sdrf: Path to the SDRF file to check
@param skip_ms_validation: Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)
@param skip_factor_validation: Disable the validation of factor values in SDRF
@param skip_experimental_design_validation: Disable the validation of experimental design
@param use_ols_cache_only: Use ols cache for validation of the terms and not OLS internet service
@param skip_sdrf_validation: Disable the validation of SDRF
"""
if skip_sdrf_validation:
print("No SDRF validation was performed.")
sys.exit(0)

df = SdrfDataFrame.parse(input_sdrf)
errors = df.validate(DEFAULT_TEMPLATE, use_ols_cache_only)

if not skip_ms_validation:
errors = errors + df.validate(MASS_SPECTROMETRY, use_ols_cache_only)

if not skip_factor_validation:
errors = errors + df.validate_factor_values()

if not skip_experimental_design_validation:
errors = errors + df.validate_experimental_design()

for error in errors:
print(error)

sys.exit(bool(errors))


def check_expdesign(expdesign):
"""
Check the expdesign file for errors. If any errors are found, print them and exit with a non-zero status code.
@param expdesign: Path to the expdesign file to check
"""
data = pd.read_csv(expdesign, sep="\t", header=0, dtype=str)
data = data.dropna()
schema_file = ["Fraction_Group", "Fraction", "Spectra_Filepath", "Label", "Sample"]
Expand Down Expand Up @@ -111,34 +135,64 @@ def check_expdesign_logic(f_table, s_table):
sys.exit(1)


@click.command("checksamplesheet", short_help="Check samplesheet")
@click.option("--is_sdrf", "-s", help="In Sdrf format", is_flag=True, default=False)
@click.command(
"check_samplesheet",
short_help="Reformat nf-core/quantms sdrf file and check its contents.",
)
@click.option("--exp_design", help="SDRF/Expdesign file to be validated")
@click.option("--is_sdrf", help="SDRF file or Expdesign file", is_flag=True)
@click.option(
"--skip_sdrf_validation", help="Disable the validation of SDRF", is_flag=True
)
@click.option(
"--skip_ms_validation",
help="Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)",
is_flag=True,
)
@click.option(
"--check_ms",
"-m",
required=False,
"--skip_factor_validation",
help="Disable the validation of factor values in SDRF",
is_flag=True,
help="Check mass spectrometry fields in sample metadata.",
default=False,
)
@click.option(
"--validate_ontologies", help="Validate the ontologies", is_flag=True, default=False
"--skip_experimental_design_validation",
help="Disable the validation of experimental design",
is_flag=True,
)
@click.option(
"-in",
"--input_file",
type=click.Path(exists=True),
required=True,
help="Input SDRF or Expdesign file",
"--use_ols_cache_only",
help="Use ols cache for validation of the terms and not OLS internet service",
is_flag=True,
)
@click.pass_context
def check_samplesheet(
ctx, is_sdrf: bool, check_ms: bool, validate_ontologies: bool, input_file: str
) -> None:
exp_design: str,
is_sdrf: bool = False,
skip_sdrf_validation: bool = False,
skip_ms_validation: bool = False,
skip_factor_validation: bool = False,
skip_experimental_design_validation: bool = False,
use_ols_cache_only: bool = False,
):
"""
Check the samplesheet for errors.
Reformat nf-core/quantms sdrf file and check its contents.
@param exp_design: SDRF/Expdesign file to be validated
@param is_sdrf: SDRF file or Expdesign file
@param skip_sdrf_validation: Disable the validation of SDRF
@param skip_ms_validation: Disable the validation of mass spectrometry fields in SDRF (e.g. posttranslational modifications)
@param skip_factor_validation: Disable the validation of factor values in SDRF
@param skip_experimental_design_validation: Disable the validation of experimental design
@param use_ols_cache_only: Use ols cache for validation of the terms and not OLS internet service
"""
# TODO validate expdesign file
if is_sdrf:
check_sdrf(check_ms, input_file, validate_ontologies)
check_sdrf(
input_sdrf=exp_design,
skip_sdrf_validation=skip_sdrf_validation,
skip_ms_validation=skip_ms_validation,
skip_factor_validation=skip_factor_validation,
skip_experimental_design_validation=skip_experimental_design_validation,
use_ols_cache_only=use_ols_cache_only,
)
else:
check_expdesign(input_file)
check_expdesign(exp_design)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
click
sdrf-pipelines==0.0.28
sdrf-pipelines==0.0.29
pyopenms
ms2rescore==3.0.2
psm-utils==0.8.0
Expand Down
17 changes: 14 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
from setuptools import find_packages, setup
import os
import codecs

VERSION = "0.0.3"
def read(rel_path):
here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, rel_path), "r") as fp:
return fp.read()
def get_version(rel_path):
for line in read(rel_path).splitlines():
if line.startswith("__version__"):
delim = '"' if '"' in line else "'"
return line.split(delim)[1]
raise RuntimeError("Unable to find version string.")

NAME = "quantms-utils"
LICENSE = "MIT License"
Expand Down Expand Up @@ -30,7 +41,7 @@

INSTALL_REQUIRES = [
"click",
"sdrf-pipelines==0.0.28",
"sdrf-pipelines==0.0.29",
"pyopenms",
"ms2rescore==3.0.2",
"psm-utils==0.8.0",
Expand All @@ -46,7 +57,7 @@

setup(
name=NAME,
version=VERSION,
version=get_version("quantmsutils/__init__.py"),
license=LICENSE,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
Expand Down
7 changes: 3 additions & 4 deletions tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_convert_psm_help():
# test for the check_samplesheet command in cli
def test_check_samplesheet_help():
runner = CliRunner()
result = runner.invoke(cli, ["checksamplesheet", "--help"])
result = runner.invoke(cli, ["check_samplesheet", "--help"])

assert result.exit_code == 0

Expand All @@ -81,10 +81,9 @@ def test_check_samplesheet_sdrf():
result = runner.invoke(
cli,
[
"checksamplesheet",
"check_samplesheet",
"--is_sdrf",
"--check_ms",
"--input_file",
"--exp_design",
"tests/test_data/PXD000001.sdrf.tsv",
],
)
Expand Down
Loading

0 comments on commit 008d67c

Please sign in to comment.