Skip to content

Commit

Permalink
Merge branch 'v4.0.0' of https://github.com/compomics/ms2pip_c into s…
Browse files Browse the repository at this point in the history
…pectrum-output
  • Loading branch information
RalfG committed May 7, 2024
2 parents 2228df5 + 7374689 commit 2ee2340
Show file tree
Hide file tree
Showing 20 changed files with 1,987 additions and 658 deletions.
38 changes: 24 additions & 14 deletions .github/workflows/build_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: "true"

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade build flake8
- name: Check for syntax errors
run: |
flake8 ./ms2pip --count --select=E9,F63,F7,F82 --show-source --statistics
python -m pip install --upgrade pip
pip install build
- name: Build sdist
run: python -m build --sdist --outdir dist
- uses: actions/upload-artifact@v3

- uses: actions/upload-artifact@v4
with:
name: dist
path: dist/*.tar.gz
Expand All @@ -34,33 +36,41 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-latest, windows-latest, macos-13, macos-14] # macos-13 for intel, macos-14 for apple silicon
steps:
- uses: actions/checkout@v4
with:
lfs: "true"
- uses: actions/setup-python@v4

- uses: actions/setup-python@v5
name: Set up Python
with:
python-version: "3.11"

- name: Install cibuildwheel
run: python -m pip install cibuildwheel>=2

- name: Build wheels
run: python -m cibuildwheel --output-dir dist
- uses: actions/upload-artifact@v3

- uses: actions/upload-artifact@v4
with:
name: dist
path: dist/ms2pip-*.whl

publish-to-pypi:
needs: [build-sdist, build-wheels]
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/ms2pip
permissions:
id-token: write
steps:
- uses: actions/download-artifact@v2
- uses: actions/download-artifact@v4
with:
name: dist
path: dist
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: ${{ secrets.PYPI_USERNAME }}
password: ${{ secrets.PYPI_PASSWORD }}

- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
21 changes: 12 additions & 9 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,42 @@ on:
branches:
- releases
pull_request:
workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
with:
lfs: "true"

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
pip install ruff
- name: Check with Ruff
run: ruff check --output-format=github ./ms2pip

- name: Build and install ms2pip
run: |
pip install .[test]
# - name: Test with pytest
# run: |
# pytest

- name: Test installation
run: |
ms2pip --help
8 changes: 8 additions & 0 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,11 @@ Clone this repository and use pip to install an editable version:
.. code-block:: bash
pip install --editable .
Optionally, add the ``[dev,docs]`` extras to install the development and
documentation dependencies:

.. code-block:: bash
pip install --editable .[dev,docs]
366 changes: 336 additions & 30 deletions docs/source/prediction-models.rst

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ This mode was first developed in collaboration with the ProGenTomics group for t

To train a new prediction model, see :ref:`Training new MS²PIP models`.

``annotate-spectra``
---------------------

[todo]


Input
=====
Expand Down
53 changes: 31 additions & 22 deletions fasta2speclib/fasta2speclib.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- Terminal modifications can have site specificity (e.g. N-term K or N-term P).
"""

from __future__ import annotations

__author__ = "Ralf Gabriels"
Expand All @@ -40,10 +41,10 @@
from functools import cmp_to_key, partial
from itertools import chain, product
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Dict, List, Optional, Union

import pandas as pd
from pydantic import BaseModel, validator
from pydantic import BaseModel, field_validator, model_validator
from pyteomics.fasta import FASTA, Protein, decoy_db
from pyteomics.parser import icleave
from rich.logging import RichHandler
Expand All @@ -56,6 +57,8 @@

logger = logging.getLogger(__name__)

raise NotImplementedError("This module is not yet implemented for MS²PIP v4.")


class Peptide(BaseModel):
"""Peptide representation within the fasta2speclib search space."""
Expand All @@ -81,20 +84,18 @@ class ModificationConfig(BaseModel):
protein_c_term: Optional[bool] = False
fixed: Optional[bool] = False

@validator("protein_c_term", always=True) # Validate on last target in model
def modification_must_have_target(cls, v, values):
@model_validator(mode="after")
def modification_must_have_target(self):
target_fields = [
"amino_acid",
"peptide_n_term",
"protein_n_term",
"peptide_c_term",
"protein_c_term",
]
if not any(t in values and values[t] for t in target_fields):
raise ValueError(
"Modifications must have at least one target (amino acid or N/C-term)."
)
return v
if not any(getattr(self, t) for t in target_fields):
raise ValueError("Modifications must have a target (amino acid or N/C-term).")
return self


DEFAULT_MODIFICATIONS = [
Expand Down Expand Up @@ -137,9 +138,10 @@ class Configuration(BaseModel):
batch_size: int = 10000
num_cpu: Optional[int] = None

@validator("output_filetype")
@field_validator("output_filetype")
@classmethod
def _validate_output_filetypes(cls, v):
allowed_types = ["msp", "mgf", "bibliospec", "spectronaut", "dlib"]# , "hdf"]
allowed_types = ["msp", "mgf", "bibliospec", "spectronaut", "dlib"] # , "hdf"]
v = [filetype.lower() for filetype in v]
for filetype in v:
if filetype not in allowed_types:
Expand All @@ -149,7 +151,8 @@ def _validate_output_filetypes(cls, v):
)
return v

@validator("modifications")
@field_validator("modifications")
@classmethod
def _validate_modifications(cls, v):
if all(isinstance(m, ModificationConfig) for m in v):
return v
Expand All @@ -160,15 +163,17 @@ def _validate_modifications(cls, v):
"Modifications should be a list of dicts or ModificationConfig objects."
)

@validator("ms2pip_model")
@field_validator("ms2pip_model")
@classmethod
def _validate_ms2pip_model(cls, v):
if v not in MODELS.keys():
raise ValueError(
f"MS²PIP model `{v}` not recognized. Should be one of " f"`{MODELS.keys()}`."
)
return v

@validator("num_cpu")
@field_validator("num_cpu")
@classmethod
def _validate_num_cpu(cls, v):
available_cpus = multiprocessing.cpu_count()
if not v or not 0 < v < available_cpus:
Expand Down Expand Up @@ -210,7 +215,7 @@ def __init__(
if isinstance(config, dict):
config["fasta_filename"] = fasta_filename
config["output_filename"] = output_filename
config = Configuration.parse_obj(config)
config = Configuration.model_validate(config)
elif isinstance(config, Configuration):
config.fasta_filename = fasta_filename
config.output_filename = output_filename
Expand Down Expand Up @@ -321,7 +326,7 @@ def peptides_to_batches(peptides: List[Peptide], batch_size: int) -> List[List[P
"""Divide peptides into batches for batch-based processing."""
return [peptides[i : i + batch_size] for i in range(0, len(peptides), batch_size)]

def process_batch(self, batch_id, batch_peptides):
def process_batch(self, batch_id: int, batch_peptides: List[Peptide]):
"""Predict and write library for a batch of peptides."""
# Generate MS²PIP input
logger.info("Generating MS²PIP input...")
Expand Down Expand Up @@ -382,7 +387,7 @@ def _get_rt_predictor(config: Configuration) -> RetentionTime:
logger.debug("Initializing DeepLC predictor")
if not config.deeplc:
config.deeplc = {"calibration_file": None}
if not "n_jobs" in config.deeplc:
if "n_jobs" not in config.deeplc:
config.deeplc["n_jobs"] = config.num_cpu
rt_predictor = RetentionTime(config=config.dict())
else:
Expand All @@ -397,11 +402,15 @@ def _prepare_ms2pip_params(config: Configuration) -> dict:
"model": config.ms2pip_model,
"frag_error": 0.02,
"ptm": [
"{},{},opt,N-term".format(mod.name, mod.mass_shift)
if mod.peptide_n_term or mod.protein_n_term
else "{},{},opt,C-term".format(mod.name, mod.mass_shift)
if mod.peptide_c_term or mod.protein_c_term
else "{},{},opt,{}".format(mod.name, mod.mass_shift, mod.amino_acid)
(
"{},{},opt,N-term".format(mod.name, mod.mass_shift)
if mod.peptide_n_term or mod.protein_n_term
else (
"{},{},opt,C-term".format(mod.name, mod.mass_shift)
if mod.peptide_c_term or mod.protein_c_term
else "{},{},opt,{}".format(mod.name, mod.mass_shift, mod.amino_acid)
)
)
for mod in config.modifications
],
"sptm": [],
Expand Down
Loading

0 comments on commit 2ee2340

Please sign in to comment.