Merge pull request #30 from MannLabs/develop

Develop
MannLabs · Oct 8, 2021 · ef3774c · ef3774c
2 parents 56bd565 + 03027b1
commit ef3774c
Show file tree

Hide file tree

Showing 26 changed files with 242 additions and 140 deletions.
diff --git a/README.md b/README.md
@@ -81,7 +81,7 @@ pip install -e .
 
 ## Test data
 
-AlphaMap has direct data import options for MaxQuant and Spectronaut.
+AlphaMap has direct data import options for AlphaPept, DIA-NN, MaxQuant, Spectronaut and FragPipe.
 
 ### AlphaPept
 AlphaMap takes the *results.csv* file from AlphaPept as input format. An example is available for [download here](https://github.com/MannLabs/alphamap/releases/download/v0.0.210730-alpha/test_alphapept_input.csv).

diff --git a/alphamap/__init__.py b/alphamap/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.4"
+__version__ = "0.1.5"
diff --git a/alphamap/_nbdev.py b/alphamap/_nbdev.py
@@ -13,6 +13,7 @@
          "convert_fragpipe_mq_mod": "Importing.ipynb",
          "import_fragpipe_data": "Importing.ipynb",
          "import_data": "Importing.ipynb",
+         "extract_uniprot_id": "Preprocessing.ipynb",
          "expand_protein_ids": "Preprocessing.ipynb",
          "pep_position_helper": "Preprocessing.ipynb",
          "get_peptide_position": "Preprocessing.ipynb",

diff --git a/alphamap/data/alphamap_tutorial.docx b/alphamap/data/alphamap_tutorial.docx
diff --git a/alphamap/data/alphamap_tutorial.pdf b/alphamap/data/alphamap_tutorial.pdf
diff --git a/alphamap/gui.py b/alphamap/gui.py
@@ -527,8 +527,20 @@ def download_pdf_report():
     margin=10
 )
 
+custom_enzyme_field = pn.widgets.TextInput(
+    name='Type a regular expression:',
+    disabled=True,
+    align='end',
+    width=160,
+    placeholder='([KR](?=[^P]))'
+    # margin=(0,150,10,-30)S
+)
+
 proteases_options_tab = pn.Card(
-    proteases_options,
+    pn.Row(
+        proteases_options,
+        custom_enzyme_field
+    ),
     pn.Row(
         proteases_select_all,
         proteases_clear_all,
@@ -1238,6 +1250,25 @@ def change_proteases_selection(select, clear):
         proteases_options.value = []
 
 
+@pn.depends(
+    custom_enzyme_field.param.value,
+    watch=True
+)
+def update_protease_dict(_):
+    protease_dict['custom_enzyme'] = custom_enzyme_field.value
+
+
+@pn.depends(
+    proteases_options.param.value,
+    watch=True
+)
+def de_activate_custom_enzyme_field(_):
+    if 'custom_enzyme' in proteases_options.value:
+        custom_enzyme_field.disabled = False
+    else:
+        custom_enzyme_field.disabled = True
+
+
 @pn.depends(
     search_by.param.value,
     watch=True

diff --git a/alphamap/preprocessing.py b/alphamap/preprocessing.py
@@ -1,26 +1,35 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/Preprocessing.ipynb (unless otherwise specified).
 
-__all__ = ['expand_protein_ids', 'pep_position_helper', 'get_peptide_position', 'get_ptm_sites', 'get_modifications',
-           'format_input_data']
+__all__ = ['extract_uniprot_id', 'expand_protein_ids', 'pep_position_helper', 'get_peptide_position', 'get_ptm_sites',
+           'get_modifications', 'format_input_data']
 
 # Cell
 import pandas as pd
+def extract_uniprot_id(protein_id:str):
+    """
+    Extract the Uniprot unique entry id from the unusual formatted protein_id.
+    """
+    if 'sp' in protein_id:
+        return protein_id.split('|')[1]
+    elif '__' in protein_id:
+        return protein_id.split('__')[-1]
+    return protein_id
+
 def expand_protein_ids(df: pd.DataFrame):
     """
     Function to split protein groups in 'all_protein_ids' by ';' into separate rows.
     The resulting dataframe has a new column 'unique_protein_id'.
-
     Args:
         df (pd.DataFrame): Experimental data that was imported by the 'import_data' function.
     Returns:
         pd.DataFrame: Exploded dataframe with a new column 'unique_protein_id'.
-
     """
     df = df.copy(deep=True)
     df.all_protein_ids = df.all_protein_ids.str.split(';')
     df["all_protein_ids_all"] = df.all_protein_ids.apply(lambda x: ';'.join(sorted(x)))
     res = df.explode('all_protein_ids').reset_index(drop=True)
     res.columns = ['unique_protein_id','modified_sequence','naked_sequence','all_protein_ids']
+    res.unique_protein_id = res.unique_protein_id.apply(lambda x: extract_uniprot_id(x))
     return res
 
 # Cell

diff --git a/alphamap/proteolytic_cleavage.py b/alphamap/proteolytic_cleavage.py
@@ -42,6 +42,7 @@
 protease_dict["trypsin_exception"] = "((?<=[CD])K(?=D))|((?<=C)K(?=[HY]))|((?<=C)R(?=K))|((?<=R)R(?=[HR]))"
 protease_dict["non-specific"] = "()"
 protease_dict["trypsin"] = "([KR](?=[^P]))"
+protease_dict["custom_enzyme"] = "[]"
 
 # Cell
 import re
@@ -56,7 +57,10 @@ def get_cleavage_sites(sequence: str, protease: str):
         list: List of cleavage site indices for the selected protease.
 
     """
-    pattern = re.compile(protease_dict[protease])
+    try:
+        pattern = re.compile(protease_dict[protease])
+    except:
+        return []
     pattern_idx = pattern.finditer(sequence)
     pattern_idx = [m.start(0) for m in pattern_idx]
     return pattern_idx
diff --git a/alphamap/sequenceplot.py b/alphamap/sequenceplot.py
@@ -383,7 +383,7 @@ def plot_single_peptide_traces(df_plot,protein,fasta):
                  'F':'Phenylalanine',
                  'P':'Proline',
                  'S':'Serine',
-                 'T':'Treonine',
+                 'T':'Threonine',
                  'W':'Tryptophan',
                  'Y':'Tyrosine',
                  'V':'Valine',

diff --git a/docs/Preprocessing.html b/docs/Preprocessing.html
@@ -8,14 +8,14 @@
 
 
 
-nb_path: "nbs\Preprocessing.ipynb"
+nb_path: "nbs/Preprocessing.ipynb"
 ---
 <!--
 
 #################################################
 ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 #################################################
-# file to edit: nbs\Preprocessing.ipynb
+# file to edit: nbs/Preprocessing.ipynb
 # command to build the docs after a change: nbdev_build_docs
 
 -->
@@ -70,11 +70,36 @@ <h2 id="Split-protein-group-into-unique-protein-accessions">Split protein group
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="expand_protein_ids" class="doc_header"><code>expand_protein_ids</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L8" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>expand_protein_ids</code>(<strong><code>df</code></strong>:<code>DataFrame</code>)</p>
+<h4 id="extract_uniprot_id" class="doc_header"><code>extract_uniprot_id</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L8" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>extract_uniprot_id</code>(<strong><code>protein_id</code></strong>:<code>str</code>)</p>
+</blockquote>
+<p>Extract the Uniprot unique entry id from the unusual formatted protein_id.</p>
+
+</div>
+
+</div>
+
+</div>
+</div>
+
+</div>
+    {% endraw %}
+
+    {% raw %}
+
+<div class="cell border-box-sizing code_cell rendered">
+
+<div class="output_wrapper">
+<div class="output">
+
+<div class="output_area">
+
+
+<div class="output_markdown rendered_html output_subarea ">
+<h4 id="expand_protein_ids" class="doc_header"><code>expand_protein_ids</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L18" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>expand_protein_ids</code>(<strong><code>df</code></strong>:<code>DataFrame</code>)</p>
 </blockquote>
 <p>Function to split protein groups in 'all_protein_ids' by ';' into separate rows.
-The resulting dataframe has a new column 'unique_protein_id'.</p>
-<p>Args:
+The resulting dataframe has a new column 'unique_protein_id'.
+Args:
     df (pd.DataFrame): Experimental data that was imported by the 'import_data' function.
 Returns:
     pd.DataFrame: Exploded dataframe with a new column 'unique_protein_id'.</p>
@@ -120,7 +145,7 @@ <h2 id="Annotate-peptides-with-start-and-end-position">Annotate peptides with st
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="pep_position_helper" class="doc_header"><code>pep_position_helper</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L31" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>pep_position_helper</code>(<strong><code>seq</code></strong>:<code>str</code>, <strong><code>prot</code></strong>:<code>str</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
+<h4 id="pep_position_helper" class="doc_header"><code>pep_position_helper</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L40" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>pep_position_helper</code>(<strong><code>seq</code></strong>:<code>str</code>, <strong><code>prot</code></strong>:<code>str</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
 </blockquote>
 <p>Helper function for 'get_peptide_position'.</p>
 <p>Args:
@@ -159,7 +184,7 @@ <h4 id="pep_position_helper" class="doc_header"><code>pep_position_helper</code>
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="get_peptide_position" class="doc_header"><code>get_peptide_position</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L64" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_peptide_position</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
+<h4 id="get_peptide_position" class="doc_header"><code>get_peptide_position</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L73" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_peptide_position</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
 </blockquote>
 <p>Function to get start and end position of each peptide in the given protein.</p>
 <p>Args:
@@ -210,7 +235,7 @@ <h2 id="Annotate-each-peptide-with-PTM-site-indices-and-modification-types">Anno
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="get_ptm_sites" class="doc_header"><code>get_ptm_sites</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L94" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_ptm_sites</code>(<strong><code>peptide</code></strong>:<code>str</code>, <strong><code>modification_reg</code></strong>:<code>str</code>)</p>
+<h4 id="get_ptm_sites" class="doc_header"><code>get_ptm_sites</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L103" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_ptm_sites</code>(<strong><code>peptide</code></strong>:<code>str</code>, <strong><code>modification_reg</code></strong>:<code>str</code>)</p>
 </blockquote>
 <p>Function to get sequence positions of all PTMs of a peptide in the given protein.</p>
 <p>Args:
@@ -247,7 +272,7 @@ <h4 id="get_ptm_sites" class="doc_header"><code>get_ptm_sites</code><a href="htt
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="get_modifications" class="doc_header"><code>get_modifications</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L124" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_modifications</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>mod_reg</code></strong>:<code>str</code>)</p>
+<h4 id="get_modifications" class="doc_header"><code>get_modifications</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L133" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_modifications</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>mod_reg</code></strong>:<code>str</code>)</p>
 </blockquote>
 <p>Function to get sequence positions and modification types of all PTMs of a peptide in the given protein.</p>
 <p>Args:
@@ -302,7 +327,7 @@ <h2 id="Preprocessing-wrapper">Preprocessing wrapper<a class="anchor-link" href=
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="format_input_data" class="doc_header"><code>format_input_data</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L143" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>format_input_data</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>modification_exp</code></strong>:<code>str</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
+<h4 id="format_input_data" class="doc_header"><code>format_input_data</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/preprocessing.py#L152" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>format_input_data</code>(<strong><code>df</code></strong>:<code>DataFrame</code>, <strong><code>fasta</code></strong>:<code>pyteomics.fasta</code>, <strong><code>modification_exp</code></strong>:<code>str</code>, <strong><code>verbose</code></strong>:<code>bool</code>=<em><code>True</code></em>)</p>
 </blockquote>
 <p>Function to format input data and to annotate sequence start and end positions plus PTM sites.</p>
 <p>Args:

diff --git a/docs/Uniprot_integration.html b/docs/Uniprot_integration.html
@@ -8,14 +8,14 @@
 
 
 
-nb_path: "nbs\Uniprot_integration.ipynb"
+nb_path: "nbs/Uniprot_integration.ipynb"
 ---
 <!--
 
 #################################################
 ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 #################################################
-# file to edit: nbs\Uniprot_integration.ipynb
+# file to edit: nbs/Uniprot_integration.ipynb
 # command to build the docs after a change: nbdev_build_docs
 
 -->

diff --git a/docs/index.html b/docs/index.html
@@ -109,7 +109,7 @@ <h3 id="Developer">Developer<a class="anchor-link" href="#Developer"> </a></h3><
 </div>
 <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 <div class="text_cell_render border-box-sizing rendered_html">
-<h2 id="Test-data">Test data<a class="anchor-link" href="#Test-data"> </a></h2><p>AlphaMap has direct data import options for MaxQuant and Spectronaut.</p>
+<h2 id="Test-data">Test data<a class="anchor-link" href="#Test-data"> </a></h2><p>AlphaMap has direct data import options for AlphaPept, DIA-NN, MaxQuant, Spectronaut and FragPipe.</p>
 <h3 id="AlphaPept">AlphaPept<a class="anchor-link" href="#AlphaPept"> </a></h3><p>AlphaMap takes the <em>results.csv</em> file from AlphaPept as input format. An example is available for <a href="https://github.com/MannLabs/alphamap/releases/download/v0.0.210730-alpha/test_alphapept_input.csv">download here</a>.</p>
 <h3 id="DIA-NN">DIA-NN<a class="anchor-link" href="#DIA-NN"> </a></h3><p>AlphaMap takes the peptide-level output .tsv file from DIA-NN as input format. An example is available for <a href="https://github.com/MannLabs/alphamap/releases/download/v0.0.210730-alpha/test_diann_input.tsv">download here</a>.</p>
 <h3 id="MaxQuant">MaxQuant<a class="anchor-link" href="#MaxQuant"> </a></h3><p>AlphaMap takes the <em>evidence.txt</em> file from MaxQuant as input format. A reduced example file is available for <a href="https://github.com/MannLabs/alphamap/releases/download/v0.0.210622-alpha/test_maxquant_input.txt">download here</a>.</p>

diff --git a/docs/organisms_data.html b/docs/organisms_data.html
@@ -8,14 +8,14 @@
 
 
 
-nb_path: "nbs\organisms_data.ipynb"
+nb_path: "nbs/organisms_data.ipynb"
 ---
 <!--
 
 #################################################
 ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 #################################################
-# file to edit: nbs\organisms_data.ipynb
+# file to edit: nbs/organisms_data.ipynb
 # command to build the docs after a change: nbdev_build_docs
 
 -->

diff --git a/docs/proteolytic_cleavage.html b/docs/proteolytic_cleavage.html
@@ -8,14 +8,14 @@
 
 
 
-nb_path: "nbs\proteolytic_cleavage.ipynb"
+nb_path: "nbs/proteolytic_cleavage.ipynb"
 ---
 <!--
 
 #################################################
 ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 #################################################
-# file to edit: nbs\proteolytic_cleavage.ipynb
+# file to edit: nbs/proteolytic_cleavage.ipynb
 # command to build the docs after a change: nbdev_build_docs
 
 -->
@@ -100,7 +100,7 @@ <h2 id="Get-proteolytic-cleavage-sites-for-a-protein-sequence">Get proteolytic c
 
 
 <div class="output_markdown rendered_html output_subarea ">
-<h4 id="get_cleavage_sites" class="doc_header"><code>get_cleavage_sites</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/proteolytic_cleavage.py#L48" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_cleavage_sites</code>(<strong><code>sequence</code></strong>:<code>str</code>, <strong><code>protease</code></strong>:<code>str</code>)</p>
+<h4 id="get_cleavage_sites" class="doc_header"><code>get_cleavage_sites</code><a href="https://github.com/mannlabs/alphamap/tree/master/alphamap/proteolytic_cleavage.py#L49" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>get_cleavage_sites</code>(<strong><code>sequence</code></strong>:<code>str</code>, <strong><code>protease</code></strong>:<code>str</code>)</p>
 </blockquote>
 <p>Function to get the position of proteolytic cleavage sites in a sequence.</p>
 <p>Args:

diff --git a/misc/bumpversion.cfg b/misc/bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.4
+current_version = 0.1.5
 commit = False
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?

diff --git a/misc/one_click_macos/Info.plist b/misc/one_click_macos/Info.plist
@@ -9,9 +9,9 @@
 	<key>CFBundleIconFile</key>
 	<string>alpha_logo.icns</string>
 	<key>CFBundleIdentifier</key>
-	<string>alphamap.0.1.4</string>
+	<string>alphamap.0.1.5</string>
 	<key>CFBundleShortVersionString</key>
-	<string>0.1.4</string>
+	<string>0.1.5</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundleName</key>