Skip to content

Commit

Permalink
making code ready for generalization
Browse files Browse the repository at this point in the history
  • Loading branch information
RobbinBouwmeester committed Nov 24, 2023
1 parent 877a471 commit b950b54
Showing 1 changed file with 36 additions and 21 deletions.
57 changes: 36 additions & 21 deletions proteobench/modules/dda_quant/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,41 @@
from proteobench.modules.interfaces import ParseInputsInterface


def count_chars(input_string):
return sum(1 for char in input_string if char.isalpha() and char.isupper())
def count_upper_chars(input_string):
return sum(1 for char in input_string if char.isupper())


def count_chars(input_string: str, isalpha: bool = True, isupper: bool = True):
if isalpha and isupper:
return sum(1 for char in input_string if char.isalpha() and char.isupper())
if isalpha:
return sum(1 for char in input_string if char.isalpha())
if isupper:
return sum(1 for char in input_string if char.isupper())


def get_stripped_seq(input_string: str, isalpha: bool = True, isupper: bool = True):
if isalpha and isupper:
return "".join(
char for char in input_string if char.isalpha() and char.isupper()
)
if isalpha:
return "".join(char for char in input_string if char.isalpha())
if isupper:
return "".join(char for char in input_string if char.isupper())


def match_seq(input_string: str, pattern=re.compile(r"([a-z]+)")):
matches = [
(match.group(1), match.start(1), match.end(1))
for match in pattern.finditer(input_string)
]
positions = (count_upper_chars(input_string[0 : m[1]]) for m in matches)
mods = (m[0] for m in matches)
return mods, positions


def match_brackets(input_string):
pattern = r"\[([^]]+)\]"
def match_brackets(input_string: str, pattern=r"\[([^]]+)\]"):
matches = [
(match.group(1), match.start(1), match.end(1))
for match in re.finditer(pattern, input_string)
Expand All @@ -26,7 +55,7 @@ def match_brackets(input_string):

def get_proforma_sage(
input_string,
modification_dict={
modification_dict: dict = {
"+57.0215": "Carbamidomethyl",
"+15.9949": "Oxidation",
"-17.026548": "Gln->pyro-Glu",
Expand All @@ -41,7 +70,7 @@ def get_proforma_sage(
try:
new_modifications.append(modification_dict[m])
except KeyError:
new_modifications.append("")
new_modifications.append(m)
modifications = new_modifications

pos_mod_dict = dict(zip(positions, modifications))
Expand Down Expand Up @@ -76,7 +105,7 @@ def get_proforma_msfragger(
try:
new_modifications.append(modification_dict[m])
except KeyError:
new_modifications.append("")
new_modifications.append(m)
modifications = new_modifications

pos_mod_dict = dict(zip(positions, modifications))
Expand Down Expand Up @@ -117,20 +146,6 @@ def get_proforma_alphapept(
return new_seq


def count_upper_chars(input_string):
return sum(1 for char in input_string if char.isupper())


def match_seq(input_string, pattern=re.compile(r"([a-z]+)")):
matches = [
(match.group(1), match.start(1), match.end(1))
for match in pattern.finditer(input_string)
]
positions = (count_upper_chars(input_string[0 : m[1]]) for m in matches)
mods = (m[0] for m in matches)
return mods, positions


class ParseInputs(ParseInputsInterface):
def convert_to_standard_format(
self, df: pd.DataFrame, parse_settings: ParseSettings
Expand Down

0 comments on commit b950b54

Please sign in to comment.