Skip to content

Commit

Permalink
Rename fixing profile script
Browse files Browse the repository at this point in the history
  • Loading branch information
Cengoni committed Apr 2, 2024
1 parent 30fc13e commit 95623ad
Showing 1 changed file with 28 additions and 9 deletions.
37 changes: 28 additions & 9 deletions metaphlan/utils/fix_relab_mpa4.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
__author__ = 'Aitor Blanco Miguez ([email protected])'
__author__ = ('Aitor Blanco Miguez ([email protected]), '
'Claudia Mengoni ([email protected])')
__version__ = '4.1.1'
__date__ = '11 Mar 2024'

Expand All @@ -11,6 +12,8 @@
from util_fun import info, error, warning
import argparse as ap

script_install_folder = os.path.dirname(os.path.abspath(__file__))
OCT22_FIXES=os.path.join(script_install_folder,'oct22_fix_tax.tsv')

def read_params():
""" Reads and parses the command line arguments of the script
Expand All @@ -19,11 +22,23 @@ def read_params():
namespace: The populated namespace with the command line arguments
"""
p = ap.ArgumentParser(formatter_class=ap.RawTextHelpFormatter, add_help=False)
requiredNamed = p.add_argument_group('requiered arguments')
requiredNamed = p.add_argument_group('required arguments')
requiredNamed.add_argument('--input', type=str, default=None, help="The path to the input profile")
requiredNamed.add_argument('--output', type=str, default=None, help="The path to the output profile")
return p.parse_args()

def read_oct22_fixes(file):
"""Reads the tab separated file with old and new taxonomies of Oct22
Args:
file: file with Oct22 fixes
"""
oct_fixes=dict()
with open(file) as inf:
for l in inf.readlines()[1:]:
old_tax, new_tax, new_tax_id = l.split('\t')
oct_fixes[old_tax]= (new_tax, new_tax_id.strip())
return oct_fixes


def check_params(args):
"""Checks the mandatory command line arguments of the script
Expand All @@ -38,9 +53,6 @@ def check_params(args):
args.input), exit=True)
if not args.output:
error('--output must be specified', exit=True)
elif not os.path.exists(args.output):
error('The file {} does not exist'.format(
args.output), exit=True)

def fix_relab_mpa4(input, output):
taxa_levs = [{},{},{},{},{},{},{},{}]
Expand All @@ -59,15 +71,20 @@ def fix_relab_mpa4(input, output):
line = line.replace('p__Bacillota', 'p__Firmicutes')
elif 'f__Saccharomycetales_unclassified' in line:
line = line.replace('f__Saccharomycetales_unclassified','f__Debaryomycetaceae')
line = line.strip().split('\t')

elif release == 'mpa_vOct22_CHOCOPhlAnSGB_202212':
pass
line = line.strip().split('\t')
taxa_levs[-1][line[0]] = [line[1], float(line[2]), line[3] if len(line)==4 else '']
line = line.strip().split('\t')
if line[0] in oct_fixes:
line[0],line[1] = oct_fixes[line[0]]

taxa_levs[-1][line[0]] = [line[1], round(float(line[2]),5), line[3] if len(line)==4 else '']

for i in range(1,8):
j = i+1
for ss in taxa_levs[-i]:
gg = ss.replace('|{}'.format(ss.split('|')[-1]), '')
gg_n = taxa_levs[-i][ss][0].replace('|{}'.format(taxa_levs[-i][ss][0].split('|')[-1]), '')
gg_n = '|'.join(taxa_levs[-i][ss][0].split('|')[:-1])
if gg not in taxa_levs[-j]:
taxa_levs[-j][gg] = [gg_n, taxa_levs[-i][ss][1], '']
else:
Expand All @@ -77,10 +94,12 @@ def fix_relab_mpa4(input, output):
wf.write(tax + '\t' + '\t'.join([str(x) for x in level[tax]]) + '\n')

def main():
global oct_fixes
t0 = time.time()
args = read_params()
info("Start fixing profile")
check_params(args)
oct_fixes = read_oct22_fixes(OCT22_FIXES)
fix_relab_mpa4(args.input, args.output)
exec_time = time.time() - t0
info("Finish fixing profile ({} seconds)".format(round(exec_time, 2)))
Expand Down

0 comments on commit 95623ad

Please sign in to comment.