-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread2aln.py
executable file
·64 lines (61 loc) · 3.49 KB
/
read2aln.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import os, argparse, time, multiprocessing
from joblib import Parallel, delayed
from Bio import SeqIO
def demo_mode(args):
os.system("bin/lastdb tmpdb examples/example_seqs.fa")
os.system("bin/lastal tmpdb examples/example_seqs.fa > examples/example_aln.maf")
os.system("bin/maf-convert sam examples/example_aln.maf > examples/example_aln.sam")
os.system("python scripts/delete_double.py examples/example_aln.sam examples/output.sam")
os.system("python scripts/parse_evalue.py examples/output.sam examples/aln.sam 10e-10 65")
os.system("find examples/ ! \( -name \"aln.sam\" -o -name \"example_seqs.fa\" \) -type f -exec rm -f {} + | rm tmp*")
return
def run_mode(args):
start_time = time.time()
if (os.path.splitext(args.reads)[1] == ".fq" or os.path.splitext(args.reads)[1] == ".fastq"):
if args.v:
print("Converting fastq to fasta...")
os.system("sed -n '1~4s/^@/>/p;2~4p' " + args.reads + " > " + os.path.splitext(args.reads)[0] + ".fa")
args.reads = os.path.splitext(args.reads)[0] + ".fa"
if args.v:
print("Deleting bad reads")
os.system("bin/dustmasker -in " + args.reads + " -out tmp_dustmasker -outfmt acclist")
os.system("python scripts/delete_fail.py " + args.reads + " " + args.length)
if args.v:
print("Creating index...")
os.system("bin/lastdb tmpdb " + args.reads)
if args.v:
print("Aligning reads against reads...")
if args.P:
os.system("bin/lastal -P " + args.P + " tmpdb " + args.reads + " > " + os.path.splitext(args.reads)[0] + ".maf")
else:
os.system("bin/lastal tmpdb " + args.reads + " > " + os.path.splitext(args.reads)[0] + ".maf")
os.system("bin/maf-convert sam "+ os.path.splitext(args.reads)[0] + ".maf > " + os.path.splitext(args.reads)[0] + ".sam")
if args.v:
print("Deleting bad alignments...")
os.system("python scripts/delete_double.py " + os.path.splitext(args.reads)[0] + ".sam output.sam")
os.system("python scripts/parse_evalue.py output.sam " + os.path.splitext(args.reads)[0] + ".sam " + args.identity + " " + args.length)
if args.v:
print("Cleaning folder...")
if args.P:
os.system("rm tmp* output.sam")
else:
os.system("rm tmp* output.sam " + os.path.splitext(args.reads)[0] + ".maf")
if args.v:
print("Done.\n\tTotal time in sec : " + str(time.time() - start_time))
return
parser = argparse.ArgumentParser(description = "Find the similarities between long reads.")
subparser = parser.add_subparsers(help = "Choose how to run the program")
parser_run = subparser.add_parser("run", help = "Run the program for a set of long reads")
parser_run.add_argument("reads", type = str, help = "Reads in fasta/fastq format")
parser_run.add_argument("--length", help = "Minimum length of alignments [65]", default = "65", action = 'store')
parser_run.add_argument("--identity", help = "Minimum required percent identity [90]", action = 'store', default = '90')
parser_run.add_argument("-v", help = "Be verbose", action = 'store_true')
parser_run.add_argument("--train", help = "Train score parameters", action = 'store_true')
parser_run.add_argument("-P", help = "Allow parallelization [2]", action = 'store', default = 2)
parser_run.set_defaults(func = run_mode, evalue = "10e-10", size = "65")
parser_demo = subparser.add_parser("demo", help = "Run the program with the example file")
parser_demo.set_defaults(func = demo_mode)
args = parser.parse_args()
args.func(args)