-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathAddIndelsToFasta.py
executable file
·67 lines (45 loc) · 1.89 KB
/
AddIndelsToFasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 21 15:17:27 2015
@author: Gillian
"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--infile", required=True, help = "input file")
parser.add_argument("-o", "--outDir", required=True, help = "output directory")
parser.add_argument("-s", "--stem", required = True, help = "stem name" )
parser.add_argument("-n", "--MaxInDel", required=True, help = "# of indels on each side to test")
args=parser.parse_args()
if args.outDir[-1]!="/":
args.outDir+="/"
# indels are inserted into the FarJunction.fa file
# insertions are SEQUENCEANNNNNSEQUENCEB where 2N is the number of indels given in the argument parser
# deletions are SEQA-AAAAA[AAABBB]BBBSEQB where N*AAA and N*BBB are removed from the junction interface.
# x,..,5, 4, 3, 2, 1 deletions on each side, then 2, 4, 6, 8, 10,.., 2X N's inserted into junction
counter=0
f1 = open(args.infile, mode = "rU")
for i in range(1,int(args.MaxInDel)+1):
f1.seek(0)
fout = open(args.outDir + args.stem + "_FJ_Indels_" +str(i)+".fa", mode ="w")
print "writing indels"+str(i)+".fa"
for line_raw in f1:
counter+=1
if line_raw[0] == ">" :
JunctionName=line_raw.strip()
JunctionSeq=""
else:
JunctionSeq += line_raw.strip()
if len(JunctionSeq)>290:
LeftExon = JunctionSeq[0:150]
RightExon = JunctionSeq[150:300]
# print JunctionName
#print JunctionSeq
fout.write(JunctionName + "|DEL"+str(i)+"\n")
fout.write(LeftExon[0:-i]+RightExon[i:]+"\n")
InsertN = "N"*(i*2)
fout.write(JunctionName + "|INS"+str(i)+"\n")
fout.write(LeftExon+InsertN+RightExon+"\n")
if counter==5000:
fout.flush()
fout.close()
f1.close()