-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIdendityFusedtranscripts.py
57 lines (40 loc) · 1.91 KB
/
IdendityFusedtranscripts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python2
import optparse
from collections import defaultdict
################################# Command line options
desc='Converte a Transdecoder longest_orfs.GFF3 file with its BLAST hit file into a BED12 file of putative multiple CDS regions in a mRNA annotation\n Cite: Cicconardi et al 2023 Tribe-wide genomics reveals the evolutionary dynamics of genome size, content, and selection across the adaptive radiation of Heliconiini butterflies. In prep.'
parser = optparse.OptionParser(description=desc, version='%prog version 0.1 - 25-09-2016 - Author: FCicconardi')
parser.add_option('-b', '--Blast-outfmt6', dest='blast', help='BLAST hits (outfmt6) of the longest_orfs file. Mandatory opt.', action='store', metavar='FILE')
parser.add_option('-g', '--GFF3-longest_orfs', dest='gff', help='longest_orfs.gff3 file. Mandatory opt.', action='store', metavar='FILE')
(opts, args) = parser.parse_args()
mandatories = ['blast','gff']
for m in mandatories:
if not opts.__dict__[m]:
print "\nWARNING! One or more options not specified\n"
parser.print_help()
exit(-1)
############################## Reading files and parametersfrom sys import argv
blastDCT=defaultdict(list)
tbl=open(opts.blast, 'r').readlines()
for row in tbl:
qID=row.split()[0]
rID=row.split()[1]
score=float(row.split()[11])
Eval=float(row.split()[10])
blastDCT[qID].append((rID,score,Eval))
gff=open(opts.gff, 'r').readlines()
for line in gff:
if len(line) > 1:
feat=line.split()[2]
if feat == 'CDS':
info=line.strip().split()[-1].split(';')
for el in info:
if 'Parent' in el:
cds=el.split('=')[1]
if cds in blastDCT:
transc=line.strip().split()[0]
start=int(line.strip().split()[3])
end=int(line.strip().split()[4])
hit=blastDCT[cds][0][0]
Eval=blastDCT[cds][0][2]
print '%s\t%s\t%s\t%s\t%s\t+' % (transc,start,end,hit,Eval)