-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfiltrarZonas.py
82 lines (66 loc) · 1.57 KB
/
filtrarZonas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/python
"""
Para eliminar de los archivos .depth zonas que no nos interesan (PE/PPE, phage y repeat)
Creado especificamente para el archivo H37Rv_annotation2sytems.ptt. Otros formatos comprobar
"""
import sys
infile=sys.argv[1] #archivo .depth a filtrar
outfile=sys.argv[2] #archivo de salida
compfile="/home/laura/Documentos/Anotacion/H37Rv_annotation2sytems.ptt"
try:
inputfile = open(infile)
except IOError:
print("%s does not exist!!" % infile)
try:
zonasfile = open(compfile)
except IOError:
print("%s does not exist!!" % compfile)
try:
output = open(outfile,'w')
except IOError:
print("File %s cannot be created!!" % outfile)
intervalos=[]
count=0
for i in range(3):
line=zonasfile.readline()
# print line
for line in zonasfile:
line=line.rstrip()
# print line
words=line.split()
if words[3]=="I":
pass
else:
# print words[10]
if ("PE/PPE" in str(words[10])) or ("phage" in str(words[10])) or ("repeat" in str(words[10])):
start=int(words[1])
end=int(words[2])
# print start
# print end
intervalos.append(start)
intervalos.append(end)
#print intervalos
#print len(intervalos)
zonasfile.close()
count=0
count2=0
for line in inputfile:
flag=0
line=line.rstrip()
# print line
words=line.split()
posicion=int(words[1])
# print posicion
for i in range(0,len(intervalos),2):
if (posicion >=intervalos[i]) and (posicion<=intervalos[i+1]):
# print intervalos[i]
flag=1
count+=1
# print line
if flag==0:
output.write(line+"\n")
count2+=1
#print count
inputfile.close()
output.close()
print "Longitud genoma filtrado: "+str(count2)