-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathCompareMafAndCheckMafCoverage.py
104 lines (96 loc) · 5.66 KB
/
CompareMafAndCheckMafCoverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!python3
import sys
def readMaf(benchMark_file, test_file):
benchMarkalignedPosition = dict()
with open(benchMark_file) as f:
for line in f:
elements = line.split()
if len(elements) == 7:
(s, refchr, refstart, reflength, refstrand, refchrLength, refali) = elements
refchr = refchr.replace("col.", "")
if refchr not in benchMarkalignedPosition:
benchMarkalignedPosition[refchr] = dict()
refstart = int(refstart)
line2 = f.readline()
elements2 = line2.split()
(s, querychr, querystart, querylength, querystrand, queryChrLength, queryali) = elements2
querychr = querychr.replace("query.", "")
if querychr != refchr:
print("this script does not apply for you comparsion, please do not use it")
print(querychr)
print(refchr)
querystart = int(querystart)
refPosition = 0
queryPosition = 0
if querystrand[0] == '+':
for i in range(len(refali)):
if refali[i] != '-':
if queryali[i] != '-' and refali[i] != queryali[i]:
benchMarkalignedPosition[refchr][refstart+refPosition] = querystart + queryPosition
elif queryali[i] == '-':
benchMarkalignedPosition[refchr][refstart+refPosition] = -1 # gap alignment, give it a value of -1
if refali[i] != '-':
refPosition = refPosition + 1
if queryali[i] != '-':
queryPosition = queryPosition + 1
else:
print("this script does not apply for you comparsion, please do not use it")
totalProducedLength = 0
with open(test_file) as f:
for line in f:
elements = line.split()
if len(elements) == 7:
(s, refchr, refstart, reflength, refstrand, refchrLength, refali) = elements
refchr = refchr.replace("col.", "")
if refchr in benchMarkalignedPosition:
refstart = int(refstart)
line2 = f.readline()
elements2 = line2.split()
(s, querychr, querystart, querylength, querystrand, queryChrLength, queryali) = elements2
if len(refali) == len(queryali):
querychr = querychr.replace("query.", "")
if querychr != refchr: ## interchrosome relocations are all wrong
for i in range(len(refali)):
if refali[i] != '-':
totalProducedLength = totalProducedLength + 1
else:
querystart = int(querystart)
refPosition = 0
queryPosition = 0
if querystrand[0] == '+':
for i in range(len(refali)):
if refali[i] != '-':
if queryali[i] != '-' and (refstart+refPosition) in benchMarkalignedPosition[refchr] and benchMarkalignedPosition[refchr][refstart+refPosition] == (querystart + queryPosition):
benchMarkalignedPosition[refchr][refstart+refPosition] = -5 # if it is same with benchmark, give it a value of -5
elif queryali[i] == '-' and (refstart+refPosition) in benchMarkalignedPosition[refchr] and benchMarkalignedPosition[refchr][refstart+refPosition] == -1:
benchMarkalignedPosition[refchr][refstart+refPosition] = -5
if refali[i] != queryali[i]:
totalProducedLength = totalProducedLength + 1
if refali[i] != '-':
refPosition = refPosition + 1
if queryali[i] != '-':
queryPosition = queryPosition + 1
else: # here we code assume that there is no negative alignment in the benchmark alignment. All inversions are wrong If this is not true, should changed it
for i in range(len(refali)):
if refali[i] != '-':
totalProducedLength = totalProducedLength + 1
return benchMarkalignedPosition, totalProducedLength
benchMarkalignedPosition, totalProducedLength = readMaf(sys.argv[1], sys.argv[2])
totalRefLength = 0
totalCorrected = 0
for chr in benchMarkalignedPosition:
for position in benchMarkalignedPosition[chr]:
totalRefLength = totalRefLength + 1
if benchMarkalignedPosition[chr][position] == -5: # if it was same with benchmark, the value was set as -5
totalCorrected = totalCorrected + 1
output = open(sys.argv[2] + ".aliEvaluatioin", 'w')
output.write ("totalRefLength:" + str(totalRefLength) + "\n")
output.write ("totalProducedLength:" + str(totalProducedLength) + "\n")
output.write ("totalCorrected:" + str(totalCorrected) + "\n")
recall = totalCorrected/totalRefLength
output.write ("recall:" + str(recall) + "\n")
precision = totalCorrected/totalProducedLength
output.write ("precision:" + str(precision) + "\n")
fscore = 2 * precision * recall/(precision + recall)
output.write ("fscore:" + str(fscore) + "\n")
output.close()