-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPGGSV.026.CombineOutput.py
executable file
·89 lines (79 loc) · 2.01 KB
/
PGGSV.026.CombineOutput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#coding: utf-8
#2020-7-23
#该脚本是用于合并metasv_manta结果里的DEL、DUP、INS、INV结果
import sys
import gzip
ref_DEL_DUP_path = sys.argv[1]
ref_INV_path = sys.argv[2]
ref_INS_path = sys.argv[3]
fout_SV_path = sys.argv[4]
fout_SV_file = open(fout_SV_path,"w")
ref_tmp_chr_dt = {}
for i in range(1,23):
chr = str(i)
ref_tmp_chr_dt[chr] = 0
ref_chr_dt = {}
ref_chr_list = []
with open(ref_DEL_DUP_path) as file:
for line in file:
val = line.strip()
if val.count("#") != 0:
pass
elif val.count("#") == 0:
j = val.split()
chr = j[0]
pos = j[1]
if chr == "23":
chr = "chrX"
elif chr == "24":
chr = "chrY"
else:
if chr not in ref_tmp_chr_dt:
print(ref_DEL_DUP_path)
break
chr = "chr" + chr
j[0] = chr
new_line = "\t".join(j) + "\n"
if chr not in ref_chr_dt:
ref_chr_list.append(chr)
ref_chr_dt[chr] = []
ref_chr_dt[chr].append([int(pos),new_line])
elif chr in ref_chr_dt:
ref_chr_dt[chr].append([int(pos),new_line])
with open(ref_INV_path) as file:
for line in file:
val = line.strip()
if val.count("#") != 0:
pass
elif val.count("#") == 0:
j = val.split()
chr = j[0]
pos = j[1]
j[0] = chr
new_line = "\t".join(j) + "\n"
if chr not in ref_chr_dt:
ref_chr_list.append(chr)
ref_chr_dt[chr] = []
ref_chr_dt[chr].append([int(pos),new_line])
elif chr in ref_chr_dt:
ref_chr_dt[chr].append([int(pos),new_line])
with gzip.open(ref_INS_path) as file:
for line in file:
val = line.strip()
if val.count("#") != 0:
fout_SV_file.write(line)
else:
j = val.split()
chr = j[0]
pos = j[1]
SV_type_1 = j[2]
SV_type_2 = j[4]
if (SV_type_1.count("INS") != 0) and (SV_type_2.count("INS") == 0):
if chr in ref_chr_dt:
ref_chr_dt[chr].append([int(pos),line])
for chr in ref_chr_list:
fout_sort_chr_list = sorted(ref_chr_dt[chr],key=lambda x:int(x[0]))
for nu in range(len(fout_sort_chr_list)):
fout_line = fout_sort_chr_list[nu][1]
fout_SV_file.write(fout_line)
fout_SV_file.close()