-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcount_dust.py
executable file
·57 lines (50 loc) · 1.58 KB
/
count_dust.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
import sys
# Read the output of dustmasker_static and report the proportion of
# non-dust (uppercase) per sequence.
upper = 0
lower = 0
count = 0
# These aren't contigs but we pretend they are...
contig = None
# This just prints out the proportion, which is the more sensible
# type of output.
"""
def emit():
if count:
total = upper + lower
print('0.000' if total == 0 else "{:.3f}".format(upper / total))
"""
# But for blobtools we want something a little different.
res = []
def emit():
if count:
total = upper + lower
frac = 0.0 if total == 0 else (upper / total)
# With vanilla blobtools you need to cram the values into the log scale
# print("{}\t1\t{:.3f}".format(contig, 10**(5*(frac-0.5))))
# With the patched version you can avoid this.
res.append("{}\t1\t{:.3f}".format(contig, frac))
def print_result():
"""Prints out the lines in res. Previously I printed them as I read the file but now
I need the total number to go into the header.
"""
assert count == len(res)
print("## count_dust v0.1")
print("## Total Reads = {}".format(count))
print("## Mapped Reads = {}".format(count))
print("## Unmapped Reads = 0")
print("# contig_id\tread_cov\tbase_cov")
for l in res:
print(l)
for l in sys.stdin:
if l.startswith('>'):
emit()
count += 1
contig = l.split('|')[1].strip()
upper = lower = 0
else:
upper += sum( 1 for n in l if n in 'ATCG' )
lower += sum( 1 for n in l if n in 'atcg' )
emit()
print_result()