-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpairs_from_nsorted_bam.py
executable file
·125 lines (95 loc) · 3.28 KB
/
pairs_from_nsorted_bam.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python
"""Extract only paired reads from name sorted BAM files"""
__author__ = "Andreas Wilm"
__email__ = "[email protected]"
__copyright__ = "2013 Genome Institute of Singapore"
__license__ = "WTFPL"
#--- standard library imports
#
import argparse
import logging
import os
import sys
#--- third-party imports
#
import pysam
#--- project specific imports
#
#--- globals
#
LOG = logging.getLogger("")
logging.basicConfig(level=logging.WARN,
format='%(levelname)s [%(asctime)s]: %(message)s')
def cmdline_parser():
"""
creates an argparse instance
"""
# http://docs.python.org/library/optparse.html
parser = argparse.ArgumentParser(
description=__doc__)
parser.add_argument("--verbose",
dest="verbose",
action="store_true",
help="Enable verbose output")
parser.add_argument("--debug",
dest="debug",
action="store_true",
help=argparse.SUPPRESS) #"debugging")
parser.add_argument("-i", "--in",
dest="bam_in",
required=True,
help="Input BAM file (must be name sorted)."
" '-' for stdin")
parser.add_argument("-o", "--out",
dest="bam_out",
required=True,
help="Output BAM file. '-' for stdout")
return parser
def qname_base(qname):
"""Return read name base, i.e. read name with pair information
"""
if qname[-2] in [".", "/", "#"]:
return qname[:-2]
else:
return qname
def main():
"""main function
"""
n_reads_in = 0
n_reads_out = 0
parser = cmdline_parser()
args = parser.parse_args()
if args.verbose:
LOG.setLevel(logging.INFO)
if args.debug:
LOG.setLevel(logging.DEBUG)
if not os.path.exists(args.bam_in) and args.bam_in != "-":
LOG.error("File '%s' does not exist.\n" % (args.bam_in))
sys.exit(1)
if os.path.exists(args.bam_out) and args.bam_out != "-":
LOG.error("Cowardly refusing to overwrite file '%s'\n" % (args.bam_out))
sys.exit(1)
sam_in = pysam.Samfile(args.bam_in, "rb" )
sam_out = pysam.Samfile(args.bam_out, "wb", template=sam_in)
last_read = None
for read in sam_in:
n_reads_in += 1
if last_read == None:
#print "First %s" % read.qname
last_read = read
elif qname_base(last_read.qname) == qname_base(read.qname):
#print "Writing pair %s %s" % (last_read.qname, read.qname);
sam_out.write(last_read)
sam_out.write(read)
n_reads_out += 2
last_read = None
else:
#print "Dropping %s" % last_read.qname
#print "First %s" % read.qname
last_read = read
LOG.info("%d reads in. %d reads out." % (n_reads_in, n_reads_out))
sam_in.close()
sam_out.close()
if __name__ == "__main__":
main()
LOG.info("Successful program exit")