-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathfasta_to_fastq.py
executable file
·152 lines (115 loc) · 3.79 KB
/
fasta_to_fastq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python
"""
Convert fasta format to fake fastq format
"""
#--- standard library imports
#
import sys
import os
import gzip
import logging
# optparse deprecated from Python 2.7 on
from optparse import OptionParser
#--- third-party imports
#
from Bio import SeqIO
#--- project specific imports
#
# /
__author__ = "Andreas Wilm"
__version__ = "0.1"
__email__ = "[email protected]"
__copyright__ = ""
__license__ = ""
__credits__ = [""]
__status__ = ""
# http://docs.python.org/library/logging.html
LOG = logging.getLogger("")
logging.basicConfig(level=logging.WARN,
format='%(levelname)s [%(asctime)s]: %(message)s')
def cmdline_parser():
"""
creates an OptionParser instance
"""
# http://docs.python.org/library/optparse.html
usage = "%prog: convert fasta output to fake fastq format\n" \
"usage: %prog [options]"
parser = OptionParser(usage=usage)
parser.add_option("-v", "--verbose",
action="store_true", dest="verbose",
help="be verbose")
parser.add_option("", "--debug",
action="store_true", dest="debug",
help="debugging")
parser.add_option("-i", "--input",
dest="ffasta", # type="string|int|float"
help="fasta input file")
parser.add_option("-p", "--pair",
dest="pairno", # type="string|int|float"
default="1", choices=["1", "2"],
help="mate pair number")
parser.add_option("-o", "--output",
dest="ffastq", # type="string|int|float"
help="fastq output file")
return parser
def main():
"""
The main function
"""
parser = cmdline_parser()
(opts, args) = parser.parse_args()
if len(args):
parser.error("Unrecognized arguments found: %s." % (
' '.join(args)))
sys.exit(1)
if opts.verbose:
LOG.setLevel(logging.INFO)
if opts.debug:
LOG.setLevel(logging.DEBUG)
if not opts.ffasta:
parser.error("fasta input file argument missing.")
sys.exit(1)
if not os.path.exists(opts.ffasta):
LOG.fatal(
"file '%s' does not exist.\n" % opts.ffasta)
sys.exit(1)
if not opts.ffastq:
parser.error("fastq output file argument missing.")
sys.exit(1)
if os.path.exists(opts.ffastq):
LOG.fatal(
"Refusing to overwrite existing output file '%s'.\n" % (
opts.ffastq))
sys.exit(1)
if opts.ffasta[-3:] == ".gz":
fhandle_fa = gzip.open(opts.ffasta, 'r')
else:
fhandle_fa = open(opts.ffasta, 'r')
if opts.ffastq[-3:] == ".gz":
fhandle_fq = gzip.open(opts.fastq, 'w')
else:
fhandle_fq = open(opts.ffastq, 'w')
for seqrec in SeqIO.parse(fhandle_fa, "fasta"):
default_qual = 'h' # have no scores. use highest score seen in example.
machine = seqrec.id.split()[0]
lane = 1
tile = 1
xpos = 1
ypos = 1
pair = "/%s" % opts.pairno
seq = str(seqrec.seq).upper()
qual = len(seq) * default_qual
fastqid = ':'.join([str(x) for x in
[machine, lane, tile, xpos, ypos]])
fastqid = "%s#%s/%s" % (fastqid, 0, opts.pairno)
# index is the number for a multiplexed sample (0 for no indexing)
# index is the barcode in qiime format
fhandle_fq.write("@%s\n" % fastqid)
fhandle_fq.write("%s\n" % seq)
fhandle_fq.write("+%s\n" % fastqid)
fhandle_fq.write("%s\n" % (default_qual*len(seq)))
fhandle_fa.close()
fhandle_fq.close()
if __name__ == "__main__":
main()
LOG.info("Successful program exit")