-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalign_ngs_codons_tests.py
executable file
·353 lines (311 loc) · 63.8 KB
/
align_ngs_codons_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
import unittest
from align_ngs_codons import pairwise_align_dna
from align_ngs_codons import gap_padding
from align_ngs_codons import translate_dna
from align_ngs_codons import get_cons_regions
from align_ngs_codons import get_var_regions
class MyTestCase(unittest.TestCase):
reference_dna = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTG" \
"GGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATG" \
"TTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATG" \
"GTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAA" \
"TGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGG" \
"AAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAAT" \
"ACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAA" \
"TAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATG" \
"GTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAAT" \
"TGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGC" \
"ACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACT" \
"CATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTT" \
"AATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTA" \
"TGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCA" \
"GACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGA" \
"AGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATC" \
"AATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGC" \
"AACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCT" \
"GGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGA" \
"TAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACA" \
"GTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATA" \
"ATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCT" \
"CGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCC" \
"TGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACA" \
"GGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGC" \
"AGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAG" \
"CTTTGCTA"
# test the prelim align function
# @unittest.skip('Skip this test')
def test_start_1_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_start_2_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_good_start_2": "TTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_good_start_2": "TTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_2": "TTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_start_3_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_good_start_3": "CTTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_good_start_3": "CTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_3": "CTTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_del_1_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_del_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_out = {"CAP255_2000_C1C3_del_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATT-GAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_del_2_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_del_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCAAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_out = {"CAP255_2000_C1C3_del_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCA-AATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_del_3_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_del_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
ref_out = {"CAP255_2000_C1C3_del_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
test_out = {"CAP255_2000_C1C3_del_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTG-ACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_del_2_or_3_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_del_2_or_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_out = {"CAP255_2000_C1C3_del_2_or_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del_2_or_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAA-TGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_ins_1_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGG-AAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_ins_2_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_ins_2": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_ins_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAA-GGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_2": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_ins_3_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_ins_3": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_out = {"CAP255_2000_C1C3_ins_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTC-TTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_3": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# @unittest.skip('Skip this test')
def test_del_large_pairwise_align_DNA(self):
reference = "ATGACAGTGATGGGGATACAGAAGAATTACCAACAGTGGTGGATATGGGGAATCTTAGGCTTTTGGATGCTAATGATTTGTAATGGGAATGACACGTGGGTCACAGTATATTATGGGGTACCTGTGTGGAGAGAAGCAAAAACTACTCTATTCTGTGCATCAGATGCTAAAGCATATGAGAAAGAAGTGCATAATGTTTGGGCTACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAATGGTTTTGGAAAATGTAACAGAAAATTTTAACATGTGGAAAAATGACATGGTAGATCAGATGCATGAAGATATAATCAGTTTATGGGATCAAAGCCTCAAGCCATGTGTAAAGTTGACCCCGCTCTGTGTCACTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTTGACCCAATTCCTATACATTATTGTGCTCCAGCTGGTTTTGCGATTCTAAAGTGTAATAATAAGACATTCAATGGCACAGGACCATGCAATAATGTCAGCACAGTACAATGTACACATGGGATTAGGCCAGTGGTCTCAACTCAACTATTGTTAAATGGTAGCCTAGCAGAAAAAGAGATAATAATTAGATCTGAAAATCTGACAAACAATATCAAAACAATAATAGTACATCTCAATGAATCTGTAGAGATTAATTGTATAAGACCCAATAATAATACAAGGAAAAGTATGAGAATAGGACCAGGACAAACATTCTATGCAACAGGAGAAATAATAGGAGATATAAGGCAAGCACATTGTAACATTAGCAAAGATAAATGGGACAAAACTTTACACAGGGTAAGTGAAAAATTAAGAGAACACTTCCCTAATAAGACAATAACATTTAACTCATCCTCAGGAGGAGACCTAGAAATTACAACACATAGCTTTAATTGTGGAGGAGAATTTTTCTATTGCAATACATCAGGCCTGTTTAATAGTACATTTAATACTACATTTTATGAACCTTCAAATTTAACCATCACACTCCAATGCAGAATAAAACAAATTATAAACATGTGGCAGGAGGTGGGACGAGCAATGTATGCCCCTCCCATTGCAGGAAACATAACATGTGAATCAAAGATCACAGGGCTAATATTGACACGTGATGGAGGAAGTGACAATGGGACAGAGACATTCAGACCTGGAGGAGGAGATATGAGGGACAACTGGAGAAGTGAATTATATAAATATAAAGTGGTAGAAATTAAGCCATTGGGAATAGCACCCACTAATGGAAGAAGGAGAGTGGTGCAGAGAGAGAAAAGAGCAGTGGGAATAGGAGCTGTATTCCTTGGGTTCTTGGGAGCAGCAGGAAGCACTATGGGCGCGGCATCAATAACGCTGACGGTACAGGCCAGACAATTATTGTCTGGTATAGTGCAACAGCAAAACAATTTGCTGAGGGCTATAGAGGCGCAACAGCATATGTTGCAACTCACGGTCTGGGGCATTAAGCAGCTCCAGGCAAGAGTCTTGGCTCTAGAAAGATACCTAAGGGATCAACAGCTCCTAGGGATGTGGGGCTGCTCTGGAAAACTCATCTGCACCACTAATGTGGCTTGGAACTCTAGTTGGAGTAATAAAAGTCAAGATGAGATTTGGAAGAACATGACCTGGATGCAGTGGGATAGAGAAATTAGTGACTATACAAACACAATATACAGGTTGCTTGAAGAGTCGCAAAACCAGCAAGAAATAAATGAAAAAGATTTACTAGCATTGGACAGTTGGAACAATCTGTGGACTTGGTTTGACATAACAAACTGGCTGTGGTATATAAAAATATTCATAATGATAGTAGGAGGCTTGATAGGGTTAAGAATAATTTTTGCTGTGCTTTCTCTAGTGAATAGAGTTAGGCAGGGATACTCACCTTTGTCGTTGCAGACCTTTACCCCAAACCCGAGGGGACCCGACAGGCTCGAAAGAATCGAAGAAGAAGGTGGAGAGCAAGACAGAGACAGATCCGTGCGATTAGTGAGCGGATTCTTAGCACTTGCCTGGGACGACCTGCGGAGCCTGTGCCTCTTCAGCTATCACCTATTGAGAGACTTCATATTGGTTGCAGCGAGAGCAGTGGGACTTCTGGGACGCAGCAGTCTCAGGGGACTACAGACAGGGTGGGAAGCCCTTAAGTATCTGGGAAGCCTTGTGCAATATTGGGGTCTAGAGCTAAAAAAGAGTGCTATTAGTCTGCTTGATACCTTAGCAATAGCAGTAGCTGAAGGAACAGATAGGATTATAGAATTCATACAAAGAATTTGTAGAGCTATCCTCCATATACCTAGAAGAATAAGACAGGGCTTTGAAGCAGCTTTGCTA"
test_in = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGCACTTTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_out = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGCACT------------------TTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
seq_align, ref_align, frame = pairwise_align_dna(test_in[name], reference)
self.assertEquals(seq_align, test_out[name])
self.assertEquals(ref_align, ref_out[name])
# test the gap padding function
# @unittest.skip('Skip this test')
def test_start_1_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_start_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_start_1"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_start_2_gap_padding(self):
frame = 2
test_in = {"CAP255_2000_C1C3_good_start_2": "TTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_start_2": "TTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_2": "--TTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_start_2"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_start_3_gap_padding(self):
frame = 1
test_in = {"CAP255_2000_C1C3_good_start_3": "CTTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_start_3": "CTTTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_3": "-CTTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_start_3"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_del_1_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATT-GAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_in = {"ref_del_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATT-GAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_del_1"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_del_2_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCA-AATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_in = {"ref_del_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCA-AATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_del_2"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_del_3_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTG-ACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
ref_in = {"ref_del_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
test_out = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTG-ACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTTT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_del_3"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_del_2_or_3_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAA-TGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_in = {"ref_del_2_or_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAA-TGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_del_2_or_3"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_ins_1_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_ins_1": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGG-AAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAA--AAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_ins_1"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_ins_2_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_ins_2": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_ins_2": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAA-GGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_2": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGG--GAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_ins_2"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_ins_3_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_ins_3": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
ref_in = {"ref_ins_3": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAA---TAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTC-TTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_3": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCT--TTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_ins_3"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# @unittest.skip('Skip this test')
def test_del_large_gap_padding(self):
frame = 0
test_in = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGCACTTTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
ref_in = {"ref_large_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAACTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGCACTTTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
for name, seq in test_in.items():
padded_sequence = gap_padding(seq, ref_in["ref_large_del"], frame, seq.replace("-", ""))
self.assertEquals(padded_sequence, test_out[name])
# test the translate function
# @unittest.skip('Skip this test')
def test_start_1_translate_dna(self):
test_in = {"CAP255_2000_C1C3_good_start_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_1": "LNCSNASSNANVTKVDNSSTIGEIKNCTFNVTTELRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVX"}
for name, seq in test_in.items():
prot_seq = translate_dna(seq)
self.assertEquals(prot_seq, test_out[name])
# @unittest.skip('Skip this test')
def test_start_2_translate_dna(self):
test_in = {"CAP255_2000_C1C3_good_start_2": "--TTTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_good_start_2": "XLNCSNASSNANVTKVDNSSTIGEIKNCTFNVTTELRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVX"}
for name, seq in test_in.items():
prot_seq = translate_dna(seq)
self.assertEquals(prot_seq, test_out[name])
# @unittest.skip('Skip this test')
def test_del_1_translate_dna(self):
test_in = {"CAP255_2000_C1C3_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATT-GAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_del": "LNCSNANSNTNVTDIDNSTIXEIKNCTFNVTTELRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVSX"}
for name, seq in test_in.items():
prot_seq = translate_dna(seq)
self.assertEquals(prot_seq, test_out[name])
# @unittest.skip('Skip this test')
def test_ins_1_translate_dna(self):
test_in = {"CAP255_2000_C1C3_ins_1": "TTAAACTGTAGCAATGCAAGTAGCAATGCAAATGTCACAAAGGTTGATAATAGTAGCACAATTGGAGAAATAAAGAATTGCACTTTCAATGTAACTACAGAATTAAGAGATAAGGAAA--AAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTTAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCT"}
test_out = {"CAP255_2000_C1C3_ins_1": "LNCSNASSNANVTKVDNSSTIGEIKNCTFNVTTELRDKEXKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVX"}
for name, seq in test_in.items():
prot_seq = translate_dna(seq)
self.assertEquals(prot_seq, test_out[name])
# @unittest.skip('Skip this test')
def test_del_large_translate_dna(self):
test_in = {"CAP255_2000_C1C3_good_del": "TTAAACTGTAGCAATGCAAATAGCAATACAAATGTCACAGATATTGATAATAGCACAATTGGAGAAATAAAGAATTGCACTTTAAGAGATAAGGAAAAGAAAGAAAATGCACTCTTTTATAAACTTGATATAGTACCACTTAATGGAAATAACAACAGCAGCTTCAGTATGTATAGATTAATAAACTGTAATACCTCAGTCGTAACACAAGCCTGTCCAAAGGTCTCTTT"}
test_out = {"CAP255_2000_C1C3_good_del": "LNCSNANSNTNVTDIDNSTIGEIKNCTLRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVSX"}
for name, seq in test_in.items():
prot_seq = translate_dna(seq)
self.assertEquals(prot_seq, test_out[name])
# test the find conserved and variable regions function
# @unittest.skip('Skip this test')
def test_find_cons_regions(self):
test_in = {
"full_env": "MTVMGIQKNYQQWWIWGILGFWMLMICNGNDTWVTVYYGVPVWREAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLENVTENFNMWKNDMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCSNANSNTNVTDIDNSTIGEIKNCTFNVTTELRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKTFNGTGPCNNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIIIRSENLTNNIKTIIVHLNESVEINCIRPNNNTRKSMRIGPGQTFYATGEIIGDIRQAHCNISKDKWDKTLHRVSEKLREHFPNKTITFNSSSGGDLEITTHSFNCGGEFFYCNTSGLFNSTFNTTFYEPSNLTITLQCRIKQIINMWQEVGRAMYAPPIAGNITCESKITGLILTRDGGSDNGTETFRPGGGDMRDNWRSELYKYKVVEIKPLGIAPTNGRRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLALERYLRDQQLLGMWGCSGKLICTTNVAWNSSWSNKSQDEIWKNMTWMQWDREISDYTNTIYRLLEESQNQQEINEKDLLALDSWNNLWTWFDITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTFTPNPRGPDRLERIEEEGGEQDRDRSVRLVSGFLALAWDDLRSLCLFSYHLLRDFILVAARAVGLLGRSSLRGLQTGWEALKYLGSLVQYWGLELKKSAISLLDTLAIAVAEGTDRIIEFIQRICRAILHIPRRIRQGFEAALL"}
test_out = {'C1': 'MRVMGIQRNCPQWWIWGILGFWMLMICSVVGKLWVTVYYGVPVWREAKTTLFCASDAKAYDTEVHNVWATHACVPTDPNPQEIVLGNVTENFNMWKNDMVDQMHEDIISLWDESLKPCVKLTPLCVTLNCVNANRTINVTTTG',
'C2': 'EIKNCSFNVTTELRDKERKEYALFYRLDIVPLNNESSSDDTEYRLINCNTSAIKQACPKVSFDPIPIHYCAPAGFAILKCNNNTFNGTGPCTNVSTVQCTHGIKPVMSTQLLLNGSLAEGEIIIRSENLTDNAKTIIVQLDKSVEITCTRPNNNTRKSVRIGPGQTFYATGDIIGNIRQAHCNISSNEWNNTLEKVKKRLKEHFPDKNITFAPSSGGDLEIATHSFNCRGEFF',
'C3': 'SNSTITLQCKIKQIIRMWQKVGQAMYAPPIAGNITCKSNITGLLLTRDGGR',
'C4': 'IFRPGGGNMRDNWRSELYKYKVVEIKPLGIAPTNAKRRVVEREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQSNLLRAIEAQQHMLQLTVWGIKQLQTRVLALERFLRDQQLLGLWGCSGKLICTTAVPWNSSWSNKSQDDIWHNMTWMQWDREINNYTGTIYRLLEESQSQQERNEKDLLALDSWNSLWNWFDITKWLWYIKLFIMIVGGLIGLRIIFAVLSIVKRVRQGYSPLSFQTLTPNPREPDRPGRIEEEGGEQDRDRSIRLVSGFLALIWDDLRSLCLFSYHQLRNFILVTARAVELLGRNSLRGLQRGWEALKYLGNLVLYWGLEIKKSAISLLDTIAIAVAEGTDRIIELIQRICRAIRNLPRRIRQGFEASLL'
}
for name, seq in test_in.items():
cons_d = get_cons_regions(test_out, 'C', 'C4')
for k, v in cons_d.items():
print(k)
self.assertEquals(v, test_out[k])
# test the find conserved and variable regions function
@unittest.skip('Skip this test')
def test_find_var_regions(self):
test_in = {"full_env": "MTVMGIQKNYQQWWIWGILGFWMLMICNGNDTWVTVYYGVPVWREAKTTLFCASDAKAYEKEVHNVWATHACVPTDPNPQEMVLENVTENFNMWKNDMVDQMHEDIISLWDQSLKPCVKLTPLCVTLNCSNANSNTNVTDIDNSTIGEIKNCTFNVTTELRDKEKKENALFYKLDIVPLNGNNNSSFSMYRLINCNTSVVTQACPKVSFDPIPIHYCAPAGFAILKCNNKTFNGTGPCNNVSTVQCTHGIRPVVSTQLLLNGSLAEKEIIIRSENLTNNIKTIIVHLNESVEINCIRPNNNTRKSMRIGPGQTFYATGEIIGDIRQAHCNISKDKWDKTLHRVSEKLREHFPNKTITFNSSSGGDLEITTHSFNCGGEFFYCNTSGLFNSTFNTTFYEPSNLTITLQCRIKQIINMWQEVGRAMYAPPIAGNITCESKITGLILTRDGGSDNGTETFRPGGGDMRDNWRSELYKYKVVEIKPLGIAPTNGRRRVVQREKRAVGIGAVFLGFLGAAGSTMGAASITLTVQARQLLSGIVQQQNNLLRAIEAQQHMLQLTVWGIKQLQARVLALERYLRDQQLLGMWGCSGKLICTTNVAWNSSWSNKSQDEIWKNMTWMQWDREISDYTNTIYRLLEESQNQQEINEKDLLALDSWNNLWTWFDITNWLWYIKIFIMIVGGLIGLRIIFAVLSLVNRVRQGYSPLSLQTFTPNPRGPDRLERIEEEGGEQDRDRSVRLVSGFLALAWDDLRSLCLFSYHLLRDFILVAARAVGLLGRSSLRGLQTGWEALKYLGSLVQYWGLELKKSAISLLDTLAIAVAEGTDRIIEFIQRICRAILHIPRRIRQGFEAALL"}
test_out = {'V1': 'SNTNVTDIDNSTI',
'V2': 'TFNTTFYE',
'V3': 'SDNGTE'
}
for name, seq in test_in.items():
var_d = get_var_regions(seq)
for k, v in var_d.items():
print(k)
self.assertEquals(v, test_out[k])
if __name__ == '__main__':
unittest.main()