-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfastautils.go
82 lines (76 loc) · 2.18 KB
/
fastautils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package fastautils
import (
"bytes"
"fmt"
"github.com/evolbioinf/fasta"
"os"
)
// Function Clean removes non-canonical nucleotides from a Sequence (that is, keeps only ATGC/atgc). The function updates the input sequence in place.
func Clean(s *fasta.Sequence) {
d := s.Data()
i := 0
for _, c := range d {
if c == 'A' || c == 'C' ||
c == 'G' || c == 'T' ||
c == 'a' || c == 'c' ||
c == 'g' || c == 't' {
d[i] = c
i++
}
}
d = d[:i]
*s = *fasta.NewSequence(s.Header(), d)
}
// Function DataToUpper converts bytes of the data
func DataToUpper(s *fasta.Sequence) {
d := s.Data()
d = bytes.ToUpper(d)
*s = *fasta.NewSequence(s.Header(), d)
}
// ReadAll reads all sequences from a file and returns a slice of Sequences.
func ReadAll(f *os.File) []*fasta.Sequence {
sc := fasta.NewScanner(f)
var s []*fasta.Sequence
for sc.ScanSequence() {
s = append(s, sc.Sequence())
}
f.Close()
return s
}
// Concatenate accepts a slice of Sequences and a sentinel byte. It concatenates the slice into a single Sequence entry, where all headers and data are glued together. The concatenated headers and pieces of data are separated with the sentinel byte, if the latter is not zero.
func Concatenate(seqSlice []*fasta.Sequence,
sentinel byte) (*fasta.Sequence, error) {
var err error
l := len(seqSlice)
switch {
case l > 1:
h := []byte(seqSlice[0].Header())
d := seqSlice[0].Data()
for i := 1; i < l; i++ {
if sentinel != 0 {
h = append(h, sentinel)
d = append(d, sentinel)
}
h = append(h, []byte(seqSlice[i].Header())...)
d = append(d, seqSlice[i].Data()...)
}
cSeq := fasta.NewSequence(string(h), d)
return cSeq, err
case l == 1:
return seqSlice[0], err
default:
err = fmt.Errorf("fastautils.Concatenate: " +
"the input slice is empty\n")
return nil, err
}
}
// AddReverseComplement appends the reverse complement sequence to a fasta entry under the same header. The strands are separated with a hash (\#).
func AddReverseComplement(s *fasta.Sequence) {
d := s.Data()
var newD []byte
rev := fasta.NewSequence("reverse", d)
rev.ReverseComplement()
newD = append(d, '#')
newD = append(newD, rev.Data()...)
*s = *fasta.NewSequence(s.Header(), newD)
}