-
Notifications
You must be signed in to change notification settings - Fork 4
/
fasta.go
71 lines (65 loc) · 1.3 KB
/
fasta.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package cablastp
import (
"compress/gzip"
"io"
"os"
"strings"
"github.com/TuftsBCB/io/fasta"
)
// ReadOriginalSeq is the value sent over `chan ReadOriginalSeq` when a new
// sequence is read from a fasta file.
type ReadOriginalSeq struct {
Seq *OriginalSeq
Err error
}
// ReadOriginalSeqs reads a FASTA formatted file and returns a channel that
// each new sequence is sent to.
func ReadOriginalSeqs(
fileName string,
ignore []byte,
) (chan ReadOriginalSeq, error) {
var f io.Reader
var err error
f, err = os.Open(fileName)
if err != nil {
return nil, err
}
if strings.HasSuffix(fileName, ".gz") {
f, err = gzip.NewReader(f)
if err != nil {
return nil, err
}
}
reader := fasta.NewReader(f)
seqChan := make(chan ReadOriginalSeq, 200)
go func() {
for i := 0; true; i++ {
sequence, err := reader.Read()
if err == io.EOF {
close(seqChan)
break
}
if err != nil {
seqChan <- ReadOriginalSeq{
Seq: nil,
Err: err,
}
close(seqChan)
break
}
for i, residue := range sequence.Residues {
for _, toignore := range ignore {
if toignore == byte(residue) {
sequence.Residues[i] = 'X'
break
}
}
}
seqChan <- ReadOriginalSeq{
Seq: NewFastaOriginalSeq(i, sequence),
Err: nil,
}
}
}()
return seqChan, nil
}