-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathalphabets.go
90 lines (77 loc) · 2.11 KB
/
alphabets.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
package seq
import (
"encoding/json"
)
// Alphabet corresponds to a set of residues, in a particular order, that
// capture all possible residues of a particular set of sequences. For example,
// this is used in frequency profiles and HMMs to specify which amino acids
// are represented in the probabilistic model.
//
// In most cases, the ordering of the alphabet is significant. For example,
// the indices of an alphabet may be in correspondence with the indices of
// a column in a frequency profile.
type Alphabet []Residue
// NewAlphabet creates an alphabet from the residues given.
func NewAlphabet(residues ...Residue) Alphabet {
return Alphabet(residues)
}
func (a Alphabet) Len() int {
return len(a)
}
// Index returns a constant-time mapping from ASCII to residue index in the
// alphabet. This depends on all residues in the alphabet being ASCII
// characters.
func (a Alphabet) Index() [256]int {
var index [256]int
for i, r := range a {
index[r] = i
}
return index
}
// Equals returns true if and only if a1 == a2.
func (a1 Alphabet) Equals(a2 Alphabet) bool {
if len(a1) != len(a2) {
return false
}
for i, residue := range a1 {
if residue != a2[i] {
return false
}
}
return true
}
func (a Alphabet) String() string {
bs := make([]byte, len(a))
for i, residue := range a {
bs[i] = byte(residue)
}
return string(bs)
}
func (a *Alphabet) MarshalJSON() ([]byte, error) {
return json.Marshal(a.String())
}
func (a *Alphabet) UnmarshalJSON(bs []byte) error {
var str string
if err := json.Unmarshal(bs, &str); err != nil {
return err
}
*a = make(Alphabet, len(str))
for i := 0; i < len(str); i++ {
(*a)[i] = Residue(str[i])
}
return nil
}
// The default alphabet that corresponds to the BLOSUM62 matrix included
// in this package.
var AlphaBlosum62 = NewAlphabet(
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z', '-',
)
// The default alphabet for DNA sequences.
var AlphaDNA = NewAlphabet(
'A', 'C', 'G', 'T', 'N', '-',
)
// The default alphabet for RNA sequences.
var AlphaRNA = NewAlphabet(
'A', 'C', 'G', 'U', 'N', '-',
)