-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathencoder.py
67 lines (54 loc) · 1.84 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# def encode(ByteArray):
# DNAString = ''
# for byte in ByteArray:
# nucleobaseGroup = ''
# #int.from_bytes(b'y\xcc\xa6\xbb', byteorder='big')
#
# nucleobaseGroup = nucleobaseGroup + \
# bits_to_nucleobase(
# (int.from_bytes(byte, byteorder='big') & 192) >> 6)
# nucleobaseGroup = nucleobaseGroup + \
# bits_to_nucleobase(
# (int.from_bytes(byte, byteorder='big') & 48) >> 4)
# nucleobaseGroup = nucleobaseGroup + \
# bits_to_nucleobase(
# (int.from_bytes(byte, byteorder='big') & 12) >> 2)
# nucleobaseGroup = nucleobaseGroup + \
# bits_to_nucleobase(int.from_bytes(byte, byteorder='big') & 3)
# DNAString = DNAString+nucleobaseGroup
#
# return DNAString
def encode(nums):
DNAString = ''
for num in nums:
DNAString += bits_to_nucleobase(int(num & 192 >> 6))
DNAString += bits_to_nucleobase(int(num & 48 >> 4))
DNAString += bits_to_nucleobase(int(num & 12 >> 2))
DNAString += bits_to_nucleobase(int(num & 3))
return DNAString
def byte_to_binString(byte):
return{0: '00',
1: '01',
2: '10',
3: '11'}[byte]
def bits_to_nucleobase(int):
return {0: 'A',
1: 'C',
2: 'G',
3: 'T'}[int]
def nucleobase_to_int(nucleobase):
return{'A': 0,
'C': 1,
'G': 2,
'T': 3}[nucleobase]
def decode(DNAString):
DNAArray = list(DNAString)
byteArray = []
for i in range(0, len(DNAArray), 4):
nucleobaseGroup = DNAArray[i:i+4]
# bitsArray=[]
byteValue = 0
for j in range(3, -1, -1):
byteValue += (4**j)*nucleobase_to_int(nucleobaseGroup[3-j])
byteArray.append(byteValue.to_bytes(1, byteorder='big'))
return byteArray