Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Properly handling Ns in make_prg #60

Merged
merged 6 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,4 @@ tests/data/prg_builder/write_prg/sample.bin
tests/data/prg_builder/write_prg/sample.prg.fa
tests/integration_tests/data/output/
tests/integration_tests/data/output_update/
debugging
39 changes: 32 additions & 7 deletions make_prg/utils/io_utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,51 @@
import gzip
import os
import tempfile
from io import StringIO
from pathlib import Path
from typing import Dict, Union
from zipfile import ZipFile

from Bio import AlignIO
from Bio.Seq import Seq

from make_prg import MSA
from make_prg.subcommands.output_type import OutputType
from make_prg.utils.seq_utils import get_majority_consensus_from_MSA


def load_alignment_file(msa_file: Union[str, Path], alignment_format: str) -> MSA:
msa_file = str(msa_file)
if msa_file.endswith(".gz"):
handle = gzip.open(msa_file, "rt")
alignment = AlignIO.read(handle, alignment_format)
handle.close()
else:
def load_alignment_file(
msa_file: Union[str, Path, StringIO], alignment_format: str
) -> MSA:
if isinstance(msa_file, StringIO):
alignment = AlignIO.read(msa_file, alignment_format)
else:
msa_file = str(msa_file)
if msa_file.endswith(".gz"):
with gzip.open(msa_file, "rt") as handle:
alignment = AlignIO.read(handle, alignment_format)
else:
with open(msa_file, "r") as handle:
alignment = AlignIO.read(handle, alignment_format)

# upper case seqs
for record in alignment:
record.seq = record.seq.upper()

# Compute the consensus sequence
consensus = get_majority_consensus_from_MSA(alignment)

# Replace 'N' with the consensus sequence in each record
for record in alignment:
record.seq = Seq(
"".join(
[
consensus[i] if nucleotide == "N" else nucleotide
for i, nucleotide in enumerate(str(record.seq))
]
)
)

return alignment


Expand Down
46 changes: 46 additions & 0 deletions make_prg/utils/seq_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import copy
import hashlib
import itertools
import random
from collections import Counter
from typing import Generator, List, Tuple

import numpy as np
Expand Down Expand Up @@ -234,3 +237,46 @@ def get_consensus_from_MSA(alignment: MSA) -> str:
consensus_string_as_list.append(column.pop())
consensus_string = "".join(consensus_string_as_list)
return consensus_string


def get_majority_consensus_from_MSA(alignment: MSA) -> str:
"""
Produces a consensus string just based on the major base for each column.
Note: alignment needs to be upper-cased bases
leoisl marked this conversation as resolved.
Show resolved Hide resolved
"""
all_seqs = "".join(get_alignment_seqs(alignment))
random_seed_for_this_alignment = hashlib.sha256(all_seqs.encode()).digest()
random.seed(random_seed_for_this_alignment)

# Initialize the consensus sequence as an empty string
consensus = ""

# Loop over the positions in the alignment
for i in range(alignment.get_alignment_length()):
# Count the residues at this position, ignoring gaps
pos_counts = Counter(
record.seq[i] for record in alignment if record.seq[i] != "-"
)

# If there are no residues other than gaps at this position, use a random base
if len(pos_counts) == 0:
consensus += random.choice("ACGT")
continue

# Find the residue with the highest count
max_residue, max_count = pos_counts.most_common(1)[0]
leoisl marked this conversation as resolved.
Show resolved Hide resolved

# If the residue is 'N', use the second most common residue
if max_residue == "N":
max_residue = (
pos_counts.most_common(2)[1][0] if len(pos_counts) > 1 else "N"
)

# If the residue is still 'N', randomise it
if max_residue == "N":
max_residue = random.choice("ACGT")

# Add the residue to the consensus sequence
consensus += max_residue

return consensus
1,578 changes: 1,578 additions & 0 deletions tests/integration_tests/data/amira_MSAs/alsB.fasta

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tests/integration_tests/data/amira_MSAs/glpG.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>SRR1314424_trimmed;1044_22_3
atgttgatgattacctcttttgctaacccccgcgtggcgcaggcgtttgttgattacatggcgacgcagggtgttatcctcacgattcaacaacataaccaaagcgatgtctggctggcggatgagtcccaggccgagcgcgtacgggcggagctggcgcnnnnnnnnnnctggcaggcaggccataccggcagtggcctgcattatcgccgttatcctttctttgccgccttgcgtgaacgcgcaggtccggtaacctgggtgatgatgatcgcctgcgtggtggtgtttattgccatgcaaattctcggcgatcaggaagtgatgttatggctggcctggccattcgatccagcactgaaatttgagttctggcgttacttcacccacgcgttaatgcacttctcgctgatgcatatcctctttaacctgctctggtggtggtatctcggcggtgcggtggaaaaacgcctcggtagcggtaagctaattgtcattacgcttatcagcgccctgttaagcggctatgtgcagcaaaaattcagcgggccgtggtttggcgggctttctggcgtggtgtatgcgctgatgggctacgtctggctacgtggcgaacgcgatccgcaaagtggcatttacctgcaacgtgggttaattatctttgcgctgatctggattgtcgccggatggtttgatttgtttgggatgtcgat---ggcgaacg--gagcacacatcgccgggttagccgtgggtttagcgatggcttttgttgattcgctcaatgcgcgaaaacgaaaataa
2,834 changes: 2,834 additions & 0 deletions tests/integration_tests/data/amira_MSAs/group_18516.fasta

Large diffs are not rendered by default.

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>alsB
5 ATGAATAAATATCT 7 G 8 A 7 AAATATTTCAGC 9 GGC 10 AGC 10 GGA 10 GAC 9 ACACTCGTGGGCTTAATGTTGT 11 C 12 T 11 AACCAGCGCTTTTGCTGCCGCCGAATATGCTGTCGTATTGAAAA 13 CC 14 CA 14 TA 13 CTCTCCAACCCATTTTGGGTAGATATGAAAAAAGGCATTGAAGAT 15 G 16 A 15 AAGCAAAAA 17 C 18 A 17 ACTGGGCGTCAGCGTTGATATTTTTGCCTC 19 T 20 G 19 CCTTCAGAA 21 G 22 C 21 GCGATTTTCAATCTCAA 6 ATGTTGTCAACCAGCGCTTTTGCTGCCGCCGAATATGC 23 C 24 T 23 GTCGTATTGAAAAC 25 A 26 C 25 CTCTCCAA 27 T 28 C 27 CCATTTTGGGTAGATATGAAAAAAGGCATTGAAGATGAAGCAAAAAC 29 GCTA 30 ACTG 29 GGCGTCAGCGTTGATATTTTTGCCTCTCCTTCAGAAGGCGATTTTCAATCTCAA 6 31 ATGAAT 32 ATGAAC 31 AAATATCTGAAATATTTCAG 33 CGGCACAC 34 TGGCACAC 34 CGGCACAT 33 TCGTGGGCTTAATGTTGTCAACCA 35 G 36 A 35 CGCTTTTGCT 37 G 38 A 37 CCGCCGAATATGC 39 C 40 A 39 GTCGTATTGAAAACACTCTCCAA 41 CCCATTT 42 TCCATTT 42 TCCATTC 41 TGGGTAGATATGAAAAAAGGCATTGAAGATGAAGCAAAAA 43 CGCTGGGCG 44 CGCTAGGCG 44 TGCTGGGCG 44 CACTGGGCG 44 CGCTGGGCA 43 TCAGCGTTGATATTTTTGCCTCTCCTTCAGAAGGCGATTTTCAATCTCAA 6 45 ATGAAAAAAG 46 TTGGGTAGATATGAAAAAG 46 TTGGGTAGATATGAAAAAA 45 GCATTGAAGATGAAGCAAAAAC 47 A 48 G 47 CTGGGCGTCAGCGTTGATATTTTTGCCTCTCCTTCAGAAGGCGATTTTCAATCTCAA 6 ATGAATAAATATCTGAAATATTTCAG 49 C 50 T 49 GGCACACTCGTGGGCTTAATGTTGTCAACCAGCGCTTTTGCTGCCGCCGAATATGC 51 C 52 T 51 GTCGTATTGAAAACACTCTCCAACCCATTTTGGGTAGATATGAAAAAAGGCATTGAAGATGAAGCAAAAATGCTGGGCGTCAGCGTTGATATTTTTGCCTCTCCTTCAGAAGG 53 TGAT 54 CGAT 54 TGAG 53 TTTCAATCTCAA 6 ATGAATAAATATCTGAAATATTTCAG 55 C 56 T 55 GGCACACTCGTGGGCTTAATGTTGTCAACCAGCGCTTTTGCT 57 G 58 A 57 CCGCCGAATATGC 59 TG 60 CA 59 TCGTATTGAAAAC 61 A 62 G 62 C 61 CTCTCCAACCCATTTTGG 63 G 64 A 63 TAGATATGAAAAAAGGCATTGAAGATGAAGCAAAAACGCTGGG 65 C 66 T 65 GTCAGCGTTGATATTTTTGCCTCTCCTTCAGAAGGCGATTTTCAATCTCAA 6 5 TTGCAGTTATTTGAAGATCTCAGTAATAAAAATTACAAAGG 67 T 68 C 67 ATCGCCTTCGC 69 T 70 G 70 A 69 CCATTATCCTCAGT 71 73 G 74 A 73 AATCTGG 75 T 76 C 75 72 GAACCTGGT 72 GAATTTGGT 72 GAACCTAGT 71 CATGCCTGT 77 CGCC 78 TGCC 78 TGCA 77 CGCGCATGGAAAAAAGG 79 C 80 T 79 ATTTATCTGGT 81 TAATCTCG 82 CAATCTCG 82 TAATCTCA 81 ATGAAAAAA 83 TC 84 GC 84 TT 83 GACATGGATAATCT 85 GAAAAAAGCTGGCGGC 86 GAAAAAAGCAGGCGGT 86 GAAAAAAGCTGGCGGT 86 GAAAAAAGCTAGCGGC 86 AAAAAAAGCTGGCGGC 86 GAAAAAAGCAGGCGGC 86 GAAAGCTGGCGGC 85 AATGTGGAAG 87 CTTTTGTCACC 88 GTTTTGTCACC 88 CTTTTGCCACC 88 GTTTTGTCACT 87 ACCGATAA 89 91 CGTTGCT 92 TGTTGCT 91 90 CGTTGCC 90 TGTTGCC 89 GTCGGGGCGAAAGG 93 CGCGTCGTTC 94 CGCGTCATTC 94 TGCGTCATTC 94 CGCGTCATTG 94 CGCATCATTC 93 ATTATTG 95 ACAAATTGGGCGCTGAAGGTGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAGGGGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAGGGGGTGAAGTTGCAATTATTGAG 96 ATAAATTGGGCGCTGAAGGTGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAGGGGGTGAAGTCGCAATCATCGAG 96 ACAAGCTGGGTGCCGAAGGGGGTGAAGTTGCAATTATTGAA 96 ACAAATTGGGTGCTGAAGGTGGTGAAGTCGCAATCATTGAG 96 ACAAATTGGGCGCTGAAGGGGGTGAAGTCGCAATCATTGAG 96 ACAAATTGGGCGCCGAAGGGGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAAGGGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAGGGGATGAAGTCGCAATCATCGAG 96 GCAAGCTGGGTGCCGAAGGGGGTGAAGTTGCAATTATTGAG 96 ATAAATTGGGTGCCGAAGGGGGTGAAGTCGCAATCATTGAG 96 ACAAATTGGGCGCTAAAGGTGGTGAAGTCGCAATCATTGAG 96 ACAAGCTGGGTGCCGAAGGGGGTAAAGTCGCAATCATTGAG 95 GGTAAAGCCGG 97 T 98 C 97 AACGCCTCCGG 99 101 TGAAGCG 102 TGAAGCA 101 100 CGAAGCA 99 CGTCGTAATGG 103 TGCCACCGAAG 104 TGCCACCGAAA 104 TGCCACCAAAG 104 CGCCACCGAAG 104 TACCACCGAAG 103 CCTTCAAAAAAGC 105 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGT 106 AAGCCAGATCAAGCTTGTCGCCAGCCCGCCCGCTGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCCGCTGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCCGCCGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGTCAGCCTGCCGACTGGGACCGT 106 AAGCCAGATCAAGCTTGTCACCAGCCAGCCTGCCGACTGGGACCGT 106 AAGCCAGCTCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGT 106 AAGCCAGATCAAGCTTGTCGCCAGCCATCCTGCCGACTGGGACCGT 106 AAGCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGC 106 AAGCCAAATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGATCGC 106 AAACCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGC 106 AAGCCAGATCAAACTTGTCGCCAGCCAGCCCGCTGACTGGGACCGC 106 AAGTCAGATCAAGCTTGTCGCCAGCCAGCCTGCCGACTGGGACCGC 106 AAGCCAGATCAAGCTTGTCGCCAGCCAGCCCGCCGACGGGGACCGC 105 ATTAAAG 107 CACTGGATGTCGCC 108 CACTAGATGTCGCC 108 TACTGGATGTCGCC 108 CACTGGATGTCGCA 108 CACTGGATGTTGCC 107 ACTAACGT 109 GT 110 AT 110 G 109 TGCAACGTAATCCGA 111 ATATTAAAGCG 112 ATATTAACGCG 112 ATATTAAAGCA 112 TTATTAAAGCG 111 ATCTATTGCGCGAATGA 113 C 114 T 113 ACGATGG 115 C 116 G 116 A 115 AATGGGTGTTGCTCAGGC 117 AGTCGCAAACGCCGGAA 118 TGTCGCAAACGCCAGAA 118 TGTCGCAAACGCCGGAA 118 AGTCGCAAACGCTGGAA 118 TGTCGCAAACGTCGGAA 118 AGTCACAAACGCCGGAA 118 AGTCGCAAACGCCGGAAA 118 TGTCGCAAACGCTGGAA 118 AGCCGCAAACGCCGGAA 117 AAACGGG 119 AAAAGTGCTGGTCGT 121 CGGTACAGAT 122 CGGTACTGAC 122 TGGTACAGAT 121 GGCATTCCGG 123 AAG 124 AAA 124 GAG 123 CCCGCAAAAT 125 G 126 A 125 GTGGAAGCCGGACAAATGAC 127 C 128 T 127 GCGACGGTTGCCCA 129 GAAC 130 GAGC 130 GAAG 129 CCGGCGG 131 ATATCGGCGCAACGGGTCTGAAGC 132 ATATCGGTGCAACGGGTCTGAAGC 132 ATATCGGCGCAACGGGTCTGAAAC 132 ATATCGGCGCTACGGGTCTAAAGC 132 ATATCGGCACAACGGGTCTGAAGC 132 ATATCGGCGCAACGAGTCTGAAGC 132 ATATCGGCGCAACGGGTTTGAAGT 132 GTATCGGCGCAACGGGTCTGAAGC 131 TGATGGTTGACGC 133 TGAG 134 TGAA 134 GGAG 133 AAATCCGGCAAGGTTATCCCG 135 C 136 T 135 TGGATAAAGCAC 137 C 138 G 137 GGAATTTAAACTGGTCGATTCAATCCTG 139 G 140 A 139 TCACTCAATAA 120 141 CAAAGTGT 142 AAAAGTGC 141 TGGTCGTCGGTACAGATGG 143 C 144 A 143 ATTCCGGAAGCCCGCAAAATGGTGGAAG 145 CCGGACA 146 CCGGACT 145 AATGACCGCGACGGTTGCCCAGAACCCGGCGGATATCGG 147 T 148 C 147 GCAACGGGTCTGAAGCTGATGGTTGACGCTGAAAAATCCGGCAAGGTTATCCCGC 149 T 150 A 150 C 149 GGATAAAGCACCGGAATTTAAACTGGTCGATTCAATCCTGGTCACTCAATAA 120 CAAAATATTGGT 151 C 152 A 151 GTCGGTACAGATGGAATTCCGGAAGCCCGCAAAATGGTGGAAGCCGGACAAATGACCGCGACGGTTGCCCA 153 GAACC 154 GAACT 153 CGGCAGATATTGGCGCAACGGG 155 T 156 G 155 CTGAAGCTGATGGTTGACGCTGAGAAATCCG 157 G 158 A 157 CAAGGTTAT 159 T 160 C 159 CCGCTGGATAAAGCACC 161 G 162 T 161 GAATTTAAACTGGTCGATTCAATCCTGGTCACTCAATAA 120 163 AAAAGTGC 164 CAAAATAT 163 TGGTCGTCGGTACAGATGG 165 C 166 A 165 ATTCCGGAAGCCCGCAAAATGGTGGAAG 167 C 168 CC 167 CGGACAAATGACCGCGACGGTTGCCCA 169 G 170 GA 169 AACCCGGC 171 GGATATC 172 AGATATT 171 GGCGCAACGGGTCTGAAGCTGATGGTTG 173 A 174 ACGCTGAGTAA 173 120 175 C 176 A 176 G 175 AAAGTGTTGGTCGTCGGTAC 177 TGAC 178 AGAT 177 GGCATTCCGGAAG 179 C 180 T 179 CCGCAAAATGGTGGAA 181 AC 182 GC 181 CGGACAAATGACCGCGACGGTTGC 183 CCAGAACCCGG 184 CCAGAACCCGA 184 TCAGAACCCGG 183 CGGATATCGGTGCAACGGGTCTGAAGCTGATGGTTGACGCTGAGAAATCCGG 185 C 186 T 185 AAGGTTATCCCGCTGGATAAAGCACCGGAATTTAAACTGGTCGATTCAA 187 T 188 C 187 CCTGGTCACTCAATAA 120 CAAAGTGTTGGTCGTCGGGGTAATGACTCCAACTTATTGA 120 CAAAATATTGGTCGTCGGTACAGATGGAATTCCGGAAGCCCGCAAAATGGTGGAAGCCGGACAAATGACCGCGACGGTTGCGCAGAACCCGGCAGATATTGGCGCAACGG 189 G 190 A 189 TCTGAAGCTGATGGTTAACGCTGAGAAATCCGGCAAGGTTATCCCGCTGGATAAAGCACCGGAATT 191 T 192 C 191 AAACTGGTCGATTCAATCCTGGTCACTCAATAA 120 193 CAAAGTG 194 CAAAGTA 193 TTGGTCGTCGGTACAGATGGAATTCCGGAAGCCCGCAAAATGGTGGAAGCCGGACAAATGACCGCGACGGTTGCCCAGAACCCGGCGGATA 195 TCGGT 196 TCGGC 196 CCGGT 195 GCAACGGGTCTGAA 197 G 198 A 197 CTGATGGTTGACGCTGAGAAATCCGGCAAGGTTATCCCGCTGGATAAAGCACCGGAATTTAAACTGGTCGATTCAATCCTGGTCACTCAATAA 120 199 CAAAATAT 200 AAAAGTGC 199 TGGTCGTCGGTACAGATGG 201 A 202 C 201 ATTCCGGAA 203 G 204 GC 203 CCGCAAAATGGTGGAAGCCGGACAAATGA 119
>glpG
ATGTTGATGATTACCTCTTTTGCTAACCCCCGCGTGGCGCAGGCGTTTGTTGATTACATGGCGACGCAGGGTGTTATCCTCACGATTCAACAACATAACCAAAGCGATGTCTGGCTGGCGGATGAGTCCCAGGCCGAGCGCGTACGGGCGGAGCTGGCGCCGATGTGGCACTGGCAGGCAGGCCATACCGGCAGTGGCCTGCATTATCGCCGTTATCCTTTCTTTGCCGCCTTGCGTGAACGCGCAGGTCCGGTAACCTGGGTGATGATGATCGCCTGCGTGGTGGTGTTTATTGCCATGCAAATTCTCGGCGATCAGGAAGTGATGTTATGGCTGGCCTGGCCATTCGATCCAGCACTGAAATTTGAGTTCTGGCGTTACTTCACCCACGCGTTAATGCACTTCTCGCTGATGCATATCCTCTTTAACCTGCTCTGGTGGTGGTATCTCGGCGGTGCGGTGGAAAAACGCCTCGGTAGCGGTAAGCTAATTGTCATTACGCTTATCAGCGCCCTGTTAAGCGGCTATGTGCAGCAAAAATTCAGCGGGCCGTGGTTTGGCGGGCTTTCTGGCGTGGTGTATGCGCTGATGGGCTACGTCTGGCTACGTGGCGAACGCGATCCGCAAAGTGGCATTTACCTGCAACGTGGGTTAATTATCTTTGCGCTGATCTGGATTGTCGCCGGATGGTTTGATTTGTTTGGGATGTCGATGGCGAACGGAGCACACATCGCCGGGTTAGCCGTGGGTTTAGCGATGGCTTTTGTTGATTCGCTCAATGCGCGAAAACGAAAATAA
>group_18516
5 ATGTTG 6 5 ATGATTACCTCTTTTGCTAA 7 9 CCCCCGC 10 TCCCCGC 9 8 CCCCCGA 8 TCCCCGA 7 GTGGCGCAGGC 11 G 12 A 11 TTTGTTGATTA 13 CATGGCGACGCAGGGTGTTATCCTCACGATTCAACAACATA 14 CATGGCGACACAGGGTGTTATCCTCACGATTCAACAACATA 14 TATGGCGACGCAGGGTGTTATCCTCACGATTCAACAACATA 14 CATGGCGACGCAGGGGGTTATCCTCACGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTTACGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTCACGATTCAACAAAATA 14 CATGGCGACGCAGGGGGTTATCCTTACGATTCAACAACATA 14 CATGGCAACGCAGGGTGTTATCCTCACGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTTACGATTCAACAATATA 14 CATGGCGACGCAGGGTGTTATCCTTACGATTCAACAGCATA 14 CATGGCGACGCAGGGTGTTATTCTCACGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTTACGATTCAACAAAATA 14 TATGGCGACGCAGGGTGTTATCCTCACGATGCAACAACATA 14 CATGGCGACGCAGGGTGTGATCCTCACGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTCGCGATTCAACAACATA 14 CATGGCGACGCAGGGTGTTATCCTCACGATTCAACAACATC 14 CATGGCGACGCAGGGTGTTATCCTCACGATTCAAAAACATA 13 ACCAAAGCGA 15 T 16 C 15 GTCTGGCT 17 GGCGGATGAGTCCCAGGCCGAGCGCGTACGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCGCGTGCGGGCG 18 GGCGGATGAGTCCCAGGCCGAACGCGTGCGGGCG 18 AGCGGATGAGTCCCAGGCCGAACGCGTGCGGGCG 18 GGCGGATGAGTCACAGGCCGAGCGCGTGCGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCACGTGCGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCGCGTGCGGGTG 18 GGCAGATGAGTCCCAGGCCGAGCGCGTACGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCGCGTACGGACG 18 GGCGGATGAGCCCCAGGCCGAGCGCGTACGGGCG 18 GGCGGATGAGTCCCAGGTCGAGCGCGTACGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCTCGTGCGGGCG 18 GGCGGATGAATCCCAGGCCGAACGCGTGCGGGCG 18 GGCGGATGAATCCCTGGCCGAGCGCGTGCGGGCG 18 GGCGGATGAGTCCCAGGCCGAGCGCGTGCGGGCA 18 GGCGGATGAGCCCCAGGCCGAGCGCGTGCGGGCG 18 GGCGGATGAGTCCCAGGCCGAACGCGTGCGGGTG 17 GAGCTGG 19 CG 20 CT 20 TG 19 CGTTTTCTCG 21 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGTCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCACGTTATCTCGCCGCCAGTTGGCAGTCCGGTCATACCGACAGTGGC 22 AAAACCCGGCAGATTCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGTGGT 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTCGCCGCCAGTTGGCAGTCCGGTCATACCGACAGTGGC 22 GAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGTTGGCAGTCCGGTCATACCGACAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGCGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCAGGCCATACCGGCAGTGGT 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCGTACCGGCAGTGGC 22 AAAACCCGGCAAATCCGCGTTATCTGGCGGCGAGCTGGCAGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGCAGGCCATATCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAATGGT 22 AAAACCCGGCAGATCCGCTTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGTGGT 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGTTGGCAGTCCGGTCATACCGACAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGTAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGCGGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCTGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCAAGCTGGCAGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCACGTTATCTCGCCGCCAGCTGGCAGTCCGGTCATACCGACAGTGGC 22 AAAACACGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGTCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGAAGTGGT 22 AAAACCCGGCAGATCCGCGTTATCTGGCAGCCAGCTGGCAGGCAGGCCATACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGTGGCCAGCTGGCAGGCGGGCCGTACCGGCAGTGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGCGGGCCATACCGGCAGTGGT 22 AAAACCCGGCAGATCCTCGTTATCTGGCGGCCAGCTGGCAGGCGGGCCATACCGGCAGCGGC 22 AAAACCCGGCAGATCCGCGTTATCTGGCGGCCAGCTGGCAGTCGGGCCATACCGGCAGCGGC 22 AAAATCCGGCAGATCCGCGTTATCTGGCGGCGAGCTGGCAGGCAGGCCATACCGGCAGTGGC 21 CTGCATTATCGCCGTTATC 23 CTTTCTTTGCC 24 CTTTCTTTGCT 24 CCTTCTTTGCT 24 CTTTTTTTGCC 24 CCTTCTTGGCT 24 ATTTCTTTGCT 23 GCCTTGCGTGAAC 25 GCGCAGGTCCGGTAACCTGGGTGGTGATGATCGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATCGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATTGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATCGTCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATAGCCTGCG 26 GCGCAGGCCCGGTGACCTGGGTGATGATGATCGCCTGCG 26 GCGCAGGTCCGGTGACCTGGGTGATGATGATCGCCTGCG 26 GCGCAGGCCCGGTAACCTGGGTGATGATGATCGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATTATCGCCTGCG 26 GCGCAGGTTCGGTAACCTGGGTGGTGATGATCGCCTGCG 26 GCGCAGGTCCGGTAACTTGGGTGATGATGATCGCCTGCG 26 GCGCAGGTCCGGTCACCTGGGTGATGATGATCGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATCGCCTGCA 26 GCGCAGGTCCGGTAACCTGGGTGGTGATGATCGCATGCG 26 GCGCAGGTCCGGTAACCTGGGTGATGATGATCGCCTGTG 26 GCGCAGGTCCGGTAACCTGGATGATGATGATCGCCTGCG 26 GTGCAGGTCCGGTGACCTGGGTGATGATGATTGCCTGCG 26 GCGCAGGTCCGGTAACCAGGGTGGTGATGATCGCCTGCG 26 GCGCAGGCCCGGTAACCTGGGTGATGATGCTCGCCTGCG 26 GCGCAGGTCCGGTAACCTGGGGGGTGATGATCGCCTGCG 26 GCGCAGGCCCGGTAACCTGGGTGATGATGATTGCCTGCG 26 GCGCAGGTCCGGTAACCTGGTTGATGATGATCGCCTGCG 26 ACGCAGGTCCGGTAACCTGGGTGGTGATGATCGCCTGCG 25 TGGTGGTGTTTAT 27 CGCCATGC 28 TGCCATGC 28 TACCATGC 28 CGCAATGC 28 TGCCATAC 28 CTCCATGC 27 AAATTCTCGG 29 CGATCAGGAAGTGATGT 30 CGATCAAGAAGTGATGT 30 TGATCAGGAAGTGATGT 30 CGATCAGGAAGTGAGGT 30 CGATCAGGAAGTCATGT 30 CGATCAGGAAGTGATGC 30 CGAGCAGGAAGTGATGT 30 AGATCAGGAAGTGATGT 29 TATGGCTGG 31 CCTGGCCATTCGATCCAACACTGAAA 32 CCTGGCCATTCGATCCGACGCTGAAA 32 CCTGGCCATTCGATCCGACGCTGAAG 32 CCTGGCCATTCGATCCGACACTGAAA 32 CCTGGCCATTCGACCCAACACTGAAA 32 CTTGGCCATTCGATCCGACGCTGAAG 32 CCTGGCCATTCGATCCAGCACTGAAA 32 CCTGGGCCTTCGATCCAACACTGAAA 32 CTTGGCCATTCGATCCAACACTGAAA 32 CTTGGCCATTCGATCCAGCACTGAAA 32 TCTGGCCATTCGATCCAACACTGAAA 32 CCAGGCCATTCGATCCAACACTGAAA 32 CCTGGTCATTCGATCCAACACTGAAA 31 TTTGAGT 33 T 34 C 33 CTGGCGTTACTTCAC 35 37 CCACGCG 38 CCATGCG 37 36 TCATGCA 35 TTAATGCA 39 CTTCTCGCTG 40 CTTCTCACTA 40 CTTCTTGCTG 40 TTTCTCGCTG 39 ATGCATA 41 TCCTC 42 TTCTC 42 TCCTT 42 TCCTA 42 GCCTC 41 TTTAACCTGCT 43 CTGGTGGTGG 45 T 46 A 45 44 CTGGTGGT 44 TTGGTGGTGGT 43 ATCTCGGC 47 49 GGTGCGG 50 GGAGCGG 49 48 GGCGCGG 48 GGTGCGA 48 AGCGCGG 47 TGGAAAA 51 ACGCCTC 52 ACGTCTC 52 ACGCCTT 52 GCGCCTC 51 GGTAGCGG 53 T 54 C 53 AAGCTAATTGT 55 TATTACTCTCATTAGCGC 56 CATTACTCTCATTAGCGC 56 CATTACGCTTATCAGCGC 56 CATTACTCTGATTAGCGC 56 CATTACGCTTATTAGTGC 56 TATTACTCTCATTAGTGC 56 CATCACTCTCATTAGCGC 56 TATTACTCTCATTAGCGG 56 TATTACTCTGATTAGCGC 55 CCTGTTAAG 57 CGGCTATGTGCAGCAAAAATTCAGCGGG 58 CGGCTATGTGCAGCAAAAGTTCAGCGGG 58 TGGCTATGTGCAGCAAAAATTCAGCGGG 58 CGGCTATGTGCAGCAAAAATTCAGCGGA 58 CGGATATGTGCAGCAAAAATTCAGCGGG 58 CGGCTATATGCAGCAAAAATTCAGCGGG 58 CGGCTATGTGCAACAAAAATTCAGCGGG 58 CGGCTATGTGCAGCAAAAATTTAGCGGG 58 CGGCTATGTGCAGCAAAAATTCAGTGGG 57 CCGTGGTT 59 T 60 C 59 GGCGGGCTTTCTGGC 61 GTGGTGTATGCGC 62 GTGGTGTATGCAC 62 GTGGTGTATGCGT 62 GTGGTGTATACGC 62 ATGGTGTATGCGC 62 GTGATGTATGCGC 61 TGATGGG 63 CTACGTCTGGCTACGTGGTGAACG 64 CTACGTCTGGCTACGTGGCGAACG 64 TTACGTCTGGCTACGCGGCGAACG 64 CTACGTCTGGCTACGTGGTGAGCG 64 CTACGTCTGGCTACGCGGCGAACG 64 CTATGTCTGGCTACGTGGCGAACG 64 CTACGTCTGGTTACGTGGCGAACG 64 TTACGTCTGGCTACGTGGCGAACG 64 GTACGTCTGGCTACGCGGCGAACG 64 CTACGTTTGGCTACGTGGCGAACG 64 CTACGTCTGGCTACGTGGTGAATG 64 CTACGTCTGGCTACGTGGCGAACT 64 CTACGTCTGGCTGCGTGGCGAACG 64 CTACGTCTGGCTACGTGGAGAACG 63 CGATCCGCAAAGTG 65 G 66 A 65 CATTTACCTGCAACG 67 TGGGTTAATT 68 AGGGTTAATT 68 TGGGATAATT 68 TGGGTTAATC 67 ATCTTTGC 69 GC 70 GT 70 AC 70 AT 69 TGATCTG 71 GATTG 72 GATTA 72 TATTG 71 TCGCCGGATGGTTTGATTTGTTTGG 73 GATGTCG 74 GATGTCA 74 TATGTCG 73 ATGGCGAACGGAGCACA 75 CATCGCCGGGTTAGCCGTGGGTTTA 76 TATCGCCGGGTTAGCCGTGGGTTTA 76 CATCGCCGGGTTAGCAGTGGGTTTA 76 CATCGCCGGGTTAGCCGTGGGGTTA 76 CATCGCCGGATTAGCCGTGGGTTTA 76 CATCGCCGGGTTAGCCGTGGGTTTC 76 CATCACCGGGTTAGCCGTGGGTTTA 76 CATAGCCGGGTTAGCCGTGGGTTTA 75 GCGATGGC 77 TTTTGTTGATTCGCTCAAT 78 TTTTGTTGATTCGCTTAAT 78 TTTTGTTGTTTCGCTCAAT 78 TTTTTTTGATTCGCTCAAT 78 TTTTGTTGATTCGCTCAAG 78 ATTTGTTGATTCGCTCAAT 77 GCGCGAAAACGAAAATAA
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading