Skip to content

Commit

Permalink
improving test coverage (bebop#65)
Browse files Browse the repository at this point in the history
* fixed typos in comments.

* made regression test for command line hash function flags.

* added forgotten unsaved line in last push.

* added strings and fasta support and tests to commands.

* added fasta to batch conversion.

* changed json test file in command tests.

* cover SantaLucia case where primer is reverse complement of itself.

* created a shorter gff file for testing.

* replaced bsub test file with smaller puc19 test file.

* made new optimization methods and covered more code in transformations.go
  • Loading branch information
TimothyStiles authored Nov 2, 2020
1 parent 9f0f049 commit 4c95242
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 25 deletions.
106 changes: 106 additions & 0 deletions data/ecoli-mg1655-short.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
##gff-version 3
##sequence-region U00096.3 1 6370
U00096.3 feature gene 190 255 . + . db_xref=EcoGene:EG11277;gene=thrL;gene_synonym=ECK0001,JW4367;locus_tag=b0001
U00096.3 feature CDS 190 255 . + 0 codon_start=1;db_xref=GI:1786182,ASAP:ABE-0000006,UniProtKB/Swiss-Prot:P0AD86,EcoGene:EG11277;function=leader%3B Amino acid biosynthesis: Threonine,1.5.1.8 metabolism%3B building block biosynthesis%3B amino acids%3B threonine;gene=thrL;gene_synonym=ECK0001,JW4367;locus_tag=b0001;note=GO_process: GO:0009088 - threonine biosynthetic process;product=thr operon leader peptide;protein_id=AAC73112.1;transl_table=11;translation=MKRISTTITTTITITTGNGAG
U00096.3 feature gene 337 2799 . + . db_xref=EcoGene:EG10998;gene=thrA;gene_synonym=ECK0002,Hs,JW0001,thrA1,thrA2,thrD;locus_tag=b0002
U00096.3 feature CDS 337 2799 . + 0 EC_number=1.1.1.3,2.7.2.4;codon_start=1;db_xref=GI:1786183,ASAP:ABE-0000008,UniProtKB/Swiss-Prot:P00561,EcoGene:EG10998;experiment=N-terminus verified by Edman degradation: PMID 354697%2C4562989;function=enzyme%3B Amino acid biosynthesis: Threonine,1.5.1.8 metabolism%3B building block biosynthesis%3B amino acids%3B threonine,1.5.1.21 metabolism%3B building block biosynthesis%3B amino acids%3B homoserine,7.1 location of gene products%3B cytoplasm;gene=thrA;gene_synonym=ECK0002,Hs,JW0001,thrA1,thrA2,thrD;locus_tag=b0002;note=bifunctional: aspartokinase I %28N-terminal%29%3B homoserine dehydrogenase I %28C-terminal%29%3B GO_component: GO:0005737 - cytoplasm%3B GO_process: GO:0009088 - threonine biosynthetic process%3B GO_process: GO:0009086 - methionine biosynthetic process%3B GO_process: GO:0009090 - homoserine biosynthetic process;product=fused aspartokinase I and homoserine dehydrogenase I;protein_id=AAC73113.1;transl_table=11;translation=MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
U00096.3 feature gene 2801 3733 . + . db_xref=EcoGene:EG10999;gene=thrB;gene_synonym=ECK0003,JW0002;locus_tag=b0003
U00096.3 feature CDS 2801 3733 . + 0 EC_number=2.7.1.39;codon_start=1;db_xref=GI:1786184,ASAP:ABE-0000010,UniProtKB/Swiss-Prot:P00547,EcoGene:EG10999;function=enzyme%3B Amino acid biosynthesis: Threonine,1.5.1.8 metabolism%3B building block biosynthesis%3B amino acids%3B threonine,7.1 location of gene products%3B cytoplasm;gene=thrB;gene_synonym=ECK0003,JW0002;locus_tag=b0003;note=GO_component: GO:0005737 - cytoplasm%3B GO_process: GO:0009088 - threonine biosynthetic process;product=homoserine kinase;protein_id=AAC73114.1;transl_table=11;translation=MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWERFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHYDNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGFIHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVADWLGKNYLQNQEGFVHICRLDTAGARVLEN
U00096.3 feature gene 3734 5020 . + . db_xref=EcoGene:EG11000;gene=thrC;gene_synonym=ECK0004,JW0003;locus_tag=b0004
U00096.3 feature CDS 3734 5020 . + 0 EC_number=4.2.3.1;codon_start=1;db_xref=GI:1786185,ASAP:ABE-0000012,UniProtKB/Swiss-Prot:P00934,EcoGene:EG11000;experiment=N-terminus verified by Edman degradation: PMID 9298646%2C9600841%2C9740056;function=enzyme%3B Amino acid biosynthesis: Threonine,1.5.1.8 metabolism%3B building block biosynthesis%3B amino acids%3B threonine,7.1 location of gene products%3B cytoplasm;gene=thrC;gene_synonym=ECK0004,JW0003;locus_tag=b0004;note=GO_component: GO:0005737 - cytoplasm%3B GO_process: GO:0009088 - threonine biosynthetic process;product=threonine synthase;protein_id=AAC73115.1;transl_table=11;translation=MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQEILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAAVAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNSANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVPRFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTSEPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAALRKLMMNHQ
U00096.3 feature gene 5234 5530 . + . db_xref=EcoGene:EG14384;gene=yaaX;gene_synonym=ECK0005,JW0004;locus_tag=b0005
U00096.3 feature CDS 5234 5530 . + 0 codon_start=1;db_xref=GI:1786186,ASAP:ABE-0000015,UniProtKB/Swiss-Prot:P75616,EcoGene:EG14384;gene=yaaX;gene_synonym=ECK0005,JW0004;locus_tag=b0005;product=predicted protein;protein_id=AAC73116.1;transl_table=11;translation=MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHHKKAPHDHHGGHGPGKHHR
###
##FASTA
>U00096.3
AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC
TTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGGTCACTAAATACTTTAACCAA
TATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTACACAACATCCATGAAACGCATTAGCACCACC
ATTACCACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG
CCCGCACCTGACAGTGCGGGCTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAA
GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTGGCGATGATTG
AAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAACGTATTTTTGCCGAACTTTT
GACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCGCAATTGAAAACTTTCGTCGATCAGGAATTT
GCCCAAATAAAACATGTCCTGCATGGCATTAGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGC
TGATTTGCCGTGGCGAGAAAATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGT
TACTGTTATCGATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT
GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCAGGTTTCACCG
CCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGACTACTCTGCTGCGGTGCTGGC
TGCCTGTTTACGCGCCGATTGTTGCGAGATTTGGACGGACGTTGACGGGGTCTATACCTGCGACCCGCGT
CAGGTGCCCGATGCGAGGTTGTTGAAGTCGATGTCCTACCAGGAAGCGATGGAGCTTTCCTACTTCGGCG
CTAAAGTTCTTCACCCCCGCACCATTACCCCCATCGCCCAGTTCCAGATCCCTTGCCTGATTAAAAATAC
CGGAAATCCTCAAGCACCAGGTACGCTCATTGGTGCCAGCCGTGATGAAGACGAATTACCGGTCAAGGGC
ATTTCCAATCTGAATAACATGGCAATGTTCAGCGTTTCTGGTCCGGGGATGAAAGGGATGGTCGGCATGG
CGGCGCGCGTCTTTGCAGCGATGTCACGCGCCCGTATTTCCGTGGTGCTGATTACGCAATCATCTTCCGA
ATACAGCATCAGTTTCTGCGTTCCACAAAGCGACTGTGTGCGAGCTGAACGGGCAATGCAGGAAGAGTTC
TACCTGGAACTGAAAGAAGGCTTACTGGAGCCGCTGGCAGTGACGGAACGGCTGGCCATTATCTCGGTGG
TAGGTGATGGTATGCGCACCTTGCGTGGGATCTCGGCGAAATTCTTTGCCGCACTGGCCCGCGCCAATAT
CAACATTGTCGCCATTGCTCAGGGATCTTCTGAACGCTCAATCTCTGTCGTGGTAAATAACGATGATGCG
ACCACTGGCGTGCGCGTTACTCATCAGATGCTGTTCAATACCGATCAGGTTATCGAAGTGTTTGTGATTG
GCGTCGGTGGCGTTGGCGGTGCGCTGCTGGAGCAACTGAAGCGTCAGCAAAGCTGGCTGAAGAATAAACA
TATCGACTTACGTGTCTGCGGTGTTGCCAACTCGAAGGCTCTGCTCACCAATGTACATGGCCTTAATCTG
GAAAACTGGCAGGAAGAACTGGCGCAAGCCAAAGAGCCGTTTAATCTCGGGCGCTTAATTCGCCTCGTGA
AAGAATATCATCTGCTGAACCCGGTCATTGTTGACTGCACTTCCAGCCAGGCAGTGGCGGATCAATATGC
CGACTTCCTGCGCGAAGGTTTCCACGTTGTCACGCCGAACAAAAAGGCCAACACCTCGTCGATGGATTAC
TACCATCAGTTGCGTTATGCGGCGGAAAAATCGCGGCGTAAATTCCTCTATGACACCAACGTTGGGGCTG
GATTACCGGTTATTGAGAACCTGCAAAATCTGCTCAATGCAGGTGATGAATTGATGAAGTTCTCCGGCAT
TCTTTCTGGTTCGCTTTCTTATATCTTCGGCAAGTTAGACGAAGGCATGAGTTTCTCCGAGGCGACCACG
CTGGCGCGGGAAATGGGTTATACCGAACCGGACCCGCGAGATGATCTTTCTGGTATGGATGTGGCGCGTA
AACTATTGATTCTCGCTCGTGAAACGGGACGTGAACTGGAGCTGGCGGATATTGAAATTGAACCTGTGCT
GCCCGCAGAGTTTAACGCCGAGGGTGATGTTGCCGCTTTTATGGCGAATCTGTCACAACTCGACGATCTC
TTTGCCGCGCGCGTGGCGAAGGCCCGTGATGAAGGAAAAGTTTTGCGCTATGTTGGCAATATTGATGAAG
ATGGCGTCTGCCGCGTGAAGATTGCCGAAGTGGATGGTAATGATCCGCTGTTCAAAGTGAAAAATGGCGA
AAACGCCCTGGCCTTCTATAGCCACTATTATCAGCCGCTGCCGTTGGTACTGCGCGGATATGGTGCGGGC
AATGACGTTACAGCTGCCGGTGTCTTTGCTGATCTGCTACGTACCCTCTCATGGAAGTTAGGAGTCTGAC
ATGGTTAAAGTTTATGCCCCGGCTTCCAGTGCCAATATGAGCGTCGGGTTTGATGTGCTCGGGGCGGCGG
TGACACCTGTTGATGGTGCATTGCTCGGAGATGTAGTCACGGTTGAGGCGGCAGAGACATTCAGTCTCAA
CAACCTCGGACGCTTTGCCGATAAGCTGCCGTCAGAACCACGGGAAAATATCGTTTATCAGTGCTGGGAG
CGTTTTTGCCAGGAACTGGGTAAGCAAATTCCAGTGGCGATGACCCTGGAAAAGAATATGCCGATCGGTT
CGGGCTTAGGCTCCAGTGCCTGTTCGGTGGTCGCGGCGCTGATGGCGATGAATGAACACTGCGGCAAGCC
GCTTAATGACACTCGTTTGCTGGCTTTGATGGGCGAGCTGGAAGGCCGTATCTCCGGCAGCATTCATTAC
GACAACGTGGCACCGTGTTTTCTCGGTGGTATGCAGTTGATGATCGAAGAAAACGACATCATCAGCCAGC
AAGTGCCAGGGTTTGATGAGTGGCTGTGGGTGCTGGCGTATCCGGGGATTAAAGTCTCGACGGCAGAAGC
CAGGGCTATTTTACCGGCGCAGTATCGCCGCCAGGATTGCATTGCGCACGGGCGACATCTGGCAGGCTTC
ATTCACGCCTGCTATTCCCGTCAGCCTGAGCTTGCCGCGAAGCTGATGAAAGATGTTATCGCTGAACCCT
ACCGTGAACGGTTACTGCCAGGCTTCCGGCAGGCGCGGCAGGCGGTCGCGGAAATCGGCGCGGTAGCGAG
CGGTATCTCCGGCTCCGGCCCGACCTTGTTCGCTCTGTGTGACAAGCCGGAAACCGCCCAGCGCGTTGCC
GACTGGTTGGGTAAGAACTACCTGCAAAATCAGGAAGGTTTTGTTCATATTTGCCGGCTGGATACGGCGG
GCGCACGAGTACTGGAAAACTAAATGAAACTCTACAATCTGAAAGATCACAACGAGCAGGTCAGCTTTGC
GCAAGCCGTAACCCAGGGGTTGGGCAAAAATCAGGGGCTGTTTTTTCCGCACGACCTGCCGGAATTCAGC
CTGACTGAAATTGATGAGATGCTGAAGCTGGATTTTGTCACCCGCAGTGCGAAGATCCTCTCGGCGTTTA
TTGGTGATGAAATCCCACAGGAAATCCTGGAAGAGCGCGTGCGCGCGGCGTTTGCCTTCCCGGCTCCGGT
CGCCAATGTTGAAAGCGATGTCGGTTGTCTGGAATTGTTCCACGGGCCAACGCTGGCATTTAAAGATTTC
GGCGGTCGCTTTATGGCACAAATGCTGACCCATATTGCGGGTGATAAGCCAGTGACCATTCTGACCGCGA
CCTCCGGTGATACCGGAGCGGCAGTGGCTCATGCTTTCTACGGTTTACCGAATGTGAAAGTGGTTATCCT
CTATCCACGAGGCAAAATCAGTCCACTGCAAGAAAAACTGTTCTGTACATTGGGCGGCAATATCGAAACT
GTTGCCATCGACGGCGATTTCGATGCCTGTCAGGCGCTGGTGAAGCAGGCGTTTGATGATGAAGAACTGA
AAGTGGCGCTAGGGTTAAACTCGGCTAACTCGATTAACATCAGCCGTTTGCTGGCGCAGATTTGCTACTA
CTTTGAAGCTGTTGCGCAGCTGCCGCAGGAGACGCGCAACCAGCTGGTTGTCTCGGTGCCAAGCGGAAAC
TTCGGCGATTTGACGGCGGGTCTGCTGGCGAAGTCACTCGGTCTGCCGGTGAAACGTTTTATTGCTGCGA
CCAACGTGAACGATACCGTGCCACGTTTCCTGCACGACGGTCAGTGGTCACCCAAAGCGACTCAGGCGAC
GTTATCCAACGCGATGGACGTGAGTCAGCCGAACAACTGGCCGCGTGTGGAAGAGTTGTTCCGCCGCAAA
ATCTGGCAACTGAAAGAGCTGGGTTATGCAGCCGTGGATGATGAAACCACGCAACAGACAATGCGTGAGT
TAAAAGAACTGGGCTACACTTCGGAGCCGCACGCTGCCGTAGCTTATCGTGCGCTGCGTGATCAGTTGAA
TCCAGGCGAATATGGCTTGTTCCTCGGCACCGCGCATCCGGCGAAATTTAAAGAGAGCGTGGAAGCGATT
CTCGGTGAAACGTTGGATCTGCCAAAAGAGCTGGCAGAACGTGCTGATTTACCCTTGCTTTCACATAATC
TGCCCGCCGATTTTGCTGCGTTGCGTAAATTGATGATGAATCATCAGTAAAATCTATTCATTATCTCAAT
CAGGCCGGGTTTGCTTTTATGCAGCCCGGCTTTTTTATGAAGAAATTATGGAGAAAAATGACAGGGAAAA
AGGAGAAATTCTCAATAAATGCGGTAACTTAGAGATTAGGATTGCGGAGAATAACAACCGCCGTTCTCAT
CGAGTAATCTCCGGATATCGACCCATAACGGGCAATGATAAAAGGAGTAACCTGTGAAAAAGATGCAATC
TATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCAGCACAGGCTGCGGAAATTACGTTAGTC
CCGTCAGTAAAATTACAGATAGGCGATCGTGATAATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCG
ACCACGGCTGGTGGAAACAACATTATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACC
GCCGCGCCACCATAAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA
ATGACAAATGCCGGGTAACAATCCGGCATTCAGCGCCTGATGCGACGCTGGCGCGTCTTATCAGGCCTAC
GTTAATTCTGCAATATATTGAATCTGCATGCTTTTGTAGGCAGGATAAGGCGTTCACGCCGCATCCGGCA
TTGACTGCAAACTTAACGCTGCTCGTAGCGTTTAAACACCAGTTCGCCATTGCTGGAGGAATCTTCATCA
AAGAAGTAACCTTCGCTATTAAAACCAGTCAGTTGCTCTGGTTTGGTCAGCCGATTTTCAATAATGAAAC
GACTCATCAGACCGCGTGCTTTCTTAGCGTAGAAGCTGATGATCTTAAATTTGCCGTTCTTCTCATCGAG
GAACACCGGCTTGATAATCTCGGCATTCAATTTCTTCGGCTTCACCGATTTAAAATACTCATCTGACGCC
AGATTAATCACCACATTATCGCCTTGTGCTGCGAGCGCCTCGTTCAGCTTGTTGGTGATGATATCTCCCC
AGAATTGATACAGATCTTTCCCTCGGGCATTCTCAAGACGGATCCCCATTTCCAGACGATAAGGCTGCAT
TAAATCGAGCGGGCGGAGTACGCCATACAAGCCGGAAAGCATTCGCAAATGCTGTTGGGCAAAATCGAAA
TCGTCTTCGCTGAAGGTTTCGGCCTGCAAGCCGGTGTAGACATCACCTTTAAACGCCAGAATCGCCTGGC
GGGCATTCGCCGGCGTGAAATCTGGCTGCCAGTCATGAAAGCGAGCGGCGTTGATACCCGCCAGTTTGTC
GCTGATGCGCATCAGCGTGCTAATCTGCGGAGGCGTCAGTTTCCGCGCCTCATGGATCAACTGCTGGGAA
4 changes: 2 additions & 2 deletions io.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func BuildGff(sequence Sequence) []byte {
if feature.Name != "" {
featureName = feature.Name
} else {
featureName = sequence.Meta.Name
featureName = sequence.Meta.Locus.Name
}

var featureSource string
Expand Down Expand Up @@ -311,7 +311,7 @@ func BuildGff(sequence Sequence) []byte {

for letterIndex, letter := range sequence.Sequence {
letterIndex++
if letterIndex%70 == 0 && letterIndex != 0 {
if letterIndex%70 == 0 && letterIndex != 0 && letterIndex != sequence.Meta.RegionEnd {
gffBuffer.WriteRune(letter)
gffBuffer.WriteString("\n")
} else {
Expand Down
18 changes: 9 additions & 9 deletions io_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ Gff related tests and benchmarks begin here.

func ExampleReadGff() {

sequence := ReadGff("data/ecoli-mg1655.gff")
sequence := ReadGff("data/ecoli-mg1655-short.gff")
fmt.Println(sequence.Meta.Name)
// Output: U00096.3
}

func ExampleParseGff() {
file, _ := ioutil.ReadFile("data/ecoli-mg1655.gff")
file, _ := ioutil.ReadFile("data/ecoli-mg1655-short.gff")
sequence := ParseGff(file)

fmt.Println(sequence.Meta.Name)
Expand All @@ -46,7 +46,7 @@ func ExampleParseGff() {

func ExampleBuildGff() {

sequence := ReadGff("data/ecoli-mg1655.gff")
sequence := ReadGff("data/ecoli-mg1655-short.gff")
gffBytes := BuildGff(sequence)
reparsedSequence := ParseGff(gffBytes)

Expand All @@ -56,7 +56,7 @@ func ExampleBuildGff() {
}

func ExampleWriteGff() {
sequence := ReadGff("data/ecoli-mg1655.gff")
sequence := ReadGff("data/ecoli-mg1655-short.gff")
WriteGff(sequence, "data/test.gff")
testSequence := ReadGff("data/test.gff")

Expand All @@ -69,7 +69,7 @@ func ExampleWriteGff() {
// TODO should delete output files.

func TestGffIO(t *testing.T) {
testInputPath := "data/ecoli-mg1655.gff"
testInputPath := "data/ecoli-mg1655-short.gff"
testOutputPath := "data/test.gff"

testSequence := ReadGff(testInputPath)
Expand Down Expand Up @@ -104,7 +104,7 @@ func TestGffIO(t *testing.T) {

func BenchmarkReadGff(b *testing.B) {
for i := 0; i < b.N; i++ {
ReadGff("data/ecoli-mg1655.gff")
ReadGff("data/ecoli-mg1655-short.gff")
}
}

Expand Down Expand Up @@ -239,7 +239,7 @@ func TestSnapgeneGenbankRegression(t *testing.T) {
}

func TestGenbankNewlineParsingRegression(t *testing.T) {
gbk := ReadGbk("data/bsub.gbk")
gbk := ReadGbk("data/puc19.gbk")

for _, feature := range gbk.Features {
if feature.SequenceLocation.Start == 410 && feature.SequenceLocation.End == 1750 && feature.Type == "CDS" {
Expand Down Expand Up @@ -302,7 +302,7 @@ func ExampleWriteJSON() {
}

func TestJSONIO(t *testing.T) {
testSequence := ReadGbk("data/bsub.gbk")
testSequence := ReadGbk("data/puc19.gbk")
WriteJSON(testSequence, "data/test.json")
readTestSequence := ReadJSON("data/test.json")

Expand All @@ -313,7 +313,7 @@ func TestJSONIO(t *testing.T) {
t.Errorf(" mismatch (-want +got):\n%s", diff)
}

gffTestSequence := ReadGff("data/ecoli-mg1655.gff")
gffTestSequence := ReadGff("data/ecoli-mg1655-short.gff")
WriteJSON(gffTestSequence, "data/testGff.json")
gffReadTestSequence := ReadJSON("data/testGff.json")

Expand Down
2 changes: 1 addition & 1 deletion poly/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ func optimizeCommand(c *cli.Context) error {
// if a file exists to weigh the table. Weigh it.
if fileExists(c.String("wt")) {
targetOrganism := fileParser(c, c.String("wt"))
codonTable.CreateWeights(targetOrganism.Sequence)
codonTable.OptimizeTable(targetOrganism.Sequence)
}

if isPipe(c) {
Expand Down
2 changes: 1 addition & 1 deletion poly/commands_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ TODO:
write subtest to check for empty output before merge
******************************************************************************/

var testFilePaths = []string{"../data/puc19.gbk", "../data/ecoli-mg1655.gff", "../data/sample.json", "../data/base.fasta"}
var testFilePaths = []string{"../data/puc19.gbk", "../data/ecoli-mg1655-short.gff", "../data/sample.json", "../data/base.fasta"}

func TestConvertPipe(t *testing.T) {

Expand Down
17 changes: 17 additions & 0 deletions primers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,23 @@ func TestSantaLucia(t *testing.T) {
}
}

func TestSantaLuciaReverseComplement(t *testing.T) {
testSeq := "ACGTAGATCTACGT" //"GTAAAACGACGGCCAGT" // M13 fwd

testReverseComplement := ReverseComplement(testSeq)
if testSeq != testReverseComplement {
t.Errorf("Input is not a reverse complement of it's. Got %q instead of %q", testSeq, testReverseComplement)
}
testCPrimer := 0.1e-6
testCNa := 350e-3
testCMg := 0.0
expectedTM := 47.428514
if calcTM, _, _ := SantaLucia(testSeq, testCPrimer, testCNa, testCMg); math.Abs(expectedTM-calcTM)/expectedTM >= 0.02 {
t.Errorf("SantaLucia has changed on test. Got %f instead of %f", calcTM, expectedTM)
}

}

func ExampleMeltingTemp() {
sequenceString := "GTAAAACGACGGCCAGT" // M13 fwd
expectedTM := 52.8
Expand Down
10 changes: 8 additions & 2 deletions transformations.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,15 @@ func Optimize(aminoAcids string, codonTable CodonTable) string {
return codons.String()
}

// CreateWeights weights each codon in a codon table according to input string codon frequency.
// GetOptimizationTable is a Sequence method that takes a CodonTable and weights it to be used to optimize inserts.
func (sequence Sequence) GetOptimizationTable(codonTable CodonTable) CodonTable {
sequenceString := getCodingRegions(sequence)
return codonTable.OptimizeTable(sequenceString)
}

// OptimizeTable weights each codon in a codon table according to input string codon frequency.
// This function actually mutates the CodonTable struct itself.
func (codonTable CodonTable) CreateWeights(sequence string) CodonTable {
func (codonTable CodonTable) OptimizeTable(sequence string) CodonTable {

sequence = strings.ToUpper(sequence)
codonFrequencyMap := getCodonFrequency(sequence)
Expand Down
20 changes: 10 additions & 10 deletions transformations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,29 @@ func TestTranslation(t *testing.T) {
func ExampleOptimize() {

gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"

sequence := ReadGbk("data/puc19.gbk")
codonTable := GetCodonTable(11)

sequence := ReadGbk("data/bsub.gbk")
rawSequence := sequence.Sequence
codonTable.CreateWeights(rawSequence)
optimizationTable := sequence.GetOptimizationTable(codonTable)

optimizedSequence := Optimize(gfpTranslation, codonTable)
optimizedSequenceTranslation := Translate(optimizedSequence, codonTable)
optimizedSequence := Optimize(gfpTranslation, optimizationTable)
optimizedSequenceTranslation := Translate(optimizedSequence, optimizationTable)

fmt.Println(optimizedSequenceTranslation == gfpTranslation)
// output: true
}

func TestOptimize(t *testing.T) {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"

sequence := ReadGbk("data/puc19.gbk")
codonTable := GetCodonTable(11)

sequence := ReadGbk("data/bsub.gbk")
rawSequence := sequence.Sequence
codonTable.CreateWeights(rawSequence)
optimizationTable := sequence.GetOptimizationTable(codonTable)

optimizedSequence := Optimize(gfpTranslation, codonTable)
optimizedSequenceTranslation := Translate(optimizedSequence, codonTable)
optimizedSequence := Optimize(gfpTranslation, optimizationTable)
optimizedSequenceTranslation := Translate(optimizedSequence, optimizationTable)

if optimizedSequenceTranslation != gfpTranslation {
t.Errorf("TestOptimize has failed. Translate has returned %q, want %q", optimizedSequenceTranslation, gfpTranslation)
Expand Down

0 comments on commit 4c95242

Please sign in to comment.