From 52aa5097b14873453a0fb1fa53ca8d26bbcb93b6 Mon Sep 17 00:00:00 2001 From: ChocoParrot Date: Sun, 12 Sep 2021 21:28:59 +0800 Subject: [PATCH] [refactor] chimpanzee mitochondrion DNA test --- orffinder/tests/gene.fasta | 258 ++++++++++++++++++++++++++++++++++--- orffinder/tests/test.py | 25 +++- 2 files changed, 260 insertions(+), 23 deletions(-) diff --git a/orffinder/tests/gene.fasta b/orffinder/tests/gene.fasta index d5b85a6..99b3fd2 100644 --- a/orffinder/tests/gene.fasta +++ b/orffinder/tests/gene.fasta @@ -1,20 +1,238 @@ ->NM_001115114.1 Danio rerio glyceraldehyde-3-phosphate dehydrogenase (gapdh), mRNA -ACTCACACCAAGTGTCAGGACGAACAGAGGCTTCTCACAAACGAGGACACAACCAAATCAGGCATAATGG -TTAAAGTTGGTATTAACGGATTCGGTCGCATTGGCCGTCTGGTGACCCGTGCTGCTTTCTTGACCAAGAA -AGTGGAGATCGTGGCCATCAATGACCCATTCATTGACCTTGATTACATGGTTTACATGTTCCAGTACGAC -TCCACCCATGGAAAGTACAAGGGTGAGGTTAAGGCAGAAGGCGGCAAACTGGTCATTGATGGTCATGCAA -TCACAGTCTATAGCGAGAGGGACCCAGCCAACATTAAGTGGGGTGATGCAGGTGCTACTTATGTTGTGGA -GTCTACTGGTGTCTTCACTACTATTGAGAAGGCTTCTGCTCACATTAAGGGTGGTGCAAAGAGAGTCATC -ATCTCTGCCCCAAGTGCAGATGCCCCCATGTTTGTCATGGGTGTCAACCATGAGAAATATGACAACTCTC -TCACAGTTGTAAGCAATGCCTCCTGCACCACCAACTGCCTGGCTCCTTTGGCAAAGGTCATCAATGATAA -CTTTGTCATCGTTGAAGGTCTTATGAGCACTGTTCATGCCATCACAGCAACACAGAAGACCGTTGATGGG -CCCTCTGGGAAGCTGTGGAGGGATGGCCGTGGTGCCAGTCAGAACATCATCCCAGCCTCCACTGGGGCTG -CCAAGGCTGTAGGCAAAGTAATTCCTGAGCTCAATGGCAAGCTTACTGGTATGGCCTTCCGTGTCCCCAC -CCCCAATGTCTCTGTTGTGGATCTGACAGTCCGTCTTGAGAAACCTGCCAAGTATGATGAGATCAAGAAA -GTCGTCAAGGCTGCAGCTGATGGGCCCATGAAAGGAATTCTGGGATACACGGAGCACCAGGTTGTGTCCA -CTGACTTCAATGGGGATTGCCGTTCATCCATCTTTGACGCTGGTGCTGGTATTGCTCTCAACGATCACTT -TGTCAAGCTGGTCACATGGTATGACAATGAGTTCGGTTACAGCAACCGTGTATGTGACCTGATGGCACAC -ATGGCCTCCAAGGAGTAGATGTGACCCCTTTGCTGTTTCTTTTTTTTGATACGCGACCATTCTCCCATCT -GGTTGAATGTTTGCACCACGTGCCTGGAAGGAAATTACATGCTTAAATTGAAGACCAATATTATTTTTAT -ATACTCTGTTCTGTTTCGTGTGTGAGGTTAAAAATAAATGTTGACTTCAAAGGCTTTTCTGTCTGTTAAC -AACTTGCGATGGAATAAAAGTCCTCTGTTTGTGAGAAATGAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +>NC_001643.1 Pan troglodytes mitochondrion, complete genome +GTTTATGTAGCTTACCCCCTCAAAGCAATACACTGAAAATGTTTCGACGGGTTTACATCACCCCATAAAC +AAACAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAAGCATCCCCGCCCCGTG +AGTCACCCTCTAAATCGCCATGATCAAAAGGAACAAGTATCAAGCACGCAGCAATGCAGCTCAAAACGCT +TAGCCTAGCCACACCCCCACGGGAGACAGCAGTGATAAACCTTTAGCAATAAACGAAAGTTTAACTAAGC +CATACTAACCTCAGGGTTGGTCAATTTCGTGCTAGCCACCGCGGTCATACGATTAACCCAAGTCAATAGA +AACCGGCGTAAAGAGTGTTTTAGATCACCCCCCCATAAAGCTAAAATTCACCTGAGTTGTAAAAAACTCC +AGCTGATACAAAATAAACTACGAAAGTGGCTTTAACACATCTGAATACACAATAGCTAAGACCCAAACTG +GGATTAGATACCCCACTATGCTTAGCCCTAAACTTCAACAGTTAAATTAACAAAACTGCTCGCCAGAACA +CTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGAGGAGCCTGTTCTGT +AATCGATAAACCCCGATCAACCTCACCGCCTCTTGCTCAGCCTATATACCGCCATCTTCAGCAAACCCTG +ATGAAGGTTACAAAGTAAGCACAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCTATGAGGTGGC +AAGAAATGGGCTACATTTTCTACCCCAGAAAATTACGATAACCCTTATGAAACCTAAGGGTCAAAGGTGG +ATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCA +CCCTCCTCAAGTATACTTCAAAGGATACTTAACTTAAACCCCCTACGTATTTATATAGAGGAGATAAGTC +GTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTAGCTTAACATAAAGCACCCAAC +TTACACTTAGGAGATTTCAACTCAACTTGACCACTCTGAGCCAAACCTAGCCCCAAACCCCCTCCACCCT +ACTACCAAACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCGATAGAAATTGTAAACCGGCGC +AATAGACATAGTACCGCAAGGGAAAGATGAAAAATTATACCCAAGCATAATACAGCAAGGACTAACCCCT +GTACCTTTTGCATAATGAATTAACTAGAAATAACTTTGCAAAGAGAACCAAAGCTAAGACCCCCGAAACC +AGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCAAAATAGTGGGAAGATTTATAG +GTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTTTAA +ATTTACCTACAGAACCCTCTAAATCCCCTTGTAAACTTAACTGTTAGTCCAAAGAGGAACAGCTCTTTAG +ACACTAGGAAAAAACCTTGTAAAGAGAGTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCA +ATTAAGAAAGCGTTCAAGCTCAACACCCACAACCTTAAAGATCCCAAACATACAACCGAACTCCTTACAC +CCAATTGGACCAATCTATTACCCCATAGAAGAACTAATGTTAGTATAAGTAACATGAAAACATTCTCCTC +CGCATAAGCCTACATCAGACCAAAATATTAAACTGACAATTAACAGCCTAATATCTACAATCAACCAACA +AGCCATTATTACCCCCGCTGTTAACCCAACACAGGCATGCCCACAAGGAAAGGTTAAAAAAAGTAAAAGG +AACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGCATTACCAGTATTAGAGGCACC +GCCTGCCCGGTGACATATGTTTAACGGCCGCGGTACCCTAACCGTGCAAAGGTAGCATAATCACTTGTTC +CTTAAATAGGGACTTGTATGAATGGCTCCACGAGGGTTTAGCTGTCTCTTACTTTCAACCAGTGAAATTG +ACCTACCCGTGAAGAGGCGGGCATAACATAACAAGACGAGAAGACCCTATGGAGCTTTAATTCATTAATG +CAAACAATACTTAACAAACCTACAGGTCCTAAACTATTAAACCTGCATTAAAAATTTCGGTTGGGGCGAC +CTCGGAGCACAACCCAACCTCCGAGCAATACATGCTAAGACCTCACCAGTCAAAGCGAATTACTACATCC +AATTGATCCAATGACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATCCTATTCCAGAGTC +CATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCGATGGTGCAGCCGCTATTAAAG +GTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTTCTA +TCTGTTCTAAATTTCTCCCTGTACGAAAGGACAAGAGAAATGAGGCCTACTTCACAAAGCGCCTTCCCCA +ATAAATGATATTATCTCAATTTAGCGCCATGCCAACACCCACTCAAGAACAGAGTTTGTTAAGATGGCAG +AGCCCGGTAATTGCATAAAACTTAAAACTTTACAATCAGAGGTTCAATTCCTCTTCTTGACAACACACCC +ATGACCAACCTCCTACTCCTCATTGTACCCATCCTAATCGCAATAGCATTCCTAATGCTAACCGAACGAA +AAATTCTAGGCTACATACAACTACGCAAAGGTCCCAACATTGTAGGTCCTTACGGGCTATTACAGCCCTT +CGCTGACGCCATAAAACTCTTCACTAAAGAACCCTTAAAACCCTCCACTTCAACCATTACCCTCTACATC +ACCGCCCCAACCCTAGCCCTCACCATTGCCCTCTTACTATGAACCCCCCTCCCCATACCCAACCCCCTAG +TCAATCTTAACTTAGGCCTCCTATTTATTCTAGCCACCTCCAGCCTAGCCGTTTACTCAATCCTCTGATC +AGGGTGAGCATCAAACTCGAACTACGCCTTAATCGGTGCACTACGAGCAGTAGCCCAAACAATCTCATAC +GAAGTCACTCTAGCCATTATCCTACTGTCAACGCTACTAATAAGTGGCTCCTTCAATCTCTCTACCCTTG +TCACAACACAAGAGCACCTCTGACTAATCCTGCCAACATGACCCCTGGCCATAATATGATTTATCTCTAC +ACTAGCAGAGACCAACCGAACTCCCTTCGACCTTACTGAAGGAGAATCTGAACTAGTCTCAGGCTTTAAT +ATCGAGTATGCCGCAGGCCCCTTTGCCCTATTTTTCATAGCCGAATACATAAACATTATTATAATAAACA +CCCTCACTGCTACAATCTTCCTAGGAGCAACATACAATACTCACTCCCCTGAACTCTACACGACATATTT +TGTCACCAAAGCTCTACTTCTAACCTCCCTGTTCCTATGAATTCGAACAGCATATCCCCGATTTCGCTAC +GACCAGCTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCTAGCATCACTCATGTGATATATCT +CCATACCCACTACAATCTCCAGCATCCCCCCTCAAACCTAAGAAATATGTCTGATAAAAGAATTACTTTG +ATAGAGTAAATAATAGGAGTTCAAATCCCCTTATTTCTAGGACTATAAGAATCGAACTCATCCCTGAGAA +TCCAAAATTCTCCGTGCCACCTATCACACCCCATCCTAAAGTAAGGTCAGCTAAATAAGCTATCGGGCCC +ATACCCCGAAAATGTTGGTTACACCCTTCCCGTACTAATTAATCCCCTAGCCCAACCCATCATCTACTCT +ACCATCCTTACAGGCACGCTCATTACAGCGCTAAGCTCACACTGATTTTTCACCTGAGTAGGCCTAGAAA +TAAATATACTAGCTTTTATCCCAATCCTAACCAAAAAAATAAGCCCCCGCTCCACAGAAGCCGCCATCAA +ATACTTTCTCACACAAGCAACTGCGTCCATAATTCTCCTGATAGCTATCCTCTCCAACAGCATACTCTCC +GGACAATGAACCATAACCAATACTACCAATCAATACTCATCATTAATAATTATAATAGCAATGGCAATAA +AACTAGGAATAGCCCCCTTTCACTTTTGAGTTCCAGAAGTTACCCAAGGCACCCCCCTAATATCCGGCCT +ACTCCTCCTCACATGACAAAAATTAGCCCCTATTTCAATTATATACCAAATCTCCTCATCACTGAACGTA +AACCTTCTCCTCACCCTTTCAATCTTGTCCATTATAGCAGGCAGCTGAGGCGGACTAAACCAAACCCAAC +TACGCAAAATCCTAGCATACTCCTCAATCACCCACATAGGCTGAATAATAGCAGTCCTACCATATAACCC +TAACATAACCATTCTTAATTTAACCATTTACATCATCCTAACTACTACCGCATTTCTGCTACTCAACTTA +AACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGATTAACTCCCCTAATTC +CATCCACCCTCCTCTCCCTAGGAGGCCTACCCCCACTAACTGGCTTCTTACCCAAATGAGTTATCATCGA +AGAATTCACAAAAAATAATAGCCTCATCATCCCCACCATCATAGCCATCATCACTCTCCTTAACCTCTAT +TTCTACCTACGCCTAATCTACTCCACCTCAATTACACTACTTCCCATATCTAATAACGTAAAAATAAAAT +GACAATTCGAACATACAAAACCCACCCCCTTCCTCCCTACACTCATCACCCTTACCACACTGCTTCTACC +CATCTCCCCCTTCATACTAATAATCTTATAGAAATTTAGGTTAAGCACAGACCAAGAGCCTTCAAAGCCC +TCAGCAAGTTACAATACTTAATTTCTGCAACAACTAAGGACTGCAAAACCCCACTCTGCATCAACTGAAC +GCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGATTAATGGGACTTAAACCCACAAACATTTAGT +TAACAGCTAAACACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCAAGAAAAAAAGGCGGGAGAA +GCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAAAATCACCTCAGAGCTGGTAAA +AAGAGGCTTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCCCACCCTACTG +ATGTTCACCGACCGCTGACTATTCTCTACAAACCACAAAGATATTGGAACACTATACCTACTATTCGGTG +CATGAGCTGGAGTCCTGGGCACAGCCCTAAGTCTCCTTATTCGGGCTGAACTAGGCCAACCAGGCAACCT +CCTAGGTAATGACCACATCTACAATGTCATCGTCACAGCCCATGCATTCGTAATAATCTTCTTCATAGTA +ATGCCTATTATAATCGGAGGCTTTGGCAACTGGCTAGTTCCCTTGATAATTGGTGCCCCCGACATGGCAT +TCCCCCGCATAAACAACATAAGCTTCTGGCTCCTGCCCCCTTCTCTCCTACTTCTACTTGCATCTGCCAT +AGTAGAAGCCGGCGCGGGAACAGGTTGAACAGTCTACCCTCCCTTAGCGGGAAACTACTCGCATCCTGGA +GCCTCCGTAGACCTAACCATCTTCTCCTTACATCTGGCAGGCATCTCCTCTATCCTAGGAGCCATTAACT +TCATCACAACAATTATTAATATAAAACCTCCTGCCATGACCCAATACCAAACACCCCTCTTCGTCTGATC +CGTCCTAATCACAGCAGTCTTACTTCTCCTATCCCTCCCAGTCCTAGCTGCTGGCATCACCATACTATTG +ACAGATCGTAACCTCAACACTACCTTCTTCGACCCAGCCGGGGGAGGAGACCCTATTCTATATCAACACT +TATTCTGATTTTTTGGCCACCCCGAAGTTTATATTCTTATCCTACCAGGCTTCGGAATAATTTCCCACAT +TGTAACTTATTACTCCGGAAAAAAAGAACCATTTGGATATATAGGCATGGTTTGAGCTATAATATCAATT +GGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGGATAGACGTAGACACCCGAGCCT +ATTTCACCTCCGCTACCATAATCATTGCTATTCCTACCGGCGTCAAAGTATTCAGCTGACTCGCTACACT +TCACGGAAGCAATATGAAATGATCTGCCGCAGTACTCTGAGCCCTAGGGTTTATCTTTCTCTTCACCGTA +GGTGGCCTAACCGGCATTGTACTAGCAAACTCATCATTAGACATCGTGCTACACGACACATACTACGTCG +TAGCCCACTTCCACTACGTTCTATCAATAGGAGCTGTATTCGCCATCATAGGAGGCTTCATTCACTGATT +CCCCCTATTCTCAGGCTATACCCTAGACCAAACCTATGCCAAAATCCAATTTGCCATCATGTTCATTGGC +GTAAACCTAACCTTCTTCCCACAGCACTTCCTTGGCCTATCTGGGATGCCCCGACGTTACTCGGACTACC +CCGATGCATACACCACATGAAATGTCCTATCATCCGTAGGCTCATTTATCTCCCTGACAGCAGTAATATT +AATAATTTTCATGATTTGAGAAGCCTTTGCTTCAAAACGAAAAGTCCTAATAGTAGAAGAGCCCTCCGCA +AACCTGGAATGACTATATGGATGCCCCCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTA +GACAAAAAAGGAAGGAATCGAACCCCCTAAAGCTGGTTTCAAGCCAACCCCATGACCTCCATGACTTTTT +CAAAAAGATATTAGAAAAACTATTTCATAACTTTGTCAAAGTTAAATTACAGGTTAACCCCCGTATATCT +TAATGGCACATGCAGCGCAAGTAGGTCTACAAGATGCTACTTCCCCTATCATAGAAGAACTTATTATCTT +TCACGACCATGCCCTCATAATTATCTTTCTCATCTGCTTTCTAGTCCTATACGCCCTTTTCCTAACACTC +ACAACAAAACTAACTAATACTAGTATTTCAGACGCCCAGGAAATAGAAACCGTCTGAACTATCCTGCCCG +CCATCATCCTAGTCCTTATTGCCCTACCATCCCTGCGTATCCTTTACATAACAGACGAGGTCAACGACCC +CTCCTTTACTATTAAATCAATCGGCCATCAATGATATTGAACCTACGAATACACCGACTACGGCGGGCTA +ATCTTCAACTCCTACATACTCCCCCCATTATTTCTAGAACCAGGTGATCTACGACTCCTTGACGTTGATA +ACCGAGTGGTCCTCCCAGTTGAAGCCCCCGTTCGTATAATAATTACATCACAAGATGTTCTACACTCATG +AGCTGTTCCCACATTAGGCCTAAAAACAGACGCAATTCCCGGACGCCTAAACCAAACCACTTTCACCGCC +ACACGACCAGGAGTATACTACGGCCAATGCTCAGAAATCTGTGGAGCAAACCACAGTTTTATACCCATCG +TCCTAGAATTAATCCCTCTAAAAATCTTTGAAATAGGACCCGTATTCACTCTATAGCACCTTCTCTACCC +CTCTCCAGAGCTCACTGTAAAGCTAACCTAGCATTAACCTTTTAAGTTAAAGATTAAGAGGACCGACACC +TCTTTACAGTGAAATGCCCCAACTAAATACCGCCGTATGACCCACCATAATTACCCCCATACTCCTGACA +CTATTTCTCGTCACCCAACTAAAAATATTAAATTCAAATTACCATCTACCCCCCTCACCAAAACCCATAA +AAATAAAAAACTACAATAAACCCTGAGAACCAAAATGAACGAAAATCTATTCGCTTCATTCGCTGCCCCC +ACAATCCTAGGCTTACCCGCCGCAGTACTAATCATTCTATTCCCCCCTCTACTGGTCCCCACTTCTAAAC +ATCTCATCAACAACCGACTAATTACCACCCAACAATGACTAATTCAACTGACCTCAAAACAAATAATAAC +TATACACAGCACTAAAGGACGAACCTGATCTCTCATACTAGTATCCTTAATCATTTTTATTACCACAACC +AATCTTCTTGGGCTTCTACCCCACTCATTCACACCAACCACCCAACTATCTATAAACCTAGCCATGGCTA +TCCCCCTATGAGCAGGCGCAGTAGTCATAGGCTTTCGCTTTAAGACTAAAAATGCCCTAGCCCACTTCTT +ACCGCAAGGCACACCTACACCCCTTATCCCCATACTAGTTATCATCGAAACTATTAGCCTACTCATTCAA +CCAATAGCCTTAGCCGTACGTCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTGGAA +GCGCCACACTAGCATTATCAACTATCAATCTACCCTATGCACTCATTATCTTCACAATTCTAATCCTACT +GACTATTCTAGAGATCGCCGTCGCCTTAATCCAAGCCTACGTTTTTACACTTCTAGTGAGCCTCTACCTG +CACGACAACACATAATGACCCACCAATCACATGCCTACCACATAGTAAAACCCAGCCCATGACCCCTAAC +AGGGGCCCTCTCGGCCCTCCTAATAACCTCCGGCCTGGCCATATGATTCCACTTCTACTCCACAACACTA +CTCACACTAGGCTTACTAACTAACACATTGACCATATATCAATGATGACGCGATGTTATACGAGAAGGCA +CATACCAAGGCCACCACACACCACCCGTCCAAAAAGGTCTCCGATATGGGATAATTCTTTTTATTACCTC +AGAAGTTTTTTTCTTTGCAGGATTTTTTTGAGCTTTCTACCACTCCAGCCTAGCCCCTACCCCCCAGCTA +GGAGGACACTGGCCCCCAACAGGTATTACCCCACTAAATCCCCTAGAAGTCCCACTCCTAAACACATCTG +TATTACTCGCATCAGGAGTATCAATTACTTGAGCCCATCACAGCTTAATAGAAAATAACCGAAACCAAAT +AATTCAAGCACTGCTTATTACGATTCTACTAGGTCTTTATTTTACCCTCCTACAAGCCTCAGAATATTTC +GAATCCCCTTTTACCATTTCCGATGGCATCTACGGCTCAACATTCTTTGTAGCCACAGGCTTCCACGGAC +TCCACGTCATTATTGGATCAACTTTCCTCACTATCTGCCTCATCCGCCAACTAATATTTCACTTCACATC +CAAACATCACTTCGGCTTTCAAGCCGCCGCCTGATACTGACACTTCGTAGATGTAGTCTGACTATTTCTA +TATGTCTCTATTTACTGATGAGGATCTTACTCTTTTAGTATAAGTAGTACCGTTAACTTCCAATTAACTA +GTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGTCCTAATTTTAATAACCAATACCCTTCTAGCCCT +ACTACTGATAATTATCACATTCTGACTACCACAACTCAACAGCTACATAGAAAAATCTACCCCTTACGAA +TGTGGCTTCGACCCTATATCCCCCGCCCGCGTCCCCTTCTCCATAAAATTTTTCCTAGTAGCCATCACCT +TCCTATTATTTGACCTAGAAATTGCCCTCCTATTGCCCTTACCTTGAGCCCTACAAACGGCCAACCTACC +ACTAATAGTCACATCATCCCTCTTATTAATTACTATCCTAGCCCTAAGCCTCGCCTACGAATGATTACAA +AAAGGGTTAGACTGAACCGAATTGGTATATAGTTTAAATAAAACGAATGATTTCGACTCATTAAATTATG +ATAATCATATTTACCAAATGCCCCTTATTTATATAAATATTATACTAGCATTTACCATCTCACTTCTAGG +AATACTAGTATATCGCTCACACCTAATATCTTCCCTACTATGCCTAGAAGGAATAATACTATCACTGTTC +ATCATAGCCACCCTCATAACCCTCAATACTCACTCCCTCTTAGCCAATATTGTACCCATCACCATACTAG +TCTTTGCTGCCTGCGAAGCAGCAGTAGGTCTAGCACTACTAGTTTCAATCTCTAACACATATGGCTTAGA +CTACGTACATAACCTAAACCTACTCCAATGCTAAAACTAATCATCCCGACAATTATATTACTACCACTAA +CATGATTCTCTAAAAAACGTATAATTTGAATCAACACAACCACTCACAGCCTAATTATCAGCACCATTCC +CTTACTATTTTTTAACCAAATTAACAACAACCTATTCAGCTGTTCCCTGCCCTTCTCCTCCGACCCCTTA +ACAACTCCCCTCCTAATATTAACTGCTTGACTTCTACCCCTCACAATCATAGCAAGCCAGCGCCACCTAT +CCAACGAACCACTATCACGAAAAAAACTCTACCTCTCCATGCTAATTTCCCTCCAAATCTCCTTAATTAT +AACATTCTCGGCCACAGAGCTAATTATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCCTGGCT +ATCATCACCCGATGGGGTAACCAACCAGAACGCCTGAACGCAGGTACATACTTCCTATTCTATACCCTAG +TAGGCTCCCTCCCCCTACTCATCGCACTAATCTATACCCACAACACCCTAGGCTCACTAAATATCCTATT +ACTCACTCTTACAACCCAAGAACTATCAAACACCTGAGCCAACAACTTAATATGACTAGCGTACACGATG +GCTTTCATGGTAAAAATACCCCTTTACGGACTCCACCTATGACTCCCTAAAGCCCATGTCGAAGCCCCTA +TTGCCGGGTCAATGGTACTTGCTGCAGTACTCTTAAAATTAGGTGGCTATGGCATAATACGCCTCACACT +CATCCTCAACCCCCTAACAAAACATATAGCCTATCCCTTCCTCATGTTGTCCTTATGAGGTATAATCATA +ACAAGCTCCATCTGCCTGCGACAAACAGACCTAAAATCGCTCATTGCATACCCTTCAGTCAGCCACATAG +CCCTCGTAGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAATTATCCTCATAATCGC +CCACGGACTTACATCCTCATTATTATCCTGCCTAGCAAACTCAAATTATGAACGCACCCACAGTCGCATC +ATAATTCTCTCCCAAGGACTTCAAACTCTACTCCCACTAATAGCCTTTTGATGACTCCTGGCAAGCCTCG +CTAACCTCGCCCTACCCCCTACCATTAATCTCCTAGGGGAACTCTCCGTGCTAGTAACCTCATTCTCCTG +ATCAAATACCACTCTCCTACTCACAGGATTCAACATACTAATCACAGCCCTGTACTCCCTCTACATGTTT +ACCACAACACAATGAGGCTCACTCACCCACCACATTAATAGCATAAAGCCCTCATTCACACGAGAAAACA +CTCTCATATTTTTACACCTATCCCCCATCCTCCTTCTATCCCTCAATCCTGATATCATCACTGGATTCAC +CTCCTGTAAATATAGTTTAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTCACGACCCCTTATT +TACCGAGAAAGCTTATAAGAACTGCTAACTCGTATTCCCATGCCTAACAACATGGCTTTCTCAACTTTTA +AAGGATAACAGTTATCCATTGGTCTTAGGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACC +ATGTATGCTACCATAACCACCTTAGCCCTAACTTCCTTAATTCCCCCCATCCTCGGCGCCCTCATTAACC +CTAACAAAAAAAACTCATACCCCCATTACGTGAAATCCATTATCGCATCCACCTTTATCATTAGCCTTTT +CCCCACAACAATATTCATATGCCTAGACCAAGAAACTATTATCTCGAACTGACACTGAGCAACAACCCAA +ACAACCCAACTCTCCCTGAGCTTTAAACTAGACTATTTCTCCATAACATTTATCCCCGTAGCACTGTTCG +TTACATGATCCATCATAGAATTCTCACTATGATATATAGACTCAGACCCCAACATCAACCAATTCTTCAA +ATACTTACTTATCTTCCTAATTACTATACTAATCCTAGTCACCGCTAACAACCTATTCCAACTCTTCATC +GGCTGAGAAGGCGTAGGAATTATATCCTTTCTACTCATTAGCTGATGGTACGCCCGAACAGATGCCAACA +CAGCAGCCATCCAAGCAATCCTATATAACCGTATCGGTGATATTGGTTTTGTCCTAGCCCTAGCATGATT +TCTCCTACACTCCAACTCATGAGATCCACAACAAATAATCCTCCTAAGTACTAATACAGACCTTACTCCA +CTACTAGGCTTCCTCCTAGCAGCAGCAGGCAAATCAGCTCAACTAGGCCTTCACCCCTGACTCCCCTCAG +CCATAGAAGGCCCTACCCCTGTTTCAGCCCTACTCCACTCAAGCACCATAGTCGTAGCAGGAATCTTCCT +ACTCATCCGCTTCTACCCCCTAGCAGAGAATAACCCACTAATCCAAACTCTCACGCTATGCCTAGGCGCT +ATCACCACCCTATTCGCAGCAGTCTGCGCCCTCACACAAAATGACATCAAAAAAATCGTGGCCTTCTCCA +CTTCAAGCCAACTAGGACTCATAATAGTTACAATCGGTATCAACCAACCACACCTAGCATTCCTTCACAT +CTGCACCCACGCTTTCTTCAAAGCCATACTATTCATATGCTCCGGATCCATTATTCACAACCTCAATAAT +GAGCAAGACATTCGAAAAATAGGAGGATTACTCAAAACCATACCCCTCACTTCAACCTCCCTCACCATTG +GGAGCCTAGCATTAGCAGGAATACCCTTCCTCACAGGTTTCTACTCCAAAGACCTCATCATCGAAACCGC +TAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGCCACCTCTCTGACAAGCGCCTAC +AGCACCCGAATAATCCTCCTCACCCTAACAGGTCAACCTCGCTTCCCAACCCTCACCAACATTAACGAAA +ACAACCCCACTCTGTTAAATCCCATTAAACGCCTAACCATTGGAAGCTTATTTGCAGGATTTCTCATTAC +CAACAACATTCTCCCCATATCTACTCCCCAAGTGACAATTCCCCTTTACTTAAAACTTACAGCCCTAGGC +GTTACTTCCCTAGGACTTCTAACAGCCCTAGACCTCAATTACCTAACCAGCAAGCTCAAAATAAAATCCC +CACTATATACATTTCACTTCTCTAATATACTCGGATTCTACCCTAACATTATACACCGCTCGATCCCCTA +TCTAGGCCTTCTTACAAGCCAAAACCTACCCCTACTTCTTCTAGACCTGACCTGACTAGAGAAACTATTA +CCTAAAACAATTTCACAGTACCAAATCTCCGCTTCCATTACCACCTCAACCCAAAAAGGCATGATCAAAC +TTTATTTCCTCTCTTTTTTCTTCCCTCTCATCTTAACCTTACTCCTAATCACATAACCTATTCCCCCGAG +CAATCTCAATCACAATGTATACACCAACAAACAATGTCCAACCAGTAACTACTACTAACCAACGCCCATA +ATCATATAAGGCCCCCGCACCAATAGGATCCTCCCGAATCAGCCCTGGCCCCTCCCCTTCATAAATTATT +CAACTTCCCACGCTATTAAAATTTACCACAACCACCATCCCATCATACCCTTTTACCCATAACACTAATC +CTACCTCCATCGCCAGTCCTACTAAAACACTAACCAAAACCTCAACCCCTGACCCCCATGCCTCAGGATA +CTCCTCAATAGCCATAGCCGTAGTATACCCAAAAACAACCATTATTCCCCCCAAATAAATTAAAAAAACC +ATTAAACCTATATAACCTCCCCCATAATTCAAAATGATGGCACACCCAACTACACCACTAACAATCAATA +CTAAACCCCCATAAATGGGAGAAGGCTTAGAAGAAAACCCCACAAACCCTATCACTAAACTCACACTCAA +TAAAAATAAAGCATATGTCATTATTCTCGCACGGACTACAACCACGACCAATGATATGAAAAACCATCGT +TGTATTTCAACTACAAGAACACCAATGACCCCGACACGCAAAATTAACCCACTAATAAAATTAATTAATC +ACTCATTTATCGACCTCCCCACCCCATCCAACATTTCCGCATGATGGAACTTCGGCTCACTTCTCGGCGC +CTGCCTAATCCTTCAAATTACCACAGGATTATTCCTAGCTATACACTACTCACCAGACGCCTCAACCGCC +TTCTCGTCGATCGCCCACATCACCCGAGACGTAAACTATGGTTGGATCATCCGCTACCTCCACGCTAACG +GCGCCTCAATATTTTTTATCTGCCTCTTCCTACACATCGGCCGAGGTCTATATTACGGCTCATTTCTCTA +CCTAGAAACCTGAAACATTGGCATTATCCTCTTGCTCACAACCATAGCAACAGCCTTTATGGGCTATGTC +CTCCCATGAGGCCAAATATCCTTCTGAGGAGCCACAGTAATTACAAACCTACTGTCCGCTATCCCATACA +TCGGAACAGACCTGGTCCAGTGAGTCTGAGGAGGCTACTCAGTAGACAGCCCTACCCTTACACGATTCTT +CACCTTCCACTTTATCTTACCCTTCATCATCACAGCCCTAACAACACTTCATCTCCTATTCTTACACGAA +ACAGGATCAAATAACCCCCTAGGAATCACCTCCCACTCCGACAAAATTACCTTCCACCCCTACTACACAA +TCAAAGATATCCTTGGCTTATTCCTTTTCCTCCTTATCCTAATGACATTAACACTATTCTCACCAGGCCT +CCTAGGCGATCCAGACAACTATACCCTAGCTAACCCCCTAAACACCCCACCCCACATTAAACCCGAGTGA +TACTTTCTATTTGCCTACACAATCCTCCGATCCATCCCCAACAAACTAGGAGGCGTCCTCGCCCTACTAC +TATCTATCCTAATCCTAACAGCAATCCCTGTCCTCCACACATCCAAACAACAAAGCATAATATTTCGCCC +ACTAAGCCAACTGCTTTACTGACTCCTAGCCACAGACCTCCTCATCCTAACCTGAATCGGAGGACAACCA +GTAAGCTACCCCTTCATCACCATCGGACAAATAGCATCCGTATTATACTTCACAACAATCCTAATCCTAA +TACCAATCGCCTCTCTAATCGAAAACAAAATACTTGAATGAACCTGCCCTTGTAGTATAAACTAATACAC +CGGTCTTGTAAACCGGAAACGAAAACTTTCTTCCAAGGACAAATCAGAGAAAAAGTAATTAACTTCACCA +TCAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAAATTTAGGTAC +CACCTAAGTACTGGCTCATTCATTACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATAT +CGTACAGTACCATATCACCCAACTACCTATAGTACATAAAATCCACTCCCACATCAAAACCTTCACTCCA +TGCTTACAAGCACGCACAACAATCAACTCCCAACTGTCGAACATAAAACACAATTCCAACGACACCCCTC +CCCCACCCCGATACCAACAGACCTATCTCCCCTTGACAGAACATAGTACATACAACCATACACCGTACAT +AGCACATTACAGTCAAACCCCTCCTCGCCCCCACGGATGCTCCCCCTCAGATAGGAATCCCTTGGTCACC +ATCCTCCGTGAAATCAATATCCCGCACAAGAGTGACTCTCCTCGCTCCGGGCCCATAACATCTGGGGGTA +GCTAAAGTGAACTGTATCCGACATCTGGTTCCTACCTCAGGGCCATGAAGTTCAAAAGACTCCCACACGT +TCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCAGTCACGGGAGCCTTCCA +TGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAAACGCTGGCCCCGGAGCACCCT +ATGTCGCAGTATCTGTCTTTGATTCCTGCCCCATTGTATTATTTATCGCACCTACGTTCAATATTACGAC +CTAGCATACCTACTAAAGTGTGTTGATTAATTAATGCTTGCAGGACATAACAACAGCAGCAAAATGCTCA +CATAACTGCTTTCCACACCAACATCATAACAAAAAATTCCCACAAACCCCCCCTTCCCCCCGGCCACAGC +ACTCAAACAAATCTCTGCCAAACCCCAAAAACAAAGAACCCAGACGCCAGCCTAGCCAGACTTCAAATTT +CATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCTCAATTAACATGCCCTCCCCCCTCAACTCCCATT +CTACTAGCCCCAGCAACGTAACCCCCTACTCACCCTACTCAACACATATACCGCTGCTAACCCCATACCC +TGAACCAACCAAACCCCAAAGACACCCCTACACA diff --git a/orffinder/tests/test.py b/orffinder/tests/test.py index e3567c4..5229dd8 100644 --- a/orffinder/tests/test.py +++ b/orffinder/tests/test.py @@ -3,7 +3,26 @@ import orffinder -sequence = list(SeqIO.parse("gene.fasta", "fasta"))[0] +sequence = list(SeqIO.parse("tests/gene.fasta", "fasta"))[0] -out = orffinder.getORFProteins(sequence, minimum_length=75, remove_nested=True) -print(out) +out = orffinder.getORFProteins(sequence, return_loci=True, minimum_length=75, remove_nested=False, trim_trailing=False) + +for orf in out: + + start = min(orf["start"], orf["end"]) + end = max(orf["start"], orf["end"]) + length = end - start + + print(orf["sense"]) + + if orf["sense"] == "+": + + print(sequence.seq[start - 1 : end - 1]) + + else: + print(sequence.seq[start - 1 : end - 1]) + + print(orf["protein"]) + print("\n") + +print(len(out))