diff --git a/emapper2gbk/genomes_to_gbk.py b/emapper2gbk/genomes_to_gbk.py index 58d2df9..78027e0 100644 --- a/emapper2gbk/genomes_to_gbk.py +++ b/emapper2gbk/genomes_to_gbk.py @@ -111,6 +111,9 @@ def gff_to_gbk(nucleic_fasta:str, protein_fasta:str, annot:Union[str, dict], elif gff_type == 'eggnog': cds_ids = set([cds.chrom + '_' + cds.id.split('_')[1] for cds in gff_database.features_of_type('CDS')]) + # If cds IDs are numeric add 'gene_' as a prefix + cds_ids = [f"gene_{cds_id}" if cds_id.isnumeric() else cds_id for cds_id in cds_ids] + cds_number = len(cds_ids) if cds_number == 0: @@ -128,12 +131,15 @@ def gff_to_gbk(nucleic_fasta:str, protein_fasta:str, annot:Union[str, dict], seq_protein_in_gff = 0 for record in SeqIO.parse(protein_fasta, "fasta"): - gene_protein_seqs[record.id] = record.seq + protein_id = record.id + if protein_id.isnumeric(): + protein_id = f"gene_{protein_id}" + gene_protein_seqs[protein_id] = record.seq if gff_type in ['default', 'cds_only', 'eggnog']: - if record.id in cds_ids: + if protein_id in cds_ids: seq_protein_in_gff += 1 elif gff_type == 'gmove': - if record.id.replace('prot', 'mRNA') in cds_ids: + if protein_id.replace('prot', 'mRNA') in cds_ids: seq_protein_in_gff += 1 if seq_protein_in_gff == 0: @@ -210,6 +216,11 @@ def gff_to_gbk(nucleic_fasta:str, protein_fasta:str, annot:Union[str, dict], # For each CDS in the GFF add a CDS in the genbank. for cds_object in gff_database.children(gene, featuretype="CDS", order_by='start'): cds_id = cds_object.id + # If id is numeric, change it + if cds_id.isnumeric(): + cds_id = f"gene_{cds_id}" + else: + cds_id = cds_id start_position = cds_object.start - 1 end_position = cds_object.end strand = strand_change(cds_object.strand) diff --git a/tests/numeric/betaox_genomes.fna b/tests/numeric/betaox_genomes.fna new file mode 100644 index 0000000..d706d08 --- /dev/null +++ b/tests/numeric/betaox_genomes.fna @@ -0,0 +1,179 @@ +>NC_000913_3 +ATGATGATTTTGAGTATTCTCGCTACGGTTGTCCTGCTCGGCGCGTTGTTCTATCACCGCGTGAGCTTAT +TTATCAGCAGTCTGATTTTGCTCGCCTGGACAGCCGCCCTCGGCGTTGCTGGTCTGTGGTCGGCGTGGGT +ACTGGTGCCTCTGGCCATTATCCTCGTGCCATTTAACTTTGCGCCTATGCGTAAGTCGATGATTTCCGCG +CCGGTATTTCGCGGTTTCCGTAAGGTGATGCCGCCGATGTCGCGCACTGAGAAAGAAGCGATTGATGCGG +GCACCACCTGGTGGGAGGGCGACTTGTTCCAGGGCAAGCCGGACTGGAAAAAGCTGCATAACTATCCGCA +GCCGCGCCTGACCGCCGAAGAGCAAGCGTTTCTCGACGGCCCGGTAGAAGAAGCCTGCCGGATGGCGAAT +GATTTCCAGATCACCCATGAGCTGGCGGATCTGCCGCCGGAGTTGTGGGCGTACCTTAAAGAGCATCGTT +TCTTCGCGATGATCATCAAAAAAGAGTACGGCGGGCTGGAGTTCTCGGCTTATGCCCAGTCTCGCGTGCT +GCAAAAACTCTCCGGCGTGAGCGGGATCCTGGCGATTACCGTCGGCGTGCCAAACTCATTAGGCCCGGGC +GAACTGTTGCAACATTACGGCACTGACGAGCAGAAAGATCACTATCTGCCGCGTCTGGCGCGTGGTCAGG +AGATCCCCTGCTTTGCACTGACCAGCCCGGAAGCGGGTTCCGATGCGGGCGCGATTCCGGACACCGGGAT +TGTCTGCATGGGCGAATGGCAGGGCCAGCAGGTGCTGGGGATGCGTCTGACCTGGAACAAACGCTACATT +ACGCTGGCACCGATTGCGACCGTGCTTGGGCTGGCGTTTAAACTCTCCGACCCGGAAAAATTACTCGGCG +GTGCAGAAGATTTAGGCATTACCTGTGCGCTGATCCCAACCACCACGCCGGGCGTGGAAATTGGTCGTCG +CCACTTCCCGCTGAACGTACCGTTCCAGAACGGACCGACGCGCGGTAAAGATGTCTTCGTGCCGATCGAT +TACATCATCGGCGGGCCGAAAATGGCCGGGCAAGGCTGGCGGATGCTGGTGGAGTGCCTCTCGGTAGGCC +GCGGCATCACCCTGCCTTCCAACTCAACCGGCGGCGTGAAATCGGTAGCGCTGGCAACCGGCGCGTATGC +TCACATTCGCCGTCAGTTCAAAATCTCTATTGGTAAGATGGAAGGGATTGAAGAGCCGCTGGCGCGTATT +GCCGGTAATGCCTACGTGATGGATGCTGCGGCATCGCTGATTACCTACGGCATTATGCTCGGCGAAAAAC +CTGCCGTGCTGTCGGCTATCGTTAAGTATCACTGTACCCACCGCGGGCAGCAGTCGATTATTGATGCGAT +GGATATTACCGGCGGTAAAGGCATTATGCTCGGGCAAAGCAACTTCCTGGCGCGTGCTTACCAGGGCGCA +CCGATTGCCATCACCGTTGAAGGGGCTAACATTCTGACCCGCAGCATGATGATCTTCGGACAAGGAGCGA +TTCGTTGCCATCCGTACGTGCTGGAAGAGATGGAAGCGGCGAAGAACAATGACGTCAACGCGTTCGATAA +ACTGTTGTTCAAACATATCGGTCACGTCGGTAGCAACAAAGTTCGCAGCTTCTGGCTGGGCCTGACGCGC +GGTTTAACCAGCAGCACGCCAACCGGCGATGCCACTAAACGCTACTATCAGCACCTGAACCGCCTGAGCG +CCAACCTCGCCCTGCTTTCTGATGTCTCGATGGCAGTGCTGGGCGGCAGCCTGAAACGTCGCGAGCGCAT +CTCGGCCCGTCTGGGGGATATTTTAAGCCAGCTCTACCTCGCCTCTGCCGTGCTGAAGCGTTATGACGAC +GAAGGCCGTAATGAAGCCGACCTGCCGCTGGTGCACTGGGGCGTACAAGATGCGCTGTATCAGGCTGAAC +AGGCGATGGATGATTTACTGCAAAACTTCCCGAACCGCGTGGTTGCCGGGCTGCTGAATGTGGTGATCTT +CCCGACCGGACGTCATTATCTGGCACCTTCTGACAAGCTGGATCATAAAGTGGCGAAGATTTTACAAGTG +CCGAACGCCACCCGTTCCCGCATTGGTCGCGGTCAGTACCTGACGCCGAGCGAGCATAATCCGGTTGGCT +TGCTGGAAGAGGCGCTGGTGGATGTGATTGCCGCCGACCCAATTCATCAGCGGATCTGTAAAGAGCTGGG +TAAAAACCTGCCGTTTACCCGTCTGGATGAACTGGCGCACAACGCGCTGGTGAAGGGGCTGATTGATAAA +GATGAAGCCGCTATTCTGGTGAAAGCTGAAGAAAGCCGTCTGCGCAGTATTAACGTTGATGACTTTGATC +CGGAAGAGCTGGCGACGAAGCCGGTAAAGTTGCCGGAGAAAGTGCGGAAAGTTGAAGCCGCGTAAATGCATCCCACAGGCCCGCATCTCGGGCCTGATGTTCTGTTTCGAGAGTCCAACATGAAAGTGACATTAA +CGTTTAACGAACAACGTCGTGCGGCGTATCGTCAGCAAGGGTTATGGGGCGATGCTTCGCTGGCCGATTA +CTGGCAGCAGACCGCTCGTGCGATGCCAGACAAAATTGCCGTGGTCGATAATCATGGTGCATCGTACACC +TATAGCGCGCTCGATCACGCCGCGAGCTGTCTGGCAAACTGGATGTTAGCGAAGGGTATTGAATCAGGCG +ATCGCATCGCATTTCAACTGCCTGGCTGGTGTGAATTTACCGTTATCTATCTTGCCTGCCTGAAAATCGG +TGCAGTTTCCGTGCCGCTGTTGCCTTCCTGGCGGGAAGCAGAACTGGTGTGGGTGCTCAATAAGTGTCAG +GCAAAAATGTTCTTTGCACCGACGTTGTTTAAACAAACGCGTCCGGTAGATTTAATCCTGCCGCTGCAAA +ATCAGCTTCCACAACTACAACAAATTGTCGGCGTGGACAAACTGGCTCCCGCCACCTCTTCCCTCTCATT +AAGTCAGATTATCGCCGACAATACCTCACTGACCACGGCGATAACGACCCACGGCGATGAATTAGCTGCG +GTGCTGTTTACCTCCGGAACCGAGGGTCTGCCAAAGGGCGTGATGCTAACGCATAACAATATTCTCGCCA +GTGAGCGGGCTTATTGCGCGCGACTGAATCTGACCTGGCAGGATGTCTTTATGATGCCTGCGCCACTTGG +TCACGCAACGGGCTTTCTGCATGGCGTAACGGCACCATTCTTAATTGGCGCTCGCAGCGTGTTGTTAGAT +ATTTTCACTCCTGATGCGTGTCTCGCGCTGCTTGAGCAGCAGCGTTGCACCTGTATGCTCGGCGCAACGC +CGTTTGTCTATGATCTTTTGAATGTACTAGAGAAACAACCCGCGGACCTTTCAGCGCTGCGTTTCTTTCT +TTGCGGCGGAACCACAATCCCCAAAAAAGTGGCGCGTGAATGCCAGCAGCGCGGCATTAAATTATTAAGT +GTTTATGGTTCCACAGAAAGTTCGCCGCATGCGGTGGTGAATCTCGATGATCCTTTGTCGCGCTTTATGC +ACACCGATGGTTACGCTGCCGCAGGTGTAGAGATTAAAGTGGTCGATGACGCACGCAAGACCTTACCGCC +AGGTTGCGAAGGTGAAGAAGCCTCGCGTGGCCCCAATGTGTTTATGGGGTATTTTGATGAACCTGAATTA +ACCGCCCGTGCCCTGGATGAAGAAGGCTGGTATTACAGCGGCGATCTCTGCCGTATGGATGAGGCTGGCT +ATATAAAAATTACCGGACGCAAAAAAGATATTATTGTCCGCGGCGGCGAAAATATTAGCAGCCGTGAAGT +GGAAGATATTTTATTGCAGCATCCTAAAATTCACGATGCCTGTGTGGTTGCAATGTCCGATGAACGTTTA +GGTGAACGATCATGCGCTTATGTCGTGCTGAAAGCGCCGCATCATTCATTATCGCTGGAAGAGGTAGTGG +CTTTTTTTAGCCGTAAACGGGTCGCAAAATATAAATATCCTGAACATATCGTGGTAATCGAAAAACTACC +GCGAACTACCTCAGGTAAAATACAAAAGTTTTTGTTAAGAAAAGATATTATGCGGCGTTTAACGCAGGAT +GTCTGTGAAGAGATTGAATAATTGAAGAAGGTTTGGCTTAACCGTTATCCCGCGGACGTTCCGACGGAGATCAACCCTGACCGTTATCAAT +CTCTGGTAGATATGTTTGAGCAGTCGGTCGCGCGCTACGCCGATCAACCTGCGTTTGTGAATATGGGGGA +GGTAATGACCTTCCGCAAGCTGGAAGAACGCAGTCGCGCGTTTGCCGCTTATTTGCAACAAGGGTTGGGG +CTGAAGAAAGGCGATCGCGTTGCGTTGATGATGCCTAATTTATTGCAATATCCGGTGGCGCTGTTTGGCA +TTTTGCGTGCCGGGATGATCGTCGTAAACGTTAACCCGTTGTATACCCCGCGTGAGCTTGAGCATCAGCT +TAACGATAGCGGCGCATCGGCGATTGTTATCGTGTCTAACTTTGCTCACACACTGGAAAAAGTGGTTGAT +AAAACCGCCGTTCAGCACGTAATTCTGACCCGTATGGGCGATCAGCTATCTACGGCAAAAGGCACGGTAG +TCAATTTCGTTGTTAAATACATCAAGCGTTTGGTGCCGAAATACCATCTGCCAGATGCCATTTCATTTCG +TAGCGCACTGCATAACGGCTACCGGATGCAGTACGTCAAACCCGAACTGGTGCCGGAAGATTTAGCTTTT +CTGCAATACACCGGCGGCACCACTGGTGTGGCGAAAGGCGCGATGCTGACTCACCGCAATATGCTGGCGA +ACCTGGAACAGGTTAACGCGACCTATGGTCCGCTGTTGCATCCGGGCAAAGAGCTGGTGGTGACGGCGCT +GCCGCTGTATCACATTTTTGCCCTGACCATTAACTGCCTGCTGTTTATCGAACTGGGTGGGCAGAACCTG +CTTATCACTAACCCGCGCGATATTCCAGGGTTGGTAAAAGAGTTAGCGAAATATCCGTTTACCGCTATCA +CGGGCGTTAACACCTTGTTCAATGCGTTGCTGAACAATAAAGAGTTCCAGCAGCTGGATTTCTCCAGTCT +GCATCTTTCCGCAGGCGGTGGGATGCCAGTGCAGCAAGTGGTGGCAGAGCGTTGGGTGAAACTGACCGGA +CAGTATCTGCTGGAAGGCTATGGCCTTACCGAGTGTGCGCCGCTGGTCAGCGTTAACCCATATGATATTG +ATTATCATAGTGGTAGCATCGGTTTGCCGGTGCCGTCGACGGAAGCCAAACTGGTGGATGATGATGATAA +TGAAGTACCACCAGGTCAACCGGGTGAGCTTTGTGTCAAAGGACCGCAGGTGATGCTGGGTTACTGGCAG +CGTCCCGATGCTACCGATGAAATCATCAAAAATGGCTGGTTACACACCGGCGACATCGCGGTAATGGATG +AAGAAGGATTCCTGCGCATTGTCGATCGTAAAAAAGACATGATTCTGGTTTCCGGTTTTAACGTCTATCC +CAACGAGATTGAAGATGTCGTCATGCAGCATCCTGGCGTACAGGAAGTCGCGGCTGTTGGCGTACCTTCC +GGCTCCAGTGGTGAAGCGGTGAAAATCTTCGTAGTGAAAAAAGATCCATCGCTTACCGAAGAGTCACTGG +TGACTTTTTGCCGCCGTCAGCTCACGGGATACAAAGTACCGAAGCTGGTGGAGTTTCGTGATGAGTTACC +GAAATCTAACGTCGGAAAAATTTTGCGACGAGAATTACGTGACGAAGCGCGCGGCAAAGTGGACAATAAA +GCCTGAATGGAAATGACATCAGCGTTTACCCTTAATGTTCGTCTGGACAACATTGCCGTTATCACCATCGACGTAC +CGGGTGAGAAAATGAATACCCTGAAGGCGGAGTTTGCCTCGCAGGTGCGCGCCATTATTAAGCAACTCCG +TGAAAACAAAGAGTTGCGAGGCGTGGTGTTTGTCTCCGCTAAACCGGACAACTTCATTGCTGGCGCAGAC +ATCAACATGATCGGCAACTGCAAAACGGCGCAAGAAGCGGAAGCTCTGGCGCGGCAGGGCCAACAGTTGA +TGGCGGAGATTCATGCTTTGCCCATTCAGGTTATCGCGGCTATTCATGGCGCTTGCCTGGGTGGTGGGCT +GGAGTTGGCGCTGGCGTGCCACGGTCGCGTTTGTACTGACGATCCTAAAACGGTGCTCGGTTTGCCTGAA +GTACAACTTGGATTGTTACCCGGTTCAGGCGGCACCCAGCGTTTACCGCGTCTGATAGGCGTCAGCACAG +CATTAGAGATGATCCTCACCGGAAAACAACTTCGGGCGAAACAGGCATTAAAGCTGGGGCTGGTGGATGA +CGTTGTTCCGCACTCCATTCTGCTGGAAGCCGCTGTTGAGCTGGCAAAGAAGGAGCGCCCATCTTCCCGC +CCTCTACCTGTACGCGAGCGTATTCTGGCGGGGCCGTTAGGTCGTGCGCTGCTGTTCAAAATGGTCGGCA +AGAAAACAGAACACAAAACTCAAGGCAATTATCCGGCGACAGAACGCATCCTGGAGGTTGTTGAAACGGG +ATTAGCGCAGGGCACCAGCAGCGGTTATGACGCCGAAGCTCGGGCGTTTGGCGAACTGGCGATGACGCCA +CAATCGCAGGCGCTGCGTAGTATCTTTTTTGCCAGTACGGACGTGAAGAAAGATCCCGGCAGTGATGCGC +CGCCTGCGCCATTAAACAGCGTGGGGATTTTAGGTGGTGGCTTGATGGGCGGCGGTATTGCTTATGTCAC +TGCTTGTAAAGCGGGGATTCCGGTCAGAATTAAAGATATCAACCCGCAGGGCATAAATCATGCGCTGAAG +TACAGTTGGGATCAGCTGGAGGGCAAAGTTCGCCGTCGTCATCTCAAAGCCAGCGAACGTGACAAACAGC +TGGCATTAATCTCCGGAACGACGGACTATCGCGGCTTTGCCCATCGCGATCTGATTATTGAAGCGGTGTT +TGAAAATCTCGAATTGAAACAACAGATGGTGGCGGAAGTTGAGCAAAATTGCGCCGCTCATACCATCTTT +GCTTCGAATACGTCATCTTTACCGATTGGTGATATCGCCGCTCACGCCACGCGACCTGAGCAAGTTATCG +GCCTGCATTTCTTCAGTCCGGTGGAAAAAATGCCGCTGGTGGAGATTATTCCTCATGCGGGGACATCGGC +GCAAACCATCGCTACCACAGTAAAACTGGCGAAAAAACAGGGTAAAACGCCAATTGTCGTGCGTGACAAA +GCCGGTTTTTACGTCAATCGCATCTTAGCGCCTTACATTAATGAAGCTATCCGCATGTTGACCCAAGGTG +AACGGGTAGAGCACATTGATGCCGCGCTAGTGAAATTTGGTTTTCCGGTAGGCCCAATCCAACTTTTGGA +TGAGGTAGGAATCGACACCGGGACTAAAATTATTCCTGTACTGGAAGCCGCTTATGGAGAACGTTTTAGC +GCGCCTGCAAATGTTGTTTCTTCAATTTTGAACGACGATCGCAAAGGCAGAAAAAATGGCCGGGGTTTCT +ATCTTTATGGTCAGAAAGGGCGTAAAAGCAAAAAACAGGTCGATCCCGCCATTTACCCGCTGATTGGCAC +ACAAGGGCAGGGGCGAATCTCCGCACCGCAGGTTGCTGAACGGTGTGTGATGTTGATGCTGAATGAAGCA +GTACGTTGTGTTGATGAGCAGGTTATCCGTAGCGTGCGTGACGGGGATATTGGCGCGGTATTTGGCATTG +GTTTTCCGCCATTTCTCGGTGGACCGTTCCGCTATATCGATTCTCTCGGCGCGGGCGAAGTGGTTGCAAT +AATGCAACGACTTGCCACGCAGTATGGTTCCCGTTTTACCCCTTGCGAGCGTTTGGTCGAGATGGGCGCG +CGTGGGGAAAGTTTTTGGAAAACAACTGCAACTGACCTGCAATAAATGGGTCAGGTTTTACCGCTGGTTACCCGCCAGGGCGATCGTATCGCCATTGTTAGCGGTTTACGTACGC +CTTTTGCCCGTCAGGCGACGGCTTTTCATGGCATTCCCGCGGTTGATTTAGGGAAGATGGTGGTAGGCGA +ACTGCTGGCACGCAGCGAGATCCCCGCCGAAGTGATTGAACAACTGGTCTTTGGTCAGGTCGTACAAATG +CCTGAAGCCCCCAACATTGCGCGTGAAATTGTTCTCGGTACGGGAATGAATGTACATACCGATGCTTACA +GCGTCAGCCGCGCTTGCGCTACCAGTTTCCAGGCAGTTGCAAACGTCGCAGAAAGCCTGATGGCGGGAAC +TATTCGAGCGGGGATTGCCGGTGGGGCAGATTCCTCTTCGGTATTGCCAATTGGCGTCAGTAAAAAACTG +GCGCGCGTGCTGGTTGATGTCAACAAAGCTCGTACCATGAGCCAGCGACTGAAACTCTTCTCTCGCCTGC +GTTTGCGCGACTTAATGCCCGTACCACCTGCGGTAGCAGAATATTCTACCGGCTTGCGGATGGGCGACAC +CGCAGAGCAAATGGCGAAAACCTACGGCATCACCCGAGAACAGCAAGATGCATTAGCGCACCGTTCGCAT +CAGCGTGCCGCTCAGGCATGGTCAGACGGAAAACTCAAAGAAGAGGTGATGACTGCCTTTATCCCTCCTT +ATAAACAACCGCTTGTCGAAGACAACAATATTCGCGGTAATTCCTCGCTTGCCGATTACGCAAAGCTGCG +CCCGGCGTTTGATCGCAAACACGGAACGGTAACGGCGGCAAACAGTACGCCGCTGACCGATGGCGCGGCA +GCGGTGATCCTGATGACTGAATCCCGGGCGAAAGAATTAGGGCTGGTGCCGCTGGGGTATCTGCGCAGCT +ACGCATTTACTGCGATTGATGTCTGGCAGGACATGTTGCTCGGTCCAGCCTGGTCAACACCGCTGGCGCT +GGAGCGTGCCGGTTTGACGATGAGCGATCTGACATTGATCGATATGCACGAAGCCTTTGCAGCTCAGACG +CTGGCGAATATTCAGTTGCTGGGTAGTGAACGTTTTGCTCGTGAAGCACTGGGGCGTGCACATGCCACTG +GCGAAGTGGACGATAGCAAATTTAACGTGCTTGGCGGTTCGATTGCTTACGGGCATCCCTTCGCGGCGAC +CGGCGCGCGGATGATTACCCAGACATTGCATGAACTTCGCCGTCGCGGCGGTGGATTTGGTTTAGTTACC +GCCTGTGCTGCCGGTGGGCTTGGCGCGGCAATGGTTCTGGAGGCGGAATAAATGGAACAGGTTGTCATTGTCGATGCAATTCGCACCCCGATGGGCCGTTCGAAGGGCGGTGCTTTTCGTA +ACGTGCGTGCAGAAGATCTCTCCGCTCATTTAATGCGTAGCCTGCTGGCGCGTAACCCGGCGCTGGAAGC +GGCGGCCCTCGACGATATTTACTGGGGTTGTGTGCAGCAGACGCTGGAGCAGGGTTTTAATATCGCCCGT +AACGCGGCGCTGCTGGCAGAAGTACCACACTCTGTCCCGGCGGTTACCGTTAATCGCTTGTGTGGTTCAT +CCATGCAGGCACTGCATGACGCAGCACGAATGATCATGACTGGCGATGCGCAGGCATGTCTGGTTGGCGG +CGTGGAGCATATGGGCCATGTGCCGATGAGTCACGGCGTCGATTTTCACCCCGGCCTGAGCCGCAATGTC +GCCAAAGCGGCGGGCATGATGGGCTTAACGGCAGAAATGCTGGCGCGTATGCACGGTATCAGCCGTGAAA +TGCAGGATGCCTTTGCCGCGCGGTCACACGCCCGCGCCTGGGCCGCCACGCAGTCGGCCGCATTTAAAAA +TGAAATCATCCCGACCGGTGGTCACGATGCCGACGGCGTCCTGAAGCAGTTTAATTACGACGAAGTGATT +CGCCCGGAAACCACCGTGGAAGCCCTCGCCACGCTGCGTCCGGCGTTTGATCCAGTAAACGGTATGGTAA +CGGCGGGCACATCTTCTGCACTTTCCGATGGCGCAGCTGCCATGCTGGTGATGAGTGAAAGCCGCGCCCA +TGAATTAGGTCTTAAGCCGCGCGCTCGTGTGCGTTCGATGGCGGTCGTTGGTTGTGACCCATCGATTATG +GGTTACGGCCCGGTTCCGGCCTCGAAACTGGCGCTGAAAAAAGCGGGGCTTTCTGCCAGCGATATCGGCG +TGTTTGAAATGAACGAAGCCTTTGCCGCGCAGATCCTGCCATGTATTAAAGATCTGGGACTAATTGAGCA +GATTGACGAGAAGATCAACCTCAACGGTGGCGCGATCGCGCTGGGTCATCCGCTGGGTTGTTCCGGTGCG +CGTATCAGCACCACGCTGCTGAATCTGATGGAACGCAAAGACGTTCAGTTTGGTCTGGCGACGATGTGTA +TCGGTCTGGGTCAGGGTATTGCGACGGTGTTTGAGCGGGTTTAAATGCTTTACAAAGGCGACACCCTGTACCTTGACTGGCTGGAAGATGGCATTGCCGAACTGGTATTTGATG +CCCCAGGTTCAGTTAATAAACTCGACACTGCGACCGTCGCCAGCCTCGGCGAGGCCATCGGCGTGCTGGA +ACAGCAATCAGATCTAAAAGGGCTGCTGCTGCGTTCGAACAAAGCAGCCTTTATCGTCGGTGCTGATATC +ACCGAATTTTTGTCCCTGTTCCTCGTTCCTGAAGAACAGTTAAGTCAGTGGCTGCACTTTGCCAATAGCG +TGTTTAATCGCCTGGAAGATCTGCCGGTGCCGACCATTGCTGCCGTCAATGGCTATGCGCTGGGCGGTGG +CTGCGAATGCGTGCTGGCGACCGATTATCGTCTGGCGACGCCGGATCTGCGCATCGGTCTGCCGGAAACC +AAACTGGGCATCATGCCTGGCTTTGGCGGTTCTGTACGTATGCCACGTATGCTGGGCGCTGACAGTGCGC +TGGAAATCATTGCCGCCGGTAAAGATGTCGGCGCGGATCAGGCGCTGAAAATCGGTCTGGTGGATGGCGT +AGTCAAAGCAGAAAAACTGGTTGAAGGCGCAAAGGCGGTTTTACGCCAGGCCATTAACGGCGACCTCGAC +TGGAAAGCAAAACGTCAGCCGAAGCTGGAACCACTAAAACTGAGCAAGATTGAAGCCACCATGAGCTTCA +CCATCGCTAAAGGGATGGTCGCACAAACAGCGGGGAAACATTATCCGGCCCCCATCACCGCAGTAAAAAC +CATTGAAGCTGCGGCCCGTTTTGGTCGTGAAGAAGCCTTAAACCTGGAAAACAAAAGTTTTGTCCCGCTG +GCGCATACCAACGAAGCCCGCGCACTGGTCGGCATTTTCCTTAACGATCAATATGTAAAAGGCAAAGCGA +AGAAACTCACCAAAGACGTTGAAACCCCGAAACAGGCCGCGGTGCTGGGTGCAGGCATTATGGGCGGCGG +CATCGCTTACCAGTCTGCGTGGAAAGGCGTGCCGGTTGTCATGAAAGATATCAACGACAAGTCGTTAACC +CTCGGCATGACCGAAGCCGCGAAACTGCTGAACAAGCAGCTTGAGCGCGGCAAGATCGATGGTCTGAAAC +TGGCTGGCGTGATCTCCACAATCCACCCAACGCTCGACTACGCCGGATTTGACCGCGTGGATATTGTGGT +AGAAGCGGTTGTTGAAAACCCGAAAGTGAAAAAAGCCGTACTGGCAGAAACCGAACAAAAAGTACGCCAG +GATACCGTGCTGGCGTCTAACACTTCAACCATTCCTATCAGCGAACTGGCCAACGCGCTGGAACGCCCGG +AAAACTTCTGCGGGATGCACTTCTTTAACCCGGTCCACCGAATGCCGTTGGTAGAAATTATTCGCGGCGA +GAAAAGCTCCGACGAAACCATCGCGAAAGTTGTCGCCTGGGCGAGCAAGATGGGCAAGACGCCGATTGTG +GTTAACGACTGCCCCGGCTTCTTTGTTAACCGCGTGCTGTTCCCGTATTTCGCCGGTTTCAGCCAGCTGC +TGCGCGACGGCGCGGATTTCCGCAAGATCGACAAAGTGATGGAAAAACAGTTTGGCTGGCCGATGGGCCC +GGCATATCTGCTGGACGTTGTGGGCATTGATACCGCGCATCACGCTCAGGCTGTCATGGCAGCAGGCTTC +CCGCAGCGGATGCAGAAAGATTACCGCGATGCCATCGACGCGCTGTTTGATGCCAACCGCTTTGGTCAGA +AGAACGGCCTCGGTTTCTGGCGTTATAAAGAAGACAGCAAAGGTAAGCCGAAGAAAGAAGAAGACGCCGC +CGTTGAAGACCTGCTGGCAGAAGTGAGCCAGCCGAAGCGCGATTTCAGCGAAGAAGAGATTATCGCCCGC +ATGATGATCCCGATGGTCAACGAAGTGGTGCGCTGTCTGGAGGAAGGCATTATCGCCACTCCGGCGGAAG +CGGATATGGCGCTGGTCTACGGCCTGGGCTTCCCTCCGTTCCACGGCGGCGCGTTCCGCTGGCTGGACAC +CCTCGGTAGCGCAAAATACCTCGATATGGCACAGCAATATCAGCACCTCGGCCCGCTGTATGAAGTGCCG +GAAGGTCTGCGTAATAAAGCGCGTCATAACGAACCGTACTATCCTCCGGTTGAGCCAGCCCGTCCGGTTG +GCGACCTGAAAACGGCTTAA diff --git a/tests/numeric/betaox_genomes.gbk b/tests/numeric/betaox_genomes.gbk new file mode 100644 index 0000000..3bfa59f --- /dev/null +++ b/tests/numeric/betaox_genomes.gbk @@ -0,0 +1,745 @@ +LOCUS NC_000913_3 12642 bp DNA BCT 21-SEP-2022 +DEFINITION Escherichia genome. +ACCESSION NC_000913_3 +VERSION NC_000913_3 +KEYWORDS Escherichia. +SOURCE . + ORGANISM Escherichia + Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; + Enterobacteriaceae. +FEATURES Location/Qualifiers + source 1..12642 + /scaffold="NC_000913_3" + /db_xref="taxon:561" + gene complement(1..2445) + /locus_tag="gene226" + CDS complement(1..2445) + /locus_tag="gene_226" + /gene="fadE" + /go_component="GO:0005575" + /go_component="GO:0005623" + /go_component="GO:0005886" + /go_component="GO:0016020" + /go_component="GO:0044464" + /go_component="GO:0071944" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0003995" + /go_function="GO:0016491" + /go_function="GO:0016627" + /go_process="GO:0006082" + /go_process="GO:0006629" + /go_process="GO:0006631" + /go_process="GO:0006635" + /go_process="GO:0008150" + /go_process="GO:0008152" + /go_process="GO:0009056" + /go_process="GO:0009062" + /go_process="GO:0009987" + /go_process="GO:0016042" + /go_process="GO:0016054" + /go_process="GO:0019395" + /go_process="GO:0019752" + /go_process="GO:0030258" + /go_process="GO:0032787" + /go_process="GO:0033539" + /go_process="GO:0034440" + /go_process="GO:0043436" + /go_process="GO:0044237" + /go_process="GO:0044238" + /go_process="GO:0044242" + /go_process="GO:0044248" + /go_process="GO:0044255" + /go_process="GO:0044281" + /go_process="GO:0044282" + /go_process="GO:0046395" + /go_process="GO:0055114" + /go_process="GO:0071704" + /go_process="GO:0072329" + /go_process="GO:1901575" + /dbxref="KEGG:R01175" + /dbxref="KEGG:R01279" + /dbxref="KEGG:R03777" + /dbxref="KEGG:R03857" + /dbxref="KEGG:R03990" + /dbxref="KEGG:R04751" + /dbxref="KEGG:R04754" + /translation="MMILSILATVVLLGALFYHRVSLFISSLILLAWTAALGVAGLWSA + WVLVPLAIILVPFNFAPMRKSMISAPVFRGFRKVMPPMSRTEKEAIDAGTTWWEGDLFQ + GKPDWKKLHNYPQPRLTAEEQAFLDGPVEEACRMANDFQITHELADLPPELWAYLKEHR + FFAMIIKKEYGGLEFSAYAQSRVLQKLSGVSGILAITVGVPNSLGPGELLQHYGTDEQK + DHYLPRLARGQEIPCFALTSPEAGSDAGAIPDTGIVCMGEWQGQQVLGMRLTWNKRYIT + LAPIATVLGLAFKLSDPEKLLGGAEDLGITCALIPTTTPGVEIGRRHFPLNVPFQNGPT + RGKDVFVPIDYIIGGPKMAGQGWRMLVECLSVGRGITLPSNSTGGVKSVALATGAYAHI + RRQFKISIGKMEGIEEPLARIAGNAYVMDAAASLITYGIMLGEKPAVLSAIVKYHCTHR + GQQSIIDAMDITGGKGIMLGQSNFLARAYQGAPIAITVEGANILTRSMMIFGQGAIRCH + PYVLEEMEAAKNNDVNAFDKLLFKHIGHVGSNKVRSFWLGLTRGLTSSTPTGDATKRYY + QHLNRLSANLALLSDVSMAVLGGSLKRRERISARLGDILSQLYLASAVLKRYDDEGRNE + ADLPLVHWGVQDALYQAEQAMDDLLQNFPNRVVAGLLNVVIFPTGRHYLAPSDKLDHKV + AKILQVPNATRSRIGRGQYLTPSEHNPVGLLEEALVDVIAADPIHQRICKELGKNLPFT + RLDELAHNALVKGLIDKDEAAILVKAEESRLRSINVDDFDPEELATKPVKLPEKVRKVE + AA" + gene 2446..4146 + /locus_tag="gene1781" + CDS 2446..4146 + /locus_tag="gene_1781" + /gene="fadK" + /go_component="GO:0005575" + /go_component="GO:0005623" + /go_component="GO:0005886" + /go_component="GO:0016020" + /go_component="GO:0044464" + /go_component="GO:0071944" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0015645" + /go_function="GO:0016405" + /go_function="GO:0016874" + /go_function="GO:0016877" + /go_function="GO:0016878" + /go_function="GO:0031956" + /dbxref="BIGG:iECIAI1_1343.ECIAI1_1755" + /translation="MHPTGPHLGPDVLFRESNMKVTLTFNEQRRAAYRQQGLWGDASLA + DYWQQTARAMPDKIAVVDNHGASYTYSALDHAASCLANWMLAKGIESGDRIAFQLPGWC + EFTVIYLACLKIGAVSVPLLPSWREAELVWVLNKCQAKMFFAPTLFKQTRPVDLILPLQ + NQLPQLQQIVGVDKLAPATSSLSLSQIIADNTSLTTAITTHGDELAAVLFTSGTEGLPK + GVMLTHNNILASERAYCARLNLTWQDVFMMPAPLGHATGFLHGVTAPFLIGARSVLLDI + FTPDACLALLEQQRCTCMLGATPFVYDLLNVLEKQPADLSALRFFLCGGTTIPKKVARE + CQQRGIKLLSVYGSTESSPHAVVNLDDPLSRFMHTDGYAAAGVEIKVVDDARKTLPPGC + EGEEASRGPNVFMGYFDEPELTARALDEEGWYYSGDLCRMDEAGYIKITGRKKDIIVRG + GENISSREVEDILLQHPKIHDACVVAMSDERLGERSCAYVVLKAPHHSLSLEEVVAFFS + RKRVAKYKYPEHIVVIEKLPRTTSGKIQKFLLRKDIMRRLTQDVCEEIE" + gene complement(4147..5832) + /locus_tag="gene1887" + CDS complement(4147..5832) + /locus_tag="gene_1887" + /gene="fadD" + /go_component="GO:0005575" + /go_component="GO:0005622" + /go_component="GO:0005623" + /go_component="GO:0005737" + /go_component="GO:0005829" + /go_component="GO:0005886" + /go_component="GO:0009898" + /go_component="GO:0016020" + /go_component="GO:0044424" + /go_component="GO:0044425" + /go_component="GO:0044444" + /go_component="GO:0044459" + /go_component="GO:0044464" + /go_component="GO:0071944" + /go_component="GO:0098552" + /go_component="GO:0098562" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0004467" + /go_function="GO:0005488" + /go_function="GO:0005504" + /go_function="GO:0008289" + /go_function="GO:0015645" + /go_function="GO:0016405" + /go_function="GO:0016874" + /go_function="GO:0016877" + /go_function="GO:0016878" + /go_function="GO:0031406" + /go_function="GO:0033293" + /go_function="GO:0036041" + /go_function="GO:0036094" + /go_function="GO:0043167" + /go_function="GO:0043168" + /go_function="GO:0043177" + /go_function="GO:0070538" + /go_process="GO:0001676" + /go_process="GO:0006082" + /go_process="GO:0006139" + /go_process="GO:0006163" + /go_process="GO:0006629" + /go_process="GO:0006631" + /go_process="GO:0006635" + /go_process="GO:0006637" + /go_process="GO:0006644" + /go_process="GO:0006725" + /go_process="GO:0006732" + /go_process="GO:0006753" + /go_process="GO:0006790" + /go_process="GO:0006793" + /go_process="GO:0006796" + /go_process="GO:0006807" + /go_process="GO:0008150" + /go_process="GO:0008152" + /go_process="GO:0008610" + /go_process="GO:0008654" + /go_process="GO:0009056" + /go_process="GO:0009058" + /go_process="GO:0009062" + /go_process="GO:0009117" + /go_process="GO:0009150" + /go_process="GO:0009259" + /go_process="GO:0009314" + /go_process="GO:0009411" + /go_process="GO:0009416" + /go_process="GO:0009628" + /go_process="GO:0009987" + /go_process="GO:0016042" + /go_process="GO:0016054" + /go_process="GO:0019395" + /go_process="GO:0019637" + /go_process="GO:0019693" + /go_process="GO:0019752" + /go_process="GO:0030258" + /go_process="GO:0032787" + /go_process="GO:0033865" + /go_process="GO:0033875" + /go_process="GO:0034032" + /go_process="GO:0034440" + /go_process="GO:0034641" + /go_process="GO:0035383" + /go_process="GO:0043436" + /go_process="GO:0043603" + /go_process="GO:0044237" + /go_process="GO:0044238" + /go_process="GO:0044242" + /go_process="GO:0044248" + /go_process="GO:0044249" + /go_process="GO:0044255" + /go_process="GO:0044281" + /go_process="GO:0044282" + /go_process="GO:0046395" + /go_process="GO:0046483" + /go_process="GO:0050896" + /go_process="GO:0051186" + /go_process="GO:0055086" + /go_process="GO:0055114" + /go_process="GO:0071704" + /go_process="GO:0072329" + /go_process="GO:0072521" + /go_process="GO:0090407" + /go_process="GO:1901135" + /go_process="GO:1901360" + /go_process="GO:1901564" + /go_process="GO:1901575" + /go_process="GO:1901576" + /EC_number="6.2.1.3" + /dbxref="KEGG:R01280" + /translation="LKKVWLNRYPADVPTEINPDRYQSLVDMFEQSVARYADQPAFVNM + GEVMTFRKLEERSRAFAAYLQQGLGLKKGDRVALMMPNLLQYPVALFGILRAGMIVVNV + NPLYTPRELEHQLNDSGASAIVIVSNFAHTLEKVVDKTAVQHVILTRMGDQLSTAKGTV + VNFVVKYIKRLVPKYHLPDAISFRSALHNGYRMQYVKPELVPEDLAFLQYTGGTTGVAK + GAMLTHRNMLANLEQVNATYGPLLHPGKELVVTALPLYHIFALTINCLLFIELGGQNLL + ITNPRDIPGLVKELAKYPFTAITGVNTLFNALLNNKEFQQLDFSSLHLSAGGGMPVQQV + VAERWVKLTGQYLLEGYGLTECAPLVSVNPYDIDYHSGSIGLPVPSTEAKLVDDDDNEV + PPGQPGELCVKGPQVMLGYWQRPDATDEIIKNGWLHTGDIAVMDEEGFLRIVDRKKDMI + LVSGFNVYPNEIEDVVMQHPGVQEVAAVGVPSGSSGEAVKIFVVKKDPSLTEESLVTFC + RRQLTGYKVPKLVEFRDELPKSNVGKILRRELRDEARGKVDNKA" + gene complement(5833..7977) + /locus_tag="gene2441" + CDS complement(5833..7977) + /locus_tag="gene_2441" + /gene="fadJ" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0003857" + /go_function="GO:0004165" + /go_function="GO:0004300" + /go_function="GO:0008691" + /go_function="GO:0008692" + /go_function="GO:0016491" + /go_function="GO:0016614" + /go_function="GO:0016616" + /go_function="GO:0016829" + /go_function="GO:0016835" + /go_function="GO:0016836" + /go_function="GO:0016853" + /go_function="GO:0016854" + /go_function="GO:0016856" + /go_function="GO:0016860" + /go_function="GO:0016863" + /go_process="GO:0006082" + /go_process="GO:0006629" + /go_process="GO:0006631" + /go_process="GO:0006635" + /go_process="GO:0006725" + /go_process="GO:0006805" + /go_process="GO:0008150" + /go_process="GO:0008152" + /go_process="GO:0009056" + /go_process="GO:0009062" + /go_process="GO:0009404" + /go_process="GO:0009407" + /go_process="GO:0009410" + /go_process="GO:0009636" + /go_process="GO:0009850" + /go_process="GO:0009852" + /go_process="GO:0009987" + /go_process="GO:0010124" + /go_process="GO:0010817" + /go_process="GO:0016042" + /go_process="GO:0016054" + /go_process="GO:0019395" + /go_process="GO:0019439" + /go_process="GO:0019748" + /go_process="GO:0019752" + /go_process="GO:0030258" + /go_process="GO:0032787" + /go_process="GO:0034440" + /go_process="GO:0042178" + /go_process="GO:0042221" + /go_process="GO:0042445" + /go_process="GO:0042447" + /go_process="GO:0042537" + /go_process="GO:0043436" + /go_process="GO:0044237" + /go_process="GO:0044238" + /go_process="GO:0044242" + /go_process="GO:0044248" + /go_process="GO:0044255" + /go_process="GO:0044281" + /go_process="GO:0044282" + /go_process="GO:0046395" + /go_process="GO:0050896" + /go_process="GO:0051716" + /go_process="GO:0055114" + /go_process="GO:0065007" + /go_process="GO:0065008" + /go_process="GO:0070887" + /go_process="GO:0071466" + /go_process="GO:0071704" + /go_process="GO:0072329" + /go_process="GO:0098754" + /go_process="GO:1901360" + /go_process="GO:1901361" + /go_process="GO:1901575" + /EC_number="1.1.1.157" + /EC_number="1.1.1.35" + /EC_number="4.2.1.17" + /EC_number="5.1.2.3" + /EC_number="5.3.3.8" + /dbxref="KEGG:R01975" + /dbxref="KEGG:R01976" + /dbxref="KEGG:R03026" + /dbxref="KEGG:R03045" + /dbxref="KEGG:R03276" + /dbxref="KEGG:R04137" + /dbxref="KEGG:R04170" + /dbxref="KEGG:R04203" + /dbxref="KEGG:R04204" + /dbxref="KEGG:R04224" + /dbxref="KEGG:R04737" + /dbxref="KEGG:R04738" + /dbxref="KEGG:R04739" + /dbxref="KEGG:R04740" + /dbxref="KEGG:R04741" + /dbxref="KEGG:R04744" + /dbxref="KEGG:R04745" + /dbxref="KEGG:R04746" + /dbxref="KEGG:R04748" + /dbxref="KEGG:R04749" + /dbxref="KEGG:R04756" + /dbxref="KEGG:R05066" + /dbxref="KEGG:R05305" + /dbxref="KEGG:R05576" + /dbxref="KEGG:R06411" + /dbxref="KEGG:R06412" + /dbxref="KEGG:R06941" + /dbxref="KEGG:R06942" + /dbxref="KEGG:R07935" + /dbxref="KEGG:R07951" + /dbxref="KEGG:R08093" + /dbxref="KEGG:R08094" + /translation="MEMTSAFTLNVRLDNIAVITIDVPGEKMNTLKAEFASQVRAIIKQ + LRENKELRGVVFVSAKPDNFIAGADINMIGNCKTAQEAEALARQGQQLMAEIHALPIQV + IAAIHGACLGGGLELALACHGRVCTDDPKTVLGLPEVQLGLLPGSGGTQRLPRLIGVST + ALEMILTGKQLRAKQALKLGLVDDVVPHSILLEAAVELAKKERPSSRPLPVRERILAGP + LGRALLFKMVGKKTEHKTQGNYPATERILEVVETGLAQGTSSGYDAEARAFGELAMTPQ + SQALRSIFFASTDVKKDPGSDAPPAPLNSVGILGGGLMGGGIAYVTACKAGIPVRIKDI + NPQGINHALKYSWDQLEGKVRRRHLKASERDKQLALISGTTDYRGFAHRDLIIEAVFEN + LELKQQMVAEVEQNCAAHTIFASNTSSLPIGDIAAHATRPEQVIGLHFFSPVEKMPLVE + IIPHAGTSAQTIATTVKLAKKQGKTPIVVRDKAGFYVNRILAPYINEAIRMLTQGERVE + HIDAALVKFGFPVGPIQLLDEVGIDTGTKIIPVLEAAYGERFSAPANVVSSILNDDRKG + RKNGRGFYLYGQKGRKSKKQVDPAIYPLIGTQGQGRISAPQVAERCVMLMLNEAVRCVD + EQVIRSVRDGDIGAVFGIGFPPFLGGPFRYIDSLGAGEVVAIMQRLATQYGSRFTPCER + LVEMGARGESFWKTTATDLQ" + gene complement(7978..9288) + /locus_tag="gene2442" + CDS complement(7978..9288) + /locus_tag="gene_2442" + /gene="fadI" + /go_component="GO:0005575" + /go_component="GO:0005622" + /go_component="GO:0005623" + /go_component="GO:0005737" + /go_component="GO:0005829" + /go_component="GO:0044424" + /go_component="GO:0044444" + /go_component="GO:0044464" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0003857" + /go_function="GO:0003988" + /go_function="GO:0004300" + /go_function="GO:0016408" + /go_function="GO:0016491" + /go_function="GO:0016614" + /go_function="GO:0016616" + /go_function="GO:0016740" + /go_function="GO:0016746" + /go_function="GO:0016747" + /go_function="GO:0016829" + /go_function="GO:0016835" + /go_function="GO:0016836" + /go_process="GO:0006082" + /go_process="GO:0006629" + /go_process="GO:0006631" + /go_process="GO:0006635" + /go_process="GO:0008150" + /go_process="GO:0008152" + /go_process="GO:0009056" + /go_process="GO:0009062" + /go_process="GO:0009987" + /go_process="GO:0016042" + /go_process="GO:0016054" + /go_process="GO:0019395" + /go_process="GO:0019752" + /go_process="GO:0030258" + /go_process="GO:0032787" + /go_process="GO:0033542" + /go_process="GO:0034440" + /go_process="GO:0043436" + /go_process="GO:0044237" + /go_process="GO:0044238" + /go_process="GO:0044242" + /go_process="GO:0044248" + /go_process="GO:0044255" + /go_process="GO:0044281" + /go_process="GO:0044282" + /go_process="GO:0046395" + /go_process="GO:0055114" + /go_process="GO:0071704" + /go_process="GO:0072329" + /go_process="GO:1901575" + /EC_number="2.3.1.16" + /dbxref="KEGG:R00829" + /dbxref="KEGG:R00927" + /dbxref="KEGG:R01177" + /dbxref="KEGG:R03778" + /dbxref="KEGG:R03858" + /dbxref="KEGG:R03991" + /dbxref="KEGG:R04546" + /dbxref="KEGG:R04742" + /dbxref="KEGG:R04747" + /dbxref="KEGG:R05506" + /dbxref="KEGG:R05586" + /dbxref="KEGG:R07891" + /dbxref="KEGG:R07895" + /dbxref="KEGG:R07899" + /dbxref="KEGG:R08091" + /dbxref="KEGG:R08095" + /translation="MGQVLPLVTRQGDRIAIVSGLRTPFARQATAFHGIPAVDLGKMVV + GELLARSEIPAEVIEQLVFGQVVQMPEAPNIAREIVLGTGMNVHTDAYSVSRACATSFQ + AVANVAESLMAGTIRAGIAGGADSSSVLPIGVSKKLARVLVDVNKARTMSQRLKLFSRL + RLRDLMPVPPAVAEYSTGLRMGDTAEQMAKTYGITREQQDALAHRSHQRAAQAWSDGKL + KEEVMTAFIPPYKQPLVEDNNIRGNSSLADYAKLRPAFDRKHGTVTAANSTPLTDGAAA + VILMTESRAKELGLVPLGYLRSYAFTAIDVWQDMLLGPAWSTPLALERAGLTMSDLTLI + DMHEAFAAQTLANIQLLGSERFAREALGRAHATGEVDDSKFNVLGGSIAYGHPFAATGA + RMITQTLHELRRRGGGFGLVTACAAGGLGAAMVLEAE" + gene complement(9289..10452) + /locus_tag="gene3987" + CDS complement(9289..10452) + /locus_tag="gene_3987" + /gene="fadA" + /go_component="GO:0005575" + /go_component="GO:0005622" + /go_component="GO:0005623" + /go_component="GO:0005737" + /go_component="GO:0044424" + /go_component="GO:0044464" + /go_function="GO:0003674" + /go_function="GO:0003824" + /go_function="GO:0003988" + /go_function="GO:0016408" + /go_function="GO:0016740" + /go_function="GO:0016746" + /go_function="GO:0016747" + /go_process="GO:0006082" + /go_process="GO:0006629" + /go_process="GO:0006631" + /go_process="GO:0006635" + /go_process="GO:0008150" + /go_process="GO:0008152" + /go_process="GO:0009056" + /go_process="GO:0009062" + /go_process="GO:0009987" + /go_process="GO:0016042" + /go_process="GO:0016054" + /go_process="GO:0019395" + /go_process="GO:0019752" + /go_process="GO:0030258" + /go_process="GO:0032787" + /go_process="GO:0034440" + /go_process="GO:0043436" + /go_process="GO:0044237" + /go_process="GO:0044238" + /go_process="GO:0044242" + /go_process="GO:0044248" + /go_process="GO:0044255" + /go_process="GO:0044281" + /go_process="GO:0044282" + /go_process="GO:0046395" + /go_process="GO:0055114" + /go_process="GO:0071704" + /go_process="GO:0072329" + /go_process="GO:1901575" + /EC_number="2.3.1.16" + /dbxref="KEGG:R00829" + /dbxref="KEGG:R00927" + /dbxref="KEGG:R01177" + /dbxref="KEGG:R03778" + /dbxref="KEGG:R03858" + /dbxref="KEGG:R03991" + /dbxref="KEGG:R04546" + /dbxref="KEGG:R04742" + /dbxref="KEGG:R04747" + /dbxref="KEGG:R05506" + /dbxref="KEGG:R05586" + /dbxref="KEGG:R07891" + /dbxref="KEGG:R07895" + /dbxref="KEGG:R07899" + /dbxref="KEGG:R08091" + /dbxref="KEGG:R08095" + /translation="MEQVVIVDAIRTPMGRSKGGAFRNVRAEDLSAHLMRSLLARNPAL + EAAALDDIYWGCVQQTLEQGFNIARNAALLAEVPHSVPAVTVNRLCGSSMQALHDAARM + IMTGDAQACLVGGVEHMGHVPMSHGVDFHPGLSRNVAKAAGMMGLTAEMLARMHGISRE + MQDAFAARSHARAWAATQSAAFKNEIIPTGGHDADGVLKQFNYDEVIRPETTVEALATL + RPAFDPVNGMVTAGTSSALSDGAAAMLVMSESRAHELGLKPRARVRSMAVVGCDPSIMG + YGPVPASKLALKKAGLSASDIGVFEMNEAFAAQILPCIKDLGLIEQIDEKINLNGGAIA + LGHPLGCSGARISTTLLNLMERKDVQFGLATMCIGLGQGIATVFERV" + gene complement(10453..12642) + /locus_tag="gene3988" + CDS complement(10453..12642) + /locus_tag="gene_3988" + /translation="MLYKGDTLYLDWLEDGIAELVFDAPGSVNKLDTATVASLGEAIGV + LEQQSDLKGLLLRSNKAAFIVGADITEFLSLFLVPEEQLSQWLHFANSVFNRLEDLPVP + TIAAVNGYALGGGCECVLATDYRLATPDLRIGLPETKLGIMPGFGGSVRMPRMLGADSA + LEIIAAGKDVGADQALKIGLVDGVVKAEKLVEGAKAVLRQAINGDLDWKAKRQPKLEPL + KLSKIEATMSFTIAKGMVAQTAGKHYPAPITAVKTIEAAARFGREEALNLENKSFVPLA + HTNEARALVGIFLNDQYVKGKAKKLTKDVETPKQAAVLGAGIMGGGIAYQSAWKGVPVV + MKDINDKSLTLGMTEAAKLLNKQLERGKIDGLKLAGVISTIHPTLDYAGFDRVDIVVEA + VVENPKVKKAVLAETEQKVRQDTVLASNTSTIPISELANALERPENFCGMHFFNPVHRM + PLVEIIRGEKSSDETIAKVVAWASKMGKTPIVVNDCPGFFVNRVLFPYFAGFSQLLRDG + ADFRKIDKVMEKQFGWPMGPAYLLDVVGIDTAHHAQAVMAAGFPQRMQKDYRDAIDALF + DANRFGQKNGLGFWRYKEDSKGKPKKEEDAAVEDLLAEVSQPKRDFSEEEIIARMMIPM + VNEVVRCLEEGIIATPAEADMALVYGLGFPPFHGGAFRWLDTLGSAKYLDMAQQYQHLG + PLYEVPEGLRNKARHNEPYYPPVEPARPVGDLKTA" +ORIGIN + 1 atgatgattt tgagtattct cgctacggtt gtcctgctcg gcgcgttgtt ctatcaccgc + 61 gtgagcttat ttatcagcag tctgattttg ctcgcctgga cagccgccct cggcgttgct + 121 ggtctgtggt cggcgtgggt actggtgcct ctggccatta tcctcgtgcc atttaacttt + 181 gcgcctatgc gtaagtcgat gatttccgcg ccggtatttc gcggtttccg taaggtgatg + 241 ccgccgatgt cgcgcactga gaaagaagcg attgatgcgg gcaccacctg gtgggagggc + 301 gacttgttcc agggcaagcc ggactggaaa aagctgcata actatccgca gccgcgcctg + 361 accgccgaag agcaagcgtt tctcgacggc ccggtagaag aagcctgccg gatggcgaat + 421 gatttccaga tcacccatga gctggcggat ctgccgccgg agttgtgggc gtaccttaaa + 481 gagcatcgtt tcttcgcgat gatcatcaaa aaagagtacg gcgggctgga gttctcggct + 541 tatgcccagt ctcgcgtgct gcaaaaactc tccggcgtga gcgggatcct ggcgattacc + 601 gtcggcgtgc caaactcatt aggcccgggc gaactgttgc aacattacgg cactgacgag + 661 cagaaagatc actatctgcc gcgtctggcg cgtggtcagg agatcccctg ctttgcactg + 721 accagcccgg aagcgggttc cgatgcgggc gcgattccgg acaccgggat tgtctgcatg + 781 ggcgaatggc agggccagca ggtgctgggg atgcgtctga cctggaacaa acgctacatt + 841 acgctggcac cgattgcgac cgtgcttggg ctggcgttta aactctccga cccggaaaaa + 901 ttactcggcg gtgcagaaga tttaggcatt acctgtgcgc tgatcccaac caccacgccg + 961 ggcgtggaaa ttggtcgtcg ccacttcccg ctgaacgtac cgttccagaa cggaccgacg + 1021 cgcggtaaag atgtcttcgt gccgatcgat tacatcatcg gcgggccgaa aatggccggg + 1081 caaggctggc ggatgctggt ggagtgcctc tcggtaggcc gcggcatcac cctgccttcc + 1141 aactcaaccg gcggcgtgaa atcggtagcg ctggcaaccg gcgcgtatgc tcacattcgc + 1201 cgtcagttca aaatctctat tggtaagatg gaagggattg aagagccgct ggcgcgtatt + 1261 gccggtaatg cctacgtgat ggatgctgcg gcatcgctga ttacctacgg cattatgctc + 1321 ggcgaaaaac ctgccgtgct gtcggctatc gttaagtatc actgtaccca ccgcgggcag + 1381 cagtcgatta ttgatgcgat ggatattacc ggcggtaaag gcattatgct cgggcaaagc + 1441 aacttcctgg cgcgtgctta ccagggcgca ccgattgcca tcaccgttga aggggctaac + 1501 attctgaccc gcagcatgat gatcttcgga caaggagcga ttcgttgcca tccgtacgtg + 1561 ctggaagaga tggaagcggc gaagaacaat gacgtcaacg cgttcgataa actgttgttc + 1621 aaacatatcg gtcacgtcgg tagcaacaaa gttcgcagct tctggctggg cctgacgcgc + 1681 ggtttaacca gcagcacgcc aaccggcgat gccactaaac gctactatca gcacctgaac + 1741 cgcctgagcg ccaacctcgc cctgctttct gatgtctcga tggcagtgct gggcggcagc + 1801 ctgaaacgtc gcgagcgcat ctcggcccgt ctgggggata ttttaagcca gctctacctc + 1861 gcctctgccg tgctgaagcg ttatgacgac gaaggccgta atgaagccga cctgccgctg + 1921 gtgcactggg gcgtacaaga tgcgctgtat caggctgaac aggcgatgga tgatttactg + 1981 caaaacttcc cgaaccgcgt ggttgccggg ctgctgaatg tggtgatctt cccgaccgga + 2041 cgtcattatc tggcaccttc tgacaagctg gatcataaag tggcgaagat tttacaagtg + 2101 ccgaacgcca cccgttcccg cattggtcgc ggtcagtacc tgacgccgag cgagcataat + 2161 ccggttggct tgctggaaga ggcgctggtg gatgtgattg ccgccgaccc aattcatcag + 2221 cggatctgta aagagctggg taaaaacctg ccgtttaccc gtctggatga actggcgcac + 2281 aacgcgctgg tgaaggggct gattgataaa gatgaagccg ctattctggt gaaagctgaa + 2341 gaaagccgtc tgcgcagtat taacgttgat gactttgatc cggaagagct ggcgacgaag + 2401 ccggtaaagt tgccggagaa agtgcggaaa gttgaagccg cgtaaatgca tcccacaggc + 2461 ccgcatctcg ggcctgatgt tctgtttcga gagtccaaca tgaaagtgac attaacgttt + 2521 aacgaacaac gtcgtgcggc gtatcgtcag caagggttat ggggcgatgc ttcgctggcc + 2581 gattactggc agcagaccgc tcgtgcgatg ccagacaaaa ttgccgtggt cgataatcat + 2641 ggtgcatcgt acacctatag cgcgctcgat cacgccgcga gctgtctggc aaactggatg + 2701 ttagcgaagg gtattgaatc aggcgatcgc atcgcatttc aactgcctgg ctggtgtgaa + 2761 tttaccgtta tctatcttgc ctgcctgaaa atcggtgcag tttccgtgcc gctgttgcct + 2821 tcctggcggg aagcagaact ggtgtgggtg ctcaataagt gtcaggcaaa aatgttcttt + 2881 gcaccgacgt tgtttaaaca aacgcgtccg gtagatttaa tcctgccgct gcaaaatcag + 2941 cttccacaac tacaacaaat tgtcggcgtg gacaaactgg ctcccgccac ctcttccctc + 3001 tcattaagtc agattatcgc cgacaatacc tcactgacca cggcgataac gacccacggc + 3061 gatgaattag ctgcggtgct gtttacctcc ggaaccgagg gtctgccaaa gggcgtgatg + 3121 ctaacgcata acaatattct cgccagtgag cgggcttatt gcgcgcgact gaatctgacc + 3181 tggcaggatg tctttatgat gcctgcgcca cttggtcacg caacgggctt tctgcatggc + 3241 gtaacggcac cattcttaat tggcgctcgc agcgtgttgt tagatatttt cactcctgat + 3301 gcgtgtctcg cgctgcttga gcagcagcgt tgcacctgta tgctcggcgc aacgccgttt + 3361 gtctatgatc ttttgaatgt actagagaaa caacccgcgg acctttcagc gctgcgtttc + 3421 tttctttgcg gcggaaccac aatccccaaa aaagtggcgc gtgaatgcca gcagcgcggc + 3481 attaaattat taagtgttta tggttccaca gaaagttcgc cgcatgcggt ggtgaatctc + 3541 gatgatcctt tgtcgcgctt tatgcacacc gatggttacg ctgccgcagg tgtagagatt + 3601 aaagtggtcg atgacgcacg caagacctta ccgccaggtt gcgaaggtga agaagcctcg + 3661 cgtggcccca atgtgtttat ggggtatttt gatgaacctg aattaaccgc ccgtgccctg + 3721 gatgaagaag gctggtatta cagcggcgat ctctgccgta tggatgaggc tggctatata + 3781 aaaattaccg gacgcaaaaa agatattatt gtccgcggcg gcgaaaatat tagcagccgt + 3841 gaagtggaag atattttatt gcagcatcct aaaattcacg atgcctgtgt ggttgcaatg + 3901 tccgatgaac gtttaggtga acgatcatgc gcttatgtcg tgctgaaagc gccgcatcat + 3961 tcattatcgc tggaagaggt agtggctttt tttagccgta aacgggtcgc aaaatataaa + 4021 tatcctgaac atatcgtggt aatcgaaaaa ctaccgcgaa ctacctcagg taaaatacaa + 4081 aagtttttgt taagaaaaga tattatgcgg cgtttaacgc aggatgtctg tgaagagatt + 4141 gaataattga agaaggtttg gcttaaccgt tatcccgcgg acgttccgac ggagatcaac + 4201 cctgaccgtt atcaatctct ggtagatatg tttgagcagt cggtcgcgcg ctacgccgat + 4261 caacctgcgt ttgtgaatat gggggaggta atgaccttcc gcaagctgga agaacgcagt + 4321 cgcgcgtttg ccgcttattt gcaacaaggg ttggggctga agaaaggcga tcgcgttgcg + 4381 ttgatgatgc ctaatttatt gcaatatccg gtggcgctgt ttggcatttt gcgtgccggg + 4441 atgatcgtcg taaacgttaa cccgttgtat accccgcgtg agcttgagca tcagcttaac + 4501 gatagcggcg catcggcgat tgttatcgtg tctaactttg ctcacacact ggaaaaagtg + 4561 gttgataaaa ccgccgttca gcacgtaatt ctgacccgta tgggcgatca gctatctacg + 4621 gcaaaaggca cggtagtcaa tttcgttgtt aaatacatca agcgtttggt gccgaaatac + 4681 catctgccag atgccatttc atttcgtagc gcactgcata acggctaccg gatgcagtac + 4741 gtcaaacccg aactggtgcc ggaagattta gcttttctgc aatacaccgg cggcaccact + 4801 ggtgtggcga aaggcgcgat gctgactcac cgcaatatgc tggcgaacct ggaacaggtt + 4861 aacgcgacct atggtccgct gttgcatccg ggcaaagagc tggtggtgac ggcgctgccg + 4921 ctgtatcaca tttttgccct gaccattaac tgcctgctgt ttatcgaact gggtgggcag + 4981 aacctgctta tcactaaccc gcgcgatatt ccagggttgg taaaagagtt agcgaaatat + 5041 ccgtttaccg ctatcacggg cgttaacacc ttgttcaatg cgttgctgaa caataaagag + 5101 ttccagcagc tggatttctc cagtctgcat ctttccgcag gcggtgggat gccagtgcag + 5161 caagtggtgg cagagcgttg ggtgaaactg accggacagt atctgctgga aggctatggc + 5221 cttaccgagt gtgcgccgct ggtcagcgtt aacccatatg atattgatta tcatagtggt + 5281 agcatcggtt tgccggtgcc gtcgacggaa gccaaactgg tggatgatga tgataatgaa + 5341 gtaccaccag gtcaaccggg tgagctttgt gtcaaaggac cgcaggtgat gctgggttac + 5401 tggcagcgtc ccgatgctac cgatgaaatc atcaaaaatg gctggttaca caccggcgac + 5461 atcgcggtaa tggatgaaga aggattcctg cgcattgtcg atcgtaaaaa agacatgatt + 5521 ctggtttccg gttttaacgt ctatcccaac gagattgaag atgtcgtcat gcagcatcct + 5581 ggcgtacagg aagtcgcggc tgttggcgta ccttccggct ccagtggtga agcggtgaaa + 5641 atcttcgtag tgaaaaaaga tccatcgctt accgaagagt cactggtgac tttttgccgc + 5701 cgtcagctca cgggatacaa agtaccgaag ctggtggagt ttcgtgatga gttaccgaaa + 5761 tctaacgtcg gaaaaatttt gcgacgagaa ttacgtgacg aagcgcgcgg caaagtggac + 5821 aataaagcct gaatggaaat gacatcagcg tttaccctta atgttcgtct ggacaacatt + 5881 gccgttatca ccatcgacgt accgggtgag aaaatgaata ccctgaaggc ggagtttgcc + 5941 tcgcaggtgc gcgccattat taagcaactc cgtgaaaaca aagagttgcg aggcgtggtg + 6001 tttgtctccg ctaaaccgga caacttcatt gctggcgcag acatcaacat gatcggcaac + 6061 tgcaaaacgg cgcaagaagc ggaagctctg gcgcggcagg gccaacagtt gatggcggag + 6121 attcatgctt tgcccattca ggttatcgcg gctattcatg gcgcttgcct gggtggtggg + 6181 ctggagttgg cgctggcgtg ccacggtcgc gtttgtactg acgatcctaa aacggtgctc + 6241 ggtttgcctg aagtacaact tggattgtta cccggttcag gcggcaccca gcgtttaccg + 6301 cgtctgatag gcgtcagcac agcattagag atgatcctca ccggaaaaca acttcgggcg + 6361 aaacaggcat taaagctggg gctggtggat gacgttgttc cgcactccat tctgctggaa + 6421 gccgctgttg agctggcaaa gaaggagcgc ccatcttccc gccctctacc tgtacgcgag + 6481 cgtattctgg cggggccgtt aggtcgtgcg ctgctgttca aaatggtcgg caagaaaaca + 6541 gaacacaaaa ctcaaggcaa ttatccggcg acagaacgca tcctggaggt tgttgaaacg + 6601 ggattagcgc agggcaccag cagcggttat gacgccgaag ctcgggcgtt tggcgaactg + 6661 gcgatgacgc cacaatcgca ggcgctgcgt agtatctttt ttgccagtac ggacgtgaag + 6721 aaagatcccg gcagtgatgc gccgcctgcg ccattaaaca gcgtggggat tttaggtggt + 6781 ggcttgatgg gcggcggtat tgcttatgtc actgcttgta aagcggggat tccggtcaga + 6841 attaaagata tcaacccgca gggcataaat catgcgctga agtacagttg ggatcagctg + 6901 gagggcaaag ttcgccgtcg tcatctcaaa gccagcgaac gtgacaaaca gctggcatta + 6961 atctccggaa cgacggacta tcgcggcttt gcccatcgcg atctgattat tgaagcggtg + 7021 tttgaaaatc tcgaattgaa acaacagatg gtggcggaag ttgagcaaaa ttgcgccgct + 7081 cataccatct ttgcttcgaa tacgtcatct ttaccgattg gtgatatcgc cgctcacgcc + 7141 acgcgacctg agcaagttat cggcctgcat ttcttcagtc cggtggaaaa aatgccgctg + 7201 gtggagatta ttcctcatgc ggggacatcg gcgcaaacca tcgctaccac agtaaaactg + 7261 gcgaaaaaac agggtaaaac gccaattgtc gtgcgtgaca aagccggttt ttacgtcaat + 7321 cgcatcttag cgccttacat taatgaagct atccgcatgt tgacccaagg tgaacgggta + 7381 gagcacattg atgccgcgct agtgaaattt ggttttccgg taggcccaat ccaacttttg + 7441 gatgaggtag gaatcgacac cgggactaaa attattcctg tactggaagc cgcttatgga + 7501 gaacgtttta gcgcgcctgc aaatgttgtt tcttcaattt tgaacgacga tcgcaaaggc + 7561 agaaaaaatg gccggggttt ctatctttat ggtcagaaag ggcgtaaaag caaaaaacag + 7621 gtcgatcccg ccatttaccc gctgattggc acacaagggc aggggcgaat ctccgcaccg + 7681 caggttgctg aacggtgtgt gatgttgatg ctgaatgaag cagtacgttg tgttgatgag + 7741 caggttatcc gtagcgtgcg tgacggggat attggcgcgg tatttggcat tggttttccg + 7801 ccatttctcg gtggaccgtt ccgctatatc gattctctcg gcgcgggcga agtggttgca + 7861 ataatgcaac gacttgccac gcagtatggt tcccgtttta ccccttgcga gcgtttggtc + 7921 gagatgggcg cgcgtgggga aagtttttgg aaaacaactg caactgacct gcaataaatg + 7981 ggtcaggttt taccgctggt tacccgccag ggcgatcgta tcgccattgt tagcggttta + 8041 cgtacgcctt ttgcccgtca ggcgacggct tttcatggca ttcccgcggt tgatttaggg + 8101 aagatggtgg taggcgaact gctggcacgc agcgagatcc ccgccgaagt gattgaacaa + 8161 ctggtctttg gtcaggtcgt acaaatgcct gaagccccca acattgcgcg tgaaattgtt + 8221 ctcggtacgg gaatgaatgt acataccgat gcttacagcg tcagccgcgc ttgcgctacc + 8281 agtttccagg cagttgcaaa cgtcgcagaa agcctgatgg cgggaactat tcgagcgggg + 8341 attgccggtg gggcagattc ctcttcggta ttgccaattg gcgtcagtaa aaaactggcg + 8401 cgcgtgctgg ttgatgtcaa caaagctcgt accatgagcc agcgactgaa actcttctct + 8461 cgcctgcgtt tgcgcgactt aatgcccgta ccacctgcgg tagcagaata ttctaccggc + 8521 ttgcggatgg gcgacaccgc agagcaaatg gcgaaaacct acggcatcac ccgagaacag + 8581 caagatgcat tagcgcaccg ttcgcatcag cgtgccgctc aggcatggtc agacggaaaa + 8641 ctcaaagaag aggtgatgac tgcctttatc cctccttata aacaaccgct tgtcgaagac + 8701 aacaatattc gcggtaattc ctcgcttgcc gattacgcaa agctgcgccc ggcgtttgat + 8761 cgcaaacacg gaacggtaac ggcggcaaac agtacgccgc tgaccgatgg cgcggcagcg + 8821 gtgatcctga tgactgaatc ccgggcgaaa gaattagggc tggtgccgct ggggtatctg + 8881 cgcagctacg catttactgc gattgatgtc tggcaggaca tgttgctcgg tccagcctgg + 8941 tcaacaccgc tggcgctgga gcgtgccggt ttgacgatga gcgatctgac attgatcgat + 9001 atgcacgaag cctttgcagc tcagacgctg gcgaatattc agttgctggg tagtgaacgt + 9061 tttgctcgtg aagcactggg gcgtgcacat gccactggcg aagtggacga tagcaaattt + 9121 aacgtgcttg gcggttcgat tgcttacggg catcccttcg cggcgaccgg cgcgcggatg + 9181 attacccaga cattgcatga acttcgccgt cgcggcggtg gatttggttt agttaccgcc + 9241 tgtgctgccg gtgggcttgg cgcggcaatg gttctggagg cggaataaat ggaacaggtt + 9301 gtcattgtcg atgcaattcg caccccgatg ggccgttcga agggcggtgc ttttcgtaac + 9361 gtgcgtgcag aagatctctc cgctcattta atgcgtagcc tgctggcgcg taacccggcg + 9421 ctggaagcgg cggccctcga cgatatttac tggggttgtg tgcagcagac gctggagcag + 9481 ggttttaata tcgcccgtaa cgcggcgctg ctggcagaag taccacactc tgtcccggcg + 9541 gttaccgtta atcgcttgtg tggttcatcc atgcaggcac tgcatgacgc agcacgaatg + 9601 atcatgactg gcgatgcgca ggcatgtctg gttggcggcg tggagcatat gggccatgtg + 9661 ccgatgagtc acggcgtcga ttttcacccc ggcctgagcc gcaatgtcgc caaagcggcg + 9721 ggcatgatgg gcttaacggc agaaatgctg gcgcgtatgc acggtatcag ccgtgaaatg + 9781 caggatgcct ttgccgcgcg gtcacacgcc cgcgcctggg ccgccacgca gtcggccgca + 9841 tttaaaaatg aaatcatccc gaccggtggt cacgatgccg acggcgtcct gaagcagttt + 9901 aattacgacg aagtgattcg cccggaaacc accgtggaag ccctcgccac gctgcgtccg + 9961 gcgtttgatc cagtaaacgg tatggtaacg gcgggcacat cttctgcact ttccgatggc + 10021 gcagctgcca tgctggtgat gagtgaaagc cgcgcccatg aattaggtct taagccgcgc + 10081 gctcgtgtgc gttcgatggc ggtcgttggt tgtgacccat cgattatggg ttacggcccg + 10141 gttccggcct cgaaactggc gctgaaaaaa gcggggcttt ctgccagcga tatcggcgtg + 10201 tttgaaatga acgaagcctt tgccgcgcag atcctgccat gtattaaaga tctgggacta + 10261 attgagcaga ttgacgagaa gatcaacctc aacggtggcg cgatcgcgct gggtcatccg + 10321 ctgggttgtt ccggtgcgcg tatcagcacc acgctgctga atctgatgga acgcaaagac + 10381 gttcagtttg gtctggcgac gatgtgtatc ggtctgggtc agggtattgc gacggtgttt + 10441 gagcgggttt aaatgcttta caaaggcgac accctgtacc ttgactggct ggaagatggc + 10501 attgccgaac tggtatttga tgccccaggt tcagttaata aactcgacac tgcgaccgtc + 10561 gccagcctcg gcgaggccat cggcgtgctg gaacagcaat cagatctaaa agggctgctg + 10621 ctgcgttcga acaaagcagc ctttatcgtc ggtgctgata tcaccgaatt tttgtccctg + 10681 ttcctcgttc ctgaagaaca gttaagtcag tggctgcact ttgccaatag cgtgtttaat + 10741 cgcctggaag atctgccggt gccgaccatt gctgccgtca atggctatgc gctgggcggt + 10801 ggctgcgaat gcgtgctggc gaccgattat cgtctggcga cgccggatct gcgcatcggt + 10861 ctgccggaaa ccaaactggg catcatgcct ggctttggcg gttctgtacg tatgccacgt + 10921 atgctgggcg ctgacagtgc gctggaaatc attgccgccg gtaaagatgt cggcgcggat + 10981 caggcgctga aaatcggtct ggtggatggc gtagtcaaag cagaaaaact ggttgaaggc + 11041 gcaaaggcgg ttttacgcca ggccattaac ggcgacctcg actggaaagc aaaacgtcag + 11101 ccgaagctgg aaccactaaa actgagcaag attgaagcca ccatgagctt caccatcgct + 11161 aaagggatgg tcgcacaaac agcggggaaa cattatccgg cccccatcac cgcagtaaaa + 11221 accattgaag ctgcggcccg ttttggtcgt gaagaagcct taaacctgga aaacaaaagt + 11281 tttgtcccgc tggcgcatac caacgaagcc cgcgcactgg tcggcatttt ccttaacgat + 11341 caatatgtaa aaggcaaagc gaagaaactc accaaagacg ttgaaacccc gaaacaggcc + 11401 gcggtgctgg gtgcaggcat tatgggcggc ggcatcgctt accagtctgc gtggaaaggc + 11461 gtgccggttg tcatgaaaga tatcaacgac aagtcgttaa ccctcggcat gaccgaagcc + 11521 gcgaaactgc tgaacaagca gcttgagcgc ggcaagatcg atggtctgaa actggctggc + 11581 gtgatctcca caatccaccc aacgctcgac tacgccggat ttgaccgcgt ggatattgtg + 11641 gtagaagcgg ttgttgaaaa cccgaaagtg aaaaaagccg tactggcaga aaccgaacaa + 11701 aaagtacgcc aggataccgt gctggcgtct aacacttcaa ccattcctat cagcgaactg + 11761 gccaacgcgc tggaacgccc ggaaaacttc tgcgggatgc acttctttaa cccggtccac + 11821 cgaatgccgt tggtagaaat tattcgcggc gagaaaagct ccgacgaaac catcgcgaaa + 11881 gttgtcgcct gggcgagcaa gatgggcaag acgccgattg tggttaacga ctgccccggc + 11941 ttctttgtta accgcgtgct gttcccgtat ttcgccggtt tcagccagct gctgcgcgac + 12001 ggcgcggatt tccgcaagat cgacaaagtg atggaaaaac agtttggctg gccgatgggc + 12061 ccggcatatc tgctggacgt tgtgggcatt gataccgcgc atcacgctca ggctgtcatg + 12121 gcagcaggct tcccgcagcg gatgcagaaa gattaccgcg atgccatcga cgcgctgttt + 12181 gatgccaacc gctttggtca gaagaacggc ctcggtttct ggcgttataa agaagacagc + 12241 aaaggtaagc cgaagaaaga agaagacgcc gccgttgaag acctgctggc agaagtgagc + 12301 cagccgaagc gcgatttcag cgaagaagag attatcgccc gcatgatgat cccgatggtc + 12361 aacgaagtgg tgcgctgtct ggaggaaggc attatcgcca ctccggcgga agcggatatg + 12421 gcgctggtct acggcctggg cttccctccg ttccacggcg gcgcgttccg ctggctggac + 12481 accctcggta gcgcaaaata cctcgatatg gcacagcaat atcagcacct cggcccgctg + 12541 tatgaagtgc cggaaggtct gcgtaataaa gcgcgtcata acgaaccgta ctatcctccg + 12601 gttgagccag cccgtccggt tggcgacctg aaaacggctt aa +// diff --git a/tests/numeric/betaox_genomes.gff b/tests/numeric/betaox_genomes.gff new file mode 100644 index 0000000..e8b86b8 --- /dev/null +++ b/tests/numeric/betaox_genomes.gff @@ -0,0 +1,22 @@ +##gff-version 3 +#!gff-spec-version 1.21 +#!processor NCBI annotwriter +#!genome-build ASM584v2 +#!genome-build-accession NCBI_Assembly:GCF_000005845.2 +##sequence-region NC_000913_3 1 12642 +##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=511145 +NC_000913_3 RefSeq region 1 12642 . + . ID=id0;Dbxref=taxon:511145;Is_circular=true;Name=ANONYMOUS;gbkey=Src;genome=chromosome;mol_type=genomic DNA;strain=K-12;substrain=MG1655 +NC_000913_3 RefSeq gene 1 2445 . - . ID=gene226;Dbxref=ASAP:ABE-0000743,ECOCYC:G6105,EcoGene:EG13145,GeneID:949007;Name=fadE;gbkey=Gene;gene=fadE;gene_biotype=protein_coding;gene_synonym=ECK0222,yafH;locus_tag=b0221 +NC_000913_3 RefSeq CDS 1 2445 . - 0 ID=226;Parent=gene226;Dbxref=UniProtKB/Swiss-Prot:Q47146,Genbank:NP_414756.2,ASAP:ABE-0000743,ECOCYC:G6105,EcoGene:EG13145,GeneID:949007;Name=NP_414756.2;gbkey=CDS;gene=fadE;orig_transcript_id=gnl|b0221|mrna.b0221;product=acyl-CoA dehydrogenase;protein_id=NP_414756.2;transl_table=11 +NC_000913_3 RefSeq gene 2446 4146 . + . ID=gene1781;Dbxref=ASAP:ABE-0005676,ECOCYC:EG12357,EcoGene:EG12357,GeneID:946213;Name=fadK;gbkey=Gene;gene=fadK;gene_biotype=protein_coding;gene_synonym=ECK1699,ydiD;locus_tag=b1701 +NC_000913_3 RefSeq CDS 2446 4146 . + 0 ID=1781;Parent=gene1781;Dbxref=UniProtKB/Swiss-Prot:P38135,Genbank:NP_416216.5,ASAP:ABE-0005676,ECOCYC:EG12357,EcoGene:EG12357,GeneID:946213;Name=NP_416216.5;gbkey=CDS;gene=fadK;orig_transcript_id=gnl|b1701|mrna.b1701;product=short chain acyl-CoA synthetase;protein_id=NP_416216.5;transl_table=11 +NC_000913_3 RefSeq gene 4147 5832 . - . ID=gene1887;Dbxref=ASAP:ABE-0006005,ECOCYC:EG11530,EcoGene:EG11530,GeneID:946327;Name=fadD;gbkey=Gene;gene=fadD;gene_biotype=protein_coding;gene_synonym=ECK1803,oldD;locus_tag=b1805 +NC_000913_3 RefSeq CDS 4147 5832 . - 0 ID=1887;Parent=gene1887;Dbxref=UniProtKB/Swiss-Prot:P69451,Genbank:NP_416319.1,ASAP:ABE-0006005,ECOCYC:EG11530,EcoGene:EG11530,GeneID:946327;Name=NP_416319.1;gbkey=CDS;gene=fadD;orig_transcript_id=gnl|b1805|mrna.b1805;product=fatty acyl-CoA synthetase;protein_id=NP_416319.1;transl_table=11 +NC_000913_3 RefSeq gene 5833 7977 . - . ID=gene2441;Dbxref=ASAP:ABE-0007723,ECOCYC:G7212,EcoGene:EG14127,GeneID:949097;Name=fadJ;gbkey=Gene;gene=fadJ;gene_biotype=protein_coding;gene_synonym=ECK2335,yfcX;locus_tag=b2341 +NC_000913_3 RefSeq CDS 5833 7977 . - 0 ID=2441;Parent=gene2441;Dbxref=UniProtKB/Swiss-Prot:P77399,Genbank:NP_416843.1,ASAP:ABE-0007723,ECOCYC:G7212,EcoGene:EG14127,GeneID:949097;Name=NP_416843.1;gbkey=CDS;gene=fadJ;orig_transcript_id=gnl|b2341|mrna.b2341;product=3-hydroxyacyl-CoA dehydrogenase FadJ;protein_id=NP_416843.1;transl_table=11 +NC_000913_3 RefSeq gene 7978 9288 . - . ID=gene2442;Dbxref=ASAP:ABE-0007725,ECOCYC:G7213,EcoGene:EG14128,GeneID:948823;Name=fadI;gbkey=Gene;gene=fadI;gene_biotype=protein_coding;gene_synonym=ECK2336,yfcY;locus_tag=b2342 +NC_000913_3 RefSeq CDS 7978 9288 . - 0 ID=2442;Parent=gene2442;Dbxref=UniProtKB/Swiss-Prot:P76503,Genbank:NP_416844.1,ASAP:ABE-0007725,ECOCYC:G7213,EcoGene:EG14128,GeneID:948823;Name=NP_416844.1;gbkey=CDS;gene=fadI;orig_transcript_id=gnl|b2342|mrna.b2342;product=3-ketoacyl-CoA thiolase FadI;protein_id=NP_416844.1;transl_table=11 +NC_000913_3 RefSeq gene 9289 10452 . - . ID=gene3987;Dbxref=ASAP:ABE-0012562,ECOCYC:EG10278,EcoGene:EG10278,GeneID:948324;Name=fadA;gbkey=Gene;gene=fadA;gene_biotype=protein_coding;gene_synonym=ECK3837,oldA;locus_tag=b3845 +NC_000913_3 RefSeq CDS 9289 10452 . - 0 ID=3987;Parent=gene3987;Dbxref=UniProtKB/Swiss-Prot:P21151,Genbank:YP_026272.1,ASAP:ABE-0012562,ECOCYC:EG10278,EcoGene:EG10278,GeneID:948324;Name=YP_026272.1;gbkey=CDS;gene=fadA;orig_transcript_id=gnl|b3845|mrna.b3845;product=3-ketoacyl-CoA thiolase;protein_id=YP_026272.1;transl_table=11 +NC_000913_3 RefSeq gene 10453 12642 . - . ID=gene3988;Dbxref=ASAP:ABE-0012564,ECOCYC:EG10279,EcoGene:EG10279,GeneID:948336;Name=fadB;gbkey=Gene;gene=fadB;gene_biotype=protein_coding;gene_synonym=ECK3838,oldB;locus_tag=b3846 +NC_000913_3 RefSeq CDS 10453 12642 . - 0 ID=3988;Parent=gene3988;Dbxref=UniProtKB/Swiss-Prot:P21177,Genbank:NP_418288.1,ASAP:ABE-0012564,ECOCYC:EG10279,EcoGene:EG10279,GeneID:948336;Name=NP_418288.1;gbkey=CDS;gene=fadB;orig_transcript_id=gnl|b3846|mrna.b3846;product=dodecenoyl-CoA delta-isomerase%2C enoyl-CoA hydratase%2C 3-hydroxybutyryl-CoA epimerase%2C 3-hydroxyacyl-CoA dehydrogenase;protein_id=NP_418288.1;transl_table=11 diff --git a/tests/test_emapper2gbk.py b/tests/test_emapper2gbk.py index 5c10c9c..2c15eba 100644 --- a/tests/test_emapper2gbk.py +++ b/tests/test_emapper2gbk.py @@ -19,6 +19,7 @@ GENOME_FNA_INPUT = 'betaox_genomes.fna' GENOME_FAA_INPUT = 'betaox_genomes.faa' GENOME_GFF_INPUT = 'betaox_genomes.gff' +NUMERIC_ANNOT_GFF = os.path.join('numeric', 'betaox_genomes.gff') ANNOT_INPUT = 'betaox_annotation.tsv' NUMERIC_ANNOT_INPUT = os.path.join('numeric', 'betaox_annotation.tsv') @@ -46,6 +47,7 @@ EXPECTED_GBK_WITH_GFF = 'betaox_from_gff.gbk' EXPECTED_GBK_NO_GFF_MERGED = 'betaox_no_gff_merged.gbk' EXPECTED_GBK_NUMERIC = os.path.join('numeric', 'betaox.gbk') +EXPECTED_GBK_GFF_NUMERIC = os.path.join('numeric', 'betaox_genomes.gbk') GMOVE_ANNOT = os.path.join('data_gmove', 'betaox_v2.emapper.annotations') GMOVE_FNA = os.path.join('data_gmove', 'betaox_genomes.fna') @@ -689,6 +691,25 @@ def test_gbk_genes_mode_numeric_test(): return +def test_gbk_genomes_numeric_test(): + """Test genomes mode with file as input. + """ + print("*** Test genomes mode eggnog with file as input ***") + gbk_test = 'test_gff.gbk' + + gbk_creation(nucleic_fasta=GENOME_FNA_INPUT, + protein_fasta=NUMERIC_FAA_INPUT, + annot=NUMERIC_ANNOT_INPUT, + gff=NUMERIC_ANNOT_GFF, + org=ORG_NAME_BACT, + output_path=gbk_test, + gobasic=GO_FILE) + + compare_two_gbks(EXPECTED_GBK_GFF_NUMERIC, gbk_test) + os.remove(gbk_test) + + return + if __name__ == "__main__": test_gbk_genes_mode_test() test_gbk_genes_mode_lineage_test() @@ -708,3 +729,4 @@ def test_gbk_genes_mode_numeric_test(): test_gbk_genomes_mode_gmove_test() test_gbk_genomes_mode_gmove_test_cli() test_gbk_genes_mode_numeric_test() + test_gbk_genomes_numeric_test()