From da6ccad6eefb864fd48d8376069a2adac5ba8993 Mon Sep 17 00:00:00 2001 From: ChocoParrot Date: Mon, 13 Sep 2021 13:25:22 +0800 Subject: [PATCH] [factor] GTF command line tool --- setup.py | 3 ++ src/cline_tools/__init__.py | 1 + src/cline_tools/orffinder-to-gtf.py | 55 +++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 src/cline_tools/__init__.py create mode 100644 src/cline_tools/orffinder-to-gtf.py diff --git a/setup.py b/setup.py index 2226273..7212f43 100644 --- a/setup.py +++ b/setup.py @@ -22,6 +22,9 @@ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ), + install_requires=[ + "biopython=1.79" + ] package_dir={"": "src"}, packages=setuptools.find_packages(where="src"), python_requires=">=3.6" diff --git a/src/cline_tools/__init__.py b/src/cline_tools/__init__.py new file mode 100644 index 0000000..291e01a --- /dev/null +++ b/src/cline_tools/__init__.py @@ -0,0 +1 @@ +__name__ = "orffinder" diff --git a/src/cline_tools/orffinder-to-gtf.py b/src/cline_tools/orffinder-to-gtf.py new file mode 100644 index 0000000..bb8cd80 --- /dev/null +++ b/src/cline_tools/orffinder-to-gtf.py @@ -0,0 +1,55 @@ +import sys +from Bio import SeqIO +from orffinder import orffinder + +arguments = sys.argv +classed_arguments = {"orf_size": "75", "max_orfs_per_sequence": "-1", "remove_nested": "False", "trim_trailing": "False", "infmt": "fasta", "attr_name": "ORF_"} + +try: + for i in range(len(arguments)): + + argument = arguments[i] + + if argument.startswith("-"): + + classed_arguments[argument[1:]] = arguments[i + 1] + + sequences = SeqIO.parse(classed_arguments["in"], classed_arguments["infmt"]) + + orf_size = int(classed_arguments["orf_size"]) + remove_nested = classed_arguments["remove_nested"] == "True" + trim_trailing = classed_arguments["trim_trailing"] == "True" + attr_name = classed_arguments["attr_name"] + max_orfs_per_sequence = int(classed_arguments["max_orfs_per_sequence"]) + + output = list() + index = int() + + for sequence in sequences: + + seqname = sequence.description + orfs = orffinder.getORFs(sequence, minimum_length=orf_size, trim_trailing=trim_trailing, remove_nested=remove_nested) + + local_index = int() + for orf in orfs: + + index += 1 + local_index += 1 + output.append([seqname, "ORFFinder Python", "ORF", str(orf["start"]), str(orf["end"]), ".", orf["sense"], str(orf["frame"] - 1), "orf_id \"" + attr_name + str(index) + "\""]) + + if local_index >= max_orfs_per_sequence and max_orfs_per_sequence != -1: + break + + + full_output = "\n".join(["\t".join(x) for x in output]) + + if "out" not in classed_arguments.keys(): + + print(full_output) + + else: + + open(classed_arguments["out"], "w+").write(full_output) + +except: + print("USAGE\n orffinder-to-gtf [-in input] [-infmt format] [-out output] [-orf_size int]\n [-remove_nested boolean] [-trim_trailing boolean] [-max_orfs_per_sequence int]\n [-attr_name string]\n\nDESCRIPTION\n ORFFinder Python v1.5\n\nUse '-help' to print detailed descriptions of command line arguments\n========================================================================")