-
Notifications
You must be signed in to change notification settings - Fork 0
/
FastqUpdate.py
executable file
·136 lines (114 loc) · 3.21 KB
/
FastqUpdate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
name = "FastqUpdate.py"
version = '0.2a'
updated = '2024-06-02'
from Bio import SeqIO
import argparse
from os import path
from sys import exit,argv
#########################################################################
### Command line options
#########################################################################
usage = f"""
NAME {name}
VERSION {version}
UPDATED {updated}
SYNOPSIS Convert sequence file formats using biopython
REQS python3, biopython
COMMAND {name} \\
--input raw_R1.fastq \\
--encode fastq-illumina
-i (--input) Input sequence file
-e (--encode) Input sequence encoding
-n (--nencode) Output sequence encoding [default = fastq]
-o (--output) Output file name [default = <InputFileName>_converted.<nencode>]
-f (--formats) Show supported formats
-v (--version) Show script version
"""
formats = f"""
Supported Sequence Encoding Types
---------------------------------
r = read, w = write
r- abi
r- abit-trim
r- ace
r- cif-atom
r- cif-seqres
rw clustal
rw embl
rw fasta
rw fasta-2line
rw fastq-sanger/fastq
rw fastq-solexa
rw fastq-illumina
r- gck
rw genebank/gb
r- ig
rw imgt
rw nexus
r- pbd-seqres
r- pdv-aom
rw phd
rw phylip
rw pir
rw seqxml
rw sff
r- sff-trim
r- snapgene
rw stockholm
r- swiss
rw tab
rw qual
r- uniprot-xml
rw xda
"""
# Print custom message if argv is empty
if len(argv) <= 1:
print(usage)
exit(0)
## Setting up default variable
nencode = "fastq"
## Set up command line parser + command line flags
parser = argparse.ArgumentParser(usage=usage)
args = parser.add_argument("-i","--input")
args = parser.add_argument("-e","--encode")
args = parser.add_argument("-o","--output")
args = parser.add_argument("-n","--nencode")
args = parser.add_argument("-f","--formats", action='store_true')
args = parser.add_argument("-v","--version", action='store_true')
args = parser.parse_args()
## Parse command line objects by <parser_object>.<flag_word>
in_file = args.input
encode = args.encode
out_file = args.output
nencode = args.nencode
sformats = args.formats
scversion = args.version
#########################################################################
### Version
#########################################################################
if scversion:
print ("")
print (f"Script: {name}")
print (f"Version: {version}")
print (f"Updated: {updated}\n")
exit(0)
#########################################################################
### Supported formats
#########################################################################
if sformats:
print (formats)
exit(0)
#########################################################################
### File conversion
#########################################################################
## Parsing name and extension of input file provided
base_name, file_name = path.split(in_file)
file_name, file_ext = path.splitext(file_name)
## If output file not given, create default from input file
if not out_file:
out_file = file_name + "_converted" + nencode
## If directory of file is current working folder, make note
if not base_name:
base_name = "."
SeqIO.convert(in_file,encode,out_file,nencode)