forked from qclian/Pan_Ath
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01genome_assembly
366 lines (285 loc) · 36.6 KB
/
01genome_assembly
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
##########################################################################################################################################################################################################################################################################################################################################
##########################################################################################################################################################################################################################################################################################################################################
#Genome assembly for Oxford Nanopore accessions, use Are-1 as an example
######
#Estimate genome size using Illumina short reads dataset
bwa mem ../ref_genome/chloro_mito_phiX.fasta ../raw_data/Illumina_rawData/Illumina_Are-1_R1.fastq.gz ../raw_data/Illumina_rawData/Illumina_Are-1_R2.fastq.gz -t 20 > ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe.sam
samtools view -@ 8 -bS ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe.sam | samtools sort -@ 20 - -o ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bam
samtools view -@ 20 -b -f 77 ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bam > ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bothunmapped1.bam
samtools view -@ 20 -b -f 141 ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bam > ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bothunmapped2.bam
bedtools bamtofastq -i ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bothunmapped1.bam -fq ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_1.fastq
bedtools bamtofastq -i ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bothunmapped2.bam -fq ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_2.fastq
samtools flagstat -@ 20 ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bam > ../genome_assembly/AtAre1/00Kmer_est/Are-1_aln_pe_sorted.bam.flagstat
gzip -f ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_1.fastq
gzip -f ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_2.fastq
zcat ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_1.fastq.gz ../raw_data/Illumina_rawData/Are-1_aln_pe_srt_2.fastq.gz | jellyfish count /dev/fd/0 -C -o ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.jf -m 21 -t 20 -s 5G
jellyfish histo -h 2000000 -o ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.jf.histo ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.jf
gzip -f ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.jf
rm ../genome_assembly/AtAre1/00Kmer_est/*sam
Rscript ./run_findGSE ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.jf.histo ../genome_assembly/AtAre1/00Kmer_est/Are-1.Illumina.ndna.findGSE
######
#Quality control of Nanopore long reads
mkdir ../genome_assembly/AtAre1/01Nano_QC
porechop -i ../raw_data/Nanopore_rawData/Nanopore_Are-1.fastq.gz -o ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.fastq.gz
filtlong --min_length 1000 --min_mean_q 70 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.fastq.gz | gzip > ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz
######
#assembly using flye
/opt/share/software/packages/Flye-2.7/bin/flye -o ../genome_assembly/AtAre1/02Flye_out00 -t 24 -g 135m --nano-raw ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz
#polished flye assembly using racon with Nanopore long reads
mkdir ../genome_assembly/AtAre1/02Flye_out00/racon_out
minimap2 -t 24 ../genome_assembly/AtAre1/02Flye_out00/assembly.fasta ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_1.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_1.paf ../genome_assembly/AtAre1/02Flye_out00/assembly.fasta > ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round1.fasta
minimap2 -t 24 ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round1.fasta ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_2.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_2.paf ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round1.fasta > ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round2.fasta
minimap2 -t 24 ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round2.fasta ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_3.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/02Flye_out00/racon_out/round_3.paf ../genome_assembly/AtAre1/02Flye_out00/racon_out/racon_round2.fasta > ../genome_assembly/AtAre1/02Flye_out00/racon_out/assembly.racon_round3.fasta
#polished flye assembly using nextpolish with Illumina short reads
mkdir ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out && cd ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out
ln -s /xxx/raw_data/Illumina_rawData/Illumina_Are-1_R1.fastq.gz .
ln -s /xxx/raw_data/Illumina_rawData/Illumina_Are-1_R2.fastq.gz .
ln -s ../assembly.racon_round3.fasta input.genome.fa
cat /xxx/run_scripts/run_nextpolish | sed -e "s/reads_R1/Illumina_Are-1_R1/g" -e "s/reads_R2/Illumina_Are-1_R2/g" > run_nextpolish
bash ./run_nextpolish
#purge flye assembly
minimap2 -x map-ont -t 20 ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/genome.nextpolish.fa ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz -o ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/AtAre1.flye_ass_polished.nano.paf
mkdir ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups
pbcstat -O ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/AtAre1.flye_ass_polished.nano.paf
calcuts ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/PB.stat > ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/PB.stat ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/PB.stat.auto.png
split_fa ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/genome.nextpolish.fa > ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/Are-1.flye_ass_polished.split
minimap2 -xasm5 -DP -t 20 ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/Are-1.flye_ass_polished.split ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/Are-1.flye_ass_polished.split -o ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/Are-1.flye_ass_polished.split.self.paf
purge_dups -2 -T ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/cutoffs_manu -c ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/PB.base.cov ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/Are-1.flye_ass_polished.split.self.paf > ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/dups.bed
get_seqs -e -p ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/dups.bed ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/genome.nextpolish.fa
minimap2 -x map-ont -t 20 ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.fa ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz -o ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.nano.paf
mkdir ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups
pbcstat -O ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.nano.paf
calcuts ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat > ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat.auto.png
######
#assembly using SMARTdenovo
smartdenovo.pl -c 1 -t 20 -J 1000 -k 16 -p ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo.mak
#polished smartdenovo assembly using racon with Nanopore long reads
ln ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo.dmo.cns ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo.dmo.cns.fa
mkdir ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out
minimap2 -t 24 ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo.dmo.cns.fa ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_1.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_1.paf ../genome_assembly/AtAre1/03SMARTdenovo_out/Are-1.smartdenovo.dmo.cns.fa > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round1.fasta
minimap2 -t 24 ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round1.fasta ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_2.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_2.paf ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round1.fasta > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round2.fasta
minimap2 -t 24 ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round2.fasta ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_3.paf
racon -t 24 ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/round_3.paf ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/racon_round2.fasta > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/Are-1.smartdenovo.racon_round3.fasta
#polished smartdenovo assembly using nextpolish with Illumina short reads
mkdir ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out && cd ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out
ln -s /xxx/raw_data/Illumina_rawData/Illumina_Are-1_R1.fastq.gz .
ln -s /xxx/raw_data/Illumina_rawData/Illumina_Are-1_R2.fastq.gz .
ln -s ../Are-1.smartdenovo.racon_round3.fasta input.genome.fa
cat /xxx/run_scripts/run_nextpolish | sed -e "s/reads_R1/Illumina_Are-1_R1/g" -e "s/reads_R2/Illumina_Are-1_R2/g" > run_nextpolish
bash ./run_nextpolish
#purge smartdenovo assembly
mkdir ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups
pbcstat -O ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/AtAre1.smartdenovo_ass_polished.nano.paf
calcuts ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/PB.stat > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/PB.stat ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/PB.stat.auto.png
split_fa ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/genome.nextpolish.fa > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/Are-1.smartdenovo_ass_polished.split
minimap2 -xasm5 -DP -t 20 ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/Are-1.smartdenovo_ass_polished.split ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/Are-1.smartdenovo_ass_polished.split -o ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/Are-1.smartdenovo_ass_polished.split.self.paf
purge_dups -2 -T ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/cutoffs_manu -c ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/PB.base.cov ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/Are-1.smartdenovo_ass_polished.split.self.paf > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/dups.bed
get_seqs -e -p ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/dups.bed ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/genome.nextpolish.fa
minimap2 -x map-ont -t 20 ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.fa ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz -o ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.nano.paf
mkdir ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups
pbcstat -O ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.nano.paf
calcuts ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat > ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/purge_dups/PB.stat.auto.png
######
#merge flye and smartdenovo assemblies using quickmerge
nucmer -t 10 -p ../genome_assembly/AtAre1/05quickmerge_out/Are-1.smartdenovo_flye.aln ../genome_assembly/AtAre1/03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.fa ../genome_assembly/AtAre1/02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.fa
delta-filter -r -q -l 10000 -i 0.9 ../genome_assembly/AtAre1/05quickmerge_out/Are-1.smartdenovo_flye.aln.delta > ../genome_assembly/AtAre1/05quickmerge_out/Are-1.smartdenovo_flye.aln.flt.delta
cd ../genome_assembly/AtAre1/05quickmerge_out
quickmerge -d Are-1.smartdenovo_flye.aln.flt.delta -q ../02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.fa -r ../03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.fa -hco 5.0 -c 1.5 -l 14172502 -ml 5000 -p Are-1.smartdenovo_flye.merge
#purge merged assembly
mkdir ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups
pbcstat -O ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.nano.paf
calcuts ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/PB.stat > ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/cutoffs.auto
hist_plot.py -X 250 -c ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/PB.stat ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/PB.stat.auto.png
split_fa ../genome_assembly/AtAre1/04quickmerge_out00/merged.fasta > ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.split
minimap2 -xasm5 -DP -t 20 ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.split ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.split -o ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.split.self.paf
purge_dups -2 -T ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/cutoffs_manu -c ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/PB.base.cov ../genome_assembly/AtAre1/04quickmerge_out00/Are-1.smartdenovo_flye_merge.split.self.paf > ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/dups.bed
get_seqs -e -p ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/Are-1.smartdenovo_flye_merge ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/dups.bed ../genome_assembly/AtAre1/04quickmerge_out00/merged.fasta
minimap2 -x map-ont -t 20 ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/Are-1.smartdenovo_flye_merge.purged.fa ../raw_data/Nanopore_rawData/Nanopore_Are-1.trim.flt.fastq.gz -o ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/Are-1.smartdenovo_flye_merge.purged.nano.paf
mkdir ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups
pbcstat -O ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/Are-1.smartdenovo_flye_merge.purged.nano.paf
calcuts ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups/PB.stat > ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups/cutoffs.auto
hist_plot.py -X 250 -c ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups/cutoffs.auto ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups/PB.stat ../genome_assembly/AtAre1/04quickmerge_out00/purge_dups/purge_dups/PB.stat.auto.png
######
#scaffolding
cd ../genome_assembly/AtAre1/04quickmerge_out00
ln -s ./purge_dups/Are-1.smartdenovo_flye_merge.purged.fa .
ln -s /xxx/ref_genome/t2t-col.20210610.fasta .
mv ./ragoo_output ./ragoo_output_tair10
ragoo.py -t 20 -R ../../../genome_assembly/AtAre1/01Canu_out/Nanopore_Are-1.trim.flt.correctedReads.fasta.gz -T corr -C ./Are-1.smartdenovo_flye_merge.purged.fa ./t2t-col.20210610.fasta > run_ragoo.t2t.logs 2>&1
mv ./ragoo_output ./ragoo_output_t2t
cd ./ragoo_output_t2t
ls ./orderings/* > orderings.fofn
samtools faidx ./ctg_alignments/Are-1.smartdenovo_flye_merge.purged.misasm.break.fa
make_agp.py orderings.fofn ./ctg_alignments/Are-1.smartdenovo_flye_merge.purged.misasm.break.fa.fai 100 > ragoo.agp
######
#gapclosing
cd ../genome_assembly/AtAre1/04quickmerge_out00/ragoo_output_t2t_corr_gapclosing
cp /xxx/genome_assembly/AtAre1/01Canu_out/Nanopore_Are-1.trim.flt.correctedReads.fasta.gz .
gunzip Nanopore_Are-1.trim.flt.correctedReads.fasta.gz
ln -sf ../ragoo_output_t2t_corr/Are-1.ragoo.fasta .
cd gap_closing_reads
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../Are-1.ragoo.fasta -q ../Nanopore_Are-1.trim.flt.correctedReads.fasta -t 8
ln -sf Are-1.ragoo.fasta.split.joined.fa Are-1.ragoo.gapclosing_reads.fasta
ragtag.py splitasm Are-1.ragoo.gapclosing_reads.fasta -o Are-1.ragoo.gapclosing_reads.ragtag.agp > Are-1.ragoo.gapclosing_reads.ragtag.split.fa
rm ../Nanopore_Are-1.trim.flt.correctedReads.fasta
cd ../gap_closing_smartdenovo
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../gap_closing_reads/Are-1.ragoo.gapclosing_reads.fasta -q ../../../03SMARTdenovo_out/racon_out/nextpolish_out/purge_dups/Are-1.smartdenovo_ass_polished.purged.fa -t 8 -d asm -m 2500 -o 10000
ln -sf Are-1.ragoo.gapclosing_reads.fasta.split.joined.fa Are-1.ragoo.gapclosing_reads_smartdenovo.fasta
ragtag.py splitasm Are-1.ragoo.gapclosing_reads_smartdenovo.fasta -o Are-1.ragoo.gapclosing_reads_smartdenovo.ragtag.agp > Are-1.ragoo.gapclosing_reads_smartdenovo.ragtag.split.fa
cd ../gap_closing_smartdenovo_flye
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../gap_closing_smartdenovo/Are-1.ragoo.gapclosing_reads_smartdenovo.fasta -q ../../../02Flye_out00/racon_out/nextpolish_out/purge_dups/Are-1.flye_ass_polished.purged.fa -t 8 -d asm -m 2500 -o 10000
ln -sf Are-1.ragoo.gapclosing_reads_smartdenovo.fasta.split.joined.fa Are-1.ragoo.gapclosing_reads_smartdenovo_flye.fasta
ragtag.py splitasm Are-1.ragoo.gapclosing_reads_smartdenovo_flye.fasta -o Are-1.ragoo.gapclosing_reads_smartdenovo_flye.ragtag.agp > Are-1.ragoo.gapclosing_reads_smartdenovo_flye.ragtag.split.fa
cd ..
ln -sf ./gap_closing_smartdenovo_flye/Are-1.ragoo.gapclosing_reads_smartdenovo_flye.fasta Are-1.ragoo.gapclosing.fasta
######
#polish using nextpolish
mkdir ../genome_assembly/AtAre1/04quickmerge_out00/ragoo_output_t2t_corr_gapclosing/nextpolish_out && cd ../genome_assembly/AtAre1/04quickmerge_out00/ragoo_output_t2t_corr_gapclosing/nextpolish_out
ln -sf /xxx/raw_data/Illumina_allData/Illumina_Are-1_R1.fastq.gz .
ln -sf /xxx/raw_data/Illumina_allData/Illumina_Are-1_R2.fastq.gz .
ragtag.py splitasm ../Are-1.ragoo.gapclosing.fasta -o ../Are-1.ragoo.gapclosing.ragtag.splitasm.agp > ../Are-1.ragoo.gapclosing.ragtag.splitasm.fasta
ln -sf ../Are-1.ragoo.gapclosing.ragtag.splitasm.fasta input.genome.fa
cat /xxx/run_scripts/run_nextpolish | sed -e "s/reads_R1/Illumina_Are-1_R1/g" -e "s/reads_R2/Illumina_Are-1_R2/g" > run_nextpolish
./run_nextpolish
cat ./genome.nextpolish.fa | sed 's/_np1212.*//g' > ./genome.nextpolish.rename.fa
ragtag.py agp2fa ../Are-1.ragoo.gapclosing.ragtag.splitasm.agp ./genome.nextpolish.rename.fa > ./Are-1.ragoo.gapclosing.polished.fasta
##########################################################################################################################################################################################################################################################################################################################################
##########################################################################################################################################################################################################################################################################################################################################
#Genome assembly for Pacbio HiFi accessions, use Db-1 as an example
######
#Estimate genome size using Illumina short reads dataset
bwa mem ../ref_genome/chloro_mito_phiX.fasta ../raw_data/Illumina_allData/Illumina_Db-1_R1.fastq.gz ../raw_data/Illumina_allData/Illumina_Db-1_R2.fastq.gz -t 20 > ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe.sam
samtools view -@ 8 -bS ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe.sam | samtools sort -@ 20 - -o ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bam
samtools view -@ 20 -b -f 77 ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bam > ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bothunmapped1.bam
samtools view -@ 20 -b -f 141 ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bam > ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bothunmapped2.bam
bedtools bamtofastq -i ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bothunmapped1.bam -fq ../raw_data/Illumina_allData/Db-1_aln_pe_srt_1.fastq
bedtools bamtofastq -i ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bothunmapped2.bam -fq ../raw_data/Illumina_allData/Db-1_aln_pe_srt_2.fastq
samtools flagstat -@ 20 ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bam > ../genome_assembly/Db-1/00Kmer_est/Db-1_aln_pe_sorted.bam.flagstat
gzip -f ../raw_data/Illumina_allData/Db-1_aln_pe_srt_1.fastq
gzip -f ../raw_data/Illumina_allData/Db-1_aln_pe_srt_2.fastq
zcat ../raw_data/Illumina_allData/Db-1_aln_pe_srt_1.fastq.gz ../raw_data/Illumina_allData/Db-1_aln_pe_srt_2.fastq.gz | jellyfish count /dev/fd/0 -C -o ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.jf -m 21 -t 20 -s 5G
jellyfish histo -h 2000000 -o ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.jf.histo ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.jf
gzip -f ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.jf
rm ../genome_assembly/Db-1/00Kmer_est/*sam
Rscript ./run_findGSE ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.jf.histo ../genome_assembly/Db-1/00Kmer_est/Db-1.Illumina.ndna.findGSE
######
#assembly using canu
canu -assemble -p Db-1.canu_ass -d ../genome_assembly/Db-1/01Canu_out maxThreads=20 genomeSize=135m -pacbio-hifi ../raw_data/batch1/5129_A.hifi_reads.fastq.gz > ../genome_assembly/Db-1/01Canu_out/run.canu.logs
#purge canu assembly
minimap2 -x map-hifi -t 20 ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.contigs.fasta ../raw_data/batch1/5129_A.hifi_reads.fastq.gz -o ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.hifi.paf
mkdir ../genome_assembly/Db-1/01Canu_out/purge_dups
pbcstat -O ../genome_assembly/Db-1/01Canu_out/purge_dups ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.hifi.paf
calcuts ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat > ../genome_assembly/Db-1/01Canu_out/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/Db-1/01Canu_out/purge_dups/cutoffs.auto ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat.auto.png
calcuts -l 9 -m 27 -u 108 ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat > ../genome_assembly/Db-1/01Canu_out/purge_dups/cutoffs_manu
hist_plot.py -c ../genome_assembly/Db-1/01Canu_out/purge_dups/cutoffs_manu ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.stat.manu.png
split_fa ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.contigs.fasta > ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.split
minimap2 -xasm5 -DP -t 20 ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.split ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.split -o ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.split.self.paf
purge_dups -2 -T ../genome_assembly/Db-1/01Canu_out/purge_dups/cutoffs_manu -c ../genome_assembly/Db-1/01Canu_out/purge_dups/PB.base.cov ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.split.self.paf > ../genome_assembly/Db-1/01Canu_out/purge_dups/dups.bed
get_seqs -e -p ../genome_assembly/Db-1/01Canu_out/purge_dups/Db-1.canu_ass ../genome_assembly/Db-1/01Canu_out/purge_dups/dups.bed ../genome_assembly/Db-1/01Canu_out/Db-1.canu_ass.contigs.fasta
######
#assembly using flye
/opt/share/software/packages/Flye-2.7/bin/flye -o ../genome_assembly/Db-1/02Flye_out00 -t 20 -g 135m --pacbio-hifi ../raw_data/batch1/5129_A.hifi_reads.fastq.gz > ../genome_assembly/Db-1/02Flye_out00/run.flye.logs
#purge flye assembly
minimap2 -x map-hifi -t 20 ../genome_assembly/Db-1/02Flye_out00/assembly.fasta ../raw_data/batch1/5129_A.hifi_reads.fastq.gz -o ../genome_assembly/Db-1/02Flye_out00/Db-1.flye_ass.hifi.paf
mkdir ../genome_assembly/Db-1/02Flye_out00/purge_dups
pbcstat -O ../genome_assembly/Db-1/02Flye_out00/purge_dups ../genome_assembly/Db-1/02Flye_out00/Db-1.flye_ass.hifi.paf
calcuts ../genome_assembly/Db-1/02Flye_out00/purge_dups/PB.stat > ../genome_assembly/Db-1/02Flye_out00/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/Db-1/02Flye_out00/purge_dups/cutoffs.auto ../genome_assembly/Db-1/02Flye_out00/purge_dups/PB.stat ../genome_assembly/Db-1/02Flye_out00/purge_dups/PB.stat.auto.png
######
#assembly using hifiasm
hifiasm -o ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass -t 20 -l0 ../raw_data/batch1/5129_A.hifi_reads.fastq.gz
awk '/^S/{print ">"$2"\n"$3}' ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.bp.p_ctg.gfa | fold > ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.bp.p_ctg.fa
#purge hifiasm assembly
minimap2 -x map-hifi -t 20 ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.bp.p_ctg.fa ../raw_data/batch1/5129_A.hifi_reads.fastq.gz -o ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.hifi.paf
mkdir ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups
pbcstat -O ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.hifi.paf
calcuts ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat > ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/cutoffs.auto ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat.auto.png
calcuts -l 7 -m 27 -u 108 ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat > ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/cutoffs_manu
hist_plot.py -c ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/cutoffs_manu ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.stat.manu.png
split_fa ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.bp.p_ctg.fa > ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.split
minimap2 -xasm5 -DP -t 20 ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.split ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.split -o ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.split.self.paf
purge_dups -2 -T ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/cutoffs_manu -c ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/PB.base.cov ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.split.self.paf > ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/dups.bed
get_seqs -e -p ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/dups.bed ../genome_assembly/Db-1/03Hifiasm_out00/Db-1.hifiasm_ass.bp.p_ctg.fa
minimap2 -x map-hifi -t 4 ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.fa ../raw_data/batch1/5129_A.hifi_reads.fastq.gz -o ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.hifi.paf
mkdir ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups
pbcstat -O ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.hifi.paf
calcuts ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups/PB.stat > ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups/cutoffs.auto
hist_plot.py -c ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups/cutoffs.auto ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups/PB.stat ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/purge_dups/PB.stat.auto.png
######
#merge canu, flye and hifiasm assemblies using quickmerge
#first round quickmerge
nucmer -t 4 -p ../genome_assembly/Db-1/04quickmerge_out000/Db-1.flye_hifiasm.aln ../genome_assembly/Db-1/02Flye_out00/assembly.fasta ../genome_assembly/Db-1/03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.fa
delta-filter -r -q -l 10000 -i 0.9 ../genome_assembly/Db-1/04quickmerge_out000/Db-1.flye_hifiasm.aln.delta > ../genome_assembly/Db-1/04quickmerge_out000/Db-1.flye_hifiasm.aln.flt.delta
cd ../genome_assembly/Db-1/04quickmerge_out000
quickmerge -d Db-1.flye_hifiasm.aln.flt.delta -q ../03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.fa -r ../02Flye_out00/assembly.fasta -hco 5.0 -c 1.5 -l 13382534 -ml 5000 -p Db-1.flye_hifiasm.merge
cd ../genome_assembly/Db-1/04quickmerge_out000/
mv merged.fasta Db-1.flye_hifiasm.merged.fasta
mv anchor_summary.txt Db-1.flye_hifiasm.anchor_summary.txt
mv summaryOut.txt Db-1.flye_hifiasm.summaryOut.txt
mv aln_summary.tsv Db-1.flye_hifiasm.aln_summary.txt
mkdir ../genome_assembly/Db-1/04quickmerge_out000/purge_dups
pbcstat -O ../genome_assembly/Db-1/04quickmerge_out000/purge_dups ../genome_assembly/Db-1/04quickmerge_out000/Db-1.flye_hifiasm_merge.hifi.paf
calcuts ../genome_assembly/Db-1/04quickmerge_out000/purge_dups/PB.stat > ../genome_assembly/Db-1/04quickmerge_out000/purge_dups/cutoffs.auto
hist_plot.py -X 200 -c ../genome_assembly/Db-1/04quickmerge_out000/purge_dups/cutoffs.auto ../genome_assembly/Db-1/04quickmerge_out000/purge_dups/PB.stat ../genome_assembly/Db-1/04quickmerge_out000/purge_dups/PB.stat.auto.png
#second round quickmerge
nucmer -t 4 -p ../genome_assembly/Db-1/04quickmerge_out000/Db-1.canu_flye_hifiasm.aln ../genome_assembly/Db-1/01Canu_out/purge_dups/Db-1.canu_ass.purged.fa ../genome_assembly/Db-1/04quickmerge_out000/Db-1.flye_hifiasm.merged.fasta
delta-filter -r -q -l 10000 -i 0.9 ../genome_assembly/Db-1/04quickmerge_out000/Db-1.canu_flye_hifiasm.aln.delta > ../genome_assembly/Db-1/04quickmerge_out000/Db-1.canu_flye_hifiasm.aln.flt.delta
cd ../genome_assembly/Db-1/04quickmerge_out000
quickmerge -d Db-1.canu_flye_hifiasm.aln.flt.delta -q ./Db-1.flye_hifiasm.merged.fasta -r ../01Canu_out/purge_dups/Db-1.canu_ass.purged.fa -hco 5.0 -c 1.5 -l 12446511 -ml 5000 -p Db-1.canu_flye_hifiasm.merge
cd ../genome_assembly/Db-1/04quickmerge_out000/
mv merged.fasta Db-1.canu_flye_hifiasm.merged.fasta
mv anchor_summary.txt Db-1.canu_flye_hifiasm.anchor_summary.txt
mv summaryOut.txt Db-1.canu_flye_hifiasm.summaryOut.txt
mv aln_summary.tsv Db-1.canu_flye_hifiasm.aln_summary.txt
mkdir ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2
pbcstat -O ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2 ../genome_assembly/Db-1/04quickmerge_out000/Db-1.canu_flye_hifiasm_merge.hifi.paf
calcuts ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2/PB.stat > ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2/cutoffs.auto
hist_plot.py -X 200 -c ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2/cutoffs.auto ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2/PB.stat ../genome_assembly/Db-1/04quickmerge_out000/purge_dups2/PB.stat.auto.png
######
#scaffolding
cd ../genome_assembly/Db-1/04quickmerge_out000
ln -sf /xxx/ref_genome/t2t-col.20210610.fasta .
ragoo.py -t 20 -R /xxx/raw_data/batch1/5129_A.hifi_reads.fastq.gz -T corr -C ./Db-1.canu_flye_hifiasm.merged.fasta ./t2t-col.20210610.fasta
mv ./ragoo_output ./ragoo_output_t2t
cd ./ragoo_output_t2t
ls ./orderings/* > orderings.fofn
samtools faidx ./ctg_alignments/Db-1.canu_flye_hifiasm.merged.misasm.break.fa
make_agp.py orderings.fofn ./ctg_alignments/Db-1.canu_flye_hifiasm.merged.misasm.break.fa.fai 100 > ragoo.agp
######
#gapclosing
cd ../genome_assembly/Db-1/04quickmerge_out000/ragoo_output_t2t_corr_gapclosing
cp /xxx/raw_data/batch1/5129_A.hifi_reads.fastq.gz .
gunzip 5129_A.hifi_reads.fastq.gz
ln -sf ../ragoo_output_t2t_corr/Db-1.ragoo.fasta .
cd gap_closing_reads
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../Db-1.ragoo.fasta -q ../5129_A.hifi_reads.fastq -t 8
ln -sf Db-1.ragoo.fasta.split.joined.fa Db-1.ragoo.gapclosing_reads.fasta
ragtag.py splitasm Db-1.ragoo.gapclosing_reads.fasta -o Db-1.ragoo.gapclosing_reads.ragtag.agp > Db-1.ragoo.gapclosing_reads.ragtag.split.fa
rm ../5129_A.hifi_reads.fastq
cd ../gap_closing_hifiasm
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../gap_closing_reads/Db-1.ragoo.gapclosing_reads.fasta -q ../../../03Hifiasm_out00/purge_dups/Db-1.hifiasm_ass.purged.fa -t 8 -d asm -m 2500 -o 10000
ln -sf Db-1.ragoo.gapclosing_reads.fasta.split.joined.fa Db-1.ragoo.gapclosing_reads_hifiasm.fasta
ragtag.py splitasm Db-1.ragoo.gapclosing_reads_hifiasm.fasta -o Db-1.ragoo.gapclosing_reads_hifiasm.ragtag.agp > Db-1.ragoo.gapclosing_reads_hifiasm.ragtag.split.fa
cd ../gap_closing_hifiasm_flye
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../gap_closing_hifiasm/Db-1.ragoo.gapclosing_reads_hifiasm.fasta -q ../../../02Flye_out00/assembly.fasta -t 8 -d asm -m 2500 -o 10000
ln -sf Db-1.ragoo.gapclosing_reads_hifiasm.fasta.split.joined.fa Db-1.ragoo.gapclosing_reads_hifiasm_flye.fasta
ragtag.py splitasm Db-1.ragoo.gapclosing_reads_hifiasm_flye.fasta -o Db-1.ragoo.gapclosing_reads_hifiasm_flye.ragtag.agp > Db-1.ragoo.gapclosing_reads_hifiasm_flye.ragtag.split.fa
cd ../gap_closing_hifiasm_flye_canu
/xxx/MaSuRCA-4.1.0/bin/close_scaffold_gaps.sh -r ../gap_closing_hifiasm_flye/Db-1.ragoo.gapclosing_reads_hifiasm_flye.fasta -q ../../../01Canu_out/purge_dups/Db-1.canu_ass.purged.fa -t 8 -d asm -m 2500 -o 10000
ln -sf Db-1.ragoo.gapclosing_reads_hifiasm_flye.fasta.split.joined.fa Db-1.ragoo.gapclosing_reads_hifiasm_flye_canu.fasta
ragtag.py splitasm ./Db-1.ragoo.gapclosing_reads_hifiasm_flye_canu.fasta -o ./Db-1.ragoo.gapclosing_reads_hifiasm_flye_canu.ragtag.agp > ./Db-1.ragoo.gapclosing_reads_hifiasm_flye_canu.ragtag.split.fa
cd ..
ln -sf ./gap_closing_hifiasm_flye_canu/Db-1.ragoo.gapclosing_reads_hifiasm_flye_canu.fasta Db-1.ragoo.gapclosing.fasta
##########################################################################################################################################################################################################################################################################################################################################
##########################################################################################################################################################################################################################################################################################################################################