-
Notifications
You must be signed in to change notification settings - Fork 3
/
interleaven.sh
executable file
·112 lines (79 loc) · 3.78 KB
/
interleaven.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
set -e
set -x
working_directory=$1
ref_dir=$2
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
mkdir ${ref_dir}/interleaven_reads || true
cd $working_directory
for f in ./*; do
genome=$(basename -- "$f")
extension="${genome##*.}"
if [ "$extension" = "gz" ]; then
genome="${genome%.*}"
extension="${genome##*.}"
if [ "$extension" = "fastq" ] ||[ "$extension" = "fq" ]; then
genome="${genome%.*}"
counter=0
if [ -f "${ref_dir}/interleaven_reads/${genome}.1.fastq" ] || [ -f "${ref_dir}/interleaven_reads/${genome}.2.fastq" ] || [ -f "${ref_dir}/interleaven_reads/whole_read_${genome}.fastq" ]; then
test_var=0
test_var=`grep "$genome" $SCRIPT_DIR/log_file.txt | grep -c -e "I$" -e "IA$" -e "IAD$" -e "IADE$" -e "IADEB$" -e "IADEBC$"` || true
if [ "1" -eq "$test_var" ]; then
counter=1
else
rm ${ref_dir}/interleaven_reads/${genome}.1.fastq || true
rm ${ref_dir}/interleaven_reads/${genome}.2.fastq || true
rm ${ref_dir}/interleaven_reads/whole_read_${genome}.fastq || true
fi
fi
if [ "0" -eq "$counter" ]; then
read_1=${ref_dir}/interleaven_reads/${genome}.1.fastq
read_2=${ref_dir}/interleaven_reads/${genome}.2.fastq
whole_read=${ref_dir}/interleaven_reads/whole_read_${genome}.fastq
zcat $f > $whole_read
paste - - - - - - - - < $whole_read \
| tee >(cut -f 1-4 | tr "\t" "\n" > $read_1) \
| cut -f 5-8 | tr "\t" "\n" > $read_2
rm "$whole_read"
if [ -z "`grep "$genome" $SCRIPT_DIR/log_file.txt`" ]; then
echo "$genome"".I">>$SCRIPT_DIR/log_file.txt
else
var=`grep "$genome" $SCRIPT_DIR/log_file.txt`
var1="$genome"".I"
sed -i "s/$var/$var1/" $SCRIPT_DIR/log_file.txt
fi
else
echo "Interleaved split operation on this sample already performed"
fi
fi
elif [ "$extension" = "fastq" ] || [ "$extension" = "fq" ]; then
genome="${genome%.*}"
counter=0
if [ -f "${ref_dir}/interleaven_reads/${genome}.1.fastq" ] || [ -f "${ref_dir}/interleaven_reads/${genome}.2.fastq" ]; then
test_var=0
test_var=`grep "$genome" $SCRIPT_DIR/log_file.txt | grep -c -e "I$" -e "IA$" -e "IAD$" -e "IADE$" -e "IADEB$" -e "IADEBC$"` || true
if [ "1" -eq "$test_var" ]; then
counter=1
else
rm ${ref_dir}/interleaven_reads/${genome}.1.fastq || true
rm ${ref_dir}/interleaven_reads/${genome}.2.fastq || true
fi
fi
if [ "0" -eq "$counter" ]; then
read_1=${ref_dir}/interleaven_reads/${genome}.1.fastq
read_2=${ref_dir}/interleaven_reads/${genome}.2.fastq
paste - - - - - - - - < $f \
| tee >(cut -f 1-4 | tr "\t" "\n" > $read_1) \
| cut -f 5-8 | tr "\t" "\n" > $read_2
if [ -z "`grep "$genome" $SCRIPT_DIR/log_file.txt`" ]; then
echo "$genome"".I">>$SCRIPT_DIR/log_file.txt
else
var=`grep "$genome" $SCRIPT_DIR/log_file.txt`
var1="$genome"".I"
sed -i "s/$var/$var1/" $SCRIPT_DIR/log_file.txt
fi
else
echo "Interleaved split operation on this sample already performed"
fi
fi
done