-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_pipeline.sh
executable file
·99 lines (75 loc) · 2.55 KB
/
run_pipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash
set -euo pipefail
#----------------------------------------------#
# User parameters
if [ ! -z "${1}" ] || [ ! -z "${2}" ] || [ ! -z "${irods_input_projectID}" ]
then
input_dir="${1}"
output_dir="${2}"
PROJECT_NAME="${irods_input_projectID}"
else
echo "One of the parameters is missing, make sure there is an input directory, output directory and project name(param 1, 2 or irods_input_projectID)."
exit 1
fi
if [ ! -d "${input_dir}" ] || [ ! -d "${output_dir}" ]
then
echo "The input directory $input_dir, output directory $output_dir or fastq dir ${input_dir}/clean_fastq does not exist"
exit 1
else
input_fastq="${input_dir}/clean_fastq"
fi
case $PROJECT_NAME in
rvp_spn)
SPECIES="streptococcus_pneumoniae";;
*)
SPECIES="other";;
esac
#----------------------------------------------#
# Create/update necessary environments
PATH_MAMBA_YAML="envs/mamba.yaml"
PATH_MASTER_YAML="envs/population_master.yaml"
MAMBA_NAME=$(head -n 1 ${PATH_MAMBA_YAML} | cut -f2 -d ' ')
MASTER_NAME=$(head -n 1 ${PATH_MASTER_YAML} | cut -f2 -d ' ')
echo -e "\nUpdating necessary environments to run the pipeline..."
# Removing strict mode because it sometimes breaks the code for
# activating an environment and for testing whether some variables
# are set or not
set +euo pipefail
conda env update -f "${PATH_MAMBA_YAML}"
source activate "${MAMBA_NAME}"
mamba env update -f "${PATH_MASTER_YAML}"
source activate "${MASTER_NAME}"
#----------------------------------------------#
# Run the pipeline
echo -e "\nRun pipeline..."
if [ ! -z ${irods_runsheet_sys__runsheet__lsf_queue} ]; then
QUEUE="${irods_runsheet_sys__runsheet__lsf_queue}"
else
QUEUE="bio"
fi
set -euo pipefail
if [ ${SPECIES} == "streptococcus_pneumoniae" ]; then
python population.py --queue "${QUEUE}" -i "${input_dir}" -o "${output_dir}" -s "${SPECIES}" --external-clustering
else
python population.py --queue "${QUEUE}" -i "${input_dir}" -o "${output_dir}" -s "${SPECIES}"
fi
result=$?
# Propagate metadata
set +euo pipefail
SEQ_KEYS=
SEQ_ENV=`env | grep irods_input_sequencing`
for SEQ_AVU in ${SEQ_ENV}
do
SEQ_KEYS="${SEQ_KEYS} ${SEQ_AVU%%=*}"
done
for key in $SEQ_KEYS irods_input_illumina__Flowcell irods_input_illumina__Instrument \
irods_input_illumina__Date irods_input_illumina__Run_number irods_input_illumina__Run_Id
do
if [ ! -z ${!key} ] ; then
attrname=${key:12}
attrname=${attrname/__/::}
echo "${attrname}: '${!key}'" >> ${OUTPUTDIR}/metadata.yml
fi
done
set -euo pipefail
exit ${result}