-
Notifications
You must be signed in to change notification settings - Fork 2
/
mageck_loop.sh
executable file
·121 lines (76 loc) · 2.67 KB
/
mageck_loop.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
# Description: Runs the MAGeCK count on raw fastq files in a loop where each fastq file requires a different
# sgRNA annotation file.
# The modules are as follows:
# 1. unzip the fastq files
# 2. Run cutadapt to trim the adapters
# 3. MAGeCK count - inuput: fastq files, output: count matrix.
#
#
#Author: Lucy Liu
#########################################################
# Arguments
#
# Change these arguments for each new project
#########################################################
# Directory path of file where the compressed fastq files are:
gzfastqfile="/Volumes/bioinf_pipeline/Runs/NextSeq/180917_NB501056_0192_AH3NYHBGX9/ProjectFolders/Project_Iva-Nikolic"
# Directory path of the file where you want the uncompressed fastq files to go -
# this location will be the 'working directory'
workdir="/Volumes/Users/Lucy/CRISPR/Iva20180921/"
# where the sgRNA annotation files are
sgRNAfiles=(Data/Annotations/sgRNAreformated/*)
# where the fastq files are
fastfiles=(*.trimmed_P7.fastq)
# # Sample labels, comma separated
# samlabels=""
#######################################
# Unzip fastq files to choosen directory
#######################################
# cd ${gzfastqfile}
# for file in $(ls */*.gz);
# do
# echo ${file}
# bname=$(basename ${file} .fastq.gz)
# # get the base name of the file and also remove the '.fastq.gz' at the end
# echo $bname
# gzip -d -c ${file} > ${workdir}${bname}.fastq
# # -decompress = unzip, send output to location as specified, add fastq to end of file name
# done
###############################
# Cutadapt to trim adapters
###############################
# Use 'cutadapt' to trim adaptors
cd ${workdir}
# Change to working directory
# module load cutadapt
# Use only if on cluster
# for file in $(ls *1.fastq);
# do
# echo ${file}
# bname=$(basename ${file} .fastq)
# echo $bname
# cutadapt -g TGTGGAAAGGACGAAACACCG -o ${bname}.trimmed_P5.fastq ${file} > trimP5_${bname}_log.txt
# #trim 5' adaptor
# cutadapt -a GTTTTAGAGCTAGAAATAGCAAG -o ${bname}.trimmed_P7.fastq ${bname}.trimmed_P5.fastq > trimP7_${bname}_log.txt
# #trim 3' adaptor
# done
########################
# MAGeCK count
########################
# mkdir Count
cd Count/
for i in 0 1 2 3 4 5;
do
bsgRNA=$(basename ${sgRNAfiles[i]} .txt)
echo ${bsgRNA}
mageck count -l ../${sgRNAfiles[i]} \
-n ${bsgRNA} \
--pdf-report \
--norm "median" \
--fastq ../${fastfiles[i]} \
--sample-label ${bsgRNA}
done
# --control-sgrna ../Data/Annotations/negControl.txt \
#Note tr gets rid of the newline character at the end of each line of output
#and replaces with space