-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from databio/dev
Dev
- Loading branch information
Showing
31 changed files
with
2,168 additions
and
1,288 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,13 @@ | ||
microtest: | ||
python $$CODEBASE/ATACseq/pipelines/ATACseq.py -I $$MICROTEST/data/atacR1.fq.gz -I2 $$MICROTEST/data/atacR2.fq.gz -G hg19 -O $$HOME/scratch -S atac_test --single-or-paired paired -R | ||
python $$CODEBASE/pepatac/pipelines/pepatac.py -I $$MICROTEST/data/atacR1.fq.gz -I2 $$MICROTEST/data/atacR2.fq.gz -G hg19 -O $$HOME/scratch -S atac_test --single-or-paired paired -R | ||
test: | ||
python pipelines/ATACseq.py -P 3 -M 100 -O test_out -R -S liver -G hg19 -Q paired -C ATACseq.yaml --genome-size hs --prealignments rCRSd human_repeats -I examples/test_data/liver-CD31_test_R1.fastq.gz -I2 examples/test_data/liver-CD31_test_R2.fastq.gz | ||
python pipelines/pepatac.py -P 3 -M 100 -O test_out -R -S liver -G hg19 -Q paired -C pepatac.yaml --genome-size hs --prealignments rCRSd human_repeats -I examples/test_data/liver-CD31_test_R1.fastq.gz -I2 examples/test_data/liver-CD31_test_R2.fastq.gz | ||
changtest: | ||
python pipelines/ATACseq.py -P 3 -M 100 -O test_out -R -S liver -G hg19 -Q paired -C $HOME/code/ATACseq/examples/chang_project/ATACseq.yaml -gs mm -I examples/test_data/liver-CD31_test_R1.fastq.gz -I2 examples/test_data/liver-CD31_test_R2.fastq.gz | ||
python pipelines/pepatac.py -P 3 -M 100 -O test_out -R -S liver -G hg19 -Q paired -C $HOME/code/pepatac/examples/chang_project/pepatac.yaml -gs mm -I examples/test_data/liver-CD31_test_R1.fastq.gz -I2 examples/test_data/liver-CD31_test_R2.fastq.gz | ||
|
||
|
||
docker: | ||
docker build -t databio/pepatac -f containers/pepatac.Dockerfile . | ||
|
||
singularity: | ||
singularity build $${SIMAGES}pepatac docker://databio/pepatac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
ATAC: ATACseq.py | ||
ATAC-SEQ: ATACseq.py | ||
ATAC: pepatac.py | ||
ATAC-SEQ: pepatac.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
# Pull base image | ||
FROM phusion/baseimage:0.10.1 | ||
|
||
# Who maintains this image | ||
LABEL maintainer Jason Smith "[email protected]" | ||
|
||
# Version info | ||
LABEL version 0.8.1 | ||
|
||
# Use baseimage-docker's init system. | ||
CMD ["/sbin/my_init"] | ||
|
||
# Install dependencies | ||
RUN apt-get update && \ | ||
DEBIAN_FRONTEND=noninteractive apt-get install --assume-yes \ | ||
curl \ | ||
default-jre \ | ||
default-jdk \ | ||
git \ | ||
libcommons-math3-java \ | ||
libcurl4-gnutls-dev \ | ||
libjbzip2-java \ | ||
libpng-dev \ | ||
libssl-dev \ | ||
libtbb2 \ | ||
libtbb-dev \ | ||
openssl \ | ||
pigz \ | ||
python \ | ||
python-pip python-dev build-essential \ | ||
wget | ||
|
||
# Install MySQL server | ||
RUN DEBIAN_FRONTEND=noninteractive apt-get install --assume-yes mysql-server \ | ||
mysql-client \ | ||
libmysqlclient-dev | ||
|
||
# Install python tools | ||
RUN pip install --upgrade pip | ||
RUN pip install virtualenv && \ | ||
pip install numpy && \ | ||
pip install MACS2 && \ | ||
pip install pararead && \ | ||
pip install piper | ||
|
||
# Install R | ||
RUN DEBIAN_FRONTEND=noninteractive apt-get --assume-yes install r-base r-base-dev && \ | ||
echo "r <- getOption('repos'); r['CRAN'] <- 'http://cran.us.r-project.org'; options(repos = r);" > ~/.Rprofile && \ | ||
Rscript -e "install.packages('devtools')" && \ | ||
Rscript -e "devtools::install_github('pepkit/pepr')" && \ | ||
Rscript -e "install.packages('gtable')" && \ | ||
Rscript -e "install.packages('argparser')" && \ | ||
Rscript -e "install.packages('ggplot2')" && \ | ||
Rscript -e "install.packages('gplots')" && \ | ||
Rscript -e "install.packages('grid')" && \ | ||
Rscript -e "install.packages('scales')" && \ | ||
Rscript -e "install.packages('data.table')" && \ | ||
Rscript -e "install.packages('stringr')" | ||
|
||
|
||
# Install bedtools | ||
RUN DEBIAN_FRONTEND=noninteractive apt-get install --assume-yes \ | ||
ant \ | ||
bedtools | ||
|
||
# Install fastqc | ||
WORKDIR /home/tools/ | ||
RUN wget http://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.7.zip && \ | ||
unzip fastqc_v0.11.7.zip && \ | ||
cd /home/tools/FastQC && \ | ||
chmod 755 fastqc && \ | ||
ln -s /home/tools/FastQC/fastqc /usr/bin/ | ||
|
||
# Install htslib | ||
WORKDIR /home/src/ | ||
RUN wget https://github.com/samtools/htslib/releases/download/1.7/htslib-1.7.tar.bz2 && \ | ||
tar xf htslib-1.7.tar.bz2 && \ | ||
cd /home/src/htslib-1.7 && \ | ||
./configure --prefix /home/tools/ && \ | ||
make && \ | ||
make install | ||
|
||
# Install samtools | ||
WORKDIR /home/src/ | ||
RUN wget https://github.com/samtools/samtools/releases/download/1.7/samtools-1.7.tar.bz2 && \ | ||
tar xf samtools-1.7.tar.bz2 && \ | ||
cd /home/src/samtools-1.7 && \ | ||
./configure && \ | ||
make && \ | ||
make install && \ | ||
ln -s /home/src/samtools-1.7/samtools /usr/bin/ | ||
|
||
# Install bowtie2 | ||
WORKDIR /home/src/ | ||
RUN wget https://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.3.4.1/bowtie2-2.3.4.1-source.zip && \ | ||
unzip bowtie2-2.3.4.1-source.zip && \ | ||
cd /home/src/bowtie2-2.3.4.1 && \ | ||
make && \ | ||
make install && \ | ||
ln -s /home/src/bowtie2-2.3.4.1/bowtie2 /usr/bin/ | ||
|
||
# Install picard | ||
WORKDIR /home/tools/bin | ||
RUN wget https://github.com/broadinstitute/picard/releases/download/2.18.0/picard.jar && \ | ||
chmod +x picard.jar | ||
|
||
# Install UCSC tools | ||
WORKDIR /home/tools/ | ||
RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedGraphToBigWig && \ | ||
wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig && \ | ||
wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bigWigCat && \ | ||
wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedSort && \ | ||
chmod +x /home/tools/bedGraphToBigWig && \ | ||
chmod +x /home/tools/wigToBigWig && \ | ||
chmod +x /home/tools/bigWigCat && \ | ||
chmod +x /home/tools/bedSort && \ | ||
ln -s /home/tools/bedGraphToBigWig /usr/bin/ && \ | ||
ln -s /home/tools/wigToBigWig /usr/bin/ && \ | ||
ln -s /home/tools/bigWigCat /usr/bin/ && \ | ||
ln -s /home/tools/bedSort /usr/bin/ | ||
|
||
# Install Skewer | ||
WORKDIR /home/src/ | ||
RUN git clone git://github.com/relipmoc/skewer.git && \ | ||
cd /home/src/skewer && \ | ||
make && \ | ||
make install | ||
|
||
# OPTIONAL REQUIREMENTS | ||
# Install F-seq | ||
WORKDIR /home/src/ | ||
RUN wget https://github.com/aboyle/F-seq/archive/master.zip && \ | ||
unzip master.zip && \ | ||
cd /home/src/F-seq-master && \ | ||
ant && \ | ||
cd dist~/ && \ | ||
tar xf fseq.tgz && \ | ||
ln -s /home/src/F-seq-master/dist~/fseq/bin/fseq /usr/bin/ | ||
|
||
# Install Trimmomatic | ||
WORKDIR /home/src/ | ||
RUN wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.36.zip && \ | ||
unzip Trimmomatic-0.36.zip && \ | ||
chmod +x Trimmomatic-0.36/trimmomatic-0.36.jar | ||
|
||
# Set environment variables | ||
ENV PATH=/home/tools/bin:/home/tools/:/home/tools/bin/kentUtils/:/home/src/F-seq-master/dist~/fseq/bin:/home/src/bowtie2-2.3.4.1:/home/src/skewer:/home/src/samtools-1.7:/home/src/Trimmomatic-0.36/:/home/src/htslib-1.7:$PATH \ | ||
TRIMMOMATIC=/home/src/Trimmomatic-0.36/trimmomatic-0.36.jar \ | ||
PICARD=/home/tools/bin/picard.jar \ | ||
R_LIBS_USER=/usr/local/lib/R/site-library/ | ||
|
||
# Define default command | ||
WORKDIR /home/ | ||
CMD ["/bin/bash"] | ||
|
||
# Clean up APT when done. | ||
RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,90 +1,33 @@ | ||
# Example commands of using pepATAC through pypiper. | ||
# For the example commands of using pepATAC with looper, please see the xxx Users Guide. | ||
# Example commands of using PEPATAC through pypiper. | ||
# For the example commands of using PEPATAC with looper, please see the xxx Users Guide. | ||
|
||
INPUT=/path/to/sequencing_results/fastq_files | ||
|
||
# run pepATAC on a human paired-end reads dataset using 5 threads: | ||
python pipelines/ATACseq.py -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results_PE_R2.fastq.gz | ||
# run PEPATAC on a human paired-end reads dataset using 5 threads: | ||
python pipelines/pepatac.py -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results_PE_R1.fastq.gz -I2 $INPUT/pepatac_results_PE_R2.fastq.gz | ||
|
||
# run pepATAC on multiple datasets at the same time: <- this could be wrong as I don't see an explaination of how to use -I and -I2 with multiple samples | ||
python pipelines/ATACseq.py -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results1_PE_R1.fastq.gz $INPUT/ATACseq_results2_PE_R1.fastq.gz $INPUT/ATACseq_results3_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results1_PE_R2.fastq.gz $INPUT/ATACseq_results2_PE_R2.fastq.gz $INPUT/ATACseq_results3_PE_R2.fastq.gz | ||
# run PEPATAC on multiple datasets at the same time: <- this could be wrong as I don't see an explaination of how to use -I and -I2 with multiple samples | ||
python pipelines/pepatac.py -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results1_PE_R1.fastq.gz $INPUT/pepatac_results2_PE_R1.fastq.gz $INPUT/pepatac_results3_PE_R1.fastq.gz -I2 $INPUT/pepatac_results1_PE_R2.fastq.gz $INPUT/pepatac_results2_PE_R2.fastq.gz $INPUT/pepatac_results3_PE_R2.fastq.gz | ||
|
||
# run multiple samples with a for loop: | ||
declare -a sample_name_arr=("sample1","sample2","sample3") | ||
for sample_name in "${sample_name_arr[@]}" | ||
do | ||
file1=$INPUT/{$file1}_PE_R1.fastq.gz | ||
file2=${file1/R1/R2} | ||
python pipelines/ATACseq.py -P 5 -O output_folder -S $sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $file1 -I2 $file2 | ||
python pipelines/pepatac.py -P 5 -O output_folder -S $sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $file1 -I2 $file2 | ||
done | ||
|
||
# run pepATAC on a mouse single-end reads dataset using 8 threads: | ||
python pipelines/ATACseq.py -P 8 -O output_folder -S output_sample_name -G mm10 -Q single -C ATACseq.yaml -gs mm -I $INPUT/ATACseq_results_PE_R1.fastq.gz | ||
# run PEPATAC on a mouse single-end reads dataset using 8 threads: | ||
python pipelines/pepatac.py -P 8 -O output_folder -S output_sample_name -G mm10 -Q single -C pepatac.yaml -gs mm -I $INPUT/pepatac_results_PE_R1.fastq.gz | ||
|
||
# run pepATAC with different trimming tools then default trimmomatic, currectly supports skewer and pyadapt: | ||
python pipelines/ATACseq.py --skewer TRUE -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results_PE_R2.fastq.gz | ||
python pipelines/ATACseq.py --pyadapt TRUE -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results_PE_R2.fastq.gz | ||
|
||
# re-run pepATAC and over-write the previous output: | ||
python pipelines/ATACseq.py -N -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results_PE_R2.fastq.gz | ||
|
||
# continue to run pepATAC since a locked step (usually locked due to failure): | ||
python pipelines/ATACseq.py -R -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C ATACseq.yaml -gs hs -I $INPUT/ATACseq_results_PE_R1.fastq.gz -I2 $INPUT/ATACseq_results_PE_R2.fastq.gz | ||
|
||
|
||
|
||
|
||
# check xxxx for full list of parameter usage | ||
|
||
# full list of parameters are listed below: | ||
python ATACseq.py | ||
usage: ATACseq.py [-h] [-N] [-I2 INPUT_FILES2 [INPUT_FILES2 ...]] | ||
[-M MEMORY_LIMIT] [-Q SINGLE_OR_PAIRED] [-S SAMPLE_NAME] | ||
[-P NUMBER_OF_CORES] [-D] [-I INPUT_FILES [INPUT_FILES ...]] | ||
[-F] [-R] [-C CONFIG_FILE] [-O PARENT_OUTPUT_FOLDER] | ||
[-G GENOME_ASSEMBLY] [-gs GENOME_SIZE] | ||
[--frip-ref-peaks FRIP_REF_PEAKS] [--pyadapt] [--skewer] | ||
[--prealignments PREALIGNMENTS [PREALIGNMENTS ...]] [-V] | ||
|
||
Pipeline | ||
optional arguments: | ||
-C CONFIG_FILE, --config CONFIG_FILE | ||
pipeline config file in YAML format; relative paths | ||
are considered relative to the pipeline script. | ||
defaults to ATACseq.yaml | ||
-D, --dirty Make all cleanups manual | ||
-F, --follow Run all follow commands, even if command is not run | ||
--frip-ref-peaks FRIP_REF_PEAKS | ||
Reference peak set for calculating FRIP | ||
-G GENOME_ASSEMBLY, --genome GENOME_ASSEMBLY | ||
identifier for genome assempbly (required) | ||
-gs GENOME_SIZE, --genome-size GENOME_SIZE | ||
genome size for MACS2 | ||
-h, --help show this help message and exit | ||
-I INPUT_FILES [INPUT_FILES ...], --input INPUT_FILES [INPUT_FILES ...] | ||
One or more primary input files (required) | ||
-I2 INPUT_FILES2 [INPUT_FILES2 ...], --input2 INPUT_FILES2 [INPUT_FILES2 ...] | ||
One or more secondary input files (if they exists); | ||
for example, second read in pair. | ||
-M MEMORY_LIMIT, --mem MEMORY_LIMIT | ||
Memory string for processes that accept memory limits | ||
(like java) | ||
-N, --new-start Fresh start mode, overwrite all | ||
-O PARENT_OUTPUT_FOLDER, --output-parent PARENT_OUTPUT_FOLDER | ||
parent output directory of the project (required). | ||
-P NUMBER_OF_CORES, --cores NUMBER_OF_CORES | ||
number of cores to use for parallel processes | ||
-Q SINGLE_OR_PAIRED, --single-or-paired SINGLE_OR_PAIRED | ||
single or paired end? default: single | ||
-R, --recover Recover mode, overwrite locks | ||
-S SAMPLE_NAME, --sample-name SAMPLE_NAME | ||
unique name for output subfolder and files (required) | ||
--pyadapt Use pyadapter_trim for trimming? [Default: False] | ||
--skewer Use skewer for trimming? [Default: False] | ||
--prealignments PREALIGNMENTS [PREALIGNMENTS ...] | ||
List of reference genomes to align to before primary | ||
alignment. | ||
-V, --version show program's version number and exit' | ||
# run PEPATAC with different trimming tools then default trimmomatic, currectly supports skewer and pyadapt: | ||
python pipelines/pepatac.py --skewer TRUE -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results_PE_R1.fastq.gz -I2 $INPUT/pepatac_results_PE_R2.fastq.gz | ||
python pipelines/pepatac.py --pyadapt TRUE -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results_PE_R1.fastq.gz -I2 $INPUT/pepatac_results_PE_R2.fastq.gz | ||
|
||
# re-run PEPATAC and over-write the previous output: | ||
python pipelines/pepatac.py -N -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results_PE_R1.fastq.gz -I2 $INPUT/pepatac_results_PE_R2.fastq.gz | ||
|
||
# continue to run PEPATAC since a locked step (usually locked due to failure): | ||
python pipelines/pepatac.py -R -P 5 -O output_folder -S output_sample_name -G hg38 -Q paired -C pepatac.yaml -gs hs -I $INPUT/pepatac_results_PE_R1.fastq.gz -I2 $INPUT/pepatac_results_PE_R2.fastq.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
examples/chang_project/ATACseq_chang.yaml → examples/chang_project/pepatac_chang.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.