Merge pull request #62 from databio/hotfix-0.8.1

Hotfix 0.8.1
databio · Sep 20, 2018 · ea97026 · ea97026
2 parents bfa4d8a + 8947b57
commit ea97026
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,13 @@
 # Change log
 All notable changes to this project will be documented in this file.
 
+## [0.8.2] -- 2018-09-20
+
+### Changed
+- Fixed a bug with bamQC.py not employing updated pararead logger use
+- Fixed a bug with container not having all required components
+- Fixed a bug where output parent directory was not required
+
 ## [0.8.1] -- 2018-09-19
 
 ### Changed

diff --git a/containers/pepatac.Dockerfile b/containers/pepatac.Dockerfile
@@ -5,7 +5,7 @@ FROM phusion/baseimage:0.10.1
 LABEL maintainer Jason Smith "[email protected]"
 
 # Version info
-LABEL version 0.8.2
+LABEL version 0.8.5
 
 # Use baseimage-docker's init system.
 CMD ["/sbin/my_init"]
@@ -40,6 +40,7 @@ RUN pip install --upgrade pip
 RUN pip install virtualenv && \
     pip install numpy && \
     pip install MACS2 && \
+    pip install pandas && \
     pip install pararead && \
     pip install piper
 
@@ -114,14 +115,17 @@ RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bedGraphToBigWig
     wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/wigToBigWig && \
     wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/bigWigCat && \
     wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedSort && \
+    wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bedToBigBed && \
     chmod +x /home/tools/bedGraphToBigWig && \
     chmod +x /home/tools/wigToBigWig && \
     chmod +x /home/tools/bigWigCat && \
     chmod +x /home/tools/bedSort && \
+    chmod +x /home/tools/bedToBigBed && \
     ln -s /home/tools/bedGraphToBigWig /usr/bin/ && \
     ln -s /home/tools/wigToBigWig /usr/bin/ && \
     ln -s /home/tools/bigWigCat /usr/bin/ && \
-    ln -s /home/tools/bedSort /usr/bin/
+    ln -s /home/tools/bedSort /usr/bin/ && \
+    ln -s /home/tools/bedToBigBed /usr/bin/
 
 # Install Skewer
 WORKDIR /home/src/

diff --git a/pipelines/pepatac.py b/pipelines/pepatac.py
@@ -5,7 +5,7 @@
 
 __author__ = ["Jin Xu", "Nathan Sheffield", "Jason Smith"]
 __email__ = "[email protected]"
-__version__ = "0.8.0"
+__version__ = "0.8.2"
 
 
 from argparse import ArgumentParser
@@ -31,7 +31,8 @@ def parse_arguments():
     ###########################################################################
     parser = ArgumentParser(description='PEPATAC version ' + __version__)
     parser = pypiper.add_pypiper_args(parser, groups=
-        ['pypiper', 'looper', 'ngs'], required=["input", "genome", "sample-name"])
+        ['pypiper', 'looper', 'ngs'],
+        required=["input", "genome", "sample-name", "output-parent"])
 
     # Pipeline-specific arguments
     parser.add_argument("-gs", "--genome-size", default="hs", type=str,

diff --git a/tools/PEPATAC_annotation.R b/tools/PEPATAC_annotation.R
@@ -177,7 +177,7 @@ invisible(dev.off())
 knownGenomes <- c('hg19', 'hg38', 'mm9', 'mm10')
 fileType     <- summary(file(paste0(argv$anno)))$class
 if (fileType == "gzfile") {
-    annoFile <- fread(sprintf('zcat %s', shQuote(file.path(argv$anno))))
+    annoFile <- fread(cmd=(sprintf('zcat %s', shQuote(file.path(argv$anno)))))
     suppressWarnings(closeAllConnections())
 } else {
     annoFile <- fread(file.path(argv$anno))

diff --git a/tools/bamQC.py b/tools/bamQC.py
@@ -13,13 +13,15 @@
 import sys
 
 import pararead
-from pararead.processor import _LOGGER
+#from pararead.processor import _LOGGER
+from pararead import add_logging_options, ParaReadProcessor
+from pararead import logger_via_cli
 
 import pandas as _pd
 import numpy as np
 
 class bamQC(pararead.ParaReadProcessor):
-    def __init__(self, reads_filename, n_proc, out_filename):
+    def __init__(self, reads_filename, n_proc, out_filename, verbosity):
         """
         Derive from ParaReadProcessor to build the bamQC caller instance.
 
@@ -32,8 +34,9 @@ def __init__(self, reads_filename, n_proc, out_filename):
         out_filename : str
             Name of output bamQC file
         """
-        self.reads_filename = reads_filename
         super(bamQC, self).__init__(reads_filename, n_proc, out_filename)
+        self.reads_filename = reads_filename
+        self.verbosity = verbosity
 
     def register_files(self):
         """
@@ -216,6 +219,7 @@ def combine(self, good_chromosomes, strict=False):
                        header='\t'.join(header), fmt='%s', delimiter='\t',
                        comments='')
 
+
 # read options from command line
 def parse_args(cmdl):
     parser = ArgumentParser(description='--Produce bamQC File--')
@@ -226,18 +230,24 @@ def parse_args(cmdl):
                         help="Output file name.")
     parser.add_argument('-c', '--cores', dest='cores', default=20, type=int,
                         help="Number of processors to use. Default=20")
+
+    parser = add_logging_options(parser)
     return parser.parse_args(cmdl)
-
+
+
 # parallel processed computation of matrix for each chromosome
 if __name__ == "__main__":
+
     args = parse_args(sys.argv[1:])
+    _LOGGER = logger_via_cli(args)
 
     qc = bamQC(reads_filename=args.infile,
                out_filename=args.outfile,
-               n_proc=args.cores)
+               n_proc=args.cores,
+               verbosity=args.verbosity)
 
     qc.register_files()
     good_chromosomes = qc.run()
 
-    print("Reduce step (merge files)...")
+    _LOGGER.info("Reduce step (merge files)...")
     qc.combine(good_chromosomes)
diff --git a/usage.txt b/usage.txt
@@ -1,14 +1,15 @@
-usage: pepatac.py [-h] [-R] [-N] [-D] [-F] [-C CONFIG_FILE]
-                  [-O PARENT_OUTPUT_FOLDER] [-M MEMORY_LIMIT]
-                  [-P NUMBER_OF_CORES] -S SAMPLE_NAME -I INPUT_FILES
-                  [INPUT_FILES ...] [-I2 [INPUT_FILES2 [INPUT_FILES2 ...]]] -G
-                  GENOME_ASSEMBLY [-Q SINGLE_OR_PAIRED] [-gs GENOME_SIZE]
-                  [--frip-ref-peaks FRIP_REF_PEAKS]
+usage: pepatac.py [-h] [-R] [-N] [-D] [-F] [-C CONFIG_FILE] -O
+                  PARENT_OUTPUT_FOLDER [-M MEMORY_LIMIT] [-P NUMBER_OF_CORES]
+                  -S SAMPLE_NAME -I INPUT_FILES [INPUT_FILES ...]
+                  [-I2 [INPUT_FILES2 [INPUT_FILES2 ...]]] -G GENOME_ASSEMBLY
+                  [-Q SINGLE_OR_PAIRED] [-gs GENOME_SIZE]
+                  [--frip-ref-peaks FRIP_REF_PEAKS] [--TSS-name TSS_NAME]
+                  [--anno-name ANNO_NAME] [--keep] [--noFIFO]
                   [--peak-caller {fseq,macs2}]
                   [--trimmer {trimmomatic,pyadapt,skewer}]
                   [--prealignments PREALIGNMENTS [PREALIGNMENTS ...]] [-V]
 
-PEPATAC version 0.7.0
+PEPATAC version 0.8.2
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -19,8 +20,6 @@ optional arguments:
   -C CONFIG_FILE, --config CONFIG_FILE
                         Pipeline configuration file (YAML). Relative paths are
                         with respect to the pipeline script.
-  -O PARENT_OUTPUT_FOLDER, --output-parent PARENT_OUTPUT_FOLDER
-                        Parent output directory of project
   -M MEMORY_LIMIT, --mem MEMORY_LIMIT
                         Memory limit (in Mb) for processes accepting such
   -P NUMBER_OF_CORES, --cores NUMBER_OF_CORES
@@ -32,7 +31,12 @@ optional arguments:
   -gs GENOME_SIZE, --genome-size GENOME_SIZE
                         genome size for MACS2
   --frip-ref-peaks FRIP_REF_PEAKS
-                        Reference peak set for calculating FRIP
+                        Reference peak set for calculating FRiP
+  --TSS-name TSS_NAME   Name of TSS annotation file
+  --anno-name ANNO_NAME
+                        Name of reference bed file for calculating FRiF
+  --keep                Keep prealignment BAM files
+  --noFIFO              Do NOT use named pipes during prealignments
   --peak-caller {fseq,macs2}
                         Name of peak caller
   --trimmer {trimmomatic,pyadapt,skewer}
@@ -43,6 +47,8 @@ optional arguments:
   -V, --version         show program's version number and exit
 
 required named arguments:
+  -O PARENT_OUTPUT_FOLDER, --output-parent PARENT_OUTPUT_FOLDER
+                        Parent output directory of project
   -S SAMPLE_NAME, --sample-name SAMPLE_NAME
                         Name for sample to run
   -I INPUT_FILES [INPUT_FILES ...], --input INPUT_FILES [INPUT_FILES ...]