From f8f37bf84138dbff8200135ef7f93c14bbb29a1d Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Fri, 4 Nov 2016 22:12:14 +0100 Subject: [PATCH 1/3] Start implementing #32, which needs a test. --- CHANGELOG.md | 4 ++++ MBias.c | 9 +++++++++ PileOMeth.h | 3 ++- README.md | 5 +---- common.c | 1 + extract.c | 11 ++++++++++- 6 files changed, 27 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27df6ea..3c614fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +Version 0.1.14: + + * Added the `--requireFlags`/`-r` option, which is equivalent to the -f option in samtools. The default is 0, which requires nothing. + Version 0.1.13: * Added the `--ignoreFlags`/`-F` option, which is equivalent to the -F option in samtools. The default is 0xF00, which ignores duplicates, QC fail, secondary, and supplemental alignments. diff --git a/MBias.c b/MBias.c index 8a3c51e..1610994 100644 --- a/MBias.c +++ b/MBias.c @@ -202,6 +202,10 @@ void mbias_usage() { " 0xF00 or 3840 in decimal. If you would like to change that,\n" " you can specify a new value here.\n" " ignored. Specifying this causes them to be included.\n" +" -R, --requireFlags Require each alignment to have all bits in this value\n" +" present, or else the alignment is ignored. This is equivalent\n" +" to the -f option in samtools. The default is 0, which\n" +" includes all alignments.\n" " --txt Output tab separated metrics to the screen. These can be\n" " imported into R or another program for manual plotting and\n" " analysis.\n" @@ -230,6 +234,7 @@ int mbias_main(int argc, char *argv[]) { config.bedName = NULL; config.bed = NULL; config.ignoreFlags = 0xF00; + config.requireFlags = 0; for(i=0; i<16; i++) config.bounds[i] = 0; static struct option lopts[] = { @@ -242,6 +247,7 @@ int mbias_main(int argc, char *argv[]) { {"txt", 0, NULL, 7}, {"noSVG", 0, NULL, 8}, {"ignoreFlags", 1, NULL, 'F'}, + {"requireFlags", 1, NULL, 'R'}, {"help", 0, NULL, 'h'}, {"version", 0, NULL, 'v'}, {0, 0, NULL, 0} @@ -291,6 +297,9 @@ int mbias_main(int argc, char *argv[]) { case 'F' : config.ignoreFlags = atoi(optarg); break; + case 'R' : + config.requireFlags = atoi(optarg); + break; case 'q' : config.minMapq = atoi(optarg); break; diff --git a/PileOMeth.h b/PileOMeth.h index 5406b32..788a890 100644 --- a/PileOMeth.h +++ b/PileOMeth.h @@ -47,6 +47,7 @@ typedef struct { @field keepDiscordant 0: Do not include discordantly aligned reads when calculating metrics @field keepSingleton 0: Do not include singletons when calculating metrics @field ignoreFlags Mask that's logically &ed with and ignored if > 0. Defaults to 0xF00. + @field requireFlags Mask that's logically &ed with and ignored if < mask. Defaults to 0, which means ignore. @field merge 1: Merge Cs in either a CpG or CHG context into single entries @field methylKit Output in a format compatible with methylKit @field output_fp Output file pointers (to CpG, CHG, and CHH, respectively) @@ -59,7 +60,7 @@ typedef struct { typedef struct { int keepCpG, keepCHG, keepCHH; int minMapq, minPhred, keepDupes, maxDepth, minDepth; - int keepDiscordant, keepSingleton, ignoreFlags; + int keepDiscordant, keepSingleton, ignoreFlags, requireFlags; int merge, methylKit; int fraction, counts, logit; FILE **output_fp; diff --git a/README.md b/README.md index 36b3c87..c31160c 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,4 @@ Ignored alignments By default, any alignment marked as being secondary (bit 256), having failed QC (bit 512), being a PCR/optical duplicate (bit 1024), or being supplemental (bit 2048) is ignored. This is a reasonable default and should only be changed by expert users. For those needing to change this behaviour, please see the `-F` or `--ignoreFlags` options to both `PileOMeth mbias` and `PileOMeth extract`. -To do list -========== - - - [ ] Is the output format the most convenient (it's what Bison uses, so converters have already been written)? It makes more sense to output a predefined VCF format, which would allow processing multiple samples at once. This would require a spec., which should have pretty broad input. +Relatedly, if you would like to require that all included alignments have certain flags set, then the `-R` or `--requireFlags` options to both `PileOMeth mbias` and `PileOMeth extract` can be given. diff --git a/common.c b/common.c index 730a68b..7c46f54 100644 --- a/common.c +++ b/common.c @@ -146,6 +146,7 @@ int filter_func(void *data, bam1_t *b) { if(b->core.tid == -1 || b->core.flag & BAM_FUNMAP) continue; //Unmapped if(b->core.qual < ldata->config->minMapq) continue; //-q if(b->core.flag & ldata->config->ignoreFlags) continue; //By default: secondary alignments, QC failed, PCR duplicates, and supplemental alignments + if(ldata->config->requireFlags && b->core.flag & ldata->config->requireFlags != ldata->config->requireFlags) continue; if(!ldata->config->keepDupes && b->core.flag & BAM_FDUP) continue; p = bam_aux_get(b, "NH"); if(p != NULL) { diff --git a/extract.c b/extract.c index 01127c1..fc3c1d5 100644 --- a/extract.c +++ b/extract.c @@ -310,6 +310,10 @@ void extract_usage() { " 0xF00 or 3840 in decimal. If you would like to change that,\n" " you can specify a new value here.\n" " ignored. Specifying this causes them to be included.\n" +" -R, --requireFlags Require each alignment to have all bits in this value\n" +" present, or else the alignment is ignored. This is equivalent\n" +" to the -f option in samtools. The default is 0, which\n" +" includes all alignments.\n" " --noCpG Do not output CpG context methylation metrics\n" " --CHG Output CHG context methylation metrics\n" " --CHH Output CHH context methylation metrics\n" @@ -364,6 +368,7 @@ int extract_main(int argc, char *argv[]) { config.counts = 0; config.logit = 0; config.ignoreFlags = 0xF00; + config.requireFlags = 0; for(i=0; i<16; i++) config.bounds[i] = 0; static struct option lopts[] = { @@ -385,11 +390,12 @@ int extract_main(int argc, char *argv[]) { {"mergeContext", 0, NULL, 11}, {"methylKit", 0, NULL, 12}, {"ignoreFlags", 1, NULL, 'F'}, + {"requireFlags", 1, NULL, 'R'}, {"help", 0, NULL, 'h'}, {"version", 0, NULL, 'v'}, {0, 0, NULL, 0} }; - while((c = getopt_long(argc, argv, "hvq:p:r:l:o:D:f:c:m:d:F:", lopts,NULL)) >=0){ + while((c = getopt_long(argc, argv, "hvq:p:r:l:o:D:f:c:m:d:F:R:", lopts,NULL)) >=0){ switch(c) { case 'h' : extract_usage(); @@ -455,6 +461,9 @@ int extract_main(int argc, char *argv[]) { case 'F' : config.ignoreFlags = atoi(optarg); break; + case 'R': + config.requireFlags = atoi(optarg); + break; case 'q' : config.minMapq = atoi(optarg); break; From d7a82930ef6d64a5bb95e54a9ffc7490527acaa5 Mon Sep 17 00:00:00 2001 From: Devon Ryan Date: Fri, 4 Nov 2016 22:28:25 +0100 Subject: [PATCH 2/3] A few fixes and add a test for #32 --- Makefile | 2 +- common.c | 2 +- tests/test.py | 7 +++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3e4edd7..a84460d 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ OPTS ?= -Wall -g -O3 all: lib PileOMeth OBJS = common.o bed.o svg.o pileup.o extract.o MBias.o mergeContext.o -VERSION = 0.1.13 +VERSION = 0.1.14 #If we're building from a git repo, then append the most recent tag ifneq "$(wildcard .git)" "" diff --git a/common.c b/common.c index 7c46f54..c495539 100644 --- a/common.c +++ b/common.c @@ -146,7 +146,7 @@ int filter_func(void *data, bam1_t *b) { if(b->core.tid == -1 || b->core.flag & BAM_FUNMAP) continue; //Unmapped if(b->core.qual < ldata->config->minMapq) continue; //-q if(b->core.flag & ldata->config->ignoreFlags) continue; //By default: secondary alignments, QC failed, PCR duplicates, and supplemental alignments - if(ldata->config->requireFlags && b->core.flag & ldata->config->requireFlags != ldata->config->requireFlags) continue; + if(ldata->config->requireFlags && (b->core.flag & ldata->config->requireFlags) != ldata->config->requireFlags) continue; if(!ldata->config->keepDupes && b->core.flag & BAM_FDUP) continue; p = bam_aux_get(b, "NH"); if(p != NULL) { diff --git a/tests/test.py b/tests/test.py index 62373ae..b906c20 100644 --- a/tests/test.py +++ b/tests/test.py @@ -60,3 +60,10 @@ def rm(f): lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) assert lines == 49 rm('cg_aln_CpG.bedGraph') + +# Check that --requireFlags is working +check_call('../PileOMeth extract --requireFlags 0xD00 cg100.fa cg_aln.bam -q 2', shell=True) +assert op.exists('cg_aln_CpG.bedGraph') +lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) +assert lines == 49 +rm('cg_aln_CpG.bedGraph') From 259584c2048a9e6fc277467ce14fd61a1f47776c Mon Sep 17 00:00:00 2001 From: dpryan79 Date: Mon, 16 Jan 2017 13:59:24 +0100 Subject: [PATCH 3/3] Missed one thing in the merger --- tests/test.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test.py b/tests/test.py index 42f15c9..244b592 100644 --- a/tests/test.py +++ b/tests/test.py @@ -61,17 +61,16 @@ def rm(f): assert lines == 49 rm('cg_aln_CpG.bedGraph') -<<<<<<< HEAD # Check that --requireFlags is working -check_call('../PileOMeth extract --requireFlags 0xD00 cg100.fa cg_aln.bam -q 2', shell=True) +check_call('../MethylDackel extract --requireFlags 0xD00 cg100.fa cg_aln.bam -q 2', shell=True) assert op.exists('cg_aln_CpG.bedGraph') lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) assert lines == 49 -======= +rm('cg_aln_CpG.bedGraph') + # Check absolute trimming bounds check_call('../MethylDackel extract --nOT 50,50,40,40 cg100.fa cg_aln.bam -q 2', shell=True) assert op.exists('cg_aln_CpG.bedGraph') lines = sum(1 for _ in open('cg_aln_CpG.bedGraph')) assert lines == 12 ->>>>>>> master rm('cg_aln_CpG.bedGraph')