diff --git a/scripts/align_reference.sh b/scripts/align_reference.sh index b45556b..1612bb3 100644 --- a/scripts/align_reference.sh +++ b/scripts/align_reference.sh @@ -87,12 +87,12 @@ for i in "${LARRAY[@]}"; do if [ ! -f $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam ]; then bwa mem -M -t $NUM_CORES $REF_GENOME $FASTQ/$SAMPLENAME\_$LANE\_R1_001.fastq.gz \ $FASTQ/$SAMPLENAME\_$LANE\_R2_001.fastq.gz > $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam - touch check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check - elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check ]; then + touch check_tmp_align/.$SAMPLENAME\_$LANE_\map_sam_check + elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_map_sam_check ]; then rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam bwa mem -M -t $NUM_CORES $REF_GENOME $FASTQ/$SAMPLENAME\_$LANE\_R1_001.fastq.gz \ $FASTQ/$SAMPLENAME\_$LANE\_R2_001.fastq.gz > $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam - touch check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check + touch check_tmp_align/.$SAMPLENAME\_$LANE\_map_sam_check fi # convert SAM to BAM and fix read pairing information and flags @@ -100,12 +100,12 @@ for i in "${LARRAY[@]}"; do if [ ! -f $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam ]; then samtools fixmate -O bam $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam \ $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam - touch check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam - elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam ]; then + touch check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam + elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam ]; then rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam samtools fixmate -O bam $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam \ $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam - touch check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam + touch check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam fi @@ -114,13 +114,13 @@ for i in "${LARRAY[@]}"; do samtools sort -T $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped -O bam -@ $NUM_CORES \ -o $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam \ $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam - touch check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam - elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam ]; then + touch check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam + elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam ]; then rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam samtools sort -T $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped -O bam -@ $NUM_CORES \ -o $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam \ $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam - touch check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam + touch check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam fi done diff --git a/scripts/call_variants.sh b/scripts/call_variants.sh index 2f2fd1c..59b59e5 100644 --- a/scripts/call_variants.sh +++ b/scripts/call_variants.sh @@ -75,12 +75,12 @@ fi # create bed file from bam file if [ ! -f $BAM_DIR/$SAMPLENAME.depth ];then - samtools depth -a $BAM > \ + samtools depth -a $BAM -q 20 -Q 30 > \ $BAM_DIR/$SAMPLENAME.depth touch check_tmp_variant/.$SAMPLENAME.bam_depth elif [ ! -e check_tmp_variant/.$SAMPLENAME.bam_depth ]; then rm $BAM_DIR/$SAMPLENAME.depth - samtools depth -a $BAM > \ + samtools depth -a $BAM -q 20 -Q 30 > \ $BAM_DIR/$SAMPLENAME.depth touch check_tmp_variant/.$SAMPLENAME.bam_depth fi @@ -132,7 +132,57 @@ elif [ ! -e check_tmp_variant/.$SAMPLENAME.vcf_bcftools_filt ]; then touch check_tmp_variant/.$SAMPLENAME.vcf_bcftools_filt fi +# mask bcftool filtered positions +# concat bcftool filtered-masked positions with depth-masked bed file +if [ ! -f $DIR/04_assembly/$SAMPLENAME.allmask.bed ]; then + bcftools filter --no-version -i "(INFO/AD[1])/(INFO/AD[0]+INFO/AD[1]) < $SNP_SUPPORT" \ + $DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf + awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \ + $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf > $DIR/03_variants/$SAMPLENAME.lowsupport.bed + + bcftools filter --no-version -i "INFO/ADF[1] < $REQ_STRAND_DEPTH || INFO/ADR[1] < $REQ_STRAND_DEPTH" \ + $DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf + awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \ + $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf > $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed + + + cat $DIR/03_variants/$SAMPLENAME.lowsupport.bed $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed > \ + $DIR/03_variants/$SAMPLENAME.vcffiltered.bed + + cat $DIR/03_variants/$SAMPLENAME.vcffiltered.bed $DIR/04_assembly/$SAMPLENAME.depthmask.bed | \ + uniq > $DIR/04_assembly/$SAMPLENAME.allmask.bed + + touch check_tmp_variant/.$SAMPLENAME.all_mask_bed + +elif [ ! -e check_tmp_variant/.$SAMPLENAME.all_mask_bed ]; then + + rm -f $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf + rm -f $DIR/03_variants/$SAMPLENAME.lowsupport.bed + rm -f $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf + rm -f $DIR/03_variants/$SAMPLENAME.lowstranddepth.bam_depth + rm -f $DIR/03_variants/$SAMPLENAME.vcffiltered.bed + rm -f $DIR/04_assembly/$SAMPLENAME.allmask.bed + + bcftools filter --no-version -i "(INFO/AD[1])/(INFO/AD[0]+INFO/AD[1]) < $SNP_SUPPORT" \ + $DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf + awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \ + $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf > $DIR/03_variants/$SAMPLENAME.lowsupport.bed + + bcftools filter --no-version -i "INFO/ADF[1] < $REQ_STRAND_DEPTH || INFO/ADR[1] < $REQ_STRAND_DEPTH" \ + $DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf + awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \ + $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf > $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed + + + cat $DIR/03_variants/$SAMPLENAME.lowsupport.bed $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed > \ + $DIR/03_variants/$SAMPLENAME.vcffiltered.bed + + cat $DIR/03_variants/$SAMPLENAME.vcffiltered.bed $DIR/04_assembly/$SAMPLENAME.depthmask.bed | \ + uniq > $DIR/04_assembly/$SAMPLENAME.allmask.bed + + touch check_tmp_variant/.$SAMPLENAME.all_mask_bed +fi # left align and normalize indels # then remove insertions to avoid issues with multi-alignment later on