Skip to content

Commit

Permalink
update pipeline parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
Barry-Xiao committed Oct 28, 2023
1 parent 4d3673d commit fb2c31e
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
18 changes: 9 additions & 9 deletions scripts/align_reference.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,25 @@ for i in "${LARRAY[@]}"; do
if [ ! -f $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam ]; then
bwa mem -M -t $NUM_CORES $REF_GENOME $FASTQ/$SAMPLENAME\_$LANE\_R1_001.fastq.gz \
$FASTQ/$SAMPLENAME\_$LANE\_R2_001.fastq.gz > $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam
touch check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check ]; then
touch check_tmp_align/.$SAMPLENAME\_$LANE_\map_sam_check
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_map_sam_check ]; then
rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam
bwa mem -M -t $NUM_CORES $REF_GENOME $FASTQ/$SAMPLENAME\_$LANE\_R1_001.fastq.gz \
$FASTQ/$SAMPLENAME\_$LANE\_R2_001.fastq.gz > $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam
touch check_tmp_align/.$SAMPLENAME\_$LANE_map_sam_check
touch check_tmp_align/.$SAMPLENAME\_$LANE\_map_sam_check
fi

# convert SAM to BAM and fix read pairing information and flags
# this will take a minute
if [ ! -f $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam ]; then
samtools fixmate -O bam $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam \
$DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam
touch check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam ]; then
touch check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam ]; then
rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam
samtools fixmate -O bam $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sam \
$DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam
touch check_tmp_align/.$SAMPLENAME\_$LANE_sam_bam
touch check_tmp_align/.$SAMPLENAME\_$LANE\_sam_bam
fi


Expand All @@ -114,13 +114,13 @@ for i in "${LARRAY[@]}"; do
samtools sort -T $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped -O bam -@ $NUM_CORES \
-o $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam \
$DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam
touch check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam ]; then
touch check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam
elif [ ! -e check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam ]; then
rm $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam
samtools sort -T $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped -O bam -@ $NUM_CORES \
-o $DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.sorted.bam \
$DIR/01_mapped/$SAMPLENAME\_$LANE.mapped.bam
touch check_tmp_align/.$SAMPLENAME\_$LANE_sort_bam
touch check_tmp_align/.$SAMPLENAME\_$LANE\_sort_bam
fi

done
Expand Down
54 changes: 52 additions & 2 deletions scripts/call_variants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ fi

# create bed file from bam file
if [ ! -f $BAM_DIR/$SAMPLENAME.depth ];then
samtools depth -a $BAM > \
samtools depth -a $BAM -q 20 -Q 30 > \
$BAM_DIR/$SAMPLENAME.depth
touch check_tmp_variant/.$SAMPLENAME.bam_depth
elif [ ! -e check_tmp_variant/.$SAMPLENAME.bam_depth ]; then
rm $BAM_DIR/$SAMPLENAME.depth
samtools depth -a $BAM > \
samtools depth -a $BAM -q 20 -Q 30 > \
$BAM_DIR/$SAMPLENAME.depth
touch check_tmp_variant/.$SAMPLENAME.bam_depth
fi
Expand Down Expand Up @@ -132,7 +132,57 @@ elif [ ! -e check_tmp_variant/.$SAMPLENAME.vcf_bcftools_filt ]; then
touch check_tmp_variant/.$SAMPLENAME.vcf_bcftools_filt
fi

# mask bcftool filtered positions
# concat bcftool filtered-masked positions with depth-masked bed file

if [ ! -f $DIR/04_assembly/$SAMPLENAME.allmask.bed ]; then
bcftools filter --no-version -i "(INFO/AD[1])/(INFO/AD[0]+INFO/AD[1]) < $SNP_SUPPORT" \
$DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf
awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \
$DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf > $DIR/03_variants/$SAMPLENAME.lowsupport.bed

bcftools filter --no-version -i "INFO/ADF[1] < $REQ_STRAND_DEPTH || INFO/ADR[1] < $REQ_STRAND_DEPTH" \
$DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf
awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \
$DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf > $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed


cat $DIR/03_variants/$SAMPLENAME.lowsupport.bed $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed > \
$DIR/03_variants/$SAMPLENAME.vcffiltered.bed

cat $DIR/03_variants/$SAMPLENAME.vcffiltered.bed $DIR/04_assembly/$SAMPLENAME.depthmask.bed | \
uniq > $DIR/04_assembly/$SAMPLENAME.allmask.bed

touch check_tmp_variant/.$SAMPLENAME.all_mask_bed

elif [ ! -e check_tmp_variant/.$SAMPLENAME.all_mask_bed ]; then

rm -f $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf
rm -f $DIR/03_variants/$SAMPLENAME.lowsupport.bed
rm -f $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf
rm -f $DIR/03_variants/$SAMPLENAME.lowstranddepth.bam_depth
rm -f $DIR/03_variants/$SAMPLENAME.vcffiltered.bed
rm -f $DIR/04_assembly/$SAMPLENAME.allmask.bed

bcftools filter --no-version -i "(INFO/AD[1])/(INFO/AD[0]+INFO/AD[1]) < $SNP_SUPPORT" \
$DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf
awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \
$DIR/03_variants/$SAMPLENAME.bcftools.lowsupport.vcf > $DIR/03_variants/$SAMPLENAME.lowsupport.bed

bcftools filter --no-version -i "INFO/ADF[1] < $REQ_STRAND_DEPTH || INFO/ADR[1] < $REQ_STRAND_DEPTH" \
$DIR/03_variants/$SAMPLENAME.bcftools.vcf -o $DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf
awk '(/^[^#]/ && length($4) == length($5)) {printf "%s\t%d\t%d\n", $1, $2 - 1, $2}' \
$DIR/03_variants/$SAMPLENAME.bcftools.lowstranddepth.vcf > $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed


cat $DIR/03_variants/$SAMPLENAME.lowsupport.bed $DIR/03_variants/$SAMPLENAME.lowstranddepth.bed > \
$DIR/03_variants/$SAMPLENAME.vcffiltered.bed

cat $DIR/03_variants/$SAMPLENAME.vcffiltered.bed $DIR/04_assembly/$SAMPLENAME.depthmask.bed | \
uniq > $DIR/04_assembly/$SAMPLENAME.allmask.bed

touch check_tmp_variant/.$SAMPLENAME.all_mask_bed
fi

# left align and normalize indels
# then remove insertions to avoid issues with multi-alignment later on
Expand Down

0 comments on commit fb2c31e

Please sign in to comment.