-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
243 changed files
with
29,633 additions
and
1,567 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
Binary file not shown.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/bin/bash | ||
#SBATCH --job-name="angsdevery_OM" | ||
#SBATCH -t 500:00:00 | ||
#SBATCH --mail-type=ALL | ||
#SBATCH --mem=180GB | ||
#SBATCH -p medmem | ||
#SBATCH -c 20 | ||
#SBATCH [email protected] | ||
#SBATCH --output=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_out.%j" | ||
#SBATCH --error=./Airradians_lcWGS/snakemake_pipeline/angsd/output/everyone/"%x_err.%j" | ||
|
||
# Set-up | ||
|
||
# output dir | ||
mkdir -p angsd/output | ||
|
||
## load module | ||
module load bio/angsd/0.940 | ||
module load bio/samtools/1.19 | ||
|
||
## dir shortcuts | ||
DATDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/data # Path to generation and life stage bam files | ||
OUTDIR=~/Airradians_lcWGS/snakemake_pipeline/angsd/output # Path to the base directory where output files will be written | ||
REFDIR=~/refs # Path to reference genome | ||
|
||
# angsd parameters | ||
DOMAJORMINOR=4 # 1 is for either seq data like bam files or genotpe liklihood data where the maj minor allele is inferred from likli$ | ||
# 4 is forcing the major allele according to the reference states defined in ref - the minor allele will be inferred based on the ge$ | ||
DOMAF=1 # 1 assumed known major and minor allele 2 assumed known major and unknown minor allele | ||
DOGENO=5 # 1 write major and minor allele 2 print the called genotype and count of minor 4 print the called genotype | ||
DOPOST=1 # 1 estimate the posterior genotype probability based on the allele frequency as a prior 2 estimat eth posterier genotpe pr$ | ||
GENOLIKE=1 # SAMtools | ||
DOGLF=2 # beagle genotype liklihood format | ||
DOCOUNTS=1 # provide freq of bases - output as pos.gz | ||
DODEPTH=1 # default 0 output distribution of seq depths sites aove maxDepth will be binnes out files in depthSample and depthGlobal | ||
MAXDEPTH=100 # default 100 | ||
SETMINDEPTHIND=5 # min depth, below which the individual is omitted from analysis | ||
SETMAXDEPTHIND=150 # max depth - above which the indivisual is omitted from analysis | ||
DUMPCOUNTS=2 # 1 is the total seq depth for site as .pos.gz, 2 is the seq deth per sample as .pos.gz and .counts.gz; look up allele $ | ||
MINQ=30 # Minimum quality filter | ||
MINMAPQ=30 # Minmm mapping quality | ||
MINMAF=0.05 # Minimum minor allele frequency filter | ||
# C -50 is flag to adjust for excessive mismatches as recomennded when BWA was used for alignment | ||
# -uniqueOnly; removed reads that have multiple best hits 0 default, 1 remove | ||
# -remove_bads = reads flagged as bad | ||
# -only_proper_pairs = read that did not have a mate pair | ||
|
||
# ANGSD for data within generation (F1 - F2 - F3, broodstock and juveniles separately) | ||
# for loop - run ansd for the total bam list within time/generation (i.e. all F1 Broodstock, F2_Juveniles, etc.) | ||
# objective to input a strata / metadta to identify treatments associated with ids downstream.. | ||
|
||
|
||
mkdir -p $OUTDIR/everyone # make director for ansd output files | ||
|
||
# nav to dir | ||
cd $DATDIR/Master_query_dups_removed # NAV TO THE REMOVED DUPLICATES BY QUERYNAME! | ||
#ls *.bam | sort | uniq > ./merged_bamlist.txt # create a list of all bam files in merged_bams | ||
|
||
# cal number of indiv | ||
nIND=$(wc -l $DATDIR/Master_query_dups_removed/everyone_omit_bamlist.txt | awk '{print $1}') # call and count lines of the bamlist with delimiter id | ||
minIND=$(echo "(${nIND} * 0.95)" | bc) # min individual is 90% of the total count | ||
minINDRd=$(printf "%.0f" ${minIND}) # round that | ||
|
||
# set min and max depth for minuum of 5X and max of 20X coerage per loci to acoid potential bias arising from seq errors and recommendations in O'Leary (etal 2018) | ||
MINDP=$(echo "(${minINDRd} * 5)" | bc) # min coverage of 5X accoutning for the minimum num of individuals for genotype calls as 90% and assuming per indiv per loci | ||
MAXDP=$(echo "(${minINDRd} * 20)" | bc) # max coverage of 20X accounting for the minimum num of individual for genotype calls as 90% and assuming per indiv per loci | ||
|
||
# call unique outbase | ||
OUTBASE='Everyone_OM_doMaf'$DOMAF'_minMaf'$MINMAF'_majorminor'$DOMAJORMINOR'_minind'$minINDRd'_minD5x'$MINDP'_maxD20x'$MAXDP'minDind'$SETMINDEPTHIND'_maxDind'$SETMAXDEPTHIND'_minq'$MINQ'_minmapQ' # Build base name of output files | ||
|
||
# run angsd | ||
angsd -b $DATDIR/Master_query_dups_removed/everyone_omit_bamlist.txt \ | ||
-ref $REFDIR/GCF_041381155.1_Ai_NY_genomic.fna \ | ||
-GL $GENOLIKE \ | ||
-doGlf $DOGLF \ | ||
-doMaf $DOMAF \ | ||
-doMajorMinor $DOMAJORMINOR \ | ||
-dogeno $DOGENO \ | ||
-doPOST $DOPOST \ | ||
-doCounts $DOCOUNTS \ | ||
-dumpCounts $DUMPCOUNTS \ | ||
-doDepth $DODEPTH \ | ||
-maxDepth $MAXDEPTH \ | ||
-setMinDepth $MINDP \ | ||
-setMaxDepth $MAXDP \ | ||
-setMaxDepthInd $SETMAXDEPTHIND \ | ||
-setMinDepthInd $SETMINDEPTHIND \ | ||
-doIBS 1 \ | ||
-makematrix 1 \ | ||
-doCov 1 \ | ||
-minInd $minINDRd \ | ||
-dobcf 1 \ | ||
--ignore-RG 0 \ | ||
-minQ $MINQ \ | ||
-minMapQ $MINMAPQ \ | ||
-minMaf $MINMAF \ | ||
-SNP_pval 1e-6 \ | ||
-P 2 \ | ||
-remove_bads 1 \ | ||
-only_proper_pairs 1 \ | ||
-uniqueOnly 1 \ | ||
-C 50 \ | ||
-out $OUTDIR/everyone/$OUTBASE \ | ||
>& $OUTDIR/everyone/$OUTBASE'.log'; | ||
#done |
Oops, something went wrong.