diff --git a/RAnalysis/Output/PopGen/AllJuv_pH75_interceptLDFILT.pdf b/RAnalysis/Output/PopGen/AllJuv_pH75_interceptLDFILT.pdf new file mode 100644 index 0000000..c6459e9 Binary files /dev/null and b/RAnalysis/Output/PopGen/AllJuv_pH75_interceptLDFILT.pdf differ diff --git a/RAnalysis/Output/PopGen/AllJuv_pH75_interceptRAW.pdf b/RAnalysis/Output/PopGen/AllJuv_pH75_interceptRAW.pdf new file mode 100644 index 0000000..34b5534 Binary files /dev/null and b/RAnalysis/Output/PopGen/AllJuv_pH75_interceptRAW.pdf differ diff --git a/RAnalysis/Output/PopGen/AllJuv_pH8_interceptLDFILT.pdf b/RAnalysis/Output/PopGen/AllJuv_pH8_interceptLDFILT.pdf new file mode 100644 index 0000000..8fcd34b Binary files /dev/null and b/RAnalysis/Output/PopGen/AllJuv_pH8_interceptLDFILT.pdf differ diff --git a/RAnalysis/Output/PopGen/AllJuv_pH8_interceptRAW.pdf b/RAnalysis/Output/PopGen/AllJuv_pH8_interceptRAW.pdf new file mode 100644 index 0000000..264b7a0 Binary files /dev/null and b/RAnalysis/Output/PopGen/AllJuv_pH8_interceptRAW.pdf differ diff --git a/RAnalysis/Scripts/Popgen/F0BroodstockF1Juveniles_workbook.Rmd b/RAnalysis/Scripts/Popgen/F0BroodstockF1Juveniles_workbook.Rmd index 4299571..2c7abe6 100644 --- a/RAnalysis/Scripts/Popgen/F0BroodstockF1Juveniles_workbook.Rmd +++ b/RAnalysis/Scripts/Popgen/F0BroodstockF1Juveniles_workbook.Rmd @@ -214,16 +214,16 @@ clump4me <- function(data, LD.thr, LD.K, LD.minMaf, outputfilename) { } return(outputfilename) -} - - - +}`` ``` ```{r run clump4me choose settings and run} +<<<<<<< HEAD library(ggpubr) +======= +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c # INTERSECT DATA ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: # * run the bam file - sliding window 50 -1000 at settings diff --git a/RAnalysis/Scripts/Popgen/F1F2F3_Juveniles_LowMod.Rmd b/RAnalysis/Scripts/Popgen/F1F2F3_Juveniles_LowMod.Rmd index 0506064..7f18bfc 100644 --- a/RAnalysis/Scripts/Popgen/F1F2F3_Juveniles_LowMod.Rmd +++ b/RAnalysis/Scripts/Popgen/F1F2F3_Juveniles_LowMod.Rmd @@ -9,15 +9,14 @@ output: html_document knitr::opts_chunk$set(echo = TRUE) # SET WORKING DIRECTORY # knitr::opts_knit$set(root.dir = "C:/Users/katherine.mcfarland/Documents/GitHub/EAD-ASEB-Airradians_multigen_OA/larvae") # Katie's -#knitr::opts_knit$set(root.dir = "C:/Users/samjg/Documents/Github_repositories/Airradians_multigen_OA/HPC_analysis") # Sam's -knitr::opts_knit$set(root.dir = "C:/Users/samuel.gurr/Documents/Github_repositories/EAD-ASEB-Airradians_multigen_OA/HPC_analysis") # Sam's +knitr::opts_knit$set(root.dir = "C:/Users/samjg/Documents/Github_repositories/Airradians_multigen_OA/HPC_analysis") # Sam's +#knitr::opts_knit$set(root.dir = "C:/Users/samuel.gurr/Documents/Github_repositories/EAD-ASEB-Airradians_multigen_OA/HPC_analysis") # Sam's ``` #### load packages ```{r load packages we need} - library(qqman) library(vcfR) library(hierfstat) @@ -56,6 +55,7 @@ library(rPlant) path = "output/lcWGS/angsd/Merge_Intercept/All_Juveniles/" +<<<<<<< HEAD pH8.vcf <- read.vcfR(paste0(path,"pH8/Juveniles_pH8_merged.vcf.gz")) # 78283 variants pH8.vcf_tidy <- vcfR2tidy(pH8.vcf) pH8.bed <- read.pcadapt(paste0(path,"pH8/Juveniles_merged_pH8.bed"), type = "bed") # 78283 variants @@ -70,6 +70,14 @@ F1F3.vcf <- read.vcfR(paste0(path2,"F1F3_LOWvMOD_merge.vcf.gz")) # 96703 va F1F3.vcf_tidy <- vcfR2tidy(F1F3.vcf) F1F3.bed <- read.pcadapt(paste0(path2,"F1F3_LOWvMOD.bed"), type = "bed") # 96703 variants +======= +pH8.vcf <- read.vcfR(paste0(path,"pH8/Juveniles_pH8_merged.vcf.gz")) # 78,283 variants +pH8.vcf_tidy <- vcfR2tidy(pH8.vcf) +pH8.bed <- read.pcadapt(paste0(path,"pH8/Juveniles_merged_pH8.bed"), type = "bed") # 78,283 variants + +pH75.vcf <- read.vcfR(paste0(path,"pH75/Juveniles_pH75_merged.vcf.gz")) # 54,315 variants +pH75.bed <- read.pcadapt(paste0(path,"pH75/Juveniles_merged_pH75.bed"), type = "bed") # 54,315 variants +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c ``` @@ -136,8 +144,16 @@ when working with whole genome data we need to perform SNP thinning to acocutn f ```{r NO LD - screeplot diagnostic and plot PCA} +<<<<<<< HEAD library(ggpubr) getwd() +======= + +path = "../RAnalysis/Output/PopGen/" + + +library(ggpubr) +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c ## pH8 pH8.diagnostic <- pcadapt(pH8.bed, K = 20) plot(pH8.diagnostic, option = "screeplot") # looks like 4 - elbow! @@ -149,6 +165,15 @@ ggarrange( (plot(pH8.diagnostic, option = "screeplot")), nrow=3) dev.off() +pdf(paste0(path,"AllJuv_pH8_interceptRAW.pdf"),width=8, height = 6) +ggarrange( + plot(pH8.diagnostic, option = "screeplot"), + plot(pH8.res, option = "screeplot"), + plot(pH8.res, option = "manhattan"), + plot(pH8.res, option = "scores", pop = pH8_strata$Gen), + nrow = 2, ncol = 2 +) +dev.off() ## pH75 pH75.diagnostic <- pcadapt(pH75.bed, K = 20) @@ -168,7 +193,15 @@ F1F3.diagnostic <- pcadapt(F1F3.bed, K = 20) plot(F1F3.diagnostic, option = "screeplot") # looks like 4 - elbow! F1F3.res <- pcadapt(F1F3.bed, K = 4) # assign PCA - +pdf(paste0(path,"AllJuv_pH75_interceptRAW.pdf"),width=8, height = 6) +ggarrange( + plot(pH75.diagnostic, option = "screeplot"), + plot(pH75.res, option = "screeplot"), + plot(pH75.res, option = "manhattan"), + plot(pH75.res, option = "scores", pop = pH75_strata$Gen), + nrow = 2, ncol = 2 +) +dev.off() ``` @@ -252,17 +285,41 @@ test <- as.data.frame(clump4me(data = pH8.bed, LD.K = 4, # number of PCAs LD.minMaf = 0.05, # minumum allele freq threshold (5% - =0.05, etc) outputfilename = pH8)) -plot(output_PC1~input_Window , data=test) # window of 150? +plot(output_PC1~input_Window, data=test) # window of 450? pH8.LDdiagnostics <- pcadapt(pH8.bed, K = 20, +<<<<<<< HEAD LD.clumping = list(size = 250, thr = 0.1), # based clump4me result +======= + LD.clumping = list(size = 450, thr = 0.1), # based clump4me result +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c min.maf= 0.05) plot(pH8.LDdiagnostics, option = "screeplot") # Lets try and use K = 2 after SNP thinning pH8.LDres <- pcadapt(pH8.bed, K = 2, # based on screeplot above +<<<<<<< HEAD LD.clumping = list(size = 150, thr = 0.1), # based clump4me result min.maf= 0.05) +======= + LD.clumping = list(size = 450, thr = 0.1), # based clump4me result + min.maf= 0.05) +plot(pH8.LDres, option = "screeplot") # Lets try and use K = 2 after SNP thinning +plot(pH8.LDres, option = "manhattan") +plot(pH8.LDres, option = "scores", pop = pH8_strata$Gen) + + +pdf(paste0(path,"AllJuv_pH8_interceptLDFILT.pdf"),width=8, height = 6) +ggarrange( + plot(pH8.LDdiagnostics, option = "screeplot"), + plot(pH8.LDres, option = "screeplot"), + plot(pH8.LDres, option = "manhattan"), + plot(pH8.LDres, option = "scores", pop = pH8_strata$Gen), + nrow = 2, ncol = 2 +) +dev.off() + +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c ## pH 75 @@ -276,6 +333,7 @@ pH75.LDdiagnostics <- pcadapt(pH75.bed, K = 20, LD.clumping = list(size = 100, thr = 0.1), # based clump4me result min.maf= 0.05) +<<<<<<< HEAD plot(pH75.LDdiagnostics, option = "screeplot") # Lets try and use K = 2 after SNP thinning pH75.LDres <- pcadapt(pH75.bed, K = 2, # based on screeplot above @@ -298,8 +356,23 @@ F1F3.LDdiagnostics <- pcadapt(F1F3.bed, plot(F1F3.LDdiagnostics, option = "screeplot") # Lets try and use K = 6 after SNP thinning F1F3.LDres <- pcadapt(F1F3.bed, K = 6, # based on screeplot above +======= +plot(pH75.LDdiagnostics, option = "screeplot") # Lets try and use K = 8 after SNP thinning +pH75.LDres <- pcadapt(pH75.bed, + K = 8, # based on screeplot above +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c LD.clumping = list(size = 100, thr = 0.1), # based clump4me result min.maf= 0.05) + +pdf(paste0(path,"AllJuv_pH75_interceptLDFILT.pdf"),width=8, height = 6) +ggarrange( + plot(pH75.LDdiagnostics, option = "screeplot"), + plot(pH75.LDres, option = "screeplot"), + plot(pH75.LDres, option = "manhattan"), + plot(pH75.LDres, option = "scores", pop = pH75_strata$Gen), + nrow = 2, ncol = 2 +) +dev.off() ``` diff --git a/RAnalysis/Scripts/Popgen/F1_Broodstock_apprent.Rmd b/RAnalysis/Scripts/Popgen/F1_Broodstock_apprent.Rmd index be4c7ea..aec7733 100644 --- a/RAnalysis/Scripts/Popgen/F1_Broodstock_apprent.Rmd +++ b/RAnalysis/Scripts/Popgen/F1_Broodstock_apprent.Rmd @@ -20,7 +20,11 @@ library(dplyr) library(stringi) library(tibble) library(stringr) +<<<<<<< HEAD # library(BEDASSLE) +======= +library(BEDASSLE) +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c # ?BEDASSLE ``` @@ -100,10 +104,15 @@ converter = function (InputFile) { ``` +<<<<<<< HEAD ```{r run the converter} +======= + +```{r run converter Juveniles Mod pCO2} +>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c converter(F1J_pH75_raw_geno) -View(OutputFile) +# View(OutputFile) # convert all genotypes to numeric @@ -202,7 +211,7 @@ F0_geno <- F0_transposed_geno # how many SNPs? numbSNPs_F0 <- (ncol(F0_geno) - 1) -paste0("Total number of SNPs = ", numbSNPs_F0) # "Total number of SNPs = 52801"``` +paste0("Total number of SNPs = ", numbSNPs_F0) # "Total number of SNPs = 37901"``` ``` @@ -272,9 +281,8 @@ F1J_pH75_geno <- F1J_pH75_transposed_geno # how many SNPs? numbSNPs_F1JpH75 <- (ncol(F1J_pH75_geno) - 1) -paste0("Total number of SNPs = ", numbSNPs_F1JpH75) # "Total number of SNPs = 52801" +paste0("Total number of SNPs = ", numbSNPs_F1JpH75) # "Total number of SNPs = 22766" -(unique(F1J_pH75_geno[2])) #plot the results @@ -324,8 +332,8 @@ paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH8)/nrow(F1J_p Join_F0_F1JpH75 <- inner_join(F0_format_geno, F1J_pH75_format_geno) # salitty check for the number of SNPs shared - must be TRUE! nrow(Join_F0_F1JpH75) == length(intersect(F0_format_geno$Gene.ID, F1J_pH75_format_geno$Gene.ID)) # == TRUE -paste0("Total number of shared SNPs with F1 Juv pH8 = ", nrow(Join_F0_F1JpH75)) # "Total number of SNPs = 24" -paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH75)/nrow(F1J_pH75_format_geno))*100 ) # 26.4 % shared! +paste0("Total number of shared SNPs with F1 Juv pH8 = ", nrow(Join_F0_F1JpH75)) # "Total number of SNPs = 8123" +paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH75)/nrow(F1J_pH75_format_geno))*100 ) # 35.7 % shared! # ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -478,9 +486,44 @@ apparentOUT <- apparent(F1Juv_pH8_parentage_df[,c(1:ncol(F1Juv_pH8_parentage_df) stop("Please, check the format of the key column (Column 2) in your input file. Acceptable keys for each genotype are Mo, Fa, Off, Pa and All.") } } +View(F1Juv_pH75_parentage_df) + +apparentOUT <- apparent(F1Juv_pH75_parentage_df[1:2000], MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE) + +apparentOUT <- apparent( + (F1Juv_pH75_parentage_df[1:2000] %>% + dplyr::mutate(key = 'All') %>% + dplyr::filter(!genos %in% c('F1-J256-pH75', 'F1-J354-pH75'))), + MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE) + +apparentOUT$Triad_all +apparentOUT +apparentOUT$Triad_sig +apparentOUT$Triad_summary_pop +apparentOUT$Triad_summary_geno +unique(apparentOUT$Triad_all$Offspring) +unique(apparentOUT$Triad_sig$Offspring) + +apparent_editted <- apparentOUT$Triad_all %>% + dplyr::filter(!Cross.Type %in% 'self') %>% + dplyr::filter(!grepl("F0-B",Offspring), + grepl("F0-B",Parent1), + grepl("F0-B",Parent2)) + +x_final <- data.frame() +for (i in 1:nrow(loop_offspring)) { -apparentOUT <- apparent(F1Juv_pH75_parentage_df, MaxIdent=0.10, alpha=0.01, nloci=10, self=TRUE, plot=FALSE, Dyad=FALSE) + x <- apparent_editted %>% + filter(Offspring %in% loop_offspring[i,]) %>% + arrange((GD)) %>% + slice(1) + + x_final <- rbind(x, x_final) + +} + +View(x_final) ``` @@ -488,4 +531,8 @@ apparentOUT <- apparent(F1Juv_pH75_parentage_df, MaxIdent=0.10, alpha=0.01, nloc ```{r apparent practice data} InputFile <- read.table(file="RAnalysis/Scripts/Popgen/apparent_TestData.txt",sep="\t",h=F) + +apparentOUT <- apparent(InputFile, MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE) + +apparentOUT ``` \ No newline at end of file