Skip to content

Commit

Permalink
merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
Samuel Gurr committed May 28, 2024
2 parents 86bc185 + 29cee00 commit 257bc02
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 16 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 changes: 4 additions & 4 deletions RAnalysis/Scripts/Popgen/F0BroodstockF1Juveniles_workbook.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,16 @@ clump4me <- function(data, LD.thr, LD.K, LD.minMaf, outputfilename) {
}
return(outputfilename)
}
}``
```

```{r run clump4me choose settings and run}
<<<<<<< HEAD
library(ggpubr)
=======
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
# INTERSECT DATA :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# * run the bam file - sliding window 50 -1000 at settings
Expand Down
83 changes: 78 additions & 5 deletions RAnalysis/Scripts/Popgen/F1F2F3_Juveniles_LowMod.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ output: html_document
knitr::opts_chunk$set(echo = TRUE)
# SET WORKING DIRECTORY
# knitr::opts_knit$set(root.dir = "C:/Users/katherine.mcfarland/Documents/GitHub/EAD-ASEB-Airradians_multigen_OA/larvae") # Katie's
#knitr::opts_knit$set(root.dir = "C:/Users/samjg/Documents/Github_repositories/Airradians_multigen_OA/HPC_analysis") # Sam's
knitr::opts_knit$set(root.dir = "C:/Users/samuel.gurr/Documents/Github_repositories/EAD-ASEB-Airradians_multigen_OA/HPC_analysis") # Sam's
knitr::opts_knit$set(root.dir = "C:/Users/samjg/Documents/Github_repositories/Airradians_multigen_OA/HPC_analysis") # Sam's
#knitr::opts_knit$set(root.dir = "C:/Users/samuel.gurr/Documents/Github_repositories/EAD-ASEB-Airradians_multigen_OA/HPC_analysis") # Sam's
```

#### load packages

```{r load packages we need}
library(qqman)
library(vcfR)
library(hierfstat)
Expand Down Expand Up @@ -56,6 +55,7 @@ library(rPlant)
path = "output/lcWGS/angsd/Merge_Intercept/All_Juveniles/"
<<<<<<< HEAD
pH8.vcf <- read.vcfR(paste0(path,"pH8/Juveniles_pH8_merged.vcf.gz")) # 78283 variants
pH8.vcf_tidy <- vcfR2tidy(pH8.vcf)
pH8.bed <- read.pcadapt(paste0(path,"pH8/Juveniles_merged_pH8.bed"), type = "bed") # 78283 variants
Expand All @@ -70,6 +70,14 @@ F1F3.vcf <- read.vcfR(paste0(path2,"F1F3_LOWvMOD_merge.vcf.gz")) # 96703 va
F1F3.vcf_tidy <- vcfR2tidy(F1F3.vcf)
F1F3.bed <- read.pcadapt(paste0(path2,"F1F3_LOWvMOD.bed"), type = "bed") # 96703 variants
=======
pH8.vcf <- read.vcfR(paste0(path,"pH8/Juveniles_pH8_merged.vcf.gz")) # 78,283 variants
pH8.vcf_tidy <- vcfR2tidy(pH8.vcf)
pH8.bed <- read.pcadapt(paste0(path,"pH8/Juveniles_merged_pH8.bed"), type = "bed") # 78,283 variants
pH75.vcf <- read.vcfR(paste0(path,"pH75/Juveniles_pH75_merged.vcf.gz")) # 54,315 variants
pH75.bed <- read.pcadapt(paste0(path,"pH75/Juveniles_merged_pH75.bed"), type = "bed") # 54,315 variants
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
```

Expand Down Expand Up @@ -136,8 +144,16 @@ when working with whole genome data we need to perform SNP thinning to acocutn f


```{r NO LD - screeplot diagnostic and plot PCA}
<<<<<<< HEAD
library(ggpubr)
getwd()
=======
path = "../RAnalysis/Output/PopGen/"
library(ggpubr)
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
## pH8
pH8.diagnostic <- pcadapt(pH8.bed, K = 20)
plot(pH8.diagnostic, option = "screeplot") # looks like 4 - elbow!
Expand All @@ -149,6 +165,15 @@ ggarrange( (plot(pH8.diagnostic, option = "screeplot")),
nrow=3)
dev.off()
pdf(paste0(path,"AllJuv_pH8_interceptRAW.pdf"),width=8, height = 6)
ggarrange(
plot(pH8.diagnostic, option = "screeplot"),
plot(pH8.res, option = "screeplot"),
plot(pH8.res, option = "manhattan"),
plot(pH8.res, option = "scores", pop = pH8_strata$Gen),
nrow = 2, ncol = 2
)
dev.off()
## pH75
pH75.diagnostic <- pcadapt(pH75.bed, K = 20)
Expand All @@ -168,7 +193,15 @@ F1F3.diagnostic <- pcadapt(F1F3.bed, K = 20)
plot(F1F3.diagnostic, option = "screeplot") # looks like 4 - elbow!
F1F3.res <- pcadapt(F1F3.bed, K = 4) # assign PCA
pdf(paste0(path,"AllJuv_pH75_interceptRAW.pdf"),width=8, height = 6)
ggarrange(
plot(pH75.diagnostic, option = "screeplot"),
plot(pH75.res, option = "screeplot"),
plot(pH75.res, option = "manhattan"),
plot(pH75.res, option = "scores", pop = pH75_strata$Gen),
nrow = 2, ncol = 2
)
dev.off()
```


Expand Down Expand Up @@ -252,17 +285,41 @@ test <- as.data.frame(clump4me(data = pH8.bed,
LD.K = 4, # number of PCAs
LD.minMaf = 0.05, # minumum allele freq threshold (5% - =0.05, etc)
outputfilename = pH8))
plot(output_PC1~input_Window , data=test) # window of 150?
plot(output_PC1~input_Window, data=test) # window of 450?
pH8.LDdiagnostics <- pcadapt(pH8.bed,
K = 20,
<<<<<<< HEAD
LD.clumping = list(size = 250, thr = 0.1), # based clump4me result
=======
LD.clumping = list(size = 450, thr = 0.1), # based clump4me result
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
min.maf= 0.05)
plot(pH8.LDdiagnostics, option = "screeplot") # Lets try and use K = 2 after SNP thinning
pH8.LDres <- pcadapt(pH8.bed,
K = 2, # based on screeplot above
<<<<<<< HEAD
LD.clumping = list(size = 150, thr = 0.1), # based clump4me result
min.maf= 0.05)
=======
LD.clumping = list(size = 450, thr = 0.1), # based clump4me result
min.maf= 0.05)
plot(pH8.LDres, option = "screeplot") # Lets try and use K = 2 after SNP thinning
plot(pH8.LDres, option = "manhattan")
plot(pH8.LDres, option = "scores", pop = pH8_strata$Gen)
pdf(paste0(path,"AllJuv_pH8_interceptLDFILT.pdf"),width=8, height = 6)
ggarrange(
plot(pH8.LDdiagnostics, option = "screeplot"),
plot(pH8.LDres, option = "screeplot"),
plot(pH8.LDres, option = "manhattan"),
plot(pH8.LDres, option = "scores", pop = pH8_strata$Gen),
nrow = 2, ncol = 2
)
dev.off()
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
## pH 75
Expand All @@ -276,6 +333,7 @@ pH75.LDdiagnostics <- pcadapt(pH75.bed,
K = 20,
LD.clumping = list(size = 100, thr = 0.1), # based clump4me result
min.maf= 0.05)
<<<<<<< HEAD
plot(pH75.LDdiagnostics, option = "screeplot") # Lets try and use K = 2 after SNP thinning
pH75.LDres <- pcadapt(pH75.bed,
K = 2, # based on screeplot above
Expand All @@ -298,8 +356,23 @@ F1F3.LDdiagnostics <- pcadapt(F1F3.bed,
plot(F1F3.LDdiagnostics, option = "screeplot") # Lets try and use K = 6 after SNP thinning
F1F3.LDres <- pcadapt(F1F3.bed,
K = 6, # based on screeplot above
=======
plot(pH75.LDdiagnostics, option = "screeplot") # Lets try and use K = 8 after SNP thinning
pH75.LDres <- pcadapt(pH75.bed,
K = 8, # based on screeplot above
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
LD.clumping = list(size = 100, thr = 0.1), # based clump4me result
min.maf= 0.05)
pdf(paste0(path,"AllJuv_pH75_interceptLDFILT.pdf"),width=8, height = 6)
ggarrange(
plot(pH75.LDdiagnostics, option = "screeplot"),
plot(pH75.LDres, option = "screeplot"),
plot(pH75.LDres, option = "manhattan"),
plot(pH75.LDres, option = "scores", pop = pH75_strata$Gen),
nrow = 2, ncol = 2
)
dev.off()
```


Expand Down
61 changes: 54 additions & 7 deletions RAnalysis/Scripts/Popgen/F1_Broodstock_apprent.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ library(dplyr)
library(stringi)
library(tibble)
library(stringr)
<<<<<<< HEAD
# library(BEDASSLE)
=======
library(BEDASSLE)
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
# ?BEDASSLE
```

Expand Down Expand Up @@ -100,10 +104,15 @@ converter = function (InputFile) {
```


<<<<<<< HEAD
```{r run the converter}
=======
```{r run converter Juveniles Mod pCO2}
>>>>>>> 29cee00d029b3dd89bede960bf88fe341c13c82c
converter(F1J_pH75_raw_geno)
View(OutputFile)
# View(OutputFile)
# convert all genotypes to numeric
Expand Down Expand Up @@ -202,7 +211,7 @@ F0_geno <- F0_transposed_geno
# how many SNPs?
numbSNPs_F0 <- (ncol(F0_geno) - 1)
paste0("Total number of SNPs = ", numbSNPs_F0) # "Total number of SNPs = 52801"```
paste0("Total number of SNPs = ", numbSNPs_F0) # "Total number of SNPs = 37901"```
```


Expand Down Expand Up @@ -272,9 +281,8 @@ F1J_pH75_geno <- F1J_pH75_transposed_geno
# how many SNPs?
numbSNPs_F1JpH75 <- (ncol(F1J_pH75_geno) - 1)
paste0("Total number of SNPs = ", numbSNPs_F1JpH75) # "Total number of SNPs = 52801"
paste0("Total number of SNPs = ", numbSNPs_F1JpH75) # "Total number of SNPs = 22766"
(unique(F1J_pH75_geno[2]))
#plot the results
Expand Down Expand Up @@ -324,8 +332,8 @@ paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH8)/nrow(F1J_p
Join_F0_F1JpH75 <- inner_join(F0_format_geno, F1J_pH75_format_geno)
# salitty check for the number of SNPs shared - must be TRUE!
nrow(Join_F0_F1JpH75) == length(intersect(F0_format_geno$Gene.ID, F1J_pH75_format_geno$Gene.ID)) # == TRUE
paste0("Total number of shared SNPs with F1 Juv pH8 = ", nrow(Join_F0_F1JpH75)) # "Total number of SNPs = 24"
paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH75)/nrow(F1J_pH75_format_geno))*100 ) # 26.4 % shared!
paste0("Total number of shared SNPs with F1 Juv pH8 = ", nrow(Join_F0_F1JpH75)) # "Total number of SNPs = 8123"
paste0("Percent F1 Juv shared with parents = ", (nrow(Join_F0_F1JpH75)/nrow(F1J_pH75_format_geno))*100 ) # 35.7 % shared!
# :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Expand Down Expand Up @@ -478,14 +486,53 @@ apparentOUT <- apparent(F1Juv_pH8_parentage_df[,c(1:ncol(F1Juv_pH8_parentage_df)
stop("Please, check the format of the key column (Column 2) in your input file. Acceptable keys for each genotype are Mo, Fa, Off, Pa and All.")
}
}
View(F1Juv_pH75_parentage_df)
apparentOUT <- apparent(F1Juv_pH75_parentage_df[1:2000], MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE)
apparentOUT <- apparent(
(F1Juv_pH75_parentage_df[1:2000] %>%
dplyr::mutate(key = 'All') %>%
dplyr::filter(!genos %in% c('F1-J256-pH75', 'F1-J354-pH75'))),
MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE)
apparentOUT$Triad_all
apparentOUT
apparentOUT$Triad_sig
apparentOUT$Triad_summary_pop
apparentOUT$Triad_summary_geno
unique(apparentOUT$Triad_all$Offspring)
unique(apparentOUT$Triad_sig$Offspring)
apparent_editted <- apparentOUT$Triad_all %>%
dplyr::filter(!Cross.Type %in% 'self') %>%
dplyr::filter(!grepl("F0-B",Offspring),
grepl("F0-B",Parent1),
grepl("F0-B",Parent2))
x_final <- data.frame()
for (i in 1:nrow(loop_offspring)) {
apparentOUT <- apparent(F1Juv_pH75_parentage_df, MaxIdent=0.10, alpha=0.01, nloci=10, self=TRUE, plot=FALSE, Dyad=FALSE)
x <- apparent_editted %>%
filter(Offspring %in% loop_offspring[i,]) %>%
arrange((GD)) %>%
slice(1)
x_final <- rbind(x, x_final)
}
View(x_final)
```



```{r apparent practice data}
InputFile <- read.table(file="RAnalysis/Scripts/Popgen/apparent_TestData.txt",sep="\t",h=F)
apparentOUT <- apparent(InputFile, MaxIdent=0.10, alpha=0.05, nloci=100, self=TRUE, plot=TRUE, Dyad=FALSE)
apparentOUT
```

0 comments on commit 257bc02

Please sign in to comment.