first commit

mdmkac1 · Nov 1, 2020 · 2987b55 · 2987b55
commit 2987b55
Show file tree

Hide file tree

Showing 1,426 changed files with 7,398,622 additions and 0 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -0,0 +1,2 @@
+^BITFAM\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.Rproj.user
diff --git a/BITFAM.Rproj b/BITFAM.Rproj
@@ -0,0 +1,22 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+LineEndingConversion: Posix
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,16 @@
+Package: BITFAM
+Title: What the Package Does (One Line, Title Case)
+Version: 0.0.0.9000
+Authors@R: 
+    person(given = "First",
+           family = "Last",
+           role = c("aut", "cre"),
+           email = "[email protected]",
+           comment = c(ORCID = "YOUR-ORCID-ID"))
+Description: What the package does (one paragraph).
+License: `use_mit_license()`, `use_gpl3_license()` or friends to
+    pick a license
+Encoding: UTF-8
+LazyData: true
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.1.0
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,2 @@
+# Generated by roxygen2: do not edit by hand
+
diff --git a/R/BITFAM.R b/R/BITFAM.R
@@ -0,0 +1,99 @@
+BITFAM <- function(data, species, interseted_TF = NA, ncores){
+  if(species == "mouse"){
+    TF_targets_dir <- "TF/mouse/"
+  }else if(species == "human"){
+    TF_targets_dir <- "TF/human/"
+  }else{
+    stop("The species must be either mouse or human.")
+  }
+
+  gene_list <- list()
+  for(i in TF_used){
+    tmp_gene <- read.table(paste0(TF_targets_dir, i), stringsAsFactors = F)
+    gene_list[[which(TF_used == i)]] <- VariableFeatures(process_data)[VariableFeatures(process_data) %in% tmp_gene$V1]
+  }
+
+  TF_used <- TF_used[ unlist(lapply(gene_list, length)) > 10]
+  if(is.na(interseted_TF)){
+  }else{
+    TF_used <- unique(c(TF_used, interseted_TF))
+  }
+
+  gene_list <- list()
+  for(i in TF_used){
+    tmp_gene <- read.table(paste0(TF_targets_dir, i), stringsAsFactors = F)
+    gene_list[[which(TF_used == i)]] <- VariableFeatures(process_data)[VariableFeatures(process_data) %in% tmp_gene$V1]
+  }
+
+  data_matrix_normalized <- t(as.matrix(GetAssayData(object = process_data)[VariableFeatures(process_data), ]))
+  data_matrix_normalized <- data_matrix_normalized[, -grep(pattern = "gRNA", x = VariableFeatures(process_data))]
+
+  chipseq_weight <- matrix(1, nrow = length(colnames(data_matrix_normalized)), ncol = length(TF_used))
+  for(i in 1:length(TF_used)){
+    chipseq_weight[, i] <- ifelse(colnames(data_matrix_normalized) %in% gene_list[[i]], 1, 0)
+  }
+
+
+  Mask_matrix <- chipseq_weight
+  X <- data_matrix_normalized
+  N <- dim(X)[1]
+  D <- dim(X)[2]
+  K <- length(TF_used)
+  data_to_model <- list(N = N, D = D, K = K, X = X, Mask = Mask_matrix)
+
+
+  library(rstan)
+  rstan_options(auto_write = TRUE)
+  options(mc.cores = ncores)
+
+  set.seed(100)
+  pca_beta_piror <- "
+data {
+int<lower=0> N; // Number of samples
+int<lower=0> D; // The original dimension
+int<lower=0> K; // The latent dimension
+matrix[N, D] X; // The data matrix
+matrix[D, K] Mask; // The binary mask of prior knowledge indicate the target of TFs
+}
+
+parameters {
+matrix<lower=0, upper=1>[N, K] Z; // The latent matrix
+matrix[D, K] W; // The weight matrix
+real<lower=0> tau; // Noise term
+vector<lower=0>[K] alpha; // ARD prior
+}
+
+transformed parameters{
+matrix<lower=0>[D, K] t_alpha;
+real<lower=0> t_tau;
+for(wmd in 1:D){
+for(wmk in 1:K){
+t_alpha[wmd, wmk] = Mask[wmd, wmk] == 1 ? inv(sqrt(alpha[wmk])) : 0.01;
+}
+}
+t_tau = inv(sqrt(tau));
+}
+model {
+tau ~ gamma(1,1);
+to_vector(Z) ~ beta(0.5, 0.5);
+alpha ~ gamma(1e-3,1e-3);
+for(d in 1:D){
+for(k in 1:K){
+W[d,k] ~ normal(0, t_alpha[d, k]);
+}
+}
+to_vector(X) ~ normal(to_vector(Z*W'), t_tau);
+} "
+
+  m_beta_prior <- stan_model(model_code = pca_beta_piror)
+  stan.fit.vb.real.beta.prior <- vb(m_beta_prior, data = data_to_model, algorithm = "meanfield",
+                                  iter = 8000, output_samples = 300)
+  BITFAM_list <- list(Model = stan.fit.vb.real.beta.prior,
+                      TF_used = TF_used,
+                      Genes = VariableFeatures(process_data))
+  return(BITFAM_list)
+}
+
+
+
+
diff --git a/R/BITFAM_extract.R b/R/BITFAM_extract.R
@@ -0,0 +1,4 @@
+BITFAM_extract <- function(BITFAM_list, result = "Z"){
+  result_matrix <- apply(extract(stan.fit.vb.real.beta.prior,result)[[1]], c(2,3), mean)
+  return(result_matrix)
+}
diff --git a/R/BITFAM_preprocess.R b/R/BITFAM_preprocess.R
@@ -0,0 +1,15 @@
+BITFAM_preprocess <- function(){
+  if(data_normalized){
+    raw_data <- Read10X(data.dir = data)
+  }else{
+    raw_data <- data
+  }
+  process_data <- CreateSeuratObject(counts = raw_data, min.cells = 3, min.features = 200)
+  process_data <- NormalizeData(object = process_data)
+  process_data <- FindVariableFeatures(object = process_data, nfeatures = 5000)
+
+  data_normalized <- as.matrix(GetAssayData(object = process_data)[VariableFeatures(process_data), ])
+  rownames(data_normalized) <- VariableFeatures(process_data)
+  colnames(data_normalized) <- colnames(GetAssayData(object = process_data))
+  return(data_normalized)
+}