Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
gaoshang1102 committed Nov 1, 2020
0 parents commit 2987b55
Show file tree
Hide file tree
Showing 1,426 changed files with 7,398,622 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
^BITFAM\.Rproj$
^\.Rproj\.user$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.Rproj.user
22 changes: 22 additions & 0 deletions BITFAM.Rproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Version: 1.0

RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
LineEndingConversion: Posix

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
16 changes: 16 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Package: BITFAM
Title: What the Package Does (One Line, Title Case)
Version: 0.0.0.9000
Authors@R:
person(given = "First",
family = "Last",
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "YOUR-ORCID-ID"))
Description: What the package does (one paragraph).
License: `use_mit_license()`, `use_gpl3_license()` or friends to
pick a license
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.0
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Generated by roxygen2: do not edit by hand

99 changes: 99 additions & 0 deletions R/BITFAM.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
BITFAM <- function(data, species, interseted_TF = NA, ncores){
if(species == "mouse"){
TF_targets_dir <- "TF/mouse/"
}else if(species == "human"){
TF_targets_dir <- "TF/human/"
}else{
stop("The species must be either mouse or human.")
}

gene_list <- list()
for(i in TF_used){
tmp_gene <- read.table(paste0(TF_targets_dir, i), stringsAsFactors = F)
gene_list[[which(TF_used == i)]] <- VariableFeatures(process_data)[VariableFeatures(process_data) %in% tmp_gene$V1]
}

TF_used <- TF_used[ unlist(lapply(gene_list, length)) > 10]
if(is.na(interseted_TF)){
}else{
TF_used <- unique(c(TF_used, interseted_TF))
}

gene_list <- list()
for(i in TF_used){
tmp_gene <- read.table(paste0(TF_targets_dir, i), stringsAsFactors = F)
gene_list[[which(TF_used == i)]] <- VariableFeatures(process_data)[VariableFeatures(process_data) %in% tmp_gene$V1]
}

data_matrix_normalized <- t(as.matrix(GetAssayData(object = process_data)[VariableFeatures(process_data), ]))
data_matrix_normalized <- data_matrix_normalized[, -grep(pattern = "gRNA", x = VariableFeatures(process_data))]

chipseq_weight <- matrix(1, nrow = length(colnames(data_matrix_normalized)), ncol = length(TF_used))
for(i in 1:length(TF_used)){
chipseq_weight[, i] <- ifelse(colnames(data_matrix_normalized) %in% gene_list[[i]], 1, 0)
}


Mask_matrix <- chipseq_weight
X <- data_matrix_normalized
N <- dim(X)[1]
D <- dim(X)[2]
K <- length(TF_used)
data_to_model <- list(N = N, D = D, K = K, X = X, Mask = Mask_matrix)


library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = ncores)

set.seed(100)
pca_beta_piror <- "
data {
int<lower=0> N; // Number of samples
int<lower=0> D; // The original dimension
int<lower=0> K; // The latent dimension
matrix[N, D] X; // The data matrix
matrix[D, K] Mask; // The binary mask of prior knowledge indicate the target of TFs
}
parameters {
matrix<lower=0, upper=1>[N, K] Z; // The latent matrix
matrix[D, K] W; // The weight matrix
real<lower=0> tau; // Noise term
vector<lower=0>[K] alpha; // ARD prior
}
transformed parameters{
matrix<lower=0>[D, K] t_alpha;
real<lower=0> t_tau;
for(wmd in 1:D){
for(wmk in 1:K){
t_alpha[wmd, wmk] = Mask[wmd, wmk] == 1 ? inv(sqrt(alpha[wmk])) : 0.01;
}
}
t_tau = inv(sqrt(tau));
}
model {
tau ~ gamma(1,1);
to_vector(Z) ~ beta(0.5, 0.5);
alpha ~ gamma(1e-3,1e-3);
for(d in 1:D){
for(k in 1:K){
W[d,k] ~ normal(0, t_alpha[d, k]);
}
}
to_vector(X) ~ normal(to_vector(Z*W'), t_tau);
} "

m_beta_prior <- stan_model(model_code = pca_beta_piror)
stan.fit.vb.real.beta.prior <- vb(m_beta_prior, data = data_to_model, algorithm = "meanfield",
iter = 8000, output_samples = 300)
BITFAM_list <- list(Model = stan.fit.vb.real.beta.prior,
TF_used = TF_used,
Genes = VariableFeatures(process_data))
return(BITFAM_list)
}




4 changes: 4 additions & 0 deletions R/BITFAM_extract.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BITFAM_extract <- function(BITFAM_list, result = "Z"){
result_matrix <- apply(extract(stan.fit.vb.real.beta.prior,result)[[1]], c(2,3), mean)
return(result_matrix)
}
15 changes: 15 additions & 0 deletions R/BITFAM_preprocess.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
BITFAM_preprocess <- function(){
if(data_normalized){
raw_data <- Read10X(data.dir = data)
}else{
raw_data <- data
}
process_data <- CreateSeuratObject(counts = raw_data, min.cells = 3, min.features = 200)
process_data <- NormalizeData(object = process_data)
process_data <- FindVariableFeatures(object = process_data, nfeatures = 5000)

data_normalized <- as.matrix(GetAssayData(object = process_data)[VariableFeatures(process_data), ])
rownames(data_normalized) <- VariableFeatures(process_data)
colnames(data_normalized) <- colnames(GetAssayData(object = process_data))
return(data_normalized)
}
Loading

0 comments on commit 2987b55

Please sign in to comment.