forked from Dollyhua/ST_LLM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ST_pre_processing.R
76 lines (58 loc) · 3.04 KB
/
ST_pre_processing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
####ST data pre_processing
rm(list = ls())
library(dplyr)
library(hdf5r)
library(Seurat)
library(rhdf5)
####writeh5 function:Write the seurat object to the HDF5 file
writeh5 <- function(h5path, count_data){
h5createFile(h5path)
h5createGroup(h5path,"matrix")
h5write(count_data@Dimnames[[2]] , h5path, "matrix/barcodes")
h5write(count_data@x, h5path, "matrix/data")
h5createGroup(h5path,"matrix/features")
h5write("genome", h5path, "matrix/features/_all_tag_keys")
Genes <- rep("Gene Expression", length(count_data@Dimnames[[1]]))
h5write(Genes,h5path, "matrix/features/feature_type")
Genome <- rep("GRCh38", length(count_data@Dimnames[[1]]))
h5write(Genome, h5path,"matrix/features/genome")
h5write(count_data@Dimnames[[1]],h5path, "matrix/features/id")
h5write(count_data@Dimnames[[1]],h5path, "matrix/features/name")
h5write(count_data@i, h5path, "matrix/indices")
h5write(count_data@p, h5path, "matrix/indptr")
h5write(count_data@Dim, h5path, "matrix/shape")
h5closeAll()
}
####ST_preprocess function
ST_preprocess <- function(ID, suffix){
path <- list.files(paste0("/fs/home/dongzhonghua/STARDUST/Data/", study_ID, "/suppl/"))[grep(file_suffix, list.files(paste0("/fs/home/dongzhonghua/STARDUST/Data/", study_ID, "/suppl/")))]
samples <- unlist(lapply(strsplit(path,file_suffix),function(x) x[1]))
print(samples)
for (sample in samples){
print(sample)
####read h5 file
h5_file <- Read10X_h5(filename = paste0("/fs/home/dongzhonghua/STARDUST/Data/", study_ID, "/suppl/", sample, file_suffix))
h5_object <- CreateSeuratObject(counts=h5_file, project=paste0(sample))
#####QC,Discard low quality Spots
h5_object[["percent.mt"]] <- PercentageFeatureSet(object = h5_object, pattern = "^MT-")
h5_object <- subset(h5_object, subset = nFeature_RNA > 200 & percent.mt < 10)
h5_object <- SCTransform(h5_object, assay = "RNA", verbose = FALSE)
h5_object <- FindVariableFeatures(h5_object, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(h5_object)
h5_object <- RunPCA(h5_object,features = VariableFeatures(object=h5_object))
h5_object <- FindNeighbors(h5_object, dims = 1:30)
h5_object <- FindClusters(h5_object, resolution=0.5)
h5_object <- RunUMAP(h5_object, dims = 1: 30)
#save.rds file
saveRDS(h5_object, file = paste0("/fs/home/dongzhonghua/STARDUST/Data/", study_ID, "/suppl/", sample, ".rds"))
#######Save the ST gene count file after QC filtering for STRIDE deconvolution
writeh5(h5path = paste0("/fs/home/dongzhonghua/STARDUST/Data/", study_ID, "/suppl/", sample, '_gene_count_QC.h5'), count_data = h5_object@assays$RNA@data)
}
####platforms:10x Genomics
####GSE158803 for example
#####study_ID <- "GSE158803"
####file_suffix <- "_gene_count.h5" ######The filename suffix is "_gene_count.h5";" _filtered_feature_bc_matrix.h5";" .h5" etc
####star working
#study_ID <- "GSE202740"
#file_suffix <- "_gene_count.h5"
#ST_preprocess(ID = study_ID, suffix = file_suffix)