Skip to content

Commit

Permalink
Change the default n.epochs from 200 to 500 in RunUMAP2 function and …
Browse files Browse the repository at this point in the history
…RunLargeVis function
  • Loading branch information
zhanghao-njmu committed Nov 28, 2023
1 parent f4b9432 commit 1215407
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 36 deletions.
23 changes: 12 additions & 11 deletions R/SCP-feature_annotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
#' @param Ensembl_version Version of the Ensembl database to use. Default is 103.
#' @param mirror URL of the mirror to use for Ensembl database. Default is NULL.
#' @param gtf Path to the GTF file to be used for annotation. Default is NULL.
#' @param merge_gtf_by Column name to merge the GTF file by. Default is "gene_name".
#' @param columns Vector of column names to be used from the GTF file. Default is
#' "seqname", "feature", "start", "end", "strand", "gene_id", "gene_name", "gene_type".
#' @param gtf_field The features in the GTF file to include for annotation. By default, search and select the first "feature" found in the order of "gene", "transcript", "exon", and "CDS" in the GTF file.
#' @param gtf_columns Vector of column names to be used from the GTF file. Default is
#' c("seqname", "feature", "start", "end", "strand", "gene_id", "gene_name", "gene_type", "gene_biotype").
#' @param gtf_merge_by Column name to merge the GTF file by. Default is "gene_name".
#' @param assays Character vector of assay names to be annotated. Default is "RNA".
#' @param overwrite Logical value indicating whether to overwrite existing metadata. Default is FALSE.
#'
Expand All @@ -32,10 +33,11 @@
#' @export
AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol", "ensembl_id", "entrez_id"),
db = NULL, db_update = FALSE, db_version = "latest", convert_species = TRUE, Ensembl_version = 103, mirror = NULL,
gtf = NULL, merge_gtf_by = "gene_name", columns = c(
gtf = NULL, gtf_field = c("gene", "transcript", "exon", "CDS"),
gtf_columns = c(
"seqname", "feature", "start", "end", "strand",
"gene_id", "gene_name", "gene_type"
),
"gene_id", "gene_name", "gene_type", "gene_biotype"
), gtf_merge_by = "gene_name",
assays = "RNA", overwrite = FALSE) {
IDtype <- match.arg(IDtype)
if (is.null(db) && is.null(gtf)) {
Expand Down Expand Up @@ -82,13 +84,12 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
gtf_all <- suppressWarnings(fread(gtf, sep = "\t"))
gtf_all <- gtf_all[, 1:9]
colnames(gtf_all) <- c("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attribute")
for (type in c("gene", "transcript", "exon", "CDS")) {
for (type in gtf_field) {
if (type %in% gtf_all[["feature"]]) {
gtf_all <- gtf_all[gtf_all[["feature"]] == type, ]
break
}
}
columns1 <- intersect(colnames(gtf_all), columns)

gtf_attribute <- gtf_all[["attribute"]]
gtf_attribute <- gsub(pattern = "\"", replacement = "", x = gtf_attribute)
Expand All @@ -97,13 +98,13 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
detail <- strsplit(x, " ")
out <- lapply(detail, function(x) x[2:length(x)])
names(out) <- sapply(detail, function(x) x[1])
out <- out[intersect(columns, names(out))]
out <- out[intersect(gtf_columns, names(out))]
return(out)
})
gene_attr_df <- rbindlist(gene_attr, fill = TRUE)
gtf_columns <- cbind(gtf_all[, intersect(colnames(gtf_all), columns), with = FALSE], gene_attr_df)
gtf_columns <- cbind(gtf_all[, intersect(colnames(gtf_all), gtf_columns), with = FALSE], gene_attr_df)
colnames(gtf_columns) <- make.unique(colnames(gtf_columns))
gtf_columns_collapse <- aggregate(gtf_columns, by = list(rowid = gtf_columns[[merge_gtf_by]]), FUN = function(x) {
gtf_columns_collapse <- aggregate(gtf_columns, by = list(rowid = gtf_columns[[gtf_merge_by]]), FUN = function(x) {
paste0(unique(x), collapse = ";")
})
rownames(gtf_columns_collapse) <- gtf_columns_collapse[["rowid"]]
Expand Down
7 changes: 4 additions & 3 deletions R/SCP-plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -3825,6 +3825,7 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp
}
bg_map <- NULL
if (!is.null(bg.by)) {
meta.data[[bg.by]] <- factor(meta.data[[bg.by]], levels = intersect(levels(meta.data[[bg.by]]), meta.data[[bg.by]]))
for (g in group.by) {
df_table <- table(meta.data[[g]], meta.data[[bg.by]])
if (max(rowSums(df_table > 0), na.rm = TRUE) > 1) {
Expand Down Expand Up @@ -8684,7 +8685,7 @@ GroupHeatmap <- function(srt, features = NULL, group.by = NULL, split.by = NULL,
index <- which(features_ordered %in% features_label)
drop <- setdiff(features_label, features_ordered)
if (length(drop) > 0) {
warning(paste0(paste0(drop, collapse = ","), "was not found in the features"), immediate. = TRUE)
warning(paste0(paste0(drop, collapse = ","), " was not found in the features"), immediate. = TRUE)
}
}
if (length(index) > 0) {
Expand Down Expand Up @@ -9754,7 +9755,7 @@ FeatureHeatmap <- function(srt, features = NULL, cells = NULL, group.by = NULL,
index <- which(features_ordered %in% features_label)
drop <- setdiff(features_label, features_ordered)
if (length(drop) > 0) {
warning(paste0(paste0(drop, collapse = ","), "was not found in the features"), immediate. = TRUE)
warning(paste0(paste0(drop, collapse = ","), " was not found in the features"), immediate. = TRUE)
}
}
if (length(index) > 0) {
Expand Down Expand Up @@ -11743,7 +11744,7 @@ DynamicHeatmap <- function(srt, lineages, features = NULL, use_fitted = FALSE, b
index <- which(features_ordered %in% features_label)
drop <- setdiff(features_label, features_ordered)
if (length(drop) > 0) {
warning(paste0(paste0(drop, collapse = ","), "was not found in the features"), immediate. = TRUE)
warning(paste0(paste0(drop, collapse = ","), " was not found in the features"), immediate. = TRUE)
}
}
if (length(index) > 0) {
Expand Down
11 changes: 6 additions & 5 deletions R/Seurat-function.R
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ RunDM.default <- function(object, assay = NULL, slot = "data",
#' @param n.neighbors An integer specifying the number of nearest neighbors to be used. Default is 30.
#' @param n.components An integer specifying the number of UMAP components. Default is 2.
#' @param metric A character string specifying the metric or a function to be used for distance calculations. When using a string, available metrics are: euclidean, manhattan. Other available generalized metrics are: cosine, pearson, pearson2. Note the triangle inequality may not be satisfied by some generalized metrics, hence knn search may not be optimal. When using metric.function as a function, the signature must be function(matrix, origin, target) and should compute a distance between the origin column and the target columns. Default is "cosine".
#' @param n.epochs An integer specifying the number of iterations performed during layout optimization for UMAP. Default is 200.
#' @param n.epochs An integer specifying the number of iterations performed during layout optimization for UMAP. Default is 500.
#' @param spread A numeric value specifying the spread parameter for UMAP, used during automatic estimation of a/b parameters. Default is 1.
#' @param min.dist A numeric value specifying the minimum distance between UMAP embeddings, determines how close points appear in the final layout. Default is 0.3.
#' @param set.op.mix.ratio Interpolate between (fuzzy) union and intersection as the set operation used to combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Both fuzzy set operations use the product t-norm. The value of this parameter should be between 0.0 and 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.
Expand Down Expand Up @@ -589,7 +589,7 @@ RunUMAP2.Seurat <- function(object,
assay = NULL, slot = "data",
umap.method = "uwot", reduction.model = NULL, n_threads = NULL,
return.model = FALSE, n.neighbors = 30L, n.components = 2L,
metric = "cosine", n.epochs = 200L, spread = 1, min.dist = 0.3,
metric = "cosine", n.epochs = 500L, spread = 1, min.dist = 0.3,
set.op.mix.ratio = 1, local.connectivity = 1L, negative.sample.rate = 5L,
a = NULL, b = NULL, learning.rate = 1, repulsion.strength = 1,
reduction.name = "umap", reduction.key = "UMAP_",
Expand Down Expand Up @@ -669,7 +669,7 @@ RunUMAP2.Seurat <- function(object,
RunUMAP2.default <- function(object, assay = NULL,
umap.method = "uwot", reduction.model = NULL, n_threads = NULL,
return.model = FALSE, n.neighbors = 30L, n.components = 2L,
metric = "cosine", n.epochs = 200L, spread = 1, min.dist = 0.3,
metric = "cosine", n.epochs = 500L, spread = 1, min.dist = 0.3,
set.op.mix.ratio = 1, local.connectivity = 1L, negative.sample.rate = 5L,
a = NULL, b = NULL, learning.rate = 1, repulsion.strength = 1,
reduction.key = "UMAP_", verbose = TRUE, seed.use = 11L, ...) {
Expand Down Expand Up @@ -1538,6 +1538,7 @@ RunTriMap.default <- function(object, assay = NULL,
#' @param verbose A logical value indicating whether to print verbose output. Default is TRUE.
#' @param seed.use An integer specifying the random seed to be used. Default is 11.
#' @param ... Additional arguments to be passed to the \link[uwot]{lvish} function.
#' @param n_epochs Number of epochs to use during the optimization of the embedded coordinates. Default is 500.
#'
#' @examples
#' pancreas_sub <- Seurat::FindVariableFeatures(pancreas_sub)
Expand All @@ -1558,7 +1559,7 @@ RunLargeVis <- function(object, ...) {
RunLargeVis.Seurat <- function(object, reduction = "pca", dims = NULL, features = NULL,
assay = NULL, slot = "data",
perplexity = 50, n_neighbors = perplexity * 3, n_components = 2, metric = "euclidean",
n_epochs = -1, learning_rate = 1, scale = "maxabs", init = "lvrandom", init_sdev = NULL,
n_epochs = 500, learning_rate = 1, scale = "maxabs", init = "lvrandom", init_sdev = NULL,
repulsion_strength = 7, negative_sample_rate = 5, nn_method = NULL, n_trees = 50,
search_k = 2 * n_neighbors * n_trees, n_threads = NULL, n_sgd_threads = 0, grain_size = 1,
kernel = "gauss", pca = NULL, pca_center = TRUE, pcg_rand = TRUE, fast_sgd = FALSE,
Expand Down Expand Up @@ -1617,7 +1618,7 @@ RunLargeVis.Seurat <- function(object, reduction = "pca", dims = NULL, features
#' @export
RunLargeVis.default <- function(object, assay = NULL,
perplexity = 50, n_neighbors = perplexity * 3, n_components = 2, metric = "euclidean",
n_epochs = -1, learning_rate = 1, scale = "maxabs", init = "lvrandom", init_sdev = NULL,
n_epochs = 500, learning_rate = 1, scale = "maxabs", init = "lvrandom", init_sdev = NULL,
repulsion_strength = 7, negative_sample_rate = 5, nn_method = NULL, n_trees = 50,
search_k = 2 * n_neighbors * n_trees, n_threads = NULL, n_sgd_threads = 0, grain_size = 1,
kernel = "gauss", pca = NULL, pca_center = TRUE, pcg_rand = TRUE, fast_sgd = FALSE,
Expand Down
15 changes: 9 additions & 6 deletions man/AnnotateFeatures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 3 additions & 8 deletions man/RunLargeVis.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/RunUMAP2.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 1215407

Please sign in to comment.