From a10579bf9adc9d84fc93e17b0da66ccf516cbf37 Mon Sep 17 00:00:00 2001 From: smartinezzz <167785995+smartinezzz@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:23:02 -0500 Subject: [PATCH] Delete scSNViz/TS_snvs_comp_distr.r --- scSNViz/TS_snvs_comp_distr.r | 118 ----------------------------------- 1 file changed, 118 deletions(-) delete mode 100644 scSNViz/TS_snvs_comp_distr.r diff --git a/scSNViz/TS_snvs_comp_distr.r b/scSNViz/TS_snvs_comp_distr.r deleted file mode 100644 index 14c7082..0000000 --- a/scSNViz/TS_snvs_comp_distr.r +++ /dev/null @@ -1,118 +0,0 @@ -suppressWarnings(library('optparse')) - -script.desc <- -'Generates statistics to determine if SNV distribution has role in clustering. -User must supply the RDS of a Seurat object (-r) which should contain a single -Seurat experiment. A file also must be submitted with (-t) which is a file that -contains a path to all the scReadCounts files containing the SNV information.' - -parser <- OptionParser(description=script.desc) -parser <- add_option(parser, c('-r', '--rds-file'), - type='character', - help='RDS file containing Seurat object.') -parser <- add_option(parser, c('-t', '--snv-file'), - type='character', - help='scReadCounts output file containing SNVs.') -parser <- add_option(parser, c('-w', '--th-snv-cells'), - type='numeric', default=10, - help=paste0('Threshold for maximum percentage of cells ', - ' that contain an SNV for bad reads. ', - '(Default: 10)')) -parser <- add_option(parser, c('-z', '--th-snv-reads'), - type='numeric', default=1, - help=paste0('Threshold for number of minimum reads to ', - 'qualify as SNV. (Default: 1)')) -args <- parse_args(parser) - -error.msg <- NULL -if (is.null(args$`rds-file`)) - error.msg <- paste(error.msg, '- Seurat RDS object (-r) is required.', - sep='\n') -if (is.null(args$`snv-file`)) - error.msg <- paste(error.msg, '- SNV data (-t) is required.', sep='\n') -if (args$`th-snv-reads`<=0) - error.msg <- paste(error.msg, '- th-snv-reads needs to be greater than 0.', - sep='\n') -if (!is.null(error.msg)) { - print_help(parser) - stop(error.msg) -} - - -suppressWarnings(library('dplyr')) -library('Seurat') - -# filtering based on arguments passed -srat <- readRDS(args$`rds-file`) -snv <- read.table(args$`snv-file`, sep= '\t', header=T) -sample.name <- gsub('(.*/)*([A-Za-z0-9]+)_.*.rds', '\\2', args$`rds-file`) - -# handling missing VAFs -snv$VAF[snv$VAF == '-'] <- NA -snv$VAF <- suppressWarnings(as.numeric(snv$VAF)) -snv <- snv[!is.na(snv$VAF), ] - -# filtering based on `X.BadRead` and `SNVCount` -snv.temp <- snv -snv.temp['BadReadFlag'] = 0 -snv.temp[snv.temp[['X.BadRead']]>0, 'BadReadFlag'] = 1 -snv.read.filt <- aggregate(BadReadFlag~CHROM+POS+REF+ALT, data=snv.temp, - function (x) 100*sum(x)/length(x)) -snv.read.filt <- snv.read.filt[snv.read.filt$BadReadFlag<=args$`th-snv-cells`, - c('CHROM', 'POS', 'REF', 'ALT')] -snv <- merge(snv, snv.read.filt, by=c('CHROM', 'POS', 'REF', 'ALT')) - -# filter on `SNVCount` -snv$VAF[snv$SNVCount 1) { - kw_test_result <- kruskal.test(VAF ~ ClusterID, data = snv_data) - kw_test_statistics <- c(kw_test_statistics, kw_test_result$statistic) # H-statistic - kw_p_values <- c(kw_p_values, kw_test_result$p.value) - } else { - kw_test_statistics <- c(kw_test_statistics, NA) # NA if test is not applicable - kw_p_values <- c(kw_p_values, NA) - } -} - -# adjustment for multiple comparisons using Bonferroni correction -kw_p_adj <- p.adjust(kw_p_values, method = 'bonferroni') - -df.final <- data.frame( - SNV = names(snv_groups), - H_statistic = kw_test_statistics, - p_value = kw_p_values, - p_adj = kw_p_adj -) - -df.final <- na.omit(df.final) # filtering NA cases -df.final <- df.final[order(df.final$p_adj), ] # sorting by p_adj - -write.table(df.final, paste0(sample.name, '_snv_candidate_', - sprintf('%02d', args$`th-snv-cells`), 'n', sprintf('%02d', args$`th-snv-reads`), - 'n.txt'), sep = '\t', row.names = FALSE) - -significant_snvs <- df.final[df.final$p_adj < 0.1, ] -write.table(significant_snvs, paste0(sample.name, '_snv_candidate_', - sprintf('%02d', args$`th-snv-cells`), 'n', sprintf('%02d', args$`th-snv-reads`), - 'n_q01.txt'), sep = '\t', row.names = FALSE)