diff --git a/scripts/util/pancancer_util.R b/scripts/util/pancancer_util.R index 11de95d..0606c01 100644 --- a/scripts/util/pancancer_util.R +++ b/scripts/util/pancancer_util.R @@ -51,7 +51,9 @@ parse_summary <- function(summary_info) { # Output: # a list of summarized classifier attributes and performance + base_dir <- NULL if (is.character(summary_info)) { + base_dir <- dirname(summary_info) summary_info <- readr::read_lines(summary_info) } summary_list <- list() @@ -72,8 +74,12 @@ parse_summary <- function(summary_info) { next } if (line[1] == "Coefficients:") { + tmp_fn <- line[2] + if ( ! file.exists(tmp_fn)){ + tmp_fn <- file.path( base_dir, 'classifier_coefficients.tsv' ) # maybe should use basename of not found file + } summary_list[[sub(":", "", line[1])]] <- - suppressMessages(readr::read_tsv(line[2])) + suppressMessages(readr::read_tsv(tmp_fn)) } else if (line[1] == "") { disease_info <- line[2:length(line)] disease <- disease_info[1] diff --git a/scripts/util/tcga_util.py b/scripts/util/tcga_util.py index b40c8fa..f1a5d14 100644 --- a/scripts/util/tcga_util.py +++ b/scripts/util/tcga_util.py @@ -14,10 +14,18 @@ def get_args(): import argparse parser = argparse.ArgumentParser() + #parser.add_argument('-g', '--genes', + # help='Comma separated string of HUGO gene symbols') parser.add_argument('-g', '--genes', - help='Comma separated string of HUGO gene symbols') + help='string of the genes to extract or genelist file') + #parser.add_argument('-t', '--diseases', default='Auto', + # help='Comma sep string of TCGA disease acronyms. ' + # 'If no arguments are passed, filtering will ' + # 'default to options given in --filter_count and ' + # '--filter_prop.') parser.add_argument('-t', '--diseases', default='Auto', help='Comma sep string of TCGA disease acronyms. ' + 'diseases_list_file' 'If no arguments are passed, filtering will ' 'default to options given in --filter_count and ' '--filter_prop.') @@ -37,9 +45,13 @@ def get_args(): help='the alphas for parameter sweep') parser.add_argument('-l', '--l1_ratios', default='0,0.1,0.15,0.18,0.2,0.3', help='the l1 ratios for parameter sweep') - parser.add_argument('-b', '--alt_genes', default='None', - help='alternative genes to test performance') - parser.add_argument('-s', '--alt_diseases', default="Auto", + #parser.add_argument('-b', '--alt_genes', default='None', + # help='alternative genes to test performance') + parser.add_argument('-b', '--alt_genes', default=None, + help='string of the alt_genes to extract or alt_genelist file to test performance') + #parser.add_argument('-s', '--alt_diseases', default="Auto", + # help='The alternative diseases to test performance') + parser.add_argument('-s', '--alt_diseases', default=None, help='The alternative diseases to test performance') parser.add_argument('-i', '--alt_filter_count', default=15, type=int, help='Min number of mutations in disease to include') @@ -53,6 +65,8 @@ def get_args(): help='Keep intermediate ROC values for plotting') parser.add_argument('-x', '--x_matrix', default='raw', help='Filename of features to use in model') + parser.add_argument('--x_as_raw', action='store_true', + help='Treat x_matrix as "raw"') parser.add_argument('-e', '--shuffled', action='store_true', help='Shuffle the input gene exprs matrix alongside') parser.add_argument('--shuffled_before_training', action='store_true', @@ -61,10 +75,24 @@ def get_args(): help='Remove mutation data from y matrix') parser.add_argument('-z', '--drop_rasopathy', action='store_true', help='Decision to drop rasopathy genes from X matrix') + parser.add_argument( '--drop_x_genes', default=None, + help='Comma separated list of genes to be dropped from X matrix, x_genelist file ') parser.add_argument('-q', '--drop_expression', action='store_true', help='Decision to drop gene expression values from X') parser.add_argument('-j', '--drop_covariates', action='store_true', help='Decision to drop covariate information from X') + parser.add_argument( '--filename_mut', default=None, + help='Filename of sample/gene mutations to use in model') + parser.add_argument( '--filename_mut_burden', default=None, + help='Filename of sample mutation burden to use in model') + parser.add_argument( '--filename_sample', default=None, + help='Filename of patient/samples to use in model') + parser.add_argument( '--filename_copy_loss', default=None, + help='Filename of copy number loss') + parser.add_argument( '--filename_copy_gain', default=None, + help='Filename of copy number gain') + parser.add_argument( '--filename_cancer_gene_classification', default=None, + help='Filename of cancer gene classification table') args = parser.parse_args() return args