From 79cb307811c09f0b5b54fe5a19e05caa8f543bea Mon Sep 17 00:00:00 2001 From: vedhav Date: Thu, 29 Feb 2024 01:33:45 +0530 Subject: [PATCH 1/3] feat: impute empty values as `na` in the variable browser --- R/tm_variable_browser.R | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/R/tm_variable_browser.R b/R/tm_variable_browser.R index 03f4a2b03..8da52c056 100644 --- a/R/tm_variable_browser.R +++ b/R/tm_variable_browser.R @@ -544,6 +544,7 @@ srv_variable_browser <- function(id, #' @return text describing \code{NA} occurrence. #' @keywords internal var_missings_info <- function(x) { + x <- impute_blanks_as_na(x) return(sprintf("%s [%s%%]", sum(is.na(x)), round(mean(is.na(x) * 100), 2))) } @@ -837,7 +838,7 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) x <- factor(x, levels = sort(unique(x))) } - level_counts <- table(x) + level_counts <- table(x, useNA = "always") max_levels_signif <- nchar(level_counts) if (!all(is.na(x))) { @@ -1085,7 +1086,7 @@ get_plotted_data <- function(input, plot_var, data) { df <- data()[[dataset_name]] var_description <- teal.data::col_labels(df)[[varname]] - list(data = df[[varname]], var_description = var_description) + list(data = impute_blanks_as_na(df[[varname]]), var_description = var_description) } #' Renders the left-hand side `tabset` panel of the module @@ -1335,3 +1336,17 @@ remove_outliers_from <- function(var, outlier_definition) { iqr <- q1_q3[2] - q1_q3[1] var[var >= q1_q3[1] - outlier_definition * iqr & var <= q1_q3[2] + outlier_definition * iqr] } + +#' Imputes empty strings as `NA` +#' +#' @param var (`vector`) a vector of any type and length +#' @returns (`vector`) a vector with empty strings imputed as `NA`, if provided. +#' @keywords internal +impute_blanks_as_na <- function(var) { + var <- as.vector(var) + if (is.character(var)) { + var <- gsub(" +", "", var) + var[var == ""] <- NA + } + var +} From 483545a31c3c689f5c867b85fbaeea01cea4a029 Mon Sep 17 00:00:00 2001 From: vedhav Date: Thu, 29 Feb 2024 01:46:19 +0530 Subject: [PATCH 2/3] feat: rename the column as and do the same for numeric columns as well --- R/tm_variable_browser.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/tm_variable_browser.R b/R/tm_variable_browser.R index 8da52c056..6f3b8534a 100644 --- a/R/tm_variable_browser.R +++ b/R/tm_variable_browser.R @@ -818,7 +818,7 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) summary <- data.frame( - Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n"), + Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n", ""), Value = c( round(min(x, na.rm = TRUE), 2), qvals[1], @@ -827,7 +827,8 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) qvals[3], round(max(x, na.rm = TRUE), 2), round(stats::sd(x, na.rm = TRUE), 2), - length(x[!is.na(x)]) + length(x[!is.na(x)]), + length(x[is.na(x)]) ) ) @@ -839,6 +840,7 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) } level_counts <- table(x, useNA = "always") + names(level_counts)[is.na(names(level_counts))] <- "" max_levels_signif <- nchar(level_counts) if (!all(is.na(x))) { From 9ea024c24e8e9641b90d7bb3cf6ed86fe7eabb28 Mon Sep 17 00:00:00 2001 From: vedhav Date: Thu, 29 Feb 2024 02:02:04 +0530 Subject: [PATCH 3/3] feat: add `` label in the bar plot instead of `NA` --- R/tm_variable_browser.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/tm_variable_browser.R b/R/tm_variable_browser.R index 6f3b8534a..78f125ab1 100644 --- a/R/tm_variable_browser.R +++ b/R/tm_variable_browser.R @@ -945,9 +945,10 @@ plot_var_summary <- function(var, var <- stringr::str_wrap(var, width = wrap_character) } var <- if (isTRUE(remove_NA_hist)) as.vector(stats::na.omit(var)) else var + var[is.na(var)] <- "" ggplot(data.frame(var), aes(x = forcats::fct_infreq(as.factor(var)))) + - geom_bar(stat = "count", aes(fill = ifelse(is.na(var), "withcolor", "")), show.legend = FALSE) + - scale_fill_manual(values = c("gray50", "tan")) + geom_bar(stat = "count", aes(fill = ifelse(var == "", "missing", "all")), show.legend = FALSE) + + scale_fill_manual(values = c("missing" = "tan", "all" = "gray50")) } } else if (is.numeric(var)) { validate(need(any(!is.na(var)), "No data left to visualize."))