-
-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Impute empty values as na
in the variable browser
#700
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -544,6 +544,7 @@ srv_variable_browser <- function(id, | |
#' @return text describing \code{NA} occurrence. | ||
#' @keywords internal | ||
var_missings_info <- function(x) { | ||
x <- impute_blanks_as_na(x) | ||
return(sprintf("%s [%s%%]", sum(is.na(x)), round(mean(is.na(x) * 100), 2))) | ||
} | ||
|
||
|
@@ -817,7 +818,7 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) | |
|
||
summary <- | ||
data.frame( | ||
Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n"), | ||
Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n", "<Missing>"), | ||
Value = c( | ||
round(min(x, na.rm = TRUE), 2), | ||
qvals[1], | ||
|
@@ -826,7 +827,8 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) | |
qvals[3], | ||
round(max(x, na.rm = TRUE), 2), | ||
round(stats::sd(x, na.rm = TRUE), 2), | ||
length(x[!is.na(x)]) | ||
length(x[!is.na(x)]), | ||
length(x[is.na(x)]) | ||
) | ||
) | ||
|
||
|
@@ -837,7 +839,8 @@ var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) | |
x <- factor(x, levels = sort(unique(x))) | ||
} | ||
|
||
level_counts <- table(x) | ||
level_counts <- table(x, useNA = "always") | ||
names(level_counts)[is.na(names(level_counts))] <- "<Missing>" | ||
max_levels_signif <- nchar(level_counts) | ||
|
||
if (!all(is.na(x))) { | ||
|
@@ -942,9 +945,10 @@ plot_var_summary <- function(var, | |
var <- stringr::str_wrap(var, width = wrap_character) | ||
} | ||
var <- if (isTRUE(remove_NA_hist)) as.vector(stats::na.omit(var)) else var | ||
var[is.na(var)] <- "<Missing>" | ||
ggplot(data.frame(var), aes(x = forcats::fct_infreq(as.factor(var)))) + | ||
geom_bar(stat = "count", aes(fill = ifelse(is.na(var), "withcolor", "")), show.legend = FALSE) + | ||
scale_fill_manual(values = c("gray50", "tan")) | ||
geom_bar(stat = "count", aes(fill = ifelse(var == "<Missing>", "missing", "all")), show.legend = FALSE) + | ||
scale_fill_manual(values = c("missing" = "tan", "all" = "gray50")) | ||
} | ||
} else if (is.numeric(var)) { | ||
validate(need(any(!is.na(var)), "No data left to visualize.")) | ||
|
@@ -1085,7 +1089,7 @@ get_plotted_data <- function(input, plot_var, data) { | |
df <- data()[[dataset_name]] | ||
|
||
var_description <- teal.data::col_labels(df)[[varname]] | ||
list(data = df[[varname]], var_description = var_description) | ||
list(data = impute_blanks_as_na(df[[varname]]), var_description = var_description) | ||
} | ||
|
||
#' Renders the left-hand side `tabset` panel of the module | ||
|
@@ -1335,3 +1339,17 @@ remove_outliers_from <- function(var, outlier_definition) { | |
iqr <- q1_q3[2] - q1_q3[1] | ||
var[var >= q1_q3[1] - outlier_definition * iqr & var <= q1_q3[2] + outlier_definition * iqr] | ||
} | ||
|
||
#' Imputes empty strings as `NA` | ||
#' | ||
#' @param var (`vector`) a vector of any type and length | ||
#' @returns (`vector`) a vector with empty strings imputed as `NA`, if provided. | ||
#' @keywords internal | ||
impute_blanks_as_na <- function(var) { | ||
var <- as.vector(var) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropping There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this was the main issue that was raised. If the factor has a level that is "" (or a character vector that has "") it will NOT be considered as NA, dropping the factor was the only way to consider them as NA. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this not influence the plotting, like change the order of categories on the axis or in the color key? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We explicitly change the NAs to r$> df_explicit_na(data.frame(col = c("A", "B", NA, "C")))
col
1 A
2 B
3 <Missing>
4 C Again, so much of a feature request here. Nothing is clear as of now. So, it's still a draft and we can talk about what we want. |
||
if (is.character(var)) { | ||
var <- gsub(" +", "", var) | ||
var[var == ""] <- NA | ||
} | ||
var | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
teal.modules.general
Imports
tern
. Why write a new function rather than use one from the dependency?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there is such an imputation function in
tern
. However, It can be used when replacingNA
with<Missing>
👍🏽There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Honestly, I don't think we should be even imputing things. The true bug is that the data had empty strings which should be the app developer's responsibility to fix before injecting it into the teal app.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Look again
?tern::df_explicit_na
😉There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh great! I'll use it. Thanks!