Skip to content

Commit

Permalink
Merge branch 'breaking-improvments' of https://github.com/HopkinsIDD/…
Browse files Browse the repository at this point in the history
…flepiMoP into breaking-improvments
  • Loading branch information
saraloo committed Oct 2, 2023
2 parents fcd790b + 4d977cf commit 699f3a4
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 46 deletions.
4 changes: 2 additions & 2 deletions flepimop/R_packages/config.writer/R/yaml_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@
collapse_intervention<- function(dat){
#TODO: add number to repeated names
#TODO add a check that all end_dates are the same
mtr <- dat %>%
mtr <- dat %>% as_tibble() %>%
dplyr::filter(template=="MultiPeriodModifier") %>%
dplyr::mutate(end_date=paste0("end_date: ", end_date),
start_date=paste0("- start_date: ", start_date)) %>%
tidyr::unite(col="period", sep="\n ", start_date:end_date) %>%
dplyr::group_by(dplyr::across(-period)) %>%
dplyr::summarize(period = paste0(period, collapse="\n "))

if (!all(is.na(mtr$subpop_groups)) & !all(is.null(mtr$subpop_groups))) {
if (exists("mtr$spatial_groups") && (!all(is.na(mtr$spatial_groups)) & !all(is.null(mtr$spatial_groups)))) {

mtr <- mtr %>%
dplyr::group_by(dplyr::across(-subpop)) %>%
Expand Down
96 changes: 63 additions & 33 deletions flepimop/main_scripts/create_seeding.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ library(purrr)

option_list <- list(
optparse::make_option(c("-c", "--config"), action = "store", default = Sys.getenv("CONFIG_PATH"), type = "character", help = "path to the config file"),
optparse::make_option(c("-s", "--seed_variants"), action="store", default = Sys.getenv("SEED_VARIANTS"), type='logical',help="Whether to add variants/subtypes to outcomes in seeding."),
optparse::make_option(c("-k", "--keep_all_seeding"), action="store",default=TRUE,type='logical',help="Whether to filter away seeding prior to the start date of the simulation.")
)

Expand All @@ -67,6 +68,9 @@ if (is.null(config$subpop_setup$us_model)) {
is_US_run <- config$subpop_setup$us_model
seed_variants <- "variant_filename" %in% names(config$seeding)

if (!is.na(opt$seed_variants)){
seed_variants <- opt$seed_variants
}


## backwards compatibility with configs that don't have inference$gt_source
Expand Down Expand Up @@ -107,7 +111,7 @@ print(paste("Successfully loaded data from ", data_path, "for seeding."))

if (is_US_run) {
cases_deaths <- cases_deaths %>%
mutate(FIPS = stringr::str_pad(FIPS, width = 5, side = "right", pad = "0"))
mutate(subpop = stringr::str_pad(subpop, width = 5, side = "right", pad = "0"))
}

print(paste("Successfully pulled", gt_source, "data for seeding."))
Expand All @@ -126,39 +130,64 @@ if (seed_variants) {
colnames(variant_data)[colnames(variant_data) == "Update"] ="date"
colnames(cases_deaths)[colnames(cases_deaths) == "Update"] ="date"

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
if (any(grepl(paste(unique(variant_data$variant), collapse = "|"), colnames(cases_deaths)))){

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, paste0("incidH_", names(config$seeding$seeding_compartments)))
colnames(cases_deaths) <- gsub("incidH_", "", colnames(cases_deaths))
cases_deaths <- cases_deaths %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(names(config$seeding$seeding_compartments))), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, paste0("incidC_", names(config$seeding$seeding_compartments)))
colnames(cases_deaths) <- gsub("incidC_", "", colnames(cases_deaths))
cases_deaths <- cases_deaths %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(names(config$seeding$seeding_compartments))), ~ tidyr::replace_na(.x, 0)))
}

} else {

if (!is.null(config$seeding$seeding_outcome)){
if (config$seeding$seeding_outcome=="incidH"){
cases_deaths <- cases_deaths %>%
dplyr::select(date, subpop, incidH) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidH * prop) %>%
dplyr::select(-prop, -incidH) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, FIPS, source, incidH) %>%
dplyr::select(date, subpop, incidC) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidH * prop) %>%
dplyr::select(-prop, -incidH) %>%
dplyr::mutate(incidI = incidC * prop) %>%
dplyr::select(-prop, -incidC) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
} else {
stop(paste(
"Currently only incidH is implemented for config$seeding$seeding_outcome."
))
}
} else {
cases_deaths <- cases_deaths %>%
dplyr::select(date, FIPS, source, incidC) %>%
dplyr::left_join(variant_data) %>%
dplyr::mutate(incidI = incidC * prop) %>%
dplyr::select(-prop, -incidC) %>%
tidyr::pivot_wider(names_from = variant, values_from = incidI) %>%
dplyr::mutate(dplyr::across(tidyselect::any_of(unique(variant_data$variant)), ~ tidyr::replace_na(.x, 0)))
}
}

## Check some data attributes:
## This is a hack:
if ("subpop" %in% names(cases_deaths)) {
cases_deaths$FIPS <- cases_deaths$subpop
if ("FIPS" %in% names(cases_deaths)) {
cases_deaths$subpop <- cases_deaths$FIPS
warning("Changing FIPS name in seeding. This is a hack")
}
if ("date" %in% names(cases_deaths)) {
cases_deaths$Update <- cases_deaths$date
if ("Update" %in% names(cases_deaths)) {
cases_deaths$date <- cases_deaths$Update
warning("Changing Update name in seeding. This is a hack")
}
obs_subpop <- config$subpop_setup$subpop
Expand All @@ -177,7 +206,7 @@ check_required_names <- function(df, cols, msg) {
if ("compartments" %in% names(config)) {

if (all(names(config$seeding$seeding_compartments) %in% names(cases_deaths))) {
required_column_names <- c("FIPS", "Update", names(config$seeding$seeding_compartments))
required_column_names <- c("subpop", "date", names(config$seeding$seeding_compartments))
check_required_names(
cases_deaths,
required_column_names,
Expand All @@ -204,7 +233,7 @@ if ("compartments" %in% names(config)) {
) %>%
tidyr::separate(source_column, paste("source", names(config$compartments), sep = "_")) %>%
tidyr::separate(destination_column, paste("destination", names(config$compartments), sep = "_"))
required_column_names <- c("FIPS", "Update", "value", paste("source", names(config$compartments), sep = "_"), paste("destination", names(config$compartments), sep = "_"))
required_column_names <- c("subpop", "date", "value", paste("source", names(config$compartments), sep = "_"), paste("destination", names(config$compartments), sep = "_"))
incident_cases <- incident_cases[, required_column_names]

# if (!is.null(config$smh_round)) {
Expand Down Expand Up @@ -233,7 +262,7 @@ if ("compartments" %in% names(config)) {
stop("Please add a seeding_compartments section to the config")
}
} else {
required_column_names <- c("FIPS", "Update", "incidI")
required_column_names <- c("subpop", "date", "incidI")
check_required_names(
cases_deaths,
required_column_names,
Expand All @@ -246,7 +275,7 @@ if ("compartments" %in% names(config)) {
tidyr::pivot_longer(cols = "incidI", names_to = "source_infection_stage", values_to = "value")
incident_cases$destination_infection_stage <- "E"
incident_cases$source_infection_stage <- "S"
required_column_names <- c("FIPS", "Update", "value", "source_infection_stage", "destination_infection_stage")
required_column_names <- c("subpop", "date", "value", "source_infection_stage", "destination_infection_stage")

if ("parallel_structure" %in% names(config[["seir"]][["parameters"]])) {
parallel_compartments <- config[["seir"]][["parameters"]][["parallel_structure"]][["compartments"]]
Expand All @@ -266,22 +295,22 @@ all_times <- lubridate::ymd(config$start_date) +
seq_len(lubridate::ymd(config$end_date) - lubridate::ymd(config$start_date))

geodata <- flepicommon::load_geodata_file(
file.path(config$data_path, config$subpop_setup$geodata),
file.path(config$data_path, config$spatial_setup$geodata),
5,
"0",
TRUE
)

all_subpop <- geodata[[config$subpop_setup$subpop]]
all_subpop <- geodata[["subpop"]]



incident_cases <- incident_cases %>%
dplyr::filter(FIPS %in% all_subpop) %>%
dplyr::filter(subpop %in% all_subpop) %>%
dplyr::select(!!!required_column_names)
incident_cases <- incident_cases %>% filter(value>0)

incident_cases[["Update"]] <- as.Date(incident_cases$Update)
incident_cases[["date"]] <- as.Date(incident_cases$date)

if (is.null(config[["seeding"]][["seeding_inflation_ratio"]])) {
config[["seeding"]][["seeding_inflation_ratio"]] <- 10
Expand All @@ -290,16 +319,16 @@ if (is.null(config[["seeding"]][["seeding_delay"]])) {
config[["seeding"]][["seeding_delay"]] <- 5
}

grouping_columns <- required_column_names[!required_column_names %in% c("Update", "value")]
grouping_columns <- required_column_names[!required_column_names %in% c("date", "value")]
incident_cases <- incident_cases %>%
dplyr::group_by(!!!rlang::syms(grouping_columns)) %>%
dplyr::group_modify(function(.x, .y) {
.x %>%
dplyr::arrange(Update) %>%
dplyr::arrange(date) %>%
dplyr::filter(value > 0) %>%
.[seq_len(min(nrow(.x), 5)), ] %>%
dplyr::mutate(
Update = Update - lubridate::days(config[["seeding"]][["seeding_delay"]]),
date = date - lubridate::days(config[["seeding"]][["seeding_delay"]]),
value = config[["seeding"]][["seeding_inflation_ratio"]] * value + .05
) %>%
return
Expand Down Expand Up @@ -384,3 +413,4 @@ print(paste("Saved seeding to", config$seeding$lambda_file))
head(incident_cases)

## @endcond

21 changes: 10 additions & 11 deletions flepimop/main_scripts/inference_slot.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ if (is.null(config$inference$gt_source)){
}

gt_scale <- ifelse(state_level, "US state", "US county")
fips_codes_ <- geodata[[obs_subpop]]
subpops_ <- geodata[[obs_subpop]]

gt_start_date <- lubridate::ymd(config$start_date)
if (opt$ground_truth_start != "") {
Expand Down Expand Up @@ -218,21 +218,20 @@ if (config$inference$do_inference){

obs <- suppressMessages(
readr::read_csv(config$inference$gt_data_path,
col_types = readr::cols(FIPS = readr::col_character(),
date = readr::col_date(),
col_types = readr::cols(date = readr::col_date(),
source = readr::col_character(),
subpop = readr::col_character(),
.default = readr::col_double()), )) %>%
dplyr::filter(FIPS %in% fips_codes_, date >= gt_start_date, date <= gt_end_date) %>%
dplyr::right_join(tidyr::expand_grid(FIPS = unique(.$FIPS), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0) %>%
dplyr::rename(!!obs_subpop := FIPS)
dplyr::filter(subpop %in% subpops_, date >= gt_start_date, date <= gt_end_date) %>%
dplyr::right_join(tidyr::expand_grid(subpop = unique(.$subpop), date = unique(.$date))) %>%
dplyr::mutate_if(is.numeric, dplyr::coalesce, 0)

geonames <- unique(obs[[obs_subpop]])
subpopnames <- unique(obs[[obs_subpop]])


## Compute statistics
data_stats <- lapply(
geonames,
subpopnames,
function(x) {
df <- obs[obs[[obs_subpop]] == x, ]
inference::getStats(
Expand All @@ -244,7 +243,7 @@ if (config$inference$do_inference){
end_date = gt_end_date
)
}) %>%
set_names(geonames)
set_names(subpopnames)


likelihood_calculation_fun <- function(sim_hosp){
Expand Down Expand Up @@ -277,7 +276,7 @@ if (config$inference$do_inference){

} else {

geonames <- obs_subpop
subpopnames <- obs_subpop

likelihood_calculation_fun <- function(sim_hosp){

Expand Down

0 comments on commit 699f3a4

Please sign in to comment.