diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cbd0976ae..58e835df10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ # Change Log + All notable changes are kept in this file. All changes made should be added to the section called `Unreleased`. Once a new release is made this file will be updated to create a new `Unreleased` section for the next release. @@ -9,6 +10,8 @@ For more information about this file see also [Keep a Changelog](http://keepacha ### Added +- Refactor `convert_input` to Perform tasks via helper function. Subtask of [#3307](https://github.com/PecanProject/pecan/issues/3307) + ### Fixed - updated github action to build docker images diff --git a/base/db/R/add_database_entries.R b/base/db/R/add_database_entries.R new file mode 100644 index 0000000000..87814e04db --- /dev/null +++ b/base/db/R/add_database_entries.R @@ -0,0 +1,130 @@ +#' Return new arrangement of database while adding code to deal with ensembles +#' +#' @param result list of results from the download function +#' @param con database connection +#' @param start_date start date of the data +#' @param end_date end date of the data +#' @param write whether to write to the database +#' @param overwrite Logical: If a file already exists, create a fresh copy? +#' @param insert.new.file whether to insert a new file +#' @param input.args input arguments obtained from the convert_input function +#' @param machine machine information +#' @param mimetype data product specific file format +#' @param formatname format name of the data +#' @param allow.conflicting.dates whether to allow conflicting dates +#' @param ensemble ensemble id +#' @param ensemble_name ensemble name +#' @param existing.input existing input records +#' @param existing.dbfile existing dbfile records +#' @param input input records +#' @return list of input and dbfile ids +#' +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +add_database_entries <- function( + result, con, start_date, + end_date, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input) { + # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. + # This list will be returned. + newinput <- list(input.id = NULL, dbfile.id = NULL) # Blank vectors are null. + + for (i in 1:length(result)) { # Master for loop + id_not_added <- TRUE + + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0 && + (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { + # Updating record with new dates + db.query( + paste0( + "UPDATE inputs SET start_date='", start_date, "', end_date='", end_date, + "' WHERE id=", existing.input[[i]]$id + ), + con + ) + id_not_added <- FALSE + + # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every iteration. + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, existing.dbfile[[i]]$id) + } + + if (overwrite) { + # A bit hacky, but need to make sure that all fields are updated to expected values (i.e., what they'd be if convert_input was creating a new record) + if (!is.null(existing.input) && nrow(existing.input[[i]]) > 0) { + db.query( + paste0( + "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), + "', file_name='", result[[i]]$dbfile.name[1], + "' WHERE id=", existing.dbfile[[i]]$id + ), + con + ) + } + + if (!is.null(existing.dbfile) && nrow(existing.dbfile[[i]]) > 0) { + db.query(paste0( + "UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), + "', file_name='", result[[i]]$dbfile.name[1], + "' WHERE id=", existing.dbfile[[i]]$id + ), con) + } + } + + # If there is no ensemble then for each record there should be one parent + # But when you have ensembles, all of the members have one parent !! + parent.id <- if (is.numeric(ensemble)) { + ifelse(is.null(input[[i]]), NA, input[[1]]$id) + } else { + ifelse(is.null(input[[i]]), NA, input[[i]]$id) + } + + + if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { + site.id <- input.args$newsite + } + + if (insert.new.file && id_not_added) { + dbfile.id <- dbfile.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + "Input", + existing.input[[i]]$id, + con, + reuse = TRUE, + hostname = machine$hostname + ) + + newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) + newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) + } else if (id_not_added) { + # This is to tell input.insert if we are writing ensembles + # Why does it need it? Because it checks for inputs with the same time period, site, and machine + # and if it returns something it does not insert anymore, but for ensembles, it needs to bypass this condition + ens.flag <- if (!is.null(ensemble) || is.null(ensemble_name)) TRUE else FALSE + + new_entry <- dbfile.input.insert( + in.path = dirname(result[[i]]$file[1]), + in.prefix = result[[i]]$dbfile.name[1], + siteid = site.id, + startdate = start_date, + enddate = end_date, + mimetype = mimetype, + formatname = formatname, + parentid = parent.id, + con = con, + hostname = machine$hostname, + allow.conflicting.dates = allow.conflicting.dates, + ens = ens.flag + ) + + newinput$input.id <- c(newinput$input.id, new_entry$input.id) + newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) + } + } # End for loop + return(newinput) +} diff --git a/base/db/R/check.missing.files.R b/base/db/R/check.missing.files.R new file mode 100644 index 0000000000..9c11923998 --- /dev/null +++ b/base/db/R/check.missing.files.R @@ -0,0 +1,46 @@ +#' Function to check if result has empty or missing files +#' +#' @param result A list of dataframes with file paths +#' @param existing.input Existing input records +#' @param existing.dbfile Existing dbfile records +#' @return A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +#' +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +check_missing_files <- function(result, existing.input = NULL, existing.dbfile = NULL) { + result_sizes <- purrr::map_dfr( + result, + ~ dplyr::mutate( + ., + file_size = purrr::map_dbl(file, file.size), + missing = is.na(file_size), + empty = file_size == 0 + ) + ) + + if (any(result_sizes$missing) || any(result_sizes$empty)) { + PEcAn.logger::logger.severe( + "Requested Processing produced empty files or Nonexistent files:\n", + log_format_df(result_sizes[, c(1, 8, 9, 10)]), + "\n Table of results printed above.", + wrap = FALSE + ) + } + + + # Wrap in a list for consistent processing later + if (is.data.frame(existing.input)) { + existing.input <- list(existing.input) + } + + if (is.data.frame(existing.dbfile)) { + existing.dbfile <- list(existing.dbfile) + } + return(list(existing.input, existing.dbfile)) +} + +log_format_df <- function(df) { + formatted_df <- rbind(colnames(df), format(df)) + formatted_text <- purrr::reduce(formatted_df, paste, sep = " ") + paste(formatted_text, collapse = "\n") +} \ No newline at end of file diff --git a/base/db/R/convert_input.R b/base/db/R/convert_input.R index 1ff74a1301..94f11df502 100644 --- a/base/db/R/convert_input.R +++ b/base/db/R/convert_input.R @@ -174,17 +174,15 @@ convert_input <- # Date/time processing for existing input existing.input[[i]]$start_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$start_date), "UTC") existing.input[[i]]$end_date <- lubridate::force_tz(lubridate::as_datetime(existing.input[[i]]$end_date), "UTC") - + ## Obtain machine information + machine.host.info <- get_machine_host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine #Grab machine info of file that exists existing.machine <- db.query(paste0("SELECT * from machines where id = '", existing.dbfile[[i]]$machine_id, "'"), con) - #Grab machine info of host machine - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - # If the files aren't on the machine, we have to download them, so "overwrite" is meaningless. if (existing.machine$id == machine$id) { @@ -343,33 +341,16 @@ convert_input <- add = TRUE ) # Close on.exit } - - - - #Grab machine info of file that exists - existing.machine <- db.query(paste0("SELECT * from machines where id = '", - existing.dbfile$machine_id, "'"), con) - - #Grab machine info of host machine - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - - if (existing.machine$id != machine$id) { - - PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") - PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") - insert.new.file <- TRUE - start_date <- existing.input$start_date - end_date <- existing.input$end_date - - } else { - # There's an existing input that spans desired start/end dates with files on this machine - PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") - return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + + existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date) + if (!is.null(existing_files_result$input.id)) { + return(existing_files_result) + } else { + insert.new.file <- existing_files_result$insert.new.file + start_date <- existing_files_result$start_date + end_date <- existing_files_result$end_date } - - + } else { # No existing record found. Should be good to go with regular conversion. } @@ -382,7 +363,7 @@ convert_input <- if (!is.null(ensemble) && ensemble) { return.all <-TRUE - }else{ + } else { return.all <- FALSE } existing.dbfile <- dbfile.input.check(siteid = site.id, @@ -469,25 +450,13 @@ convert_input <- } else if ((start_date >= existing.input$start_date) && (end_date <= existing.input$end_date)) { - #Grab machine info of file that exists - existing.machine <- db.query(paste0("SELECT * from machines where id = '", - existing.dbfile$machine_id, "'"), con) - - #Grab machine info of - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - - if(existing.machine$id != machine$id){ - PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") - PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") - insert.new.file <- TRUE - start_date <- existing.input$start_date - end_date <- existing.input$end_date + existing_files_result <- check_and_handle_existing_files(existing.dbfile, host, con, existing.input, start_date, end_date) + if (!is.null(existing_files_result$input.id)) { + return(existing_files_result) } else { - # There's an existing input that spans desired start/end dates with files on this machine - PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") - return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + insert.new.file <- existing_files_result$insert.new.file + start_date <- existing_files_result$start_date + end_date <- existing_files_result$end_date } } else { @@ -512,66 +481,21 @@ convert_input <- # we'll need to update its start/end dates . } } else { - # No existing record found. Should be good to go. + PEcAn.logger::logger.debug("No existing record found. Should be good to go.") } } #---------------------------------------------------------------------------------------------------------------# # Get machine information + machine.info <- get_machine_info(host, input.args = input.args, input.id = input.id) - machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) - machine <- db.query(paste0("SELECT * from machines where hostname = '", - machine.host, "'"), con) - - if (nrow(machine) == 0) { - PEcAn.logger::logger.error("machine not found", host$name) - return(NULL) - } - - if (missing(input.id) || is.na(input.id) || is.null(input.id)) { - input <- dbfile <- NULL - } else { - input <- db.query(paste("SELECT * from inputs where id =", input.id), con) - if (nrow(input) == 0) { - PEcAn.logger::logger.error("input not found", input.id) - return(NULL) - } - - if(!is.null(input.args$dbfile.id)){ - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where id=",input.args$dbfile.id," and container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - }else{ - dbfile <- - db.query( - paste( - "SELECT * from dbfiles where container_id =", - input.id, - " and container_type = 'Input' and machine_id =", - machine$id - ), - con - ) - } - - - - if (nrow(dbfile) == 0) { - PEcAn.logger::logger.error("dbfile not found", input.id) - return(NULL) - } - if (nrow(dbfile) > 1) { - PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) - dbfile <- dbfile[nrow(dbfile), ] - } + if (any(sapply(machine.info, is.null))) { + PEcAn.logger::logger.error("failed lookup of inputs or dbfiles") + return(NULL) } + machine <- machine.info$machine + input <- machine.info$input + dbfile <- machine.info$dbfile #--------------------------------------------------------------------------------------------------# # Perform Conversion @@ -634,143 +558,29 @@ convert_input <- #--------------------------------------------------------------------------------------------------# # Check if result has empty or missing files - result_sizes <- purrr::map_dfr( - result, - ~ dplyr::mutate( - ., - file_size = purrr::map_dbl(file, file.size), - missing = is.na(file_size), - empty = file_size == 0 - ) - ) - - if (any(result_sizes$missing) || any(result_sizes$empty)){ - log_format_df = function(df){ - rbind(colnames(df), format(df)) - purrr::reduce( paste, sep=" ") %>% - paste(collapse="\n") - } - - PEcAn.logger::logger.severe( - "Requested Processing produced empty files or Nonexistant files :\n", - log_format_df(result_sizes[,c(1,8,9,10)]), - "\n Table of results printed above.", - wrap = FALSE) - } - - # Insert into Database - outlist <- unlist(strsplit(outname, "_")) - - # Wrap in a list for consistant processing later - if (exists("existing.input") && is.data.frame(existing.input)) { - existing.input <- list(existing.input) - } - - if (exists("existing.dbfile") && is.data.frame(existing.dbfile)) { - existing.dbfile <- list(existing.dbfile) - } + checked.missing.files <- check_missing_files(result, existing.input, existing.dbfile) + + # Unwrap parameters after performing checks for missing files + existing.input <- checked.missing.files$existing.input + existing.dbfile <- checked.missing.files$existing.dbfile #---------------------------------------------------------------# # New arrangement of database adding code to deal with ensembles. - if (write) { - - # Setup newinput. This list will contain two variables: a vector of input IDs and a vector of DB IDs for each entry in result. - # This list will be returned. - newinput = list(input.id = NULL, dbfile.id = NULL) #Blank vectors are null. - for(i in 1:length(result)) { # Master for loop - id_not_added <- TRUE - - if (exists("existing.input") && nrow(existing.input[[i]]) > 0 && - (existing.input[[i]]$start_date != start_date || existing.input[[i]]$end_date != end_date)) { - - # Updating record with new dates - db.query(paste0("UPDATE inputs SET start_date='", start_date, "', end_date='", - end_date, "' WHERE id=", existing.input[[i]]$id), - con) - id_not_added = FALSE - - # The overall structure of this loop has been set up so that exactly one input.id and one dbfile.id will be written to newinput every interation. - newinput$input.id = c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id = c(newinput$dbfile.id, existing.dbfile[[i]]$id) - } - - if (overwrite) { - # A bit hacky, but need to make sure that all fields are updated to expected - # values (i.e., what they'd be if convert_input was creating a new record) - if (exists("existing.input") && nrow(existing.input[[i]]) > 0) { - db.query(paste0("UPDATE inputs SET name='", basename(dirname(result[[i]]$file[1])), - "' WHERE id=", existing.input[[i]]$id), con) - - } - - if (exists("existing.dbfile") && nrow(existing.dbfile[[i]]) > 0) { - db.query(paste0("UPDATE dbfiles SET file_path='", dirname(result[[i]]$file[1]), - "', ", "file_name='", result[[i]]$dbfile.name[1], - "' WHERE id=", existing.dbfile[[i]]$id), con) - - } - } - - # If there is no ensemble then for each record there should be one parent - #But when you have ensembles, all of the members have one parent !! - if (is.numeric(ensemble)){ - parent.id <- ifelse(is.null(input[i]), NA, input[1]$id) - }else{ - parent.id <- ifelse(is.null(input[i]), NA, input[i]$id) - } - - - - if ("newsite" %in% names(input.args) && !is.null(input.args[["newsite"]])) { - site.id <- input.args$newsite - } - - if (insert.new.file && id_not_added) { - dbfile.id <- dbfile.insert(in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - 'Input', existing.input[[i]]$id, - con, reuse=TRUE, hostname = machine$hostname) - newinput$input.id <- c(newinput$input.id, existing.input[[i]]$id) - newinput$dbfile.id <- c(newinput$dbfile.id, dbfile.id) - } else if (id_not_added) { - - # This is to tell input.insert if we are wrting ensembles - # Why does it need it ? bc it checks for inputs with the same time period, site and machine - # and if it returns somethings it does not insert anymore, but for ensembles it needs to bypass this condition - if (!is.null(ensemble) | is.null(ensemble_name)){ - ens.flag <- TRUE - }else{ - ens.flag <- FALSE - } - - new_entry <- dbfile.input.insert(in.path = dirname(result[[i]]$file[1]), - in.prefix = result[[i]]$dbfile.name[1], - siteid = site.id, - startdate = start_date, - enddate = end_date, - mimetype, - formatname, - parentid = parent.id, - con = con, - hostname = machine$hostname, - allow.conflicting.dates = allow.conflicting.dates, - ens=ens.flag - ) - - - newinput$input.id <- c(newinput$input.id, new_entry$input.id) - newinput$dbfile.id <- c(newinput$dbfile.id, new_entry$dbfile.id) - } - - } #End for loop - - successful <- TRUE - return(newinput) - } else { - PEcAn.logger::logger.warn("Input was not added to the database") + if(write) { + add_entries_result <- return (add_database_entries(result, con, start_date, + end_date, overwrite, + insert.new.file, input.args, + machine, mimetype, formatname, + allow.conflicting.dates, ensemble, + ensemble_name, existing.input, + existing.dbfile, input)) + } else { + PEcAn.logger::logger.warn("Input was not added to the database") + successful <- TRUE + return(NULL) + } successful <- TRUE - return(NULL) - } + return (add_entries_result) } # convert_input diff --git a/base/db/R/get_machine_info.R b/base/db/R/get_machine_info.R new file mode 100644 index 0000000000..2d35cfaca0 --- /dev/null +++ b/base/db/R/get_machine_info.R @@ -0,0 +1,104 @@ +#' Get machine information from db +#' @param host host information +#' @param input.args input args for existing records +#' @param input.id input id for existing records +#' @param con database connection +#' +#' @return list of machine, input, and dbfile records +#' @author Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko + +get_machine_info <- function(host, input.args, input.id = NULL, con = NULL) { + + machine.host.info <- get_machine_host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine + + if (nrow(machine) == 0) { + PEcAn.logger::logger.error("machine not found", host$name) + return(NULL) + } + + if (is.na(input.id) || is.null(input.id)) { + input <- dbfile <- NULL + } else { + input <- db.query(paste("SELECT * from inputs where id =", input.id), con) + if (nrow(input) == 0) { + PEcAn.logger::logger.error("input not found", input.id) + return(NULL) + } + + if (!is.null(input.args$dbfile.id)) { + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where id=", input.args$dbfile.id, " and container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + } else { + dbfile <- + db.query( + paste( + "SELECT * from dbfiles where container_id =", + input.id, + " and container_type = 'Input' and machine_id =", + machine$id + ), + con + ) + } + + + + if (nrow(dbfile) == 0) { + PEcAn.logger::logger.error("dbfile not found", input.id) + return(NULL) + } + if (nrow(dbfile) > 1) { + PEcAn.logger::logger.warn("multiple dbfile records, using last", dbfile) + dbfile <- dbfile[nrow(dbfile), ] + } + } + + return(list(machine = machine, input = input, dbfile = dbfile)) +} + +#' Helper Function to retrieve machine host and machine informations +#' @param host host information +#' @param con database connection +#' @return list of machine host and machine information +#' @author Abhinav Pandey +get_machine_host <- function(host, con) { + #Grab machine info of host machine + machine.host <- ifelse(host$name == "localhost", PEcAn.remote::fqdn(), host$name) + machine <- db.query(paste0( + "SELECT * from machines where hostname = '", + machine.host, "'" + ), con) + + return(list(machine.host, machine)) +} + +check_and_handle_existing_files <- function(existing.dbfile, host, con, existing.input, start_date, end_date) { + # Grab machine info of file that exists + existing.machine <- db.query(paste0("SELECT * from machines where id = '", + existing.dbfile$machine_id, "'"), con) + + # Grab machine info of host machine + machine.host.info <- get_machine_host(host, con = con) + machine.host <- machine.host.info$machine.host + machine <- machine.host.info$machine + + if (existing.machine$id != machine$id) { + PEcAn.logger::logger.info("Valid Input record found that spans desired dates, but valid files do not exist on this machine.") + PEcAn.logger::logger.info("Downloading all years of Valid input to ensure consistency") + return(list(insert.new.file = TRUE, start_date = existing.input$start_date, end_date = existing.input$end_date)) + } else { + # There's an existing input that spans desired start/end dates with files on this machine + PEcAn.logger::logger.info("Skipping this input conversion because files are already available.") + return(list(input.id = existing.input$id, dbfile.id = existing.dbfile$id)) + } +} \ No newline at end of file diff --git a/base/db/man/add_database_entries.Rd b/base/db/man/add_database_entries.Rd new file mode 100644 index 0000000000..ad1f8acbc0 --- /dev/null +++ b/base/db/man/add_database_entries.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_database_entries.R +\name{add_database_entries} +\alias{add_database_entries} +\title{Return new arrangement of database while adding code to deal with ensembles} +\usage{ +add_database_entries( + result, + con, + start_date, + end_date, + overwrite, + insert.new.file, + input.args, + machine, + mimetype, + formatname, + allow.conflicting.dates, + ensemble, + ensemble_name, + existing.input, + existing.dbfile, + input +) +} +\arguments{ +\item{result}{list of results from the download function} + +\item{con}{database connection} + +\item{start_date}{start date of the data} + +\item{end_date}{end date of the data} + +\item{overwrite}{Logical: If a file already exists, create a fresh copy?} + +\item{insert.new.file}{whether to insert a new file} + +\item{input.args}{input arguments obtained from the convert_input function} + +\item{machine}{machine information} + +\item{mimetype}{data product specific file format} + +\item{formatname}{format name of the data} + +\item{allow.conflicting.dates}{whether to allow conflicting dates} + +\item{ensemble}{ensemble id} + +\item{ensemble_name}{ensemble name} + +\item{existing.input}{existing input records} + +\item{existing.dbfile}{existing dbfile records} + +\item{input}{input records} + +\item{write}{whether to write to the database} +} +\value{ +list of input and dbfile ids +} +\description{ +Return new arrangement of database while adding code to deal with ensembles +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} diff --git a/base/db/man/check_missing_files.Rd b/base/db/man/check_missing_files.Rd new file mode 100644 index 0000000000..6f5f8c23ce --- /dev/null +++ b/base/db/man/check_missing_files.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/check.missing.files.R +\name{check_missing_files} +\alias{check_missing_files} +\title{Function to check if result has empty or missing files} +\usage{ +check_missing_files(result, existing.input = NULL, existing.dbfile = NULL) +} +\arguments{ +\item{result}{A list of dataframes with file paths} + +\item{existing.input}{Existing input records} + +\item{existing.dbfile}{Existing dbfile records} +} +\value{ +A list of dataframes with file paths, a list of strings with the output file name, a list of existing input records, and a list of existing dbfile records +} +\description{ +Function to check if result has empty or missing files +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} diff --git a/base/db/man/get_machine_host.Rd b/base/db/man/get_machine_host.Rd new file mode 100644 index 0000000000..4dbc2258ab --- /dev/null +++ b/base/db/man/get_machine_host.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_machine_info.R +\name{get_machine_host} +\alias{get_machine_host} +\title{Helper Function to retrieve machine host and machine informations} +\usage{ +get_machine_host(host, con) +} +\arguments{ +\item{host}{host information} + +\item{con}{database connection} +} +\value{ +list of machine host and machine information +} +\description{ +Helper Function to retrieve machine host and machine informations +} +\author{ +Abhinav Pandey +} diff --git a/base/db/man/get_machine_info.Rd b/base/db/man/get_machine_info.Rd new file mode 100644 index 0000000000..68221a9c56 --- /dev/null +++ b/base/db/man/get_machine_info.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_machine_info.R +\name{get_machine_info} +\alias{get_machine_info} +\title{Get machine information from db} +\usage{ +get_machine_info(host, input.args, input.id = NULL, con = NULL) +} +\arguments{ +\item{host}{host information} + +\item{input.args}{input args for existing records} + +\item{input.id}{input id for existing records} + +\item{con}{database connection} +} +\value{ +list of machine, input, and dbfile records +} +\description{ +Get machine information from db +} +\author{ +Betsy Cowdery, Michael Dietze, Ankur Desai, Tony Gardella, Luke Dramko +} diff --git a/base/db/tests/testthat/test.check.missing.files.R b/base/db/tests/testthat/test.check.missing.files.R new file mode 100644 index 0000000000..75a531283d --- /dev/null +++ b/base/db/tests/testthat/test.check.missing.files.R @@ -0,0 +1,16 @@ +test_that("`check_missing_files()` able to return correct missing files", { + # Mock `purrr::map_dfr` + mocked_size <- mockery::mock(100,200) + mockery::stub(check_missing_files, "file.size", mocked_res) + + res <- check_missing_files( + result = list(data.frame(file = c("A", "B"))), + existing.input = data.frame(), + existing.dbfile = data.frame() + ) + + + expect_equal(length(res), 2) + expect_true(is.list(res[[1]])) + expect_true(is.list(res[[2]])) +}) diff --git a/base/db/tests/testthat/test.convert_input.R b/base/db/tests/testthat/test.convert_input.R index c2e7f49c1e..474ed4eaae 100644 --- a/base/db/tests/testthat/test.convert_input.R +++ b/base/db/tests/testthat/test.convert_input.R @@ -1,10 +1,26 @@ test_that("`convert_input()` able to call the respective download function for a data item with the correct arguments", { mocked_res <- mockery::mock(list(c("A", "B"))) - mockery::stub(convert_input, 'dbfile.input.check', data.frame()) - mockery::stub(convert_input, 'db.query', data.frame(id = 1)) - mockery::stub(convert_input, 'PEcAn.remote::remote.execute.R', mocked_res) - mockery::stub(convert_input, 'purrr::map_dfr', data.frame(missing = c(FALSE), empty = c(FALSE))) + mockery::stub(convert_input, "dbfile.input.check", data.frame()) + mockery::stub(convert_input, "db.query", data.frame(id = 1)) + mockery::stub(convert_input, "get_machine_info", list( + machine = data.frame(id = 1), + input = data.frame(id = 1), + dbfile = data.frame(id = 1) + )) + mockery::stub(convert_input, "PEcAn.remote::remote.execute.R", mocked_res) + mockery::stub(convert_input, "check_missing_files", list( + result_sizes = data.frame( + file = c("A", "B"), + file_size = c(100, 200), + missing = c(FALSE, FALSE), + empty = c(FALSE, FALSE) + ), + outlist = "test", + existing.input = list(data.frame(file = character(0))), + existing.dbfile = list(data.frame(file = character(0))) + )) + mockery::stub(convert_input, "add_database_entries", list(input.id = 1, dbfile.id = 1)) convert_input( input.id = NA, @@ -14,18 +30,18 @@ test_that("`convert_input()` able to call the respective download function for a site.id = 1, start_date = "2011-01-01", end_date = "2011-12-31", - pkg = 'PEcAn.data.atmosphere', - fcn = 'download.AmerifluxLBL', + pkg = "PEcAn.data.atmosphere", + fcn = "download.AmerifluxLBL", con = NULL, host = data.frame(name = "localhost"), write = FALSE, lat.in = 40, lon.in = -88 ) - + args <- mockery::mock_args(mocked_res) expect_equal( - args[[1]]$script, + args[[1]]$script, "PEcAn.data.atmosphere::download.AmerifluxLBL(lat.in=40, lon.in=-88, overwrite=FALSE, outfolder='test/', start_date='2011-01-01', end_date='2011-12-31')" ) }) @@ -35,4 +51,4 @@ test_that("`.get.file.deletion.commands()` able to return correct file deletion expect_equal(res$move.to.tmp, "dir.create(c('./tmp'), recursive=TRUE, showWarnings=FALSE); file.rename(from=c('test'), to=c('./tmp/test'))") expect_equal(res$delete.tmp, "unlink(c('./tmp'), recursive=TRUE)") expect_equal(res$replace.from.tmp, "file.rename(from=c('./tmp/test'), to=c('test'));unlink(c('./tmp'), recursive=TRUE)") -}) \ No newline at end of file +}) diff --git a/modules/meta.analysis/R/run.meta.analysis.R b/modules/meta.analysis/R/run.meta.analysis.R index 360d3453a0..b1580e0779 100644 --- a/modules/meta.analysis/R/run.meta.analysis.R +++ b/modules/meta.analysis/R/run.meta.analysis.R @@ -208,22 +208,26 @@ runModule.run.meta.analysis <- function(settings) { PEcAn.logger::logger.info(paste0("Running meta-analysis on all PFTs listed by any Settings object in the list: ", paste(pft.names, collapse = ", "))) - iterations <- settings$meta.analysis$iter - random <- settings$meta.analysis$random.effects$on - use_ghs <- settings$meta.analysis$random.effects$use_ghs - threshold <- settings$meta.analysis$threshold - dbfiles <- settings$database$dbfiles - database <- settings$database$bety - run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs) + run.meta.analysis( + pfts, + settings$meta.analysis$iter, + settings$meta.analysis$random.effects$on, + settings$meta.analysis$threshold, + settings$database$dbfiles, + settings$database$bety, + settings$meta.analysis$random.effects$use_ghs + ) } else if (PEcAn.settings::is.Settings(settings)) { - pfts <- settings$pfts - iterations <- settings$meta.analysis$iter - random <- settings$meta.analysis$random.effects$on - use_ghs <- settings$meta.analysis$random.effects$use_ghs - threshold <- settings$meta.analysis$threshold - dbfiles <- settings$database$dbfiles - database <- settings$database$bety - run.meta.analysis(pfts, iterations, random, threshold, dbfiles, database, use_ghs, update = settings$meta.analysis$update) + run.meta.analysis( + settings$pfts, + settings$meta.analysis$iter, + settings$meta.analysis$random.effects$on, + settings$meta.analysis$threshold, + settings$database$dbfiles, + settings$database$bety, + settings$meta.analysis$random.effects$use_ghs, + update = settings$meta.analysis$update + ) } else { stop("runModule.run.meta.analysis only works with Settings or MultiSettings") }