Skip to content

Commit

Permalink
Merge pull request #110 from RobLBaker/master
Browse files Browse the repository at this point in the history
minor updates and fixes
  • Loading branch information
RobLBaker authored Jun 27, 2024
2 parents 33ad267 + 5f7c48f commit d79c2a7
Show file tree
Hide file tree
Showing 65 changed files with 3,988 additions and 13,528 deletions.
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export(get_taxon_rank)
export(get_utm_zone)
export(long2UTM)
export(order_cols)
export(removeEmptyTables)
export(remove_empty_tables)
export(replace_blanks)
export(te_check)
export(utm_to_ll)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# QCkit v0.1.8 (not yet released)

2024-06-27
* bug fixes for `generate_ll_from_utm()`
* add function `remove_empty_tables()` (and associated unit tests)
* update documentation for `replace blanks()` to indicate it can replace blanks with more than just NA

# QCkit v0.1.7
2024-05-08
* Updated the `replace_blanks()` function to accept any missing value code a user inputs (but it still defaults to NA).
Expand Down
32 changes: 23 additions & 9 deletions R/geography.R
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,11 @@ generate_ll_from_utm <- function(df,

# Separate df with just coordinates. We'll filter out any NA rows.
coord_df <- df %>%
dplyr::select(`_UTMJOINCOL`, {{EastingCol}}, {{NorthingCol}}, {{ZoneCol}}, {{DatumCol}})
dplyr::select(`_UTMJOINCOL`,
{{EastingCol}},
{{NorthingCol}},
{{ZoneCol}},
{{DatumCol}})

withr::with_envvar(c("PROJ_LIB" = ""), { # This is a fix for the proj library bug in R (see pinned post "sf::st_read() of geojson not getting CRS" in IMData General Discussion).
coord_df <- coord_df %>%
Expand All @@ -437,14 +441,19 @@ generate_ll_from_utm <- function(df,

na_row_count <- nrow(df) - nrow(coord_df)
if (na_row_count > 0) {
warning(paste(na_row_count, "rows are missing UTM coordinates, zone, and/or datum information."), call. = FALSE)
warning(paste(
na_row_count,
"rows are missing UTM coordinates, zone, and/or datum information."),
call. = FALSE)
}

## Set up CRS for lat/long data
latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}")) # CRS for our new lat/long values
latlong_CRS <- sp::CRS(glue::glue("+proj=longlat +datum={latlong_datum}"))
# CRS for our new lat/long values

# Loop through each datum and zone in the data
zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>% # Get vector of zones present in data
zones_datums <- dplyr::select(coord_df, {{ZoneCol}}, {{DatumCol}}) %>%
# Get vector of zones present in data
unique()

new_coords <- sapply(1:nrow(zones_datums), function(zone_datum_index) {
Expand All @@ -454,7 +463,8 @@ generate_ll_from_utm <- function(df,
zone_num <- current_zone
north_south <- ""
} else {
zone_num <- stringr::str_extract(current_zone, "\\d+") # sp::CRS wants zone number only, e.g. 11, not 11N
zone_num <- stringr::str_extract(current_zone, "\\d+")
# sp::CRS wants zone number only, e.g. 11, not 11N
zone_letter <- tolower(stringr::str_extract(current_zone, "[A-Za-z]"))
if (!is.na(zone_letter) && zone_letter == "s") {
north_south <- " +south"
Expand All @@ -469,17 +479,21 @@ generate_ll_from_utm <- function(df,
filtered_df <- coord_df %>%
dplyr::filter((!!rlang::ensym(ZoneCol) == current_zone & !!rlang::ensym(DatumCol) == current_datum))
sp_utm <- sp::SpatialPoints(filtered_df %>%
dplyr::select({{EastingCol}}, {{NorthingCol}}) %>%
dplyr::select({{EastingCol}},
{{NorthingCol}}) %>%
as.matrix(),
proj4string = utm_CRS) # Convert UTM columns into a SpatialPoints object
sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>% # Transform UTM to Lat/Long
proj4string = utm_CRS)
# Convert UTM columns into a SpatialPoints object
sp_geo <- sp::spTransform(sp_utm, latlong_CRS) %>%
# Transform UTM to Lat/Long
tibble::as_tibble()

# Add lat/long columns back into the original dataframe
latlong <- tibble::tibble(`_UTMJOINCOL` = filtered_df$`_UTMJOINCOL`,
decimalLatitude = sp_geo[[2]],
decimalLongitude = sp_geo[[1]],
LatLong_CRS = latlong_CRS@projargs) # Store the coordinate reference system PROJ string in the dataframe
LatLong_CRS = latlong_CRS@projargs)
# Store the coordinate reference system PROJ string in the dataframe


return(latlong)
Expand Down
4 changes: 2 additions & 2 deletions R/remove_empty_tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
#' item_b = mtcars,
#' item_c = iris)
#'
#' tidy_list <- removeEmptyTables(test_list)
#' tidy_list <- remove_empty_tables(test_list)
#'
removeEmptyTables <- function(df_list) {
remove_empty_tables <- function(df_list) {
non_empty_list <- purrr::compact(df_list) # Remove empty dataframes
tables_removed <- setdiff(names(df_list), names(non_empty_list)) # Get names of removed dataframes

Expand Down
27 changes: 21 additions & 6 deletions R/replace_blanks.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#' Replaces all blank cells with NA
#' Replaces all blank cells a missing value code of your choice
#'
#' @details `replace_blanks()` is particularly useful for exporting data from a
#' database (such as access) and converting it to a data package with metadata.
#' @description `replace_blanks()` is particularly useful for exporting data
#' from a database (such as access) and converting it to a data package with
#' metadata.
#'
#' `replace_blanks()` will import all .csv files in the specified working
#' directory. The files are then written back out to the same directory,
Expand All @@ -10,9 +11,23 @@
#' If no missing value is specified, the function defaults to replacing all
#' blanks with "NA".
#'
#' One exception is if a .csv contains NO data (i.e. just column names and no
#' data in any of the cells). In this case, the blanks will not be replaced with
#' NA (as the function cannot determine how many NAs to include).
#' Please keep in mind the "missing" is a general term for all data
#' not present in the data file or data package. Although you may have a very
#' good reason for not providing data and that data may not, from the data
#' package creator's perspective, be "missing" (maybe you never intended to
#' collect it) from a data package user's perspective any data that is not in
#' the data package is effectively "missing" from the data package. Therefore,
#' it is critical to document in metadata any data that are absent with an
#' appropriate "missingValueCode" and "missingValueDefinition". These terms are
#' defined by the metadata schema and are broadly used to apply to any data not
#' present.
#'
#' This function will replace all empty cells and all cells with NA with a
#' "missingValueCode" of your choice (although it defaults to NA).
#'
#' @details One exception is if a .csv contains NO data (i.e. just column names
#' and no data in any of the cells). In this case, the blanks will not be
#' replaced with NA (as the function cannot determine how many NAs to include).
#'
#' @param directory String. Path to the file(s) to have blanks replaced with
#' NAs. Defaults to the working directory of the project (here::here())
Expand Down
125 changes: 75 additions & 50 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d79c2a7

Please sign in to comment.