Refactor od_disaggregate III (#38)

* od_nrows() * Refactor with od$nrows column * Refactor, remove subzones arg * Tidy up, remove lines! * Add od_disag args to od_jitter for #39 (#40) * Add od_disag args to od_jitter for #39 * Use od_dissaggrate backend for #39 * Update news
itsleeds · Dec 5, 2021 · b0087b9 · b0087b9
1 parent b0005a3
commit b0087b9
Show file tree

Hide file tree

Showing 6 changed files with 159 additions and 116 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # od 0.3.2 (2021-12)
 
-* `od_disaggregate()` can now take route networks as inputs (experimental)
+* `od_disaggregate()` can now take route networks as inputs
+* `od_disaggregate()` is now used as a 'back-end' for `od_jitter()` where possible by default
 
 # od 0.3.1 (2021-07)
 

diff --git a/R/aggregate.R b/R/aggregate.R
@@ -10,8 +10,8 @@
 #'
 #' @inheritParams od_to_sf
 #' @param od An origin-destination data frame
-#' @param subpoints Points or linestrings within the zones defining the OD data start/end points
-#' @param subzones Sub-zones within the zones defining the OD data
+#' @param subpoints Points, lines or polygons within the zones.
+#'   These define the OD data start/end points.
 #' @param code_append The name of the column containing aggregate zone names
 #' @param population_column The column containing the total population (if it exists)
 #' @param max_per_od Maximum flow in the population_column to assign per OD pair.
@@ -45,13 +45,16 @@
 #' plot(disag[0])
 #'
 #' # with subpoints
-#' subpoints = sf::st_sample(zones, 1000)
+#' subpoints = sf::st_sample(zones, 100)
 #' od_disag_subpoints = od_disaggregate(od, zones, subpoints = subpoints)
+#' plot(subpoints)
+#' plot(od_disag_subpoints$geometry, add = TRUE)
 #'
 #' # with buildings data
 #' od_disag_buildings = od_disaggregate(od, zones, od_data_buildings)
 #' summary(od_disag_buildings)
-#' plot(od_disag_buildings)
+#' plot(od_data_buildings$geometry)
+#' plot(od_disag_buildings[3], add = TRUE)
 #' # mapview::mapview(od_disag_buildings)
 #'
 #' od = od_data_df[1:2, 1:4]
@@ -69,92 +72,43 @@
 #' plot(zones$geometry)
 #' plot(od_road_network$geometry, add = TRUE, col = "green")
 #' plot(od_disag_net$geometry, add = TRUE)
-#' mapview::mapview(zones) + od_disag_net
+#' mapview::mapview(zones) + od_disag_net + od_road_network
 #' }
 od_disaggregate = function(od,
                            z,
-                           subzones = NULL,
                            subpoints = NULL,
                            code_append = "_ag",
                            population_column = 3,
                            max_per_od = 5,
                            keep_ids = TRUE,
                            integer_outputs = FALSE
                            ) {
-
-  nrow_od_disag = ceiling(od[[population_column]] / max_per_od)
-  nr = sum(nrow_od_disag)
+  od$nrows = od_nrows(od, population_column, max_per_od)
   azn = paste0(names(z)[1], code_append)
-
   # is the input od data an sf object? tell the user and convert to df if so
   if(methods::is(object = od, class2 = "sf")) {
     message("Input object is sf, attempting to convert to a data frame")
     od = sf::st_drop_geometry(od)
   }
-
-  if (is.null(subpoints) && is.null(subzones)) {
-    message("Creating randomly sampled origin and destination points.")
-    # from jitter.R
-    # todo: consider splitting this out into new function
-    od_disag_indices = rep(x = seq(nrow(od)), nrow_od_disag)
-    od_disag_ids = od[od_disag_indices, c(1:2)]
-    id_zones = c(od_disag_ids[[1]], od_disag_ids[[2]])
-    points_per_zone = data.frame(table(id_zones))
-    z = z[z[[1]] %in% points_per_zone[[1]], ]
-    freq = points_per_zone$Freq
-    subpoints_df = data.frame(id = as.character(seq(nr * 2)), azn = sort(id_zones))
-    names(subpoints_df)[2] = azn
-    subpoints = sf::st_sf(subpoints_df, geometry = sf::st_sample(z, freq))
-  }
-
-  if (is.null(subpoints) && !is.null(subzones)) {
-    message("Converting subzones/lines to points")
-    geomtype = as.character(unique(sf::st_geometry_type(subzones)))
-    if (methods::is(subzones, "sf") && geomtype == "LINESTRING") {
-      subpoints = od_sample_vertices(subzones)
-      subzones = NULL
-    } else {
-      suppressWarnings({
-        subpoints = sf::st_centroid(subzones)
-      })
+  if (is.null(subpoints)) {
+    message("Creating origin and destination points at random locations")
+    subpoints = od_subpoints_sample(od, nrows = od$nrows, z = z, azn = azn)
+  } else {
+    # if subpoints have not been created they need to be sampled
+    geomtype = as.character(unique(sf::st_geometry_type(subpoints)))
+    if (geomtype == "LINESTRING") {
+      message("Converting lines on network to points")
+      subpoints = od_sample_vertices(subpoints)
     }
-  }
-
-  # if subpoints is an sf object with class linestring
-  geomtype = as.character(unique(sf::st_geometry_type(subpoints)))
-  if (methods::is(subpoints, "sf") && geomtype == "POINT") {
-    subpoints = od_sample_vertices(subpoints)
-  }
-
-  if (methods::is(subpoints, "sfc")) {
-    if(length(subpoints) > nr * 2) {
-      # from jitter.R
-      # todo: consider splitting this out into new function
-      od_disag_indices = rep(x = seq(nrow(od)), nrow_od_disag)
-      od_disag_ids = od[od_disag_indices, c(1:2)]
-      id_zones = c(od_disag_ids[[1]], od_disag_ids[[2]])
-      points_per_zone = data.frame(table(id_zones))
-      z = z[z[[1]] %in% points_per_zone[[1]], ]
-      freq = points_per_zone$Freq
-      subpoints_df = data.frame(id = as.character(seq(nr * 2)), azn = sort(id_zones))
-      subpoints_joined = sf::st_join(sf::st_sf(subpoints), z[1])
-      sel_list = lapply(1:nrow(points_per_zone), function(i) {
-        sample(
-          which(subpoints_joined[[1]] == points_per_zone[[1]][i]),
-          size = points_per_zone[[2]][i]
-          )
-      })
-      sel = unlist(sel_list)
-      names(subpoints_df)[2] = azn
-      subpoints = sf::st_sf(subpoints_df, geometry = subpoints[sel])
-    } else {
-      suppressMessages({
-        subpoints = sf::st_sf(
-          id = as.character(seq(nr * 2)),
-          geometry = subpoints
-        )
-      })
+    if (geomtype == "POLYGON" | geomtype == "MULTIPOLYGON") {
+      message("Converting polygons to points")
+      subpoints = sf::st_centroid(subpoints)
     }
+
+    points_per_zone = od_zonepoints(od, od$nrows)
+    subdf = od_subpoints_sample(od, od$nrows, z, azn, output_sf = FALSE)
+    subgeo = sf::st_geometry(subpoints)
+    subpoints = od_sample_points(subgeo, subdf, z, points_per_zone, azn = azn)
   }
 
   # detect and deal with non numeric inputs
@@ -166,26 +120,10 @@ od_disaggregate = function(od,
     od = od[c(names(od)[1:2], numeric_col_names)]
   }
 
-  o_in_zones = od[[1]] %in% z[[1]]
-  d_in_zones = od[[2]] %in% z[[1]]
-  if (!all(o_in_zones) || !all(d_in_zones)) {
-    stop("No matching zones associated with ",
-         which(!o_in_zones | !d_in_zones))
-  }
-
-  # If subpoints are not generated by the function:
-  if (!azn %in% names(subpoints)) {
-    suppressMessages({
-      subpoints = subpoints[z,]
-    })
-    if (nrow(subpoints) < nrow(subpoints)) {
-      message(nrow(subpoints) - nrow(subpoints),
-              " locations outside zones removed")
-    }
-    names(z)[1] = azn
-    suppressMessages({
-      subpoints = sf::st_join(subpoints, z[1])
-    })
+  o_in_z = od[[1]] %in% z[[1]]
+  d_in_z = od[[2]] %in% z[[1]]
+  if (!all(o_in_z) || !all(d_in_z)) {
+    stop("No matching zones associated with ", which(!o_in_z | !d_in_z))
   }
 
   # i = 1 # uncomment for debugging
@@ -226,6 +164,7 @@ od_disaggregate = function(od,
 
   # todo: could be sped-up
   od_new_sf = do.call(rbind, list_new)
+  od_new_sf$nrows = NULL
 
   # output od data with same number of columns but more rows
   od_new_sf
@@ -315,3 +254,50 @@ od_sample_vertices = function(x, fraction = 1) {
   }
   x_point
 }
+
+od_nrows = function(od, population_column, max_per_od) {
+  ceiling(od[[population_column]] / max_per_od)
+}
+
+od_zoneids = function(od) {
+  c(od[[1]], od[[2]])
+}
+
+od_disag_ids = function(od, nrows) {
+  od_disag_indices = rep(x = seq(nrow(od)), nrows)
+  od[od_disag_indices, c(1:2)] # was od_disag_ids, od with only ids
+}
+
+od_zonepoints = function(od, nrows) {
+  disag_ids = od_disag_ids(od, nrows)
+  id_zones = od_zoneids(disag_ids)
+  data.frame(table(id_zones))
+}
+
+od_subpoints_sample = function(od, nrows, z, azn, output_sf = TRUE) {
+  disag_ids = od_disag_ids(od, od$nrows)
+  id_zones = od_zoneids(disag_ids)
+  points_per_zone = od_zonepoints(od, od$nrows)
+  z = z[z[[1]] %in% points_per_zone[[1]], ]
+  freq = points_per_zone$Freq
+  subdf = data.frame(id = as.character(seq(sum(od$nrows) * 2)), azn = sort(id_zones))
+  names(subdf)[2] = azn
+  if (output_sf) {
+    subdf = sf::st_sf(subdf, geometry = sf::st_sample(z, freq))
+  }
+  subdf
+}
+
+od_sample_points = function(subpoints, subdf, z, per_zone, azn = "azn") {
+  subpoints_joined = sf::st_join(sf::st_sf(subpoints), z[1])
+  sel_list = lapply(1:nrow(per_zone), function(i) {
+    which_points = which(subpoints_joined[[1]] == per_zone[[1]][i])
+    if(length(which_points) == 0) {
+      return(NULL)
+    }
+    sample(which_points, size = per_zone[[2]][i])
+  })
+  sel = unlist(sel_list)
+  names(subdf)[2] = azn
+  sf::st_sf(subdf, geometry = subpoints[sel])
+}
diff --git a/R/jitter.R b/R/jitter.R
@@ -8,6 +8,8 @@
 #' @param zd Zones with ids matching the destination codes in input OD data
 #' @param subpoints_o Points within origin zones representing possible destinations
 #' @param subpoints_d Points within destination zones representing possible destinations
+#' @param disag Should the od_disaggregate function be used as a 'back end' where possible?
+#'   TRUE by default. See https://github.com/ITSLeeds/od/issues/39.
 #' @inheritParams od_disaggregate
 #'
 #' @return An `sf` data frame
@@ -21,8 +23,10 @@
 #' dlr = od_jitter(od, z) # desire_lines_random
 #' desire_lines = od_to_sf(od, z)
 #' plot(z$geometry)
-#' plot(dlr, add = TRUE, lwd = 3)
-#' plot(desire_lines, add = TRUE, lwd = 5)
+#' plot(dlr["all"], add = TRUE, lwd = 3)
+#' dlr$all
+#' desire_lines$all
+#' plot(desire_lines["all"], add = TRUE, lwd = 5)
 #'
 #' # Example showing use of subpoints
 #' subpoints_o = sf::st_sample(z, 200)
@@ -62,20 +66,36 @@
 #' # plot(od_sf[od$all > 200, 1])
 #' # plot(dlr3[od$all > 200, 1])
 #' # mapview::mapview(od_sf$geometry[od$all > 200])
-od_jitter = function(od,
-                     z,
-                     zd = NULL,
-                     subpoints_o = NULL,
-                     subpoints_d = NULL) {
+od_jitter = function(
+  od,
+  z,
+  zd = NULL,
+  subpoints = NULL,
+  code_append = "_ag",
+  population_column = 3,
+  max_per_od = 100000,
+  keep_ids = TRUE,
+  integer_outputs = FALSE,
+  # od_jitter-specific arguments (and zd)
+  subpoints_o = NULL,
+  subpoints_d = NULL,
+  disag = TRUE
+  ) {
+
   if (!methods::is(od, "sf")) {
     # the data structure to reproduce for matching OD pairs
     od = od::od_to_sf(od, z = z, zd = zd)
   }
+  disag = all(is.null(zd), is.null(subpoints_o), is.null(subpoints_d), disag)
+  if(disag) {
+    message("Using od_disaggregate") # todo remove once tested
+    return(od_disaggregate(od, z, subpoints, code_append, population_column,
+                           max_per_od, keep_ids, integer_outputs))
+  }
   odc_new = odc_original = od::od_coordinates(od)
   od = sf::st_drop_geometry(od)
   odc_df = data.frame(o = od[[1]], d = od[[2]], odc_original)
   z_geo = sf::st_geometry(z)
-  # browser()
   id_origins = od[[1]]
   points_per_zone = data.frame(table(id_origins))
   names(points_per_zone)[1] = names(z)[1]

diff --git a/man/od_disaggregate.Rd b/man/od_disaggregate.Rd