Skip to content

Commit

Permalink
Refactor od_disaggregate III (#38)
Browse files Browse the repository at this point in the history
* od_nrows()

* Refactor with od$nrows column

* Refactor, remove subzones arg

* Tidy up, remove lines!

* Add od_disag args to od_jitter for #39 (#40)

* Add od_disag args to od_jitter for #39

* Use od_dissaggrate backend for #39

* Update news
  • Loading branch information
Robinlovelace authored Dec 5, 2021
1 parent b0005a3 commit b0087b9
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 116 deletions.
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# od 0.3.2 (2021-12)

* `od_disaggregate()` can now take route networks as inputs (experimental)
* `od_disaggregate()` can now take route networks as inputs
* `od_disaggregate()` is now used as a 'back-end' for `od_jitter()` where possible by default

# od 0.3.1 (2021-07)

Expand Down
170 changes: 78 additions & 92 deletions R/aggregate.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#'
#' @inheritParams od_to_sf
#' @param od An origin-destination data frame
#' @param subpoints Points or linestrings within the zones defining the OD data start/end points
#' @param subzones Sub-zones within the zones defining the OD data
#' @param subpoints Points, lines or polygons within the zones.
#' These define the OD data start/end points.
#' @param code_append The name of the column containing aggregate zone names
#' @param population_column The column containing the total population (if it exists)
#' @param max_per_od Maximum flow in the population_column to assign per OD pair.
Expand Down Expand Up @@ -45,13 +45,16 @@
#' plot(disag[0])
#'
#' # with subpoints
#' subpoints = sf::st_sample(zones, 1000)
#' subpoints = sf::st_sample(zones, 100)
#' od_disag_subpoints = od_disaggregate(od, zones, subpoints = subpoints)
#' plot(subpoints)
#' plot(od_disag_subpoints$geometry, add = TRUE)
#'
#' # with buildings data
#' od_disag_buildings = od_disaggregate(od, zones, od_data_buildings)
#' summary(od_disag_buildings)
#' plot(od_disag_buildings)
#' plot(od_data_buildings$geometry)
#' plot(od_disag_buildings[3], add = TRUE)
#' # mapview::mapview(od_disag_buildings)
#'
#' od = od_data_df[1:2, 1:4]
Expand All @@ -69,92 +72,43 @@
#' plot(zones$geometry)
#' plot(od_road_network$geometry, add = TRUE, col = "green")
#' plot(od_disag_net$geometry, add = TRUE)
#' mapview::mapview(zones) + od_disag_net
#' mapview::mapview(zones) + od_disag_net + od_road_network
#' }
od_disaggregate = function(od,
z,
subzones = NULL,
subpoints = NULL,
code_append = "_ag",
population_column = 3,
max_per_od = 5,
keep_ids = TRUE,
integer_outputs = FALSE
) {

nrow_od_disag = ceiling(od[[population_column]] / max_per_od)
nr = sum(nrow_od_disag)
od$nrows = od_nrows(od, population_column, max_per_od)
azn = paste0(names(z)[1], code_append)

# is the input od data an sf object? tell the user and convert to df if so
if(methods::is(object = od, class2 = "sf")) {
message("Input object is sf, attempting to convert to a data frame")
od = sf::st_drop_geometry(od)
}

if (is.null(subpoints) && is.null(subzones)) {
message("Creating randomly sampled origin and destination points.")
# from jitter.R
# todo: consider splitting this out into new function
od_disag_indices = rep(x = seq(nrow(od)), nrow_od_disag)
od_disag_ids = od[od_disag_indices, c(1:2)]
id_zones = c(od_disag_ids[[1]], od_disag_ids[[2]])
points_per_zone = data.frame(table(id_zones))
z = z[z[[1]] %in% points_per_zone[[1]], ]
freq = points_per_zone$Freq
subpoints_df = data.frame(id = as.character(seq(nr * 2)), azn = sort(id_zones))
names(subpoints_df)[2] = azn
subpoints = sf::st_sf(subpoints_df, geometry = sf::st_sample(z, freq))
}

if (is.null(subpoints) && !is.null(subzones)) {
message("Converting subzones/lines to points")
geomtype = as.character(unique(sf::st_geometry_type(subzones)))
if (methods::is(subzones, "sf") && geomtype == "LINESTRING") {
subpoints = od_sample_vertices(subzones)
subzones = NULL
} else {
suppressWarnings({
subpoints = sf::st_centroid(subzones)
})
if (is.null(subpoints)) {
message("Creating origin and destination points at random locations")
subpoints = od_subpoints_sample(od, nrows = od$nrows, z = z, azn = azn)
} else {
# if subpoints have not been created they need to be sampled
geomtype = as.character(unique(sf::st_geometry_type(subpoints)))
if (geomtype == "LINESTRING") {
message("Converting lines on network to points")
subpoints = od_sample_vertices(subpoints)
}
}

# if subpoints is an sf object with class linestring
geomtype = as.character(unique(sf::st_geometry_type(subpoints)))
if (methods::is(subpoints, "sf") && geomtype == "POINT") {
subpoints = od_sample_vertices(subpoints)
}

if (methods::is(subpoints, "sfc")) {
if(length(subpoints) > nr * 2) {
# from jitter.R
# todo: consider splitting this out into new function
od_disag_indices = rep(x = seq(nrow(od)), nrow_od_disag)
od_disag_ids = od[od_disag_indices, c(1:2)]
id_zones = c(od_disag_ids[[1]], od_disag_ids[[2]])
points_per_zone = data.frame(table(id_zones))
z = z[z[[1]] %in% points_per_zone[[1]], ]
freq = points_per_zone$Freq
subpoints_df = data.frame(id = as.character(seq(nr * 2)), azn = sort(id_zones))
subpoints_joined = sf::st_join(sf::st_sf(subpoints), z[1])
sel_list = lapply(1:nrow(points_per_zone), function(i) {
sample(
which(subpoints_joined[[1]] == points_per_zone[[1]][i]),
size = points_per_zone[[2]][i]
)
})
sel = unlist(sel_list)
names(subpoints_df)[2] = azn
subpoints = sf::st_sf(subpoints_df, geometry = subpoints[sel])
} else {
suppressMessages({
subpoints = sf::st_sf(
id = as.character(seq(nr * 2)),
geometry = subpoints
)
})
if (geomtype == "POLYGON" | geomtype == "MULTIPOLYGON") {
message("Converting polygons to points")
subpoints = sf::st_centroid(subpoints)
}

points_per_zone = od_zonepoints(od, od$nrows)
subdf = od_subpoints_sample(od, od$nrows, z, azn, output_sf = FALSE)
subgeo = sf::st_geometry(subpoints)
subpoints = od_sample_points(subgeo, subdf, z, points_per_zone, azn = azn)
}

# detect and deal with non numeric inputs
Expand All @@ -166,26 +120,10 @@ od_disaggregate = function(od,
od = od[c(names(od)[1:2], numeric_col_names)]
}

o_in_zones = od[[1]] %in% z[[1]]
d_in_zones = od[[2]] %in% z[[1]]
if (!all(o_in_zones) || !all(d_in_zones)) {
stop("No matching zones associated with ",
which(!o_in_zones | !d_in_zones))
}

# If subpoints are not generated by the function:
if (!azn %in% names(subpoints)) {
suppressMessages({
subpoints = subpoints[z,]
})
if (nrow(subpoints) < nrow(subpoints)) {
message(nrow(subpoints) - nrow(subpoints),
" locations outside zones removed")
}
names(z)[1] = azn
suppressMessages({
subpoints = sf::st_join(subpoints, z[1])
})
o_in_z = od[[1]] %in% z[[1]]
d_in_z = od[[2]] %in% z[[1]]
if (!all(o_in_z) || !all(d_in_z)) {
stop("No matching zones associated with ", which(!o_in_z | !d_in_z))
}

# i = 1 # uncomment for debugging
Expand Down Expand Up @@ -226,6 +164,7 @@ od_disaggregate = function(od,

# todo: could be sped-up
od_new_sf = do.call(rbind, list_new)
od_new_sf$nrows = NULL

# output od data with same number of columns but more rows
od_new_sf
Expand Down Expand Up @@ -315,3 +254,50 @@ od_sample_vertices = function(x, fraction = 1) {
}
x_point
}

od_nrows = function(od, population_column, max_per_od) {
ceiling(od[[population_column]] / max_per_od)
}

od_zoneids = function(od) {
c(od[[1]], od[[2]])
}

od_disag_ids = function(od, nrows) {
od_disag_indices = rep(x = seq(nrow(od)), nrows)
od[od_disag_indices, c(1:2)] # was od_disag_ids, od with only ids
}

od_zonepoints = function(od, nrows) {
disag_ids = od_disag_ids(od, nrows)
id_zones = od_zoneids(disag_ids)
data.frame(table(id_zones))
}

od_subpoints_sample = function(od, nrows, z, azn, output_sf = TRUE) {
disag_ids = od_disag_ids(od, od$nrows)
id_zones = od_zoneids(disag_ids)
points_per_zone = od_zonepoints(od, od$nrows)
z = z[z[[1]] %in% points_per_zone[[1]], ]
freq = points_per_zone$Freq
subdf = data.frame(id = as.character(seq(sum(od$nrows) * 2)), azn = sort(id_zones))
names(subdf)[2] = azn
if (output_sf) {
subdf = sf::st_sf(subdf, geometry = sf::st_sample(z, freq))
}
subdf
}

od_sample_points = function(subpoints, subdf, z, per_zone, azn = "azn") {
subpoints_joined = sf::st_join(sf::st_sf(subpoints), z[1])
sel_list = lapply(1:nrow(per_zone), function(i) {
which_points = which(subpoints_joined[[1]] == per_zone[[1]][i])
if(length(which_points) == 0) {
return(NULL)
}
sample(which_points, size = per_zone[[2]][i])
})
sel = unlist(sel_list)
names(subdf)[2] = azn
sf::st_sf(subdf, geometry = subpoints[sel])
}
36 changes: 28 additions & 8 deletions R/jitter.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#' @param zd Zones with ids matching the destination codes in input OD data
#' @param subpoints_o Points within origin zones representing possible destinations
#' @param subpoints_d Points within destination zones representing possible destinations
#' @param disag Should the od_disaggregate function be used as a 'back end' where possible?
#' TRUE by default. See https://github.com/ITSLeeds/od/issues/39.
#' @inheritParams od_disaggregate
#'
#' @return An `sf` data frame
Expand All @@ -21,8 +23,10 @@
#' dlr = od_jitter(od, z) # desire_lines_random
#' desire_lines = od_to_sf(od, z)
#' plot(z$geometry)
#' plot(dlr, add = TRUE, lwd = 3)
#' plot(desire_lines, add = TRUE, lwd = 5)
#' plot(dlr["all"], add = TRUE, lwd = 3)
#' dlr$all
#' desire_lines$all
#' plot(desire_lines["all"], add = TRUE, lwd = 5)
#'
#' # Example showing use of subpoints
#' subpoints_o = sf::st_sample(z, 200)
Expand Down Expand Up @@ -62,20 +66,36 @@
#' # plot(od_sf[od$all > 200, 1])
#' # plot(dlr3[od$all > 200, 1])
#' # mapview::mapview(od_sf$geometry[od$all > 200])
od_jitter = function(od,
z,
zd = NULL,
subpoints_o = NULL,
subpoints_d = NULL) {
od_jitter = function(
od,
z,
zd = NULL,
subpoints = NULL,
code_append = "_ag",
population_column = 3,
max_per_od = 100000,
keep_ids = TRUE,
integer_outputs = FALSE,
# od_jitter-specific arguments (and zd)
subpoints_o = NULL,
subpoints_d = NULL,
disag = TRUE
) {

if (!methods::is(od, "sf")) {
# the data structure to reproduce for matching OD pairs
od = od::od_to_sf(od, z = z, zd = zd)
}
disag = all(is.null(zd), is.null(subpoints_o), is.null(subpoints_d), disag)
if(disag) {
message("Using od_disaggregate") # todo remove once tested
return(od_disaggregate(od, z, subpoints, code_append, population_column,
max_per_od, keep_ids, integer_outputs))
}
odc_new = odc_original = od::od_coordinates(od)
od = sf::st_drop_geometry(od)
odc_df = data.frame(o = od[[1]], d = od[[2]], odc_original)
z_geo = sf::st_geometry(z)
# browser()
id_origins = od[[1]]
points_per_zone = data.frame(table(id_origins))
names(points_per_zone)[1] = names(z)[1]
Expand Down
16 changes: 8 additions & 8 deletions man/od_disaggregate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit b0087b9

Please sign in to comment.