Skip to content

Commit

Permalink
Updates popgetter crate to enable compilation to WASM
Browse files Browse the repository at this point in the history
* Update deps and features for WASM target:

  * Adds dep on fork of polars with 'parquet' feature that compiles
    to wasm

  * Adds 'wasm' and 'not_wasm' features for target specific polars
    features to be be included

* Refactor get_metrics as async to enable WASM version (#76)

* Add wasm_bingen_test for get_metrics
  • Loading branch information
sgreenbury committed Sep 10, 2024
1 parent b0070d1 commit 55ac9e0
Show file tree
Hide file tree
Showing 8 changed files with 724 additions and 817 deletions.
1,264 changes: 521 additions & 743 deletions Cargo.lock

Large diffs are not rendered by default.

20 changes: 15 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@
members = [
"popgetter",
"popgetter_cli",
"popgetter_py"
# "popgetter_py"
]

# # TODO: fix incompatibility of forked polars with pyo3-polars=0.14
# # members = ["popgetter", "popgetter_cli", "popgetter_py"]
# members = ["popgetter", "popgetter_cli"]

resolver = "2"

[workspace.dependencies]
Expand All @@ -14,16 +18,22 @@ clap = "4.5.0"
comfy-table = "7.1.1"
dirs = "5"
enum_dispatch = "0.3"
flatgeobuf = "~4.1.0"
# Requires version 4.2.1 for WASM (see: https://github.com/flatgeobuf/flatgeobuf/pull/366)
flatgeobuf = "4.2.1"
futures = "0.3.30"
geo = "0.28.0"
geojson = "0.24.1"
geozero = "0.12.0"
geozero = "0.13.0"
httpmock = "0.7.0-rc.1"
itertools = "0.13.0"
log = "0.4.21"
nonempty = "0.10.0"
polars = "0.42.0"
# polars = "0.42.0"
# TODO: Exploring polars versions that can be compiled to WASM (see PR: https://github.com/pola-rs/polars/pull/16731)
# Current dependency is on a UATP fork where the "parquet" feature is updated to enable compilation
# to WASM. This will need to be temporary so a version pointing to a published crate remains
# possible (since crates.io requires no git deps)
polars = { git = "https://github.com/Urban-Analytics-Technology-Platform/polars.git", branch = "rs-0.42.0-parquet-wasm", default-features = false}
pretty_env_logger = "0.5.0"
pyo3 = "0.22.0"
pyo3-polars = "0.16.0"
Expand All @@ -36,7 +46,7 @@ strum = "0.26"
strum_macros = "0.26.4"
tempfile = "3.12"
thiserror = "1"
tokio = "1.38.0"
tokio = { version="1.38.0", default-features = false }
toml = "0.8.13"
wkb = "0.7.1"
wkt = "0.10.3"
43 changes: 38 additions & 5 deletions popgetter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,57 @@ futures = { workspace = true }
geo = { workspace = true }
geojson = { workspace = true, optional = true }
geozero = { workspace = true, features = ["with-csv", "with-geojson"] }
httpmock = { workspace = true }
itertools = { workspace = true }
log = { workspace = true }
nonempty = { workspace = true, features = ["serialize"] }
polars = { workspace = true, features = ["lazy", "is_in", "http", "streaming", "parquet", "polars-io", "regex", "strings", "rows"] }
polars = { workspace = true, default-features = false}
regex = { workspace = true }
reqwest = { workspace = true, features = ["json"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tokio = { workspace = true, default-features = false}
wkb = { workspace = true }
wkt = { workspace = true }

[dev-dependencies]
[target.'cfg(not(target_arch="wasm32"))'.dev-dependencies]
httpmock = { workspace = true }
tempfile = { workspace = true }

[target.'cfg(target_arch="wasm32")'.dev-dependencies]
wasm-bindgen-test = { version = "0.3" }

[features]
default = ["cache", "formatters"]
default = ["cache", "formatters", "not_wasm"]
cache = ["dep:dirs"]
formatters = ["dep:geojson"]
not_wasm = [
"tokio/full",
"polars/csv",
"polars/temporal",
"polars/lazy",
"polars/is_in",
"polars/dtype-time",
"polars/parquet",
"polars/regex",
"polars/strings",
"polars/rows",
"polars/http",
"polars/streaming",
]
wasm = [
"tokio/rt",
"tokio/macros",
"polars/csv",
"polars/temporal",
"polars/lazy",
"polars/is_in",
"polars/dtype-time",
"polars/parquet",
"polars/regex",
"polars/strings",
"polars/rows",
# "http" and "streaming" features do not compile to WASM due to dep "mio" that is introduced
# "polars/http",
# "polars/streaming"
]
2 changes: 2 additions & 0 deletions popgetter/src/geo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ impl FromStr for BBox {
}

#[cfg(test)]
// TODO: update for wasm32. "httpmock" does not build for wasm32 so only include for not wasm32
#[cfg(not(target_arch = "wasm32"))]
mod tests {
use super::*;
use ::geozero::{geojson::GeoJson, ColumnValue};
Expand Down
5 changes: 4 additions & 1 deletion popgetter/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#[cfg(feature = "cache")]
use std::path::Path;

use anyhow::Result;
#[cfg(feature = "cache")]
use anyhow::{anyhow, Context};
use data_request_spec::DataRequestSpec;
use log::{debug, error};
use log::debug;
#[cfg(feature = "cache")]
use log::error;
use metadata::Metadata;
use polars::frame::DataFrame;
use search::{Params, SearchParams, SearchResults};
Expand Down
51 changes: 37 additions & 14 deletions popgetter/src/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
use std::default::Default;
use std::fmt::Display;
#[cfg(feature = "cache")]
use std::path::Path;

use anyhow::{anyhow, Result};
#[cfg(not(target_arch = "wasm32"))]
use anyhow::anyhow;
use futures::future::join_all;
use log::debug;
use log::info;
#[cfg(not(target_arch = "wasm32"))]
use polars::prelude::ScanArgsParquet;
#[cfg(feature = "cache")]
use polars::prelude::{ParquetCompression, ParquetWriter};
#[cfg(target_arch = "wasm32")]
use polars::{io::SerReader, prelude::ParquetReader};
use polars::{
lazy::{
dsl::col,
frame::{IntoLazy, LazyFrame, ScanArgsParquet},
frame::{IntoLazy, LazyFrame},
},
prelude::{DataFrame, JoinArgs, JoinType, ParquetCompression, ParquetWriter, UnionArgs},
prelude::{DataFrame, JoinArgs, JoinType, UnionArgs},
};
use tokio::try_join;

Expand Down Expand Up @@ -193,7 +201,7 @@ impl CountryMetadataLoader {

/// Load the Metadata catalouge for this country with
/// the specified metadata paths
pub async fn load(self, config: &Config) -> Result<Metadata> {
pub async fn load(self, config: &Config) -> anyhow::Result<Metadata> {
let t = try_join!(
self.load_metadata(PATHS::METRIC_METADATA, config),
self.load_metadata(PATHS::GEOMETRY_METADATA, config),
Expand All @@ -211,16 +219,31 @@ impl CountryMetadataLoader {
}

/// Performs a load of a given metadata parquet file
async fn load_metadata(&self, path: &str, config: &Config) -> Result<DataFrame> {
async fn load_metadata(&self, path: &str, config: &Config) -> anyhow::Result<DataFrame> {
let full_path = format!("{}/{}/{path}", config.base_path, self.country);
let args = ScanArgsParquet::default();

info!("Attempting to load dataframe from {full_path}");
tokio::task::spawn_blocking(move || {
LazyFrame::scan_parquet(&full_path, args)?
.collect()
.map_err(|e| anyhow!("Failed to load '{full_path}': {e}"))
})
.await?
#[cfg(not(target_arch = "wasm32"))]
{
let args = ScanArgsParquet::default();
tokio::task::spawn_blocking(move || {
LazyFrame::scan_parquet(&full_path, args)?
.collect()
.map_err(|e| anyhow!("Failed to load '{full_path}': {e}"))
})
.await?
}
#[cfg(target_arch = "wasm32")]
{
let bytes = reqwest::Client::new()
.get(&full_path)
.send()
.await?
.bytes()
.await?;
let cursor = std::io::Cursor::new(bytes);
Ok(ParquetReader::new(cursor).finish()?)
}
}
}

Expand All @@ -238,11 +261,11 @@ async fn get_country_names(config: &Config) -> anyhow::Result<Vec<String>> {

/// Load the metadata for a list of countries and merge them into
/// a single `Metadata` catalogue.
pub async fn load_all(config: &Config) -> Result<Metadata> {
pub async fn load_all(config: &Config) -> anyhow::Result<Metadata> {
let country_names = get_country_names(config).await?;

info!("Detected country names: {:?}", country_names);
let metadata: Result<Vec<Metadata>> = join_all(
let metadata: anyhow::Result<Vec<Metadata>> = join_all(
country_names
.iter()
.map(|c| CountryMetadataLoader::new(c).load(config)),
Expand Down
Loading

0 comments on commit 55ac9e0

Please sign in to comment.