diff --git a/CMakeLists.txt b/CMakeLists.txt index 42eee91..326fa4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,14 +71,19 @@ endif() # Build -add_library(spherely MODULE +set(CPP_SOURCES src/accessors-geog.cpp src/creation.cpp src/geography.cpp src/io.cpp src/predicates.cpp - src/spherely.cpp - ) + src/spherely.cpp) + +if(${s2geography_VERSION} VERSION_GREATER_EQUAL "0.2.0") + set(CPP_SOURCES ${CPP_SOURCES} src/geoarrow.cpp) +endif() + +add_library(spherely MODULE ${CPP_SOURCES}) target_compile_definitions( spherely diff --git a/ci/environment-dev.yml b/ci/environment-dev.yml index f00ca15..a22d803 100644 --- a/ci/environment-dev.yml +++ b/ci/environment-dev.yml @@ -13,3 +13,4 @@ dependencies: - ninja - pytest - pip + - geoarrow-pyarrow diff --git a/ci/environment.yml b/ci/environment.yml index 6dca01c..28c8655 100644 --- a/ci/environment.yml +++ b/ci/environment.yml @@ -14,3 +14,4 @@ dependencies: - ninja - pytest - pip + - geoarrow-pyarrow diff --git a/docs/api.rst b/docs/api.rst index d96de68..5851fce 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -71,3 +71,4 @@ Input/Output from_wkt to_wkt + from_geoarrow diff --git a/src/arrow_abi.h b/src/arrow_abi.h new file mode 100644 index 0000000..b8b62b8 --- /dev/null +++ b/src/arrow_abi.h @@ -0,0 +1,57 @@ +#pragma once + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +// Extra guard for versions of Arrow without the canonical guard +#ifndef ARROW_FLAG_DICTIONARY_ORDERED + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE +#endif // ARROW_FLAG_DICTIONARY_ORDERED + +#ifdef __cplusplus +} +#endif diff --git a/src/geoarrow.cpp b/src/geoarrow.cpp new file mode 100644 index 0000000..b434d21 --- /dev/null +++ b/src/geoarrow.cpp @@ -0,0 +1,127 @@ +#include <s2geography.h> + +#include "arrow_abi.h" +#include "constants.hpp" +#include "creation.hpp" +#include "geography.hpp" +#include "pybind11.hpp" + +namespace py = pybind11; +namespace s2geog = s2geography; +using namespace spherely; + +py::array_t<PyObjectGeography> from_geoarrow(py::object input, + bool oriented, + bool planar, + float tessellate_tolerance, + py::object geometry_encoding) { + if (!py::hasattr(input, "__arrow_c_array__")) { + throw std::invalid_argument( + "input should be an Arrow-compatible array object (i.e. has an '__arrow_c_array__' " + "method)"); + } + py::tuple capsules = input.attr("__arrow_c_array__")(); + py::capsule schema_capsule = capsules[0]; + py::capsule array_capsule = capsules[1]; + + const ArrowSchema* schema = static_cast<const ArrowSchema*>(schema_capsule); + const ArrowArray* array = static_cast<const ArrowArray*>(array_capsule); + + s2geog::geoarrow::Reader reader; + std::vector<std::unique_ptr<s2geog::Geography>> s2geog_vec; + + s2geog::geoarrow::ImportOptions options; + options.set_oriented(oriented); + if (planar) { + auto tol = S1Angle::Radians(tessellate_tolerance / EARTH_RADIUS_METERS); + options.set_tessellate_tolerance(tol); + } + if (geometry_encoding.is(py::none())) { + try { + reader.Init(schema, options); + } catch (const std::exception& ex) { + // re-raise RuntimeError as ValueError + throw py::value_error(ex.what()); + } + } else if (geometry_encoding.equal(py::str("WKT"))) { + reader.Init(s2geog::geoarrow::Reader::InputType::kWKT, options); + } else if (geometry_encoding.equal(py::str("WKB"))) { + reader.Init(s2geog::geoarrow::Reader::InputType::kWKB, options); + } else { + throw std::invalid_argument("'geometry_encoding' should be one of None, 'WKT' or 'WKB'"); + } + + try { + reader.ReadGeography(array, 0, array->length, &s2geog_vec); + } catch (const std::exception& ex) { + // re-raise RuntimeError as ValueError + throw py::value_error(ex.what()); + } + + // Convert resulting vector to array of python objects + auto result = py::array_t<PyObjectGeography>(array->length); + py::buffer_info rbuf = result.request(); + py::object* rptr = static_cast<py::object*>(rbuf.ptr); + + py::ssize_t i = 0; + for (auto& s2geog_ptr : s2geog_vec) { + rptr[i] = make_py_geography(std::move(s2geog_ptr)); + i++; + } + return result; +} + +void init_geoarrow(py::module& m) { + m.def("from_geoarrow", + &from_geoarrow, + py::arg("input"), + py::pos_only(), + py::kw_only(), + py::arg("oriented") = false, + py::arg("planar") = false, + py::arg("tessellate_tolerance") = 100.0, + py::arg("geometry_encoding") = py::none(), + R"pbdoc( + Create an array of geographies from an Arrow array object with a GeoArrow + extension type. + + See https://geoarrow.org/ for details on the GeoArrow specification. + + This functions accepts any Arrow array object implementing + the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_array__`` + method). + + .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Parameters + ---------- + input : pyarrow.Array, Arrow array + Any array object implementing the Arrow PyCapsule Protocol + (i.e. has a ``__arrow_c_array__`` method). The type of the array + should be one of the geoarrow geometry types. + oriented : bool, default False + Set to True if polygon ring directions are known to be correct + (i.e., exterior rings are defined counter clockwise and interior + rings are defined clockwise). + By default (False), it will return the polygon with the smaller + area. + planar : bool, default False + If set to True, the edges of linestrings and polygons are assumed + to be linear on the plane. In that case, additional points will + be added to the line while creating the geography objects, to + ensure every point is within 100m of the original line. + By default (False), it is assumed that the edges are spherical + (i.e. represent the shortest path on the sphere between two points). + tessellate_tolerance : float, default 100.0 + The maximum distance in meters that a point must be moved to + satisfy the planar edge constraint. This is only used if `planar` + is set to True. + geometry_encoding : str, default None + By default, the encoding is inferred from the GeoArrow extension + type of the input array. + However, for parsing WKT and WKB it is also possible to pass an + Arrow array without geoarrow type but with a plain string or + binary type, if specifying this keyword with "WKT" or "WKB", + respectively. + )pbdoc"); +} diff --git a/src/spherely.cpp b/src/spherely.cpp index f8ac02e..16c14ef 100644 --- a/src/spherely.cpp +++ b/src/spherely.cpp @@ -10,6 +10,10 @@ void init_creation(py::module&); void init_predicates(py::module&); void init_accessors(py::module&); void init_io(py::module&); +#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \ + (S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2) +void init_geoarrow(py::module&); +#endif PYBIND11_MODULE(spherely, m) { m.doc() = R"pbdoc( @@ -25,6 +29,10 @@ PYBIND11_MODULE(spherely, m) { init_predicates(m); init_accessors(m); init_io(m); +#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \ + (S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2) + init_geoarrow(m); +#endif #ifdef VERSION_INFO m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO); diff --git a/src/spherely.pyi b/src/spherely.pyi index 883b1f3..eb818ce 100644 --- a/src/spherely.pyi +++ b/src/spherely.pyi @@ -5,7 +5,9 @@ from typing import ( Generic, Iterable, Literal, + Protocol, Sequence, + Tuple, TypeVar, overload, ) @@ -196,3 +198,18 @@ def from_wkt( planar: bool = False, tessellate_tolerance: float = 100.0, ) -> npt.NDArray[Any]: ... + +class ArrowArrayExportable(Protocol): + def __arrow_c_array__( + self, requested_schema: object | None = None + ) -> Tuple[object, object]: ... + +def from_geoarrow( + input: ArrowArrayExportable, + /, + *, + oriented: bool = False, + planar: bool = False, + tessellate_tolerance: float = 100.0, + geometry_encoding: str | None = None, +) -> npt.NDArray[Any]: ... diff --git a/tests/test_geoarrow.py b/tests/test_geoarrow.py new file mode 100644 index 0000000..cb0b28b --- /dev/null +++ b/tests/test_geoarrow.py @@ -0,0 +1,111 @@ +from packaging.version import Version + +import numpy as np + +import pytest + +import spherely + + +pytestmark = pytest.mark.skipif( + Version(spherely.__s2geography_version__) < Version("0.2.0"), + reason="Needs s2geography >= 0.2.0", +) + +pa = pytest.importorskip("pyarrow") +ga = pytest.importorskip("geoarrow.pyarrow") + + +def test_from_geoarrow_wkt(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + result = spherely.from_geoarrow(arr) + expected = spherely.points([1, 2, 3], [1, 2, 3]) + # object equality does not yet work + # np.testing.assert_array_equal(result, expected) + assert spherely.equals(result, expected).all() + + # without extension type + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + result = spherely.from_geoarrow(arr, geometry_encoding="WKT") + assert spherely.equals(result, expected).all() + + +def test_from_geoarrow_wkb(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr_wkb = ga.as_wkb(arr) + + result = spherely.from_geoarrow(arr_wkb) + expected = spherely.points([1, 2, 3], [1, 2, 3]) + assert spherely.equals(result, expected).all() + + # without extension type + arr_wkb = ga.as_wkb(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr = arr_wkb.cast(pa.binary()) + result = spherely.from_geoarrow(arr, geometry_encoding="WKB") + assert spherely.equals(result, expected).all() + + +def test_from_geoarrow_native(): + + arr = ga.as_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + arr_point = ga.as_geoarrow(arr) + + result = spherely.from_geoarrow(arr_point) + expected = spherely.points([1, 2, 3], [1, 2, 3]) + assert spherely.equals(result, expected).all() + + +polygon_with_bad_hole_wkt = ( + "POLYGON " + "((20 35, 10 30, 10 10, 30 5, 45 20, 20 35)," + "(30 20, 20 25, 20 15, 30 20))" +) + + +def test_from_geoarrow_oriented(): + # by default re-orients the inner ring + arr = ga.as_geoarrow([polygon_with_bad_hole_wkt]) + + result = spherely.from_geoarrow(arr) + assert ( + str(result[0]) + == "POLYGON ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (20 15, 20 25, 30 20, 20 15))" + ) + + # if we force to not orient, we get an error + with pytest.raises(ValueError, match="Inconsistent loop orientations detected"): + spherely.from_geoarrow(arr, oriented=True) + + +def test_from_wkt_planar(): + arr = ga.as_geoarrow(["LINESTRING (-64 45, 0 45)"]) + result = spherely.from_geoarrow(arr) + assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000 + + result = spherely.from_geoarrow(arr, planar=True) + assert spherely.distance(result, spherely.point(-30.1, 45)) < 100 + + result = spherely.from_geoarrow(arr, planar=True, tessellate_tolerance=10) + assert spherely.distance(result, spherely.point(-30.1, 45)) < 10 + + +def test_from_geoarrow_no_extension_type(): + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + with pytest.raises(ValueError, match="Expected extension type"): + spherely.from_geoarrow(arr) + + +def test_from_geoarrow_invalid_encoding(): + arr = pa.array(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"]) + + with pytest.raises(ValueError, match="'geometry_encoding' should be one"): + spherely.from_geoarrow(arr, geometry_encoding="point") + + +def test_from_geoarrow_no_arrow_object(): + with pytest.raises(ValueError, match="input should be an Arrow-compatible array"): + spherely.from_geoarrow(np.array(["POINT (1 1)"], dtype=object))