Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Full read/write support. #2

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@
test/.cpenv
.CondaPkg
*.arrow

# pixi environments
.pixi
*.egg-info
17 changes: 14 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
name = "GeoArrow"
uuid = "5bc3a8d9-1bfb-4624-ba94-a391279174d6"
authors = ["Maarten Pronk <[email protected]> and contributors"]
version = "0.1.0"
version = "0.2.0"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Extents = "411431e0-e8b7-467b-b5e0-f676ba4f2910"
GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f"
GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
Proj = "c94c279d-25a6-4763-9509-64d165bea63e"
StringViews = "354b36f9-a18e-4713-926e-db85100087ba"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
WellKnownGeometry = "0f680547-7be7-4555-8820-bb198eeb646b"

[compat]
Arrow = "2.4"
Arrow = "2.8"
DataAPI = "1"
DataFrames = "1.7.0"
Extents = "0.1"
GeoFormatTypes = "0.4"
GeoInterface = "1.2"
JSON3 = "1.14"
Proj = "1.8"
PythonCall = "0.9.23"
StringViews = "1.3"
Tables = "1.12"
WellKnownGeometry = "0.2"
julia = "1.6"
julia = "1.10"

[extras]
CondaPkg = "992eb4ea-22a4-4c89-a5bb-47a3300528ab"
Expand Down
5 changes: 5 additions & 0 deletions src/GeoArrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ using GeoFormatTypes
using JSON3
using WellKnownGeometry
using Extents
using Tables
using StringViews
using Proj
using DataAPI
using DataFrames

include("type.jl")
include("arrow.jl")
Expand Down
59 changes: 43 additions & 16 deletions src/arrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,41 +36,68 @@ function ArrowTypes.JuliaType(::Val{BOX}, x, metadata)
end
ArrowTypes.ArrowKind(::Type{Geometry}) = ArrowTypes.ListKind()
ArrowTypes.ArrowKind(::Type{<:Geometry{PointTrait,D,T}}) where {D,T} = ArrowTypes.FixedSizeListKind{D,T}()
ArrowTypes.ArrowKind(::Type{<:GeoFormatTypes.WellKnownBinary}) = ArrowTypes.PrimitiveKind()
ArrowTypes.ArrowKind(::Type{<:GeoFormatTypes.WellKnownText}) = ArrowTypes.ListKind()
ArrowTypes.ArrowKind(::Type{Wrapper}) = ArrowTypes.ListKind()
ArrowTypes.ArrowKind(::Type{Wrapper{Seperated, T, G}}) where {T,G} = ArrowTypes.StructKind()
ArrowTypes.ArrowKind(::Type{Wrapper{Interleaved, T, N, G}}) where {T,N,G} = ArrowTypes.ListKind()
ArrowTypes.ArrowKind(::Type{Wrapper{Interleaved, PointTrait, N, G}}) where {N,G} = ArrowTypes.FixedSizeListKind{N,Float64}()
ArrowTypes.ArrowKind(::Type{Wrapper{WellKnownText, T, N, G}}) where {T,N,G} = ArrowTypes.ListKind()
ArrowTypes.ArrowKind(::Type{Wrapper{WellKnownBinary, T, N, G}}) where {T,N,G} = ArrowTypes.ListKind()

ArrowTypes.ArrowType(::Type{<:GeoFormatTypes.WellKnownBinary}) = Vector{UInt8}
ArrowTypes.ArrowType(::Type{<:GeoFormatTypes.WellKnownText}) = String
ArrowTypes.ArrowType(::Type{Geometry{X,D,T,G}}) where {X,D,T,G} = G
ArrowTypes.ArrowType(::Type{Wrapper{WellKnownText,T,N,G}}) where {T,N,G} = String
ArrowTypes.ArrowType(::Type{Wrapper{WellKnownBinary,T,N,G}}) where {T,N,G} = Vector{UInt8}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,PointTrait,N,G}}) where {N,G} = NTuple{N,Float64}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,LineStringTrait,N,G}}) where {N,G} = Vector{NTuple{N,Float64}}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,MultiLineStringTrait,N,G}}) where {N,G} = Vector{Vector{NTuple{N,Float64}}}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,MultiPointTrait,N,G}}) where {N,G} = Vector{NTuple{N,Float64}}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,PolygonTrait,N,G}}) where {N,G} = Vector{Vector{NTuple{N,Float64}}}
ArrowTypes.ArrowType(::Type{Wrapper{Interleaved,MultiPolygonTrait,N,G}}) where {N,G} = Vector{Vector{Vector{NTuple{N,Float64}}}}
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,PointTrait,N,G}}) where {N,G} = _named(NTuple{N,Float64})
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,LineStringTrait,N,G}}) where {N,G} = Vector{_named(NTuple{N,Float64})}
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,MultiLineStringTrait,N,G}}) where {N,G} = Vector{Vector{_named(NTuple{N,Float64})}}
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,MultiPointTrait,N,G}}) where {N,G} = Vector{_named(NTuple{N,Float64})}
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,PolygonTrait,N,G}}) where {N,G} = Vector{Vector{_named(NTuple{N,Float64})}}
ArrowTypes.ArrowType(::Type{Wrapper{Seperated,MultiPolygonTrait,N,G}}) where {N,G} = Vector{Vector{Vector{_named(NTuple{N,Float64})}}}

ArrowTypes.arrowname(::Type{Geometry{PointTrait}}) = POINT
ArrowTypes.arrowname(::Type{Geometry{LineStringTrait}}) = LINESTRING
ArrowTypes.arrowname(::Type{Geometry{PolygonTrait}}) = POLYGON
ArrowTypes.arrowname(::Type{Geometry{MultiPointTrait}}) = MULTIPOINT
ArrowTypes.arrowname(::Type{Geometry{MultiLineStringTrait}}) = MULTILINESTRING
ArrowTypes.arrowname(::Type{Geometry{MultiPolygonTrait}}) = MULTIPOLYGON
ArrowTypes.arrowname(::Type{<:GeoFormatTypes.WellKnownBinary}) = WKB
ArrowTypes.arrowname(::Type{<:GeoFormatTypes.WellKnownText}) = WKT
ArrowTypes.arrowname(::Type{Wrapper{WellKnownBinary,T,N,G}}) where {T,N,G} = WKB
ArrowTypes.arrowname(::Type{Wrapper{WellKnownText,T,N,G}}) where {T,N,G} = WKT
ArrowTypes.arrowname(::Type{Wrapper{E,PointTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = POINT
ArrowTypes.arrowname(::Type{Wrapper{E,LineStringTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = LINESTRING
ArrowTypes.arrowname(::Type{Wrapper{E,PolygonTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = POLYGON
ArrowTypes.arrowname(::Type{Wrapper{E,MultiPointTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = MULTIPOINT
ArrowTypes.arrowname(::Type{Wrapper{E,MultiLineStringTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = MULTILINESTRING
ArrowTypes.arrowname(::Type{Wrapper{E,MultiPolygonTrait,N,G}}) where {E<:AbstractNativeEncoding,N,G} = MULTIPOLYGON
ArrowTypes.arrowname(::Type{Extents.Extent}) = BOX

ArrowTypes.toarrow(x::Geometry) = x.geom
ArrowTypes.toarrow(x::GeoFormatTypes.WellKnownText) = GeoFormatTypes.val(x)
ArrowTypes.toarrow(x::GeoFormatTypes.WellKnownBinary) = GeoFormatTypes.val(x)
ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y)}) = (; xmin=ex.X[1], ymin=ex.Y[1], xmax=ex.X[2], ymax=ex.Y[2])
ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y, :Z)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2])
ArrowTypes.toarrow(x::Extents.Extent{(:X, :Y, :Z, :M)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], mmin=ex.M[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2], mmax=ex.M[2])
ArrowTypes.toarrow(x::Wrapper{E,T,<:Geometry}) where {E,T} = x.geom
ArrowTypes.toarrow(x::Wrapper) = data(x)
ArrowTypes.toarrow(ex::Extents.Extent{(:X, :Y)}) = (; xmin=ex.X[1], ymin=ex.Y[1], xmax=ex.X[2], ymax=ex.Y[2])
ArrowTypes.toarrow(ex::Extents.Extent{(:X, :Y, :Z)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2])
ArrowTypes.toarrow(ex::Extents.Extent{(:X, :Y, :Z, :M)}) = (; xmin=ex.X[1], ymin=ex.Y[1], zmin=ex.Z[1], mmin=ex.M[1], xmax=ex.X[2], ymax=ex.Y[2], zmax=ex.Z[2], mmax=ex.M[2])

ArrowTypes.fromarrow(::Type{GeoFormatTypes.WellKnownBinary}, x) = GeoFormatTypes.WellKnownBinary(GeoFormatTypes.Geom(), x)
ArrowTypes.fromarrow(::Type{GeoFormatTypes.WellKnownText}, x) = GeoFormatTypes.WellKnownText(GeoFormatTypes.Geom(), x)

ArrowTypes.fromarrow(::Type{GeoFormatTypes.WellKnownText}, x) = GeoFormatTypes.WellKnownText(GeoFormatTypes.Geom(), String(x)) # should be StringView
function ArrowTypes.fromarrow(::Type{Geometry{X}}, x) where {X}
nt = nested_eltype(x)
D = length(nt.types)
T = nt.types[1]
return Geometry{X,D,T}(x)
D = length(nonmissingtype(nt).types)
return Geometry{X,D,Float64}(x)
end
function fromarrow(::Type{GeoArrow.Geometry{X}}, nt::NamedTuple) where X
return Geometry{X,length(nt),Float64}(nt)
end
ArrowTypes.fromarrow(::Type{Extents.Extent}, x) = Extents.Extent(X=(x.xmin, x.xmax), Y=(x.ymin, x.ymax))

nested_eltype(x) = nested_eltype(typeof(x))
nested_eltype(::Type{T}) where {T<:AbstractArray} = nested_eltype(eltype(T))
nested_eltype(::Type{T}) where {T} = T

_named(::Type{NTuple{2, T}}) where {T} = @NamedTuple{x::Float64, y::Float64}
_named(::Type{NTuple{3, T}}) where {T} = @NamedTuple{x::Float64, y::Float64, z::Float64}
_named(::Type{NTuple{4, T}}) where {T} = @NamedTuple{x::Float64, y::Float64, z::Float64, m::Float64}
47 changes: 39 additions & 8 deletions src/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,27 @@
Write a geospatial table to a file. Like Arrow.write, but with geospatial metadata.
Any kwargs are passed to Arrow.write.
"""
function write(path, t; kwargs...)
projjson = ""
crs = Dict("crs" => projjson)
colmetadata =
Dict(:geometry => ["ARROW:extension:metadata" => JSON3.write(crs)])
Arrow.write(path, t; colmetadata, kwargs...)
function write(path, t; geocolumns=GeoInterface.geometrycolumns(t), crs=GeoInterface.crs(t), encoding::AbstractEncoding=Interleaved(), kwargs...)

if isnothing(crs)
dcrs = Dict{String,String}()
else
pcrs = convert(Proj.CRS, crs)
jcrs = convert(ProjJSON, pcrs)
dcrs = Dict("crs" => GeoFormatTypes.val(jcrs))
end
ct = Tables.columntable(t)
colmetadata = Dict{Symbol,Vector{Pair{String,String}}}()
for column in geocolumns
column in Tables.columnnames(t) || error("Geometry column $column not found in table")
data = Tables.getcolumn(t, column)
T = nonmissingtype(Tables.columntype(t, column))
GeoInterface.isgeometry(T) || error("Geometry in $column must support the GeoInterface")
ct = merge(ct, NamedTuple{(column,)}((Wrapper.(Ref(encoding), data),)))

colmetadata[column] = ["ARROW:extension:metadata" => JSON3.write(dcrs)]
end
Arrow.write(path, ct; colmetadata, kwargs...)
end

"""
Expand All @@ -19,7 +34,23 @@ Read a geospatial table from a file. Like Arrow.Table, but with geospatial metad
Any kwargs are passed to Arrow.Table.
"""
function read(path; kwargs...)
t = Arrow.Table(path; kwargs...)
meta = Arrow.getmetadata(t)
at = Arrow.Table(path; kwargs...)
t = DataFrame(at, copycols=false)

# set GeoInterface metadata
names = []
for (column, metadata) in DataAPI.colmetadata(t)
"ARROW:extension:name" in keys(metadata) || continue
startswith(metadata["ARROW:extension:name"], "geoarrow.") || continue
push!(names, Symbol(column))

"ARROW:extension:metadata" in keys(metadata) || continue
extmetadata = metadata["ARROW:extension:metadata"]
isempty(extmetadata) && continue
crs = get(JSON3.read(extmetadata), :crs, nothing)
isnothing(crs) || DataAPI.metadata!(t, "GEOINTERFACE:crs", crs)
end
isempty(names) || DataAPI.metadata!(t, "GEOINTERFACE:geometrycolumns", Tuple(names))

return t
end
35 changes: 35 additions & 0 deletions src/type.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# For reading
struct Geometry{X,D,T,G}
geom::G
end
Base.:(==)(x::Geometry{X,D,T,G}, y::Geometry{X,D,T,G}) where {X,D,T,G} = x.geom == y.geom
Base.show(io::IO, x::Geometry{X,D,T}) where {X,D,T} = print(io, "$X geometry in $(D)D with eltype $T")
Geometry{X,D,T}(x) where {X,D,T} = Geometry{X,D,T,typeof(x)}(x)
Geometry{PointTrait}(x::Vararg{T,D}) where {T,D} = Geometry{PointTrait,D,T}(reinterpret(NTuple{D,T}, x))
Expand All @@ -13,13 +15,46 @@ Base.getindex(x::Geometry{PointTrait,D,T}, i) where {D,T} = Base.getindex(x.geom
GeoInterface.isgeometry(::Type{<:Geometry}) = true
GeoInterface.ncoord(_, ::Geometry{X,D}) where {X,D} = D
GeoInterface.getcoord(::PointTrait, g::Geometry, i) = Base.getindex(g.geom, i)
GeoInterface.getcoord(::PointTrait, g::Geometry{X,D,T,<:GeoFormatTypes.MixedFormat}, i) where {X,D,T} = getcoord(PointTrait(), g.geom, i)
GeoInterface.geomtrait(::Geometry{X}) where {X} = X()
GeoInterface.ngeom(_, g::Geometry) = length(g.geom)
GeoInterface.ngeom(t, g::Geometry{X,D,T,<:GeoFormatTypes.MixedFormat}) where {X,D,T} = ngeom(t, g.geom)
GeoInterface.getgeom(_, g::Geometry, i) = Base.getindex(g, i)
GeoInterface.getgeom(t, g::Geometry{X,D,T,<:GeoFormatTypes.MixedFormat}, i) where {X,D,T} = getgeom(t, g.geom, i)

childtrait(::LineStringTrait) = PointTrait
childtrait(::LinearRingTrait) = PointTrait
childtrait(::PolygonTrait) = LinearRingTrait
childtrait(::MultiPointTrait) = PointTrait
childtrait(::MultiLineStringTrait) = LineStringTrait
childtrait(::MultiPolygonTrait) = PolygonTrait

# For writing
abstract type AbstractEncoding end
abstract type AbstractNativeEncoding <: AbstractEncoding end
struct Seperated <: AbstractNativeEncoding end
struct Interleaved <: AbstractNativeEncoding end
struct WellKnownBinary <: AbstractEncoding end
struct WellKnownText <: AbstractEncoding end

struct Wrapper{E,T,N,G}
geom::G
end
Base.:(==)(x::Wrapper{E,T,N,G}, y::Wrapper{E,T,N,G}) where {E,T,N,G} = x.geom == y.geom
Base.show(io::IO, ::Wrapper{E,T,G}) where {E,T,G} = print(io, "$T geometry encoded as $E")
Wrapper(e::AbstractEncoding, x) = Wrapper{typeof(e),typeof(GeoInterface.geomtrait(x)),GeoInterface.ncoord(x),typeof(x)}(x)
Wrapper(x) = Wrapper(Interleaved(), x)

data(x::Wrapper{E,T,N,G}) where {E,T,N,G} = _coordinates(E(), T(), N, x.geom)
evetion marked this conversation as resolved.
Show resolved Hide resolved
data(x::Wrapper{WellKnownBinary,T,G}) where {T,G} = getwkb(x.geom).val
data(x::Wrapper{WellKnownText,T,G}) where {T,G} = getwkt(x.geom).val

_coordinates(::Interleaved, t::AbstractPointTrait, N, geom) = NTuple{N,Float64}(getcoord(t, geom))
evetion marked this conversation as resolved.
Show resolved Hide resolved
_coordinates(::Seperated, t::AbstractPointTrait, N, geom) = nt(NTuple{N,Float64}(getcoord(t, geom)))
evetion marked this conversation as resolved.
Show resolved Hide resolved
function _coordinates(E::AbstractNativeEncoding, t::AbstractGeometryTrait, N, geom)
map(x -> _coordinates(E, GeoInterface.geomtrait(x), N, x), getgeom(t, geom))
end

nt(x::NTuple{2,Float64}) = NamedTuple{(:x, :y)}(x)
nt(x::NTuple{3,Float64}) = NamedTuple{(:x, :y, :z)}(x)
nt(x::NTuple{4,Float64}) = NamedTuple{(:x, :y, :z, :m)}(x)
62 changes: 55 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ using Arrow
using GeoInterface
using Downloads
using Test
# ENV["JULIA_CONDAPKG_OFFLINE"] = true
using GeoFormatTypes
using DataFrames
using Extents

# ENV["JULIA_CONDAPKG_OFFLINE"] = true # for running locally
ENV["JULIA_CONDAPKG_ENV"] = joinpath(@__DIR__, ".cpenv")
using PythonCall
# ga = pyimport("geoarrow.pyarrow")
feather = pyimport("pyarrow.feather")

mkpath(joinpath(@__DIR__, "data/write"))
Expand Down Expand Up @@ -38,21 +41,66 @@ mkpath(joinpath(@__DIR__, "data/write"))
GeoArrow.write(io, t; compress=:zstd)
seekstart(io)
nt = GeoArrow.read(io, convert=true)
ngeom = t.geometry[1]
@test GeoInterface.isgeometry(geom)
ngeom = nt.geometry[1]
@test GeoInterface.testgeometry(ngeom)

@test ngeom == geom
@test GeoInterface.coordinates(ngeom) == GeoInterface.coordinates(geom)
end
end
end
@testset "Python" begin
for arrowfn in filter(endswith("arrow"), readdir("data", join=true))
@testset "$arrowfn" begin
t = Arrow.Table(arrowfn)
t = GeoArrow.read(arrowfn)
geom = t.geometry[1]

fn = joinpath("data/write", basename(arrowfn))
GeoArrow.write(fn, t)
pt = feather.read_table(fn)

# Read with Python
# gdf = geopandas.read_feather(fn)
# print(gdf.geometry.type)
t = feather.read_table(fn)
meta = t.schema.field(-1).metadata
@test length(meta.keys()) == 2
@test any(occursin.("geoarrow", string.(meta.values())))

# Read with Julia
tt = GeoArrow.read(fn)
tt.geometry[1] == geom
end
end
end
@testset "Encodings" begin
g = GeoFormatTypes.WellKnownText(GeoFormatTypes.Geom(), "POINT (1 2)")

w = GeoArrow.Wrapper(GeoArrow.WellKnownText(), g)
@test ArrowTypes.ArrowKind(typeof(w)) == ArrowTypes.ListKind()
@test ArrowTypes.ArrowType(typeof(w)) == String
@test ArrowTypes.arrowname(typeof(w)) == Symbol("geoarrow.wkt")
@test ArrowTypes.toarrow(w) == "POINT (1.0 2.0)"

w = GeoArrow.Wrapper(GeoArrow.WellKnownBinary(), g)
@test ArrowTypes.ArrowKind(typeof(w)) == ArrowTypes.ListKind()
@test ArrowTypes.ArrowType(typeof(w)) == Vector{UInt8}
@test ArrowTypes.toarrow(w)[1:10] == UInt8[0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]

w = GeoArrow.Wrapper(g) # Encoding defaults to Interleaved
@test ArrowTypes.ArrowKind(typeof(w)) == ArrowTypes.FixedSizeListKind{2,Float64}()
@test ArrowTypes.ArrowType(typeof(w)) == NTuple{2,Float64}
@test ArrowTypes.arrowname(typeof(w)) == Symbol("geoarrow.point")
@test ArrowTypes.toarrow(w) == (1.0, 2.0)

w = GeoArrow.Wrapper(GeoArrow.Seperated(), g)
@test ArrowTypes.ArrowKind(typeof(w)) == ArrowTypes.StructKind()
@test ArrowTypes.ArrowType(typeof(w)) == @NamedTuple{x::Float64, y::Float64}
@test ArrowTypes.arrowname(typeof(w)) == Symbol("geoarrow.point")
@test ArrowTypes.toarrow(w) == (; x=1.0, y=2.0)
end
@testset "Simple" begin
df = DataFrame(a=1, geometry=[(1.,2.)])
GeoArrow.write("simple.arrow", df)
dfn = GeoArrow.read("simple.arrow")
@test GeoInterface.isgeometry(dfn.geometry[1])
end
end
Loading