Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a multilayer reading option for AOI #507

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion tests/utils/test_geoutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from typing import List

import geopandas as gpd
from geopandas.testing import assert_geodataframe_equal
import fiona
import numpy as np
import pytest
import rasterio
from _pytest.fixtures import SubRequest
from torchgeo.datasets.utils import extract_archive
from shapely.geometry import MultiPolygon
from shapely.geometry import mapping, MultiPolygon

from dataset.aoi import AOI
from utils.geoutils import create_new_raster_from_base, bounds_gdf, bounds_riodataset, overlap_poly1_rto_poly2, \
Expand Down Expand Up @@ -134,3 +136,44 @@ def test_gdf_mean_vertices_nb(self):
mean_vertices_per_label.append(mean_vertices)
mean_vertices_per_label_int = [round(mean_verts) for mean_verts in mean_vertices_per_label if mean_verts]
assert mean_vertices_per_label_int == [7, 7, 6, 36, 5, 5, 8, 5]

def test_check_gdf_load(self) -> None:
"""Test the gdf load function, with only one layer and multiple layers."""
gpkg_path = 'tests/data/massachusetts_buildings_kaggle/22978945_15.gpkg'
label_gdf = gpd.read_file(gpkg_path)
# TODO test the csv option
# check normal use, one layer gpkg
layer_name = fiona.listlayers(gpkg_path)
assert len(layer_name) == 1
load_from_gpkg_path = check_gdf_load(gpkg_path)
assert assert_geodataframe_equal(load_from_gpkg_path, label_gdf) == None
# check use of gpkg multi layer
multi_layer = 'tests/data/multi_layer.gpkg'
schema = {'geometry': 'Polygon', 'properties': {'id': 'int'}}
with fiona.open(multi_layer, 'w', driver='GPKG', schema=schema,
crs=str(label_gdf.crs), layer='l1') as outlayer:
outlayer.write({
'geometry': mapping(label_gdf.geometry[0]),
'properties': {'id': 0},
})
with fiona.open(multi_layer, 'w', driver='GPKG', schema=schema,
crs=str(label_gdf.crs), layer='l2') as outlayer:
outlayer.write({
'geometry': mapping(label_gdf.geometry[0]),
'properties': {'id': 1},
})
layer_name = fiona.listlayers(multi_layer)
assert len(layer_name) == 2
load_from_gpkg_path = check_gdf_load(multi_layer)
ml = {
'id': [0, 1],
'geometry': [label_gdf.geometry[0], label_gdf.geometry[0]]
}
multi_layer_df = gpd.GeoDataFrame(ml, crs=str(label_gdf.crs))
assert assert_geodataframe_equal(load_from_gpkg_path, multi_layer_df) == None
os.remove(multi_layer)
# check if false gpkg given return empty gpd dataframe
empty_gdf = check_gdf_load('fake.gpkg')
assert empty_gdf.empty
# check if given a geodataframe, it will just return the same geodataframe.
assert assert_geodataframe_equal(check_gdf_load(label_gdf), label_gdf) == None
35 changes: 32 additions & 3 deletions utils/geoutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

import ast
import pyproj
import fiona
from fiona._err import CPLE_OpenFailedError
from fiona.errors import DriverError
import geopandas as gpd
import pandas as pd
import numpy as np
import pystac
import rasterio
Expand Down Expand Up @@ -174,11 +176,24 @@ def check_rasterio_im_load(im):
raise ValueError("{} is not an accepted image format for rasterio.".format(im))


def check_gdf_load(gdf):
def check_gdf_load(gdf: Union[str, Path, gpd.GeoDataFrame]) -> gpd.GeoDataFrame:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good job Charles! I like how you improved this part.

"""
Check if `gdf` is already loaded in, if not, load from geojson.
Copied from: https://github.com/CosmiQ/solaris/blob/main/solaris/utils/core.py#L52
"""

We added a way to read all the layers from a given GPKG, this
way if the GPKG contain more than one layer, it will stack all
geometry and other information find in all the layers.

Args:
gdf (Union[str, Path, gpd.GeoDataFrame]): Link or Geodataframe itself.

Raises:
ValueError: Error if the given `gdf` is not a format supported.

Returns:
gpd.GeoDataFrame: GeoDataFrame containing all information and geometry.
"""
if isinstance(gdf, (str, Path)):
if not is_url(gdf):
gdf = to_absolute_path(str(gdf))
Expand All @@ -190,7 +205,21 @@ def check_gdf_load(gdf):
gdf, GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO"
)
try:
return gpd.read_file(gdf)
layer_name = fiona.listlayers(gdf)
if len(layer_name) == 1:
return gpd.read_file(gdf)
else:
# multi_layers = gpd.GeoDataFrame()
for count, layername in enumerate(layer_name):
if count == 0:
multi_layers = gpd.read_file(gdf, layer=layername)
else:
geopkg = gpd.read_file(gdf, layer=layername)
multi_layers = pd.concat(
[multi_layers, geopkg],
ignore_index=True
)
return multi_layers
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, I understand why it makes sense to know if the label gpkg is a multi-layer or not. But do we have label gpkg in practice? I thought we always have a single-layer single-class gpkg for training? If this is not true, then I need to adjust my tiling script too.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We mustly use single-layer single-class, but we want to also support multi-classes and Pierre told me that the GPKG will be multi layer.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@CharlesAuthier let's discuss it altogether with Mathieu next week because it will require many potential adjustments to the code.

except (DriverError, CPLE_OpenFailedError):
logging.warning(
f"GeoDataFrame couldn't be loaded: either {gdf} isn't a valid"
Expand Down