diff --git a/dev-environment.yml b/dev-environment.yml index 46b4da9f..eb4f3df5 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -19,6 +19,7 @@ dependencies: - pip # Optional dependencies + - laspy - scikit-image # Test dependencies @@ -49,4 +50,4 @@ dependencies: - typing-extensions - pip: - - -e ./ + - -e ./ diff --git a/geoutils/__init__.py b/geoutils/__init__.py index 1a587d88..52988bc1 100644 --- a/geoutils/__init__.py +++ b/geoutils/__init__.py @@ -2,10 +2,12 @@ GeoUtils is a Python package for the analysis of geospatial data. """ -from geoutils import examples, pointcloud, projtools, raster, vector # noqa +from geoutils import examples, projtools # noqa from geoutils._config import config # noqa -from geoutils.raster import Mask, Raster # noqa -from geoutils.vector import Vector # noqa + +from geoutils.raster import Mask, Raster # noqa isort:skip +from geoutils.vector import Vector # noqa isort:skip +from geoutils.pointcloud import PointCloud # noqa isort:skip try: from geoutils._version import __version__ as __version__ # noqa diff --git a/geoutils/interface/gridding.py b/geoutils/interface/gridding.py index 20e71821..6fb37cb0 100644 --- a/geoutils/interface/gridding.py +++ b/geoutils/interface/gridding.py @@ -3,8 +3,10 @@ import warnings from typing import Literal +import affine import geopandas as gpd import numpy as np +import rasterio as rio from scipy.interpolate import griddata from geoutils._typing import NDArrayNum @@ -16,7 +18,7 @@ def _grid_pointcloud( data_column_name: str = "b1", resampling: Literal["nearest", "linear", "cubic"] = "linear", dist_nodata_pixel: float = 1.0, -) -> NDArrayNum: +) -> tuple[NDArrayNum, affine.Affine]: """ Grid point cloud (possibly irregular coordinates) to raster (regular grid) using delaunay triangles interpolation. @@ -88,4 +90,7 @@ def _grid_pointcloud( # Flip Y axis of grid aligned_dem = np.flip(aligned_dem, axis=0) - return aligned_dem + # 3/ Derive output transform from input grid + transform_from_coords = rio.transform.from_origin(min(grid_coords[0]), max(grid_coords[1]), res_x, res_y) + + return aligned_dem, transform_from_coords diff --git a/geoutils/interface/raster_point.py b/geoutils/interface/raster_point.py index b9127f47..e7cc2d18 100644 --- a/geoutils/interface/raster_point.py +++ b/geoutils/interface/raster_point.py @@ -100,7 +100,7 @@ def _raster_to_pointcloud( as_array: bool, random_state: int | np.random.Generator | None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"], -) -> NDArrayNum | gu.Vector: +) -> NDArrayNum | gu.PointCloud: """ Convert a raster to a point cloud. See Raster.to_pointcloud() for details. """ @@ -227,15 +227,13 @@ def _raster_to_pointcloud( ) if not as_array: - points = gu.Vector( - gpd.GeoDataFrame( - pixel_data.T, - columns=all_column_names, - geometry=gpd.points_from_xy(x_coords_2, y_coords_2), - crs=source_raster.crs, - ) + pc = gpd.GeoDataFrame( + pixel_data.T, + columns=all_column_names, + geometry=gpd.points_from_xy(x_coords_2, y_coords_2), + crs=source_raster.crs, ) - return points + return gu.PointCloud(pc, data_column=data_column_name) else: # Merge the coordinates and pixel data an array of N x K # This has the downside of converting all the data to the same data type diff --git a/geoutils/pointcloud/__init__.py b/geoutils/pointcloud/__init__.py index 16393346..cf6e1541 100644 --- a/geoutils/pointcloud/__init__.py +++ b/geoutils/pointcloud/__init__.py @@ -1 +1 @@ -from geoutils.pointcloud.pointcloud import * # noqa +from geoutils.pointcloud.pointcloud import PointCloud # noqa diff --git a/geoutils/pointcloud/pointcloud.py b/geoutils/pointcloud/pointcloud.py index cf2f6f3c..1c69929f 100644 --- a/geoutils/pointcloud/pointcloud.py +++ b/geoutils/pointcloud/pointcloud.py @@ -1 +1,393 @@ -"""Module for future PointCloud class.""" +"""Module for PointCloud class.""" + +from __future__ import annotations + +import os.path +import pathlib +import warnings +from typing import Any, Iterable, Literal + +import geopandas as gpd +import numpy as np +import pandas as pd +from pyproj import CRS +from rasterio.coords import BoundingBox +from shapely.geometry.base import BaseGeometry + +import geoutils as gu +from geoutils._typing import ArrayLike, NDArrayNum, Number +from geoutils.interface.gridding import _grid_pointcloud + +# from geoutils.raster.sampling import subsample_array + +try: + import laspy + + has_laspy = True +except ImportError: + has_laspy = False + + +def _load_laspy_data(filename: str, columns: list[str]) -> gpd.GeoDataFrame: + """Load point cloud data from LAS/LAZ/COPC file.""" + + # Read file + las = laspy.read(filename) + + # Get data from requested columns + data = np.vstack([las[n] for n in columns]).T + + # Build geodataframe + gdf = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(x=las.x, y=las.y, crs=las.header.parse_crs(prefer_wkt=False)), + data=data, + columns=columns, + ) + + return gdf + + +def _load_laspy_metadata( + filename: str, +) -> tuple[CRS, int, BoundingBox, pd.Index]: + """Load point cloud metadata from LAS/LAZ/COPC file.""" + + with laspy.open(filename) as f: + + crs = f.header.parse_crs(prefer_wkt=False) + nb_points = f.header.point_count + bounds = BoundingBox(left=f.header.x_min, right=f.header.x_max, bottom=f.header.y_min, top=f.header.y_max) + columns_names = pd.Index(list(f.header.point_format.dimension_names)) + + return crs, nb_points, bounds, columns_names + + +# def _write_laspy(filename: str, pc: gpd.GeoDataFrame): +# """Write a point cloud dataset as LAS/LAZ/COPC.""" +# +# with laspy.open(filename) as f: +# new_hdr = laspy.LasHeader(version="1.4", point_format=6) +# # You can set the scales and offsets to values that suits your data +# new_hdr.scales = np.array([1.0, 0.5, 0.1]) +# new_las = laspy.LasData(header = new_hdr, points=) +# +# return + + +class PointCloud(gu.Vector): # type: ignore[misc] + """ + The georeferenced point cloud. + + A point cloud is a vector of 2D point geometries associated to numeric values from a data column, and potentially + auxiliary data columns. + + Main attributes: + ds: :class:`geopandas.GeoDataFrame` + Geodataframe of the point cloud. + data_column: str + Name of point cloud data column. + crs: :class:`pyproj.crs.CRS` + Coordinate reference system of the point cloud. + bounds: :class:`rio.coords.BoundingBox` + Coordinate bounds of the point cloud. + + + All other attributes are derivatives of those attributes, or read from the file on disk. + See the API for more details. + """ + + def __init__( + self, + filename_or_dataset: str | pathlib.Path | gpd.GeoDataFrame | gpd.GeoSeries | BaseGeometry, + data_column: str, + ): + """ + Instantiate a point cloud from either a data column name and a vector (filename, GeoPandas dataframe or series, + or a Shapely geometry), or only with a point cloud file type. + + :param filename_or_dataset: Path to vector file, or GeoPandas dataframe or series, or Shapely geometry. + :param data_column: Name of main data column defining the point cloud. + """ + + self._ds: gpd.GeoDataFrame | None = None + self._name: str | None = None + self._crs: CRS | None = None + self._bounds: BoundingBox + self._data_column: str + self._data: NDArrayNum + self._nb_points: int + self._all_columns: pd.Index + + # If PointCloud is passed, simply point back to PointCloud + if isinstance(filename_or_dataset, PointCloud): + for key in filename_or_dataset.__dict__: + setattr(self, key, filename_or_dataset.__dict__[key]) + return + # For filename, rely on parent Vector class or LAS file reader + else: + if isinstance(filename_or_dataset, (str, pathlib.Path)) and os.path.splitext(filename_or_dataset)[-1] in [ + ".las", + ".laz", + ]: + # Load only metadata, and not the data + fn = filename_or_dataset if isinstance(filename_or_dataset, str) else filename_or_dataset.name + crs, nb_points, bounds, columns = _load_laspy_metadata(fn) + self._name = fn + self._crs = crs + self._nb_points = nb_points + self._all_columns = columns + self._bounds = bounds + self._ds = None + # Check on filename are done with Vector.__init__ + else: + super().__init__(filename_or_dataset) + # Verify that the vector can be cast as a point cloud + if not all(p == "Point" for p in self.ds.geom_type): + raise ValueError( + "This vector file contains non-point geometries, " "cannot be instantiated as a point cloud." + ) + + # Set data column following user input + self.set_data_column(new_data_column=data_column) + + # TODO: Could also move to Vector directly? + ############################################## + # OVERRIDDEN VECTOR METHODS TO SUPPORT LOADING + ############################################## + + @property + def ds(self) -> gpd.GeoDataFrame: + """Geodataframe of the point cloud.""" + # We need to override the Vector method to introduce the is_loaded dynamic for LAS files + if not self.is_loaded: + self.load() + return self._ds # type: ignore + + @ds.setter + def ds(self, new_ds: gpd.GeoDataFrame | gpd.GeoSeries) -> None: + """Set a new geodataframe for the point cloud.""" + # We need to override the setter Vector method because we have overridden the property method + # (even if the code below is the same) + if isinstance(new_ds, gpd.GeoDataFrame): + self._ds = new_ds + elif isinstance(new_ds, gpd.GeoSeries): + self._ds = gpd.GeoDataFrame(geometry=new_ds) + else: + raise ValueError("The dataset of a vector must be set with a GeoSeries or a GeoDataFrame.") + + @property + def crs(self) -> CRS: + """Coordinate reference system of the vector.""" + # Overriding method in Vector + if self.is_loaded: + return super().crs + else: + return self._crs + + @property + def bounds(self) -> BoundingBox: + # Overriding method in Vector + if self.is_loaded: + return super().bounds + else: + return self._bounds + + ##################################### + # NEW METHODS SPECIFIC TO POINT CLOUD + ##################################### + + @property + def data(self) -> NDArrayNum: + """ + Data of the point cloud. + + Points to the data column of the geodataframe, equivalent to calling self.ds[self.data_column]. + """ + # Triggers the loading mechanism through self.ds + return self.ds[self.data_column] + + @data.setter + def data(self, new_data: NDArrayNum) -> None: + """Set new data for the point cloud.""" + + self.ds[self.data_column] = new_data + + @property + def all_columns(self) -> pd.Index: + """Index of all columns of the point cloud, excluding the column of 2D point geometries.""" + # Overriding method in Vector + if self.is_loaded: + all_columns = super().columns + all_columns_nongeom = all_columns[all_columns != "geometry"] + return all_columns_nongeom + else: + return self._all_columns + + @property + def data_column(self) -> str: + """Name of data column of the point cloud.""" + return self._data_column + + @data_column.setter + def data_column(self, new_data_column: str) -> None: + self.set_data_column(new_data_column=new_data_column) + + def set_data_column(self, new_data_column: str) -> None: + """Set new column as point cloud data column.""" + + if new_data_column not in self.all_columns: + raise ValueError( + f"Data column {new_data_column} not found among columns, available columns " + f"are: {', '.join(self.all_columns)}." + ) + self._data_column = new_data_column + + @property + def is_loaded(self) -> bool: + """Whether the point cloud data is loaded""" + return self._ds is not None + + @property + def nb_points(self) -> int: + """Number of points in the point cloud.""" + # New method for point cloud + if self.is_loaded: + return len(self.ds) + else: + return self._nb_points + + def load(self, columns: Literal["all", "main"] | list[str] = "main") -> None: + """ + Load point cloud from disk (only supported for LAS files). + + :param columns: Columns to load. Defaults to main data column only. + """ + + if self.is_loaded: + raise ValueError("Data are already loaded.") + + if self.name is None: + raise AttributeError( + "Cannot load as filename is not set anymore. Did you manually update the filename attribute?" + ) + + if columns == "all": + columns_to_load = self.all_columns + elif columns == "main": + columns_to_load = [self.data_column] + else: + columns_to_load = columns + + ds = _load_laspy_data(filename=self.name, columns=columns_to_load) + self._ds = ds + + @classmethod + def from_array(cls, array: NDArrayNum, crs: CRS, data_column: str = "z") -> PointCloud: + """Create point cloud from a 3 x N or N x 3 array of X coordinate, Y coordinates and Z values.""" + + # Check shape + if array.shape[0] != 3 and array.shape[1] != 3: + raise ValueError("Array must be of shape 3xN or Nx3.") + + # Make the first axis the one with size 3 + if array.shape[0] != 3: + array_in = array.T + else: + array_in = array + + # Build geodataframe + gdf = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(x=array_in[0, :], y=array_in[1, :], crs=crs), data={data_column: array_in[2, :]} + ) + + return cls(filename_or_dataset=gdf, data_column=data_column) + + @classmethod + def from_tuples( + cls, tuples_xyz: Iterable[tuple[Number, Number, Number]], crs: CRS, data_column: str = "z" + ) -> PointCloud: + """Create point cloud from an iterable of 3-tuples (X coordinate, Y coordinate, Z value).""" + + return cls.from_array(np.array(tuples_xyz), crs=crs, data_column=data_column) + + @classmethod + def from_xyz(cls, x: ArrayLike, y: ArrayLike, z: ArrayLike, crs: CRS, data_column: str = "z") -> PointCloud: + """Create point cloud from three 1D array-like coordinates for X/Y/Z.""" + + return cls.from_array(np.stack((x, y, z)), crs=crs, data_column=data_column) + + def to_array(self) -> NDArrayNum: + """Convert point cloud to a 3 x N array of X coordinates, Y coordinates and Z values.""" + + return np.stack((self.geometry.x.values, self.geometry.y.values, self.ds[self.data_column].values), axis=0) + + def to_tuples(self) -> Iterable[tuple[Number, Number, Number]]: + """Convert point cloud to a list of 3-tuples (X coordinate, Y coordinate, Z value).""" + + return list(zip(self.geometry.x.values, self.geometry.y.values, self.ds[self.data_column].values)) + + def to_xyz(self) -> tuple[NDArrayNum, NDArrayNum, NDArrayNum]: + """Convert point cloud to three 1D arrays of coordinates for X/Y/Z.""" + + return self.geometry.x.values, self.geometry.y.values, self.ds[self.data_column].values + + def pointcloud_equal(self, other: PointCloud, **kwargs: Any) -> bool: + """ + Check if two point clouds are equal. + + This means that: + - The two vectors (geodataframes) are equal. + - The data column is the same for both point clouds. + + Keyword arguments are passed to geopandas.assert_geodataframe_equal. + """ + + # Vector equality + vector_eq = super().vector_equal(other, **kwargs) + # Data column equality + data_column_eq = self.data_column == other.data_column + + return all([vector_eq, data_column_eq]) + + def grid( + self, + ref: gu.Raster | None, + grid_coords: tuple[NDArrayNum, NDArrayNum] | None, + resampling: Literal["nearest", "linear", "cubic"], + dist_nodata_pixel: float = 1.0, + ) -> gu.Raster: + """Grid point cloud into a raster.""" + + if isinstance(ref, gu.Raster): + if grid_coords is None: + warnings.warn( + "Both reference raster and grid coordinates were passed for gridding, " + "using only the reference raster." + ) + grid_coords = ref.coords(grid=False) + else: + grid_coords = grid_coords + + array, transform = _grid_pointcloud( + self.ds, + grid_coords=grid_coords, + data_column_name=self.data_column, + resampling=resampling, + dist_nodata_pixel=dist_nodata_pixel, + ) + + return gu.Raster.from_array(data=array, transform=transform, crs=self.crs, nodata=None) + + # def subsample(self, subsample: float | int, random_state: int | np.random.Generator | None = None) -> PointCloud: + # + # indices = subsample_array( + # array=self.data, subsample=subsample, return_indices=True, random_state=random_state + # ) + # + # return PointCloud(self.ds[indices]) + + # @classmethod + # def from_raster(cls, raster: gu.Raster) -> PointCloud: + # """Create a point cloud from a raster. Equivalent with Raster.to_pointcloud.""" + # + # pc = _raster_to_pointcloud(source_raster=raster) diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py index aaedfbf0..8db1d716 100644 --- a/geoutils/raster/raster.py +++ b/geoutils/raster/raster.py @@ -28,6 +28,7 @@ from rasterio.enums import Resampling from rasterio.plot import show as rshow +import geoutils as gu from geoutils._config import config from geoutils._typing import ( ArrayLike, @@ -69,7 +70,6 @@ decode_sensor_metadata, parse_and_convert_metadata_from_filename, ) -from geoutils.vector.vector import Vector # If python38 or above, Literal is builtin. Otherwise, use typing_extensions try: @@ -590,7 +590,7 @@ def bounds(self) -> rio.coords.BoundingBox: return _bounds(transform=self.transform, shape=self.shape) @property - def footprint(self) -> Vector: + def footprint(self) -> gu.Vector: """Footprint of the raster.""" return self.get_footprint_projected(self.crs) @@ -638,7 +638,7 @@ def indexes(self) -> tuple[int, ...]: @property def name(self) -> str | None: - """Name of the file on disk, if it exists.""" + """Name of the raster file on disk, if it exists.""" return self._name def set_area_or_point( @@ -2212,7 +2212,7 @@ def __array_function__( @overload def crop( self: RasterType, - crop_geom: RasterType | Vector | list[float] | tuple[float, ...], + crop_geom: RasterType | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[False] = False, @@ -2221,7 +2221,7 @@ def crop( @overload def crop( self: RasterType, - crop_geom: RasterType | Vector | list[float] | tuple[float, ...], + crop_geom: RasterType | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[True], @@ -2230,7 +2230,7 @@ def crop( @overload def crop( self: RasterType, - crop_geom: RasterType | Vector | list[float] | tuple[float, ...], + crop_geom: RasterType | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, @@ -2238,7 +2238,7 @@ def crop( def crop( self: RasterType, - crop_geom: RasterType | Vector | list[float] | tuple[float, ...], + crop_geom: RasterType | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, @@ -2670,7 +2670,7 @@ def get_bounds_projected(self, out_crs: CRS, densify_points: int = 5000) -> rio. return new_bounds - def get_footprint_projected(self, out_crs: CRS, densify_points: int = 5000) -> Vector: + def get_footprint_projected(self, out_crs: CRS, densify_points: int = 5000) -> gu.Vector: """ Get raster footprint projected in a specified CRS. @@ -2682,7 +2682,7 @@ def get_footprint_projected(self, out_crs: CRS, densify_points: int = 5000) -> V Reduce if time computation is really critical (ms) or increase if extent is not accurate enough. """ - return Vector( + return gu.Vector( _get_footprint_projected( bounds=self.bounds, in_crs=self.crs, out_crs=out_crs, densify_points=densify_points ) @@ -3315,7 +3315,7 @@ def to_pointcloud( as_array: Literal[True], random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> Vector: ... + ) -> gu.Vector: ... @overload def to_pointcloud( @@ -3330,7 +3330,7 @@ def to_pointcloud( as_array: bool = False, random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> NDArrayNum | Vector: ... + ) -> NDArrayNum | gu.Vector: ... def to_pointcloud( self, @@ -3343,7 +3343,7 @@ def to_pointcloud( as_array: bool = False, random_state: int | np.random.Generator | None = None, force_pixel_offset: Literal["center", "ul", "ur", "ll", "lr"] = "ul", - ) -> NDArrayNum | Vector: + ) -> NDArrayNum | gu.PointCloud: """ Convert raster to point cloud. @@ -3447,7 +3447,7 @@ def polygonize( self, target_values: Number | tuple[Number, Number] | list[Number] | NDArrayNum | Literal["all"] = "all", data_column_name: str = "id", - ) -> Vector: + ) -> gu.Vector: """ Polygonize the raster into a vector. @@ -3456,14 +3456,14 @@ def polygonize( :param data_column_name: Data column name to be associated with target values in the output vector (defaults to "id"). - :returns: Vector containing the polygonized geometries associated to target values. + :returns: gu.Vector containing the polygonized geometries associated to target values. """ return _polygonize(source_raster=self, target_values=target_values, data_column_name=data_column_name) def proximity( self, - vector: Vector | None = None, + vector: gu.Vector | None = None, target_values: list[float] | None = None, geometry_type: str = "boundary", in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both", @@ -3479,7 +3479,7 @@ def proximity( passing "geometry", or any lower dimensional geometry attribute such as "centroid", "envelope" or "convex_hull". See all geometry attributes in the Shapely documentation at https://shapely.readthedocs.io/. - :param vector: Vector for which to compute the proximity to geometry, + :param vector: gu.Vector for which to compute the proximity to geometry, if not provided computed on this raster target pixels. :param target_values: (Only with raster) List of target values to use for the proximity, defaults to all non-zero values. @@ -3777,7 +3777,7 @@ def reproject( @overload def crop( self: Mask, - crop_geom: Mask | Vector | list[float] | tuple[float, ...], + crop_geom: Mask | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[False] = False, @@ -3786,7 +3786,7 @@ def crop( @overload def crop( self: Mask, - crop_geom: Mask | Vector | list[float] | tuple[float, ...], + crop_geom: Mask | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: Literal[True], @@ -3795,7 +3795,7 @@ def crop( @overload def crop( self: Mask, - crop_geom: Mask | Vector | list[float] | tuple[float, ...], + crop_geom: Mask | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, @@ -3803,7 +3803,7 @@ def crop( def crop( self: Mask, - crop_geom: Mask | Vector | list[float] | tuple[float, ...], + crop_geom: Mask | gu.Vector | list[float] | tuple[float, ...], mode: Literal["match_pixel"] | Literal["match_extent"] = "match_pixel", *, inplace: bool = False, @@ -3828,7 +3828,7 @@ def polygonize( self, target_values: Number | tuple[Number, Number] | list[Number] | NDArrayNum | Literal["all"] = 1, data_column_name: str = "id", - ) -> Vector: + ) -> gu.Vector: # If target values is passed but does not correspond to 0 or 1, raise a warning if not isinstance(target_values, (int, np.integer, float, np.floating)) or target_values not in [0, 1]: warnings.warn("In-value converted to 1 for polygonizing boolean mask.") @@ -3847,7 +3847,7 @@ def polygonize( def proximity( self, - vector: Vector | None = None, + vector: gu.Vector | None = None, target_values: list[float] | None = None, geometry_type: str = "boundary", in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both", diff --git a/geoutils/vector/vector.py b/geoutils/vector/vector.py index 083224a9..d79917db 100644 --- a/geoutils/vector/vector.py +++ b/geoutils/vector/vector.py @@ -5,7 +5,6 @@ from __future__ import annotations import pathlib -import warnings from collections import abc from os import PathLike from typing import ( @@ -28,7 +27,7 @@ from geopandas.testing import assert_geodataframe_equal from mpl_toolkits.axes_grid1 import make_axes_locatable from pandas._typing import WriteBuffer -from rasterio.crs import CRS +from pyproj import CRS from shapely.geometry.base import BaseGeometry import geoutils as gu @@ -50,7 +49,7 @@ class Vector: """ - The georeferenced vector + The georeferenced vector. Main attributes: ds: :class:`geopandas.GeoDataFrame` @@ -64,37 +63,105 @@ class Vector: See the API for more details. """ - def __init__(self, filename_or_dataset: str | pathlib.Path | gpd.GeoDataFrame | gpd.GeoSeries | BaseGeometry): + def __init__( + self, filename_or_dataset: str | pathlib.Path | gpd.GeoDataFrame | gpd.GeoSeries | BaseGeometry | dict[str, Any] + ): """ Instantiate a vector from either a filename, a GeoPandas dataframe or series, or a Shapely geometry. :param filename_or_dataset: Path to file, or GeoPandas dataframe or series, or Shapely geometry. """ + self._name: str | None + self._ds: gpd.GeoDataFrame | None = None + + # If Vector is passed, simply point back to Vector + if isinstance(filename_or_dataset, Vector): + for key in filename_or_dataset.__dict__: + setattr(self, key, filename_or_dataset.__dict__[key]) + return # If filename is passed - if isinstance(filename_or_dataset, (str, pathlib.Path)): - with warnings.catch_warnings(): - # This warning shows up in numpy 1.21 (2021-07-09) - warnings.filterwarnings("ignore", ".*attribute.*array_interface.*Polygon.*") - ds = gpd.read_file(filename_or_dataset) - self._ds = ds - self._name: str | gpd.GeoDataFrame | None = filename_or_dataset + elif isinstance(filename_or_dataset, (str, pathlib.Path)): + ds = gpd.read_file(filename_or_dataset) # If GeoPandas or Shapely object is passed elif isinstance(filename_or_dataset, (gpd.GeoDataFrame, gpd.GeoSeries, BaseGeometry)): - self._name = None if isinstance(filename_or_dataset, gpd.GeoDataFrame): - self._ds = filename_or_dataset + ds = filename_or_dataset elif isinstance(filename_or_dataset, gpd.GeoSeries): - self._ds = gpd.GeoDataFrame(geometry=filename_or_dataset) + ds = gpd.GeoDataFrame(geometry=filename_or_dataset) else: - self._ds = gpd.GeoDataFrame({"geometry": [filename_or_dataset]}, crs=None) - # If Vector is passed, simply point back to Vector - elif isinstance(filename_or_dataset, Vector): - for key in filename_or_dataset.__dict__: - setattr(self, key, filename_or_dataset.__dict__[key]) - return + ds = gpd.GeoDataFrame({"geometry": [filename_or_dataset]}, crs=None) else: - raise TypeError("Filename argument should be a string, Path or geopandas.GeoDataFrame.") + raise TypeError("Filename argument should be a string, path or geodataframe.") + + # Set geodataframe + self.ds = ds + + # Write name attribute + if isinstance(filename_or_dataset, str): + self._name = filename_or_dataset + if isinstance(filename_or_dataset, pathlib.Path): + self._name = filename_or_dataset.name + + @property + def crs(self) -> CRS: + """Coordinate reference system of the vector.""" + return self.ds.crs + + @property + def ds(self) -> gpd.GeoDataFrame: + """Geodataframe of the vector.""" + return self._ds + + @ds.setter + def ds(self, new_ds: gpd.GeoDataFrame | gpd.GeoSeries) -> None: + """Set a new geodataframe.""" + + if isinstance(new_ds, gpd.GeoDataFrame): + self._ds = new_ds + elif isinstance(new_ds, gpd.GeoSeries): + self._ds = gpd.GeoDataFrame(geometry=new_ds) + else: + raise ValueError("The dataset of a vector must be set with a GeoSeries or a GeoDataFrame.") + + def vector_equal(self, other: gu.Vector, **kwargs: Any) -> bool: + """ + Check if two vectors are equal. + + Keyword arguments are passed to geopandas.assert_geodataframe_equal. + """ + + try: + assert_geodataframe_equal(self.ds, other.ds, **kwargs) + vector_eq = True + except AssertionError: + vector_eq = False + + return vector_eq + + @property + def name(self) -> str | None: + """Name on disk, if it exists.""" + return self._name + + @property + def geometry(self) -> gpd.GeoSeries: + return self.ds.geometry + + @property + def columns(self) -> pd.Index: + return self.ds.columns + + @property + def index(self) -> pd.Index: + return self.ds.index + + def copy(self: VectorType) -> VectorType: + """Return a copy of the vector.""" + # Utilise the copy method of GeoPandas + new_vector = self.__new__(type(self)) + new_vector.__init__(self.ds.copy()) # type: ignore + return new_vector # type: ignore def __repr__(self) -> str: """Convert vector to string representation.""" @@ -925,62 +992,6 @@ def to_csv( # End of GeoPandas functionalities # -------------------------------- - @property - def crs(self) -> rio.crs.CRS: - """Coordinate reference system of the vector.""" - return self.ds.crs - - @property - def ds(self) -> gpd.GeoDataFrame: - """Geodataframe of the vector.""" - return self._ds - - @ds.setter - def ds(self, new_ds: gpd.GeoDataFrame | gpd.GeoSeries) -> None: - """Set a new geodataframe.""" - - if isinstance(new_ds, gpd.GeoDataFrame): - self._ds = new_ds - elif isinstance(new_ds, gpd.GeoSeries): - self._ds = gpd.GeoDataFrame(geometry=new_ds) - else: - raise ValueError("The dataset of a vector must be set with a GeoSeries or a GeoDataFrame.") - - def vector_equal(self, other: gu.Vector, **kwargs: Any) -> bool: - """ - Check if two vectors are equal. - - Keyword arguments are passed to geopandas.assert_geodataframe_equal. - """ - - try: - assert_geodataframe_equal(self.ds, other.ds, **kwargs) - vector_eq = True - except AssertionError: - vector_eq = False - - return vector_eq - - @property - def name(self) -> str | None: - """Name on disk, if it exists.""" - return self._name - - @property - def geometry(self) -> gpd.GeoSeries: - return self.ds.geometry - - @property - def index(self) -> pd.Index: - return self.ds.index - - def copy(self: VectorType) -> VectorType: - """Return a copy of the vector.""" - # Utilise the copy method of GeoPandas - new_vector = self.__new__(type(self)) - new_vector.__init__(self.ds.copy()) # type: ignore - return new_vector # type: ignore - @overload def crop( self: VectorType, diff --git a/tests/test_interface/test_gridding.py b/tests/test_interface/test_gridding.py index 35371f25..71f460d3 100644 --- a/tests/test_interface/test_gridding.py +++ b/tests/test_interface/test_gridding.py @@ -1,4 +1,4 @@ -"""Test module for point cloud functionalities.""" +"""Test module for gridding functionalities.""" import geopandas as gpd import numpy as np @@ -30,11 +30,13 @@ def test_grid_pc(self) -> None: grid_coords = rst.coords(grid=False) # Grid the point cloud - gridded_pc = _grid_pointcloud(pc, grid_coords=grid_coords) + gridded_pc, output_transform = _grid_pointcloud(pc, grid_coords=grid_coords) # Compare back to raster, all should be very close (but not exact, some info is lost due to interpolations) valids = np.isfinite(gridded_pc) assert np.allclose(gridded_pc[valids], rst.data.data[valids], rtol=10e-5) + # And the transform exactly the same + assert output_transform == transform # 2/ Check the propagation of nodata values @@ -86,7 +88,7 @@ def test_grid_pc(self) -> None: assert all(~np.isfinite(gridded_pc[ifarchull, jfarchull])) # Check for a different distance value - gridded_pc = _grid_pointcloud(pc, grid_coords=grid_coords, dist_nodata_pixel=0.5) + gridded_pc, output_transform = _grid_pointcloud(pc, grid_coords=grid_coords, dist_nodata_pixel=0.5) ind_close = np.array(list_min_dist) <= 0.5 # We get the indexes for these coordinates diff --git a/tests/test_pointcloud/test_pointcloud.py b/tests/test_pointcloud/test_pointcloud.py index ec26e5d3..a57bc9d0 100644 --- a/tests/test_pointcloud/test_pointcloud.py +++ b/tests/test_pointcloud/test_pointcloud.py @@ -1 +1,238 @@ -"""Test for future PointCloud class.""" +"""Test for PointCloud class.""" + +from __future__ import annotations + +import geopandas as gpd +import numpy as np +import pytest +from geopandas.testing import assert_geodataframe_equal +from shapely import Polygon + +from geoutils import PointCloud + + +class TestPointCloud: + + # 1/ Synthetic point cloud with no auxiliary column + rng = np.random.default_rng(42) + arr_points = rng.integers(low=1, high=1000, size=(100, 3)) + rng.normal(0, 0.15, size=(100, 3)) + gdf1 = gpd.GeoDataFrame( + data={"b1": arr_points[:, 2]}, geometry=gpd.points_from_xy(x=arr_points[:, 0], y=arr_points[:, 1]), crs=4326 + ) + + # 2/ Synthetic point cloud with auxiliary column + arr_points2 = rng.integers(low=1, high=1000, size=(100, 4)) + rng.normal(0, 0.15, size=(100, 4)) + gdf2 = gpd.GeoDataFrame( + data=arr_points2[:, 2:], + columns=["b1", "b2"], + geometry=gpd.points_from_xy(x=arr_points2[:, 0], y=arr_points2[:, 1]), + crs=4326, + ) + # 3/ LAS file + fn_las = "/home/atom/ongoing/own/geoutils/test.laz" + + # 4/ Non-point vector (for error raising) + poly = Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]) + gdf3 = gpd.GeoDataFrame({"geometry": [poly]}, crs="EPSG:4326") + + def test_init(self) -> None: + """Test instantiation of a point cloud.""" + + # 1/ For a single column point cloud + pc = PointCloud(self.gdf1, data_column="b1") + + # Assert that both the dataframe and data column name are equal + assert pc.data_column == "b1" + assert_geodataframe_equal(pc.ds, self.gdf1) + + # 2/ For a point cloud from LAS/LAZ file + pc = PointCloud(self.fn_las, data_column="Z") + + assert pc.data_column == "Z" + assert not pc.is_loaded + + def test_init__errors(self) -> None: + """Test errors raised by point cloud instantiation.""" + + # If the data column does not exist + with pytest.raises(ValueError, match="Data column column_that_does_not_exist not found*"): + PointCloud(self.gdf1, data_column="column_that_does_not_exist") + + # If vector is not only comprised of points + with pytest.raises(ValueError, match="This vector file contains non-point geometries*"): + PointCloud(self.gdf3, data_column="z") + + def test_load(self) -> None: + """ + Test loading of a point cloud (only possible with a LAS file). + + This test also serves to test the overridden methods "crs", "bounds", "nb_points", "all_columns" in relation + to loading. + """ + + # 1/ Check unloaded and loaded attributes are all the same + + pc = PointCloud(self.fn_las, data_column="Z") + + # Check point cloud is not loaded, and fetch before metadata + assert not pc.is_loaded + before_crs = pc.crs + before_bounds = pc.bounds + before_nb_points = pc.nb_points + before_columns = pc.all_columns + + # Load and fetch after metadata + pc.load(columns="all") + assert pc.is_loaded + + after_crs = pc.crs + after_bounds = pc.bounds + after_nb_points = pc.nb_points + after_columns = pc.all_columns + + # Check those are equal + assert before_crs == after_crs + assert before_bounds == after_bounds + assert before_nb_points == after_nb_points + assert all(before_columns == after_columns) + + # 2/ Check default column argument + pc = PointCloud(self.fn_las, data_column="Z") + pc.load() + + assert pc.all_columns == ["Z"] + + # 3/ Check implicit loading when calling a function requiring .ds + pc = PointCloud(self.fn_las, data_column="Z") + assert not pc.is_loaded + + pc.buffer(distance=0.1) + assert pc.is_loaded + + def test_load__errors(self) -> None: + """Test errors raised by loading.""" + + pc = PointCloud(self.fn_las, data_column="Z") + pc.load() + + # Error if already loaded + with pytest.raises(ValueError, match="Data are already loaded."): + pc.load() + + pc = PointCloud(self.fn_las, data_column="Z") + pc._name = None + with pytest.raises(AttributeError, match="Cannot load as filename is not set anymore.*"): + pc.load() + + def test_data_column(self) -> None: + """Test the setting and getting of the main data column.""" + + # Assert column is set properly at instantiation + pc = PointCloud(self.gdf1, data_column="b1") + assert pc.data_column == "b1" + + # And can be reset to another name if it exists + pc2 = PointCloud(self.gdf2, data_column="b1") + assert pc2.data_column == "b1" + # First syntax + pc2.data_column = "b2" + assert pc2.data_column == "b2" + # Equivalent syntax + pc2.set_data_column("b1") + assert pc2.data_column == "b1" + + def test_data_column__errors(self) -> None: + """Test errors raised during setting of data column.""" + + pc = PointCloud(self.gdf1, data_column="b1") + # If the data column does not exist + with pytest.raises(ValueError, match="Data column column_that_does_not_exist not found*"): + pc.data_column = "column_that_does_not_exist" + # Equivalent syntax + with pytest.raises(ValueError, match="Data column column_that_does_not_exist not found*"): + pc.set_data_column("column_that_does_not_exist") + + def test_pointcloud_equal(self) -> None: + """Test pointcloud equality.""" + + def test_from_array(self) -> None: + """Test building point cloud from array.""" + + # Build from array and compare + pc1 = PointCloud(self.gdf1, data_column="b1") + pc_from_arr = PointCloud.from_array(array=self.arr_points, crs=4326, data_column="b1") + assert pc_from_arr.pointcloud_equal(pc1) + + # Should be the same with transposed array + pc_from_arr = PointCloud.from_array(array=self.arr_points.T, crs=4326, data_column="b1") + assert pc_from_arr.pointcloud_equal(pc1) + + def test_from_array__errors(self) -> None: + """Test errors raised during creation with array.""" + + array = np.ones((4, 5)) + with pytest.raises(ValueError, match="Array must be of shape 3xN or Nx3."): + PointCloud.from_array(array=array, crs=4326) + + def test_from_tuples(self) -> None: + """Test building point cloud from list of tuples.""" + + pc1 = PointCloud(self.gdf1, data_column="b1") + tuples_xyz = list(zip(self.arr_points[:, 0], self.arr_points[:, 1], self.arr_points[:, 2])) + pc_from_tuples = PointCloud.from_tuples(tuples_xyz=tuples_xyz, crs=4326, data_column="b1") + assert pc_from_tuples.pointcloud_equal(pc1) + + def test_from_xyz(self) -> None: + """Test building point cloud from xyz array-like.""" + + # Build from array and compare + pc1 = PointCloud(self.gdf1, data_column="b1") + pc_from_xyz = PointCloud.from_xyz( + x=self.arr_points[:, 0], y=self.arr_points[:, 1], z=self.arr_points[:, 2], crs=4326, data_column="b1" + ) + assert pc_from_xyz.pointcloud_equal(pc1) + + # Test with lists + pc_from_xyz = PointCloud.from_xyz( + x=list(self.arr_points[:, 0]), + y=list(self.arr_points[:, 1]), + z=list(self.arr_points[:, 2]), + crs=4326, + data_column="b1", + ) + assert pc_from_xyz.pointcloud_equal(pc1) + + # Test with tuples + pc_from_xyz = PointCloud.from_xyz( + x=tuple(self.arr_points[:, 0]), + y=tuple(self.arr_points[:, 1]), + z=tuple(self.arr_points[:, 2]), + crs=4326, + data_column="b1", + ) + assert pc_from_xyz.pointcloud_equal(pc1) + + def test_to_array(self) -> None: + """Test exporting point cloud to array.""" + + # Convert point cloud and compare + pc1 = PointCloud(self.gdf1, data_column="b1") + arr_from_pc = pc1.to_array() + assert np.array_equal(arr_from_pc, self.arr_points.T) + + def test_to_tuples(self) -> None: + """Test exporting point cloud to tuples.""" + + # Convert point cloud and compare + pc1 = PointCloud(self.gdf1, data_column="b1") + tuples_xyz = list(zip(self.arr_points[:, 0], self.arr_points[:, 1], self.arr_points[:, 2])) + tuples_from_pc = pc1.to_tuples() + assert tuples_from_pc == tuples_xyz + + def test_to_xyz(self) -> None: + """Test exporting point cloud to xyz arrays.""" + + # Convert point cloud and compare + pc1 = PointCloud(self.gdf1, data_column="b1") + xyz_from_pc = pc1.to_xyz() + assert np.array_equal(np.stack(xyz_from_pc), self.arr_points.T)