forked from City-of-Helsinki/parkkihubi
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request City-of-Helsinki#50 from suutari-ai/fix-invalid-ge…
…ometries Add a script to fix invalid geometries
- Loading branch information
Showing
5 changed files
with
365 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Fix invalid geometries of a model in the database. | ||
Some of the (imported) geometries might be invalid. This causes | ||
problems, because it makes many GIS database queries fail . This | ||
command can be used to fix those geometries. | ||
Currently supports only MultiPolygon geometries. | ||
""" | ||
import re | ||
import sys | ||
|
||
from django.apps import apps as django_apps | ||
from django.contrib.gis.db.models.fields import MultiPolygonField | ||
from django.contrib.gis.db.models.functions import MakeValid | ||
from django.contrib.gis.geos import GeometryCollection, MultiPolygon, Polygon | ||
from django.core.management.base import BaseCommand | ||
|
||
|
||
class Command(BaseCommand): | ||
help = __doc__.strip().splitlines()[0] | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
'model_name', type=str, | ||
help=("Name of the model to fix, e.g. \"ParkingArea\"")) | ||
|
||
parser.add_argument( | ||
'--dry-run', '-n', action='store_true', | ||
help=("Dry run, i.e. just show what would be fixed")) | ||
|
||
def handle(self, model_name, dry_run=False, *args, **options): | ||
verbosity = int(options['verbosity']) | ||
show_info = self.stdout.write if verbosity >= 1 else (lambda x: None) | ||
show_debug = self.stdout.write if verbosity >= 2 else (lambda x: None) | ||
|
||
model_full_name = ('parkings.' + model_name | ||
if '.' not in model_name else model_name) | ||
model = django_apps.get_model(model_full_name) | ||
geometry_fields = [ | ||
field for field in model._meta.get_fields() | ||
if isinstance(field, MultiPolygonField)] | ||
|
||
if len(geometry_fields) != 1: | ||
sys.exit("Model should have exactly one MultiPolygon field") | ||
|
||
field_name = geometry_fields[0].name | ||
|
||
objects_to_fix = ( | ||
model.objects.order_by('pk') | ||
.filter(**{field_name + '__isvalid': False}) | ||
.annotate(**{'valid_geom': MakeValid(field_name)})) | ||
|
||
if not objects_to_fix: | ||
show_info("All good. Nothing to fix.") | ||
return | ||
|
||
for obj in objects_to_fix: | ||
obj_info = '{field_name} of {model_name}:{obj.pk} / {obj}'.format( | ||
field_name=field_name, model_name=model_name, obj=obj) | ||
show_info("Doing {}".format(obj_info)) | ||
show_debug(" original geometry: {}".format( | ||
_geom_info(getattr(obj, field_name), verbosity))) | ||
show_debug(" after MakeValid: {}".format( | ||
_geom_info(obj.valid_geom, verbosity))) | ||
fixed_geom = _to_multipolygon(obj.valid_geom) | ||
show_debug(" after conversion: {}".format( | ||
_geom_info(fixed_geom, verbosity))) | ||
if dry_run: | ||
show_info(" Not saved, since doing dry-run.") | ||
else: | ||
setattr(obj, field_name, fixed_geom) | ||
obj.save() | ||
show_info(" Fixed.") | ||
|
||
|
||
def _polygon_to_multipolygon(polygon): | ||
assert isinstance(polygon, Polygon) | ||
return MultiPolygon(polygon, srid=polygon.srid) | ||
|
||
|
||
def _geometry_collection_to_multipolygon(geometry_collection): | ||
assert isinstance(geometry_collection, GeometryCollection) | ||
polygons = [] | ||
for geom in geometry_collection: | ||
if geom.dims < 2: | ||
# Just ignore everything that has dimension 0 or 1, | ||
# i.e. points, lines, or linestrings, multipoints, etc. | ||
continue | ||
polygons.extend(_to_multipolygon(geom)) | ||
return MultiPolygon(polygons, srid=geometry_collection.srid) | ||
|
||
|
||
def _to_multipolygon(geometry): | ||
converter = _geometry_converters[type(geometry)] | ||
multipolygon = converter(geometry) | ||
assert isinstance(multipolygon, MultiPolygon) | ||
assert geometry.srid == multipolygon.srid | ||
return multipolygon | ||
|
||
|
||
_geometry_converters = { | ||
MultiPolygon: lambda x: x, | ||
Polygon: _polygon_to_multipolygon, | ||
GeometryCollection: _geometry_collection_to_multipolygon, | ||
} | ||
|
||
|
||
def _geom_info(geometry, verbosity=2): | ||
text = str(geometry) | ||
return text if verbosity >= 3 else re.sub(r'\(\d[\d., ]*\)', '(...)', text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
- model: parkings.parkingarea | ||
pk: a0000000-d493-4670-a1de-f752fd694ed4 | ||
fields: | ||
origin_id: '2833' | ||
capacity_estimate: null | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25493758.106511 6670974.137834, 25493759.034831 | ||
6670969.377303, 25493768.770703 6670971.171852, 25493767.940726 | ||
6670975.950511, 25493758.106511 6670974.137834)), ((25493759.034831 | ||
6670969.377303, 25493759.919187 6670964.303618, 25493769.753403 | ||
6670966.116295, 25493768.819875 6670971.180916, 25493759.034831 | ||
6670969.377303))) | ||
created_at: '2017-09-05T13:52:19.677Z' | ||
modified_at: '2017-09-11T11:29:33.103Z' | ||
|
||
- model: parkings.parkingarea | ||
pk: b0000000-0a2f-4acf-a89d-0c89eb3a0d1b | ||
fields: | ||
origin_id: '3066' | ||
capacity_estimate: null | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25492123.165669 6672509.440342, 25492089.404534 | ||
6672502.727943, 25492089.806087 6672500.195914, 25492123.577051 | ||
6672506.510351, 25492123.165669 6672509.440342)), ((25492080.262998 | ||
6672501.796631, 25492070.454466 6672499.915259, 25492071.088292 | ||
6672496.869559, 25492080.785463 6672498.730063, 25492080.262998 | ||
6672501.796631)), ((25492062.81757 6672498.450243, 25492042.021899 | ||
6672494.461257, 25492042.194103 6672491.457184, 25492063.110677 | ||
6672495.629223, 25492062.81757 6672498.450243)), ((25492042.021899 | ||
6672494.461257, 25492031.1663 6672492.378956, 25492031.369354 | ||
6672489.202028, 25492042.194103 6672491.457184, 25492042.021899 | ||
6672494.461257)), ((25492031.1663 6672492.378956, 25492011.254863 | ||
6672488.559611, 25492011.636738 6672485.537399, 25492031.369354 | ||
6672489.202028, 25492031.1663 6672492.378956))) | ||
created_at: '2017-09-05T13:52:22.127Z' | ||
modified_at: '2017-09-11T11:29:34.877Z' | ||
|
||
- model: parkings.parkingarea | ||
pk: c0000000-7713-48d5-82f5-920dfe2a61d7 | ||
fields: | ||
origin_id: '3331' | ||
capacity_estimate: 7 | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25495652.656271 6671509.00403, 25495650.825957 | ||
6671509.961425, 25495640.210139 6671490.306675, 25495641.927818 | ||
6671489.461915, 25495652.656271 6671509.00403)), ((25495638.703651 | ||
6671487.543605, 25495633.337664 6671477.207614, 25495635.078776 | ||
6671476.263543, 25495640.541004 6671486.528133, 25495638.703651 | ||
6671487.543605)), ((25495633.337664 6671477.207614, 25495628.368539 | ||
6671468.280437, 25495630.26925 6671467.097773, 25495635.078776 | ||
6671476.263543, 25495633.337664 6671477.207614))) | ||
created_at: '2017-09-05T13:52:18.570Z' | ||
modified_at: '2017-09-11T11:29:32.203Z' | ||
|
||
- model: parkings.parkingarea | ||
pk: d0000000-a225-4542-a7d4-58196cd82d19 | ||
fields: | ||
origin_id: '3652' | ||
capacity_estimate: 46 | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25496176.30779 6672190.143211, 25496154.362008 | ||
6672175.553799, 25496129.131841 6672158.039108, 25496106.143323 | ||
6672141.056291, 25496109.06055 6672136.938323, 25496151.222316 | ||
6672166.852771, 25496179.509938 6672185.767273, 25496177.393447 | ||
6672188.930668, 25496176.30779 6672190.143211)), ((25496179.509938 | ||
6672185.767273, 25496151.222316 6672166.852771, 25496109.06055 | ||
6672136.938323, 25496111.873102 6672132.968113, 25496142.014432 | ||
6672153.688594, 25496180.817076 6672179.98879, 25496181.952203 | ||
6672182.116963, 25496179.509938 6672185.767273))) | ||
created_at: '2017-09-05T13:52:19.375Z' | ||
modified_at: '2017-09-11T11:29:32.850Z' | ||
|
||
- model: parkings.parkingarea | ||
pk: e0000000-c039-462b-9680-798e4fe9d7e4 | ||
fields: | ||
origin_id: '4396' | ||
capacity_estimate: 46 | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25496038.688839 6674542.081072, 25495956.240259 | ||
6674527.213296, 25495956.465529 6674521.975783, 25496039.702551 | ||
6674537.237781, 25496038.688839 6674542.081072)), ((25496039.702551 | ||
6674537.237781, 25495956.465529 6674521.975783, 25495957.253971 | ||
6674517.245127, 25496040.659946 6674532.450808, 25496039.702551 | ||
6674537.237781))) | ||
created_at: '2017-09-05T13:52:18.167Z' | ||
modified_at: '2017-09-11T11:29:31.869Z' | ||
|
||
- model: parkings.parkingarea | ||
pk: f0000000-28b8-4710-a203-8dd3c597b936 | ||
fields: | ||
origin_id: '4542' | ||
capacity_estimate: 28 | ||
geom: >- | ||
SRID=3879;MULTIPOLYGON (((25495474.469975 6673671.460571, 25495490.024166 | ||
6673675.541957, 25495489.604713 6673677.225644, 25495473.948494 | ||
6673673.170796, 25495474.469975 6673671.460571)), ((25495567.798874 | ||
6673695.949872, 25495567.210002 6673698.23201, 25495494.391687 | ||
6673678.746212, 25495494.972847 6673676.84048, 25495567.798874 | ||
6673695.949872)), ((25495567.798874 6673695.949872, 25495577.414136 | ||
6673698.995531, 25495576.783949 6673700.935242, 25495567.210002 | ||
6673698.23201, 25495567.798874 6673695.949872)), ((25495590.525597 | ||
6673703.14862, 25495612.655646 6673710.15837, 25495619.406624 | ||
6673712.823533, 25495618.852999 6673714.395086, 25495590.30011 | ||
6673704.99009, 25495590.525597 6673703.14862)), ((25495622.409608 | ||
6673714.009057, 25495654.256126 6673726.581481, 25495653.375526 | ||
6673728.812324, 25495621.781501 6673715.746702, 25495622.409608 | ||
6673714.009057))) | ||
created_at: '2017-09-05T13:52:22.569Z' | ||
modified_at: '2017-09-11T11:29:35.224Z' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import os | ||
|
||
import pytest | ||
from django.core import management | ||
|
||
from parkings.management.commands import fix_geometries | ||
|
||
from ..models import ParkingArea | ||
from .utils import call_mgmt_cmd_with_output | ||
|
||
directory = os.path.abspath(os.path.dirname(__file__)) | ||
|
||
parking_areas_yaml = os.path.join(directory, 'invalid_geom_parking_areas.yaml') | ||
|
||
|
||
def test_non_compatible_model(): | ||
with pytest.raises(SystemExit) as excinfo: | ||
call_the_command('Operator') | ||
assert str(excinfo.value) == ( | ||
'Model should have exactly one MultiPolygon field') | ||
|
||
|
||
@pytest.mark.django_db | ||
@pytest.mark.parametrize('verbosity', [0, 1, 2]) | ||
@pytest.mark.parametrize('dry_run', [False, True]) | ||
def test_fixing(verbosity, dry_run): | ||
# Load the test data and collect its timestamps and area sizes | ||
management.call_command('loaddata', parking_areas_yaml) | ||
timestamps_before = sorted( | ||
ParkingArea.objects.values_list('modified_at', flat=True)) | ||
areas_before = get_areas(ParkingArea.objects.all()) | ||
|
||
# Call the command | ||
(stdout, stderr) = call_the_command( | ||
'ParkingArea', verbosity=verbosity, dry_run=dry_run) | ||
|
||
# Check the output | ||
assert stdout == get_expected_output(verbosity, dry_run) | ||
assert stderr == '' | ||
|
||
# Check that it did changes as expected | ||
timestamps_after = sorted( | ||
ParkingArea.objects.values_list('modified_at', flat=True)) | ||
if dry_run: | ||
assert timestamps_after == timestamps_before | ||
else: | ||
assert timestamps_after != timestamps_before | ||
|
||
areas_after = get_areas(ParkingArea.objects.all()) | ||
assert areas_after == areas_before, "Areas sizes (m^2) shouldn't change" | ||
|
||
# Make sure that after the command is run, there is no more invalid | ||
# geometries | ||
if not dry_run: | ||
assert call_the_command('ParkingArea', dry_run=True)[0] == ( | ||
'All good. Nothing to fix.\n') | ||
|
||
|
||
def get_areas(objects): | ||
""" | ||
Get rounded area sizes (in m^2) of the geometries in given objects. | ||
The results are rounded to two digits (dm^2 if the unit is metre) | ||
and converted to string to allow easy comparison with equals. | ||
""" | ||
return {str(obj): '{:.2f}'.format(obj.geom.area) for obj in objects} | ||
|
||
|
||
def get_expected_output(verbosity, dry_run): | ||
lines = [ | ||
('Doing geom of ParkingArea:a0000000-d493-4670-a1de-f752fd694ed4 ' | ||
'/ Parking Area 2833', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' Fixed.', 1), | ||
|
||
('Doing geom of ParkingArea:b0000000-0a2f-4acf-a89d-0c89eb3a0d1b ' | ||
'/ Parking Area 3066', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON ' | ||
'(((...)), ((...)), ((...)), ((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' | ||
'(MULTIPOLYGON (((...)), ((...)), ((...))),' | ||
' MULTILINESTRING ((...), (...)))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON ' | ||
'(((...)), ((...)), ((...)))', 2), | ||
(' Fixed.', 1), | ||
|
||
('Doing geom of ParkingArea:c0000000-7713-48d5-82f5-920dfe2a61d7 ' | ||
'/ Parking Area 3331', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON ' | ||
'(((...)), ((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' | ||
'(MULTIPOLYGON (((...)), ((...))), LINESTRING (...))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' Fixed.', 1), | ||
|
||
('Doing geom of ParkingArea:d0000000-a225-4542-a7d4-58196cd82d19 ' | ||
'/ Parking Area 3652', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' | ||
'(POLYGON ((...)), MULTILINESTRING ((...), (...)))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2), | ||
(' Fixed.', 1), | ||
|
||
('Doing geom of ParkingArea:e0000000-c039-462b-9680-798e4fe9d7e4 ' | ||
'/ Parking Area 4396', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' | ||
'(POLYGON ((...)), LINESTRING (...))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2), | ||
(' Fixed.', 1), | ||
|
||
('Doing geom of ParkingArea:f0000000-28b8-4710-a203-8dd3c597b936 ' | ||
'/ Parking Area 4542', 1), | ||
(' original geometry: SRID=3879;MULTIPOLYGON ' | ||
'(((...)), ((...)), ((...)), ((...)), ((...)))', 2), | ||
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' | ||
'(MULTIPOLYGON (((...)), ((...)), ((...)), ((...))), ' | ||
'LINESTRING (...))', 2), | ||
(' after conversion: SRID=3879;MULTIPOLYGON ' | ||
'(((...)), ((...)), ((...)), ((...)))', 2), | ||
(' Fixed.', 1), | ||
] | ||
text = ''.join(line + '\n' for (line, lvl) in lines if lvl <= verbosity) | ||
if dry_run: | ||
return text.replace('Fixed.', 'Not saved, since doing dry-run.') | ||
return text | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_nothing_to_fix(parking_area): | ||
(stdout, stderr) = call_the_command('ParkingArea') | ||
assert stdout == 'All good. Nothing to fix.\n' | ||
assert stderr == '' | ||
|
||
|
||
def call_the_command(*args, **kwargs): | ||
(result, stdout, stderr) = call_mgmt_cmd_with_output( | ||
fix_geometries.Command, *args, **kwargs) | ||
assert result is None | ||
return (stdout, stderr) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters