From d11551f66ad24593a65c92b808843ce2100e23c3 Mon Sep 17 00:00:00 2001 From: Tuomas Suutari Date: Mon, 12 Feb 2018 12:37:08 +0200 Subject: [PATCH] Add a script to fix invalid geometries Some of the (imported) geometries might be invalid. This causes problems, because it makes many GIS database queries fail . This command can be used to fix those geometries. --- .../management/commands/fix_geometries.py | 112 ++++++++++++++ .../tests/invalid_geom_parking_areas.yaml | 109 ++++++++++++++ parkings/tests/test_fix_geometries.py | 142 ++++++++++++++++++ requirements-test.in | 1 + requirements-test.txt | 1 + 5 files changed, 365 insertions(+) create mode 100755 parkings/management/commands/fix_geometries.py create mode 100644 parkings/tests/invalid_geom_parking_areas.yaml create mode 100644 parkings/tests/test_fix_geometries.py diff --git a/parkings/management/commands/fix_geometries.py b/parkings/management/commands/fix_geometries.py new file mode 100755 index 00000000..20b79d89 --- /dev/null +++ b/parkings/management/commands/fix_geometries.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +""" +Fix invalid geometries of a model in the database. + +Some of the (imported) geometries might be invalid. This causes +problems, because it makes many GIS database queries fail . This +command can be used to fix those geometries. + +Currently supports only MultiPolygon geometries. +""" +import re +import sys + +from django.apps import apps as django_apps +from django.contrib.gis.db.models.fields import MultiPolygonField +from django.contrib.gis.db.models.functions import MakeValid +from django.contrib.gis.geos import GeometryCollection, MultiPolygon, Polygon +from django.core.management.base import BaseCommand + + +class Command(BaseCommand): + help = __doc__.strip().splitlines()[0] + + def add_arguments(self, parser): + parser.add_argument( + 'model_name', type=str, + help=("Name of the model to fix, e.g. \"ParkingArea\"")) + + parser.add_argument( + '--dry-run', '-n', action='store_true', + help=("Dry run, i.e. just show what would be fixed")) + + def handle(self, model_name, dry_run=False, *args, **options): + verbosity = int(options['verbosity']) + show_info = self.stdout.write if verbosity >= 1 else (lambda x: None) + show_debug = self.stdout.write if verbosity >= 2 else (lambda x: None) + + model_full_name = ('parkings.' + model_name + if '.' not in model_name else model_name) + model = django_apps.get_model(model_full_name) + geometry_fields = [ + field for field in model._meta.get_fields() + if isinstance(field, MultiPolygonField)] + + if len(geometry_fields) != 1: + sys.exit("Model should have exactly one MultiPolygon field") + + field_name = geometry_fields[0].name + + objects_to_fix = ( + model.objects.order_by('pk') + .filter(**{field_name + '__isvalid': False}) + .annotate(**{'valid_geom': MakeValid(field_name)})) + + if not objects_to_fix: + show_info("All good. Nothing to fix.") + return + + for obj in objects_to_fix: + obj_info = '{field_name} of {model_name}:{obj.pk} / {obj}'.format( + field_name=field_name, model_name=model_name, obj=obj) + show_info("Doing {}".format(obj_info)) + show_debug(" original geometry: {}".format( + _geom_info(getattr(obj, field_name), verbosity))) + show_debug(" after MakeValid: {}".format( + _geom_info(obj.valid_geom, verbosity))) + fixed_geom = _to_multipolygon(obj.valid_geom) + show_debug(" after conversion: {}".format( + _geom_info(fixed_geom, verbosity))) + if dry_run: + show_info(" Not saved, since doing dry-run.") + else: + setattr(obj, field_name, fixed_geom) + obj.save() + show_info(" Fixed.") + + +def _polygon_to_multipolygon(polygon): + assert isinstance(polygon, Polygon) + return MultiPolygon(polygon, srid=polygon.srid) + + +def _geometry_collection_to_multipolygon(geometry_collection): + assert isinstance(geometry_collection, GeometryCollection) + polygons = [] + for geom in geometry_collection: + if geom.dims < 2: + # Just ignore everything that has dimension 0 or 1, + # i.e. points, lines, or linestrings, multipoints, etc. + continue + polygons.extend(_to_multipolygon(geom)) + return MultiPolygon(polygons, srid=geometry_collection.srid) + + +def _to_multipolygon(geometry): + converter = _geometry_converters[type(geometry)] + multipolygon = converter(geometry) + assert isinstance(multipolygon, MultiPolygon) + assert geometry.srid == multipolygon.srid + return multipolygon + + +_geometry_converters = { + MultiPolygon: lambda x: x, + Polygon: _polygon_to_multipolygon, + GeometryCollection: _geometry_collection_to_multipolygon, +} + + +def _geom_info(geometry, verbosity=2): + text = str(geometry) + return text if verbosity >= 3 else re.sub(r'\(\d[\d., ]*\)', '(...)', text) diff --git a/parkings/tests/invalid_geom_parking_areas.yaml b/parkings/tests/invalid_geom_parking_areas.yaml new file mode 100644 index 00000000..3e302880 --- /dev/null +++ b/parkings/tests/invalid_geom_parking_areas.yaml @@ -0,0 +1,109 @@ +- model: parkings.parkingarea + pk: a0000000-d493-4670-a1de-f752fd694ed4 + fields: + origin_id: '2833' + capacity_estimate: null + geom: >- + SRID=3879;MULTIPOLYGON (((25493758.106511 6670974.137834, 25493759.034831 + 6670969.377303, 25493768.770703 6670971.171852, 25493767.940726 + 6670975.950511, 25493758.106511 6670974.137834)), ((25493759.034831 + 6670969.377303, 25493759.919187 6670964.303618, 25493769.753403 + 6670966.116295, 25493768.819875 6670971.180916, 25493759.034831 + 6670969.377303))) + created_at: '2017-09-05T13:52:19.677Z' + modified_at: '2017-09-11T11:29:33.103Z' + +- model: parkings.parkingarea + pk: b0000000-0a2f-4acf-a89d-0c89eb3a0d1b + fields: + origin_id: '3066' + capacity_estimate: null + geom: >- + SRID=3879;MULTIPOLYGON (((25492123.165669 6672509.440342, 25492089.404534 + 6672502.727943, 25492089.806087 6672500.195914, 25492123.577051 + 6672506.510351, 25492123.165669 6672509.440342)), ((25492080.262998 + 6672501.796631, 25492070.454466 6672499.915259, 25492071.088292 + 6672496.869559, 25492080.785463 6672498.730063, 25492080.262998 + 6672501.796631)), ((25492062.81757 6672498.450243, 25492042.021899 + 6672494.461257, 25492042.194103 6672491.457184, 25492063.110677 + 6672495.629223, 25492062.81757 6672498.450243)), ((25492042.021899 + 6672494.461257, 25492031.1663 6672492.378956, 25492031.369354 + 6672489.202028, 25492042.194103 6672491.457184, 25492042.021899 + 6672494.461257)), ((25492031.1663 6672492.378956, 25492011.254863 + 6672488.559611, 25492011.636738 6672485.537399, 25492031.369354 + 6672489.202028, 25492031.1663 6672492.378956))) + created_at: '2017-09-05T13:52:22.127Z' + modified_at: '2017-09-11T11:29:34.877Z' + +- model: parkings.parkingarea + pk: c0000000-7713-48d5-82f5-920dfe2a61d7 + fields: + origin_id: '3331' + capacity_estimate: 7 + geom: >- + SRID=3879;MULTIPOLYGON (((25495652.656271 6671509.00403, 25495650.825957 + 6671509.961425, 25495640.210139 6671490.306675, 25495641.927818 + 6671489.461915, 25495652.656271 6671509.00403)), ((25495638.703651 + 6671487.543605, 25495633.337664 6671477.207614, 25495635.078776 + 6671476.263543, 25495640.541004 6671486.528133, 25495638.703651 + 6671487.543605)), ((25495633.337664 6671477.207614, 25495628.368539 + 6671468.280437, 25495630.26925 6671467.097773, 25495635.078776 + 6671476.263543, 25495633.337664 6671477.207614))) + created_at: '2017-09-05T13:52:18.570Z' + modified_at: '2017-09-11T11:29:32.203Z' + +- model: parkings.parkingarea + pk: d0000000-a225-4542-a7d4-58196cd82d19 + fields: + origin_id: '3652' + capacity_estimate: 46 + geom: >- + SRID=3879;MULTIPOLYGON (((25496176.30779 6672190.143211, 25496154.362008 + 6672175.553799, 25496129.131841 6672158.039108, 25496106.143323 + 6672141.056291, 25496109.06055 6672136.938323, 25496151.222316 + 6672166.852771, 25496179.509938 6672185.767273, 25496177.393447 + 6672188.930668, 25496176.30779 6672190.143211)), ((25496179.509938 + 6672185.767273, 25496151.222316 6672166.852771, 25496109.06055 + 6672136.938323, 25496111.873102 6672132.968113, 25496142.014432 + 6672153.688594, 25496180.817076 6672179.98879, 25496181.952203 + 6672182.116963, 25496179.509938 6672185.767273))) + created_at: '2017-09-05T13:52:19.375Z' + modified_at: '2017-09-11T11:29:32.850Z' + +- model: parkings.parkingarea + pk: e0000000-c039-462b-9680-798e4fe9d7e4 + fields: + origin_id: '4396' + capacity_estimate: 46 + geom: >- + SRID=3879;MULTIPOLYGON (((25496038.688839 6674542.081072, 25495956.240259 + 6674527.213296, 25495956.465529 6674521.975783, 25496039.702551 + 6674537.237781, 25496038.688839 6674542.081072)), ((25496039.702551 + 6674537.237781, 25495956.465529 6674521.975783, 25495957.253971 + 6674517.245127, 25496040.659946 6674532.450808, 25496039.702551 + 6674537.237781))) + created_at: '2017-09-05T13:52:18.167Z' + modified_at: '2017-09-11T11:29:31.869Z' + +- model: parkings.parkingarea + pk: f0000000-28b8-4710-a203-8dd3c597b936 + fields: + origin_id: '4542' + capacity_estimate: 28 + geom: >- + SRID=3879;MULTIPOLYGON (((25495474.469975 6673671.460571, 25495490.024166 + 6673675.541957, 25495489.604713 6673677.225644, 25495473.948494 + 6673673.170796, 25495474.469975 6673671.460571)), ((25495567.798874 + 6673695.949872, 25495567.210002 6673698.23201, 25495494.391687 + 6673678.746212, 25495494.972847 6673676.84048, 25495567.798874 + 6673695.949872)), ((25495567.798874 6673695.949872, 25495577.414136 + 6673698.995531, 25495576.783949 6673700.935242, 25495567.210002 + 6673698.23201, 25495567.798874 6673695.949872)), ((25495590.525597 + 6673703.14862, 25495612.655646 6673710.15837, 25495619.406624 + 6673712.823533, 25495618.852999 6673714.395086, 25495590.30011 + 6673704.99009, 25495590.525597 6673703.14862)), ((25495622.409608 + 6673714.009057, 25495654.256126 6673726.581481, 25495653.375526 + 6673728.812324, 25495621.781501 6673715.746702, 25495622.409608 + 6673714.009057))) + created_at: '2017-09-05T13:52:22.569Z' + modified_at: '2017-09-11T11:29:35.224Z' diff --git a/parkings/tests/test_fix_geometries.py b/parkings/tests/test_fix_geometries.py new file mode 100644 index 00000000..db1056f6 --- /dev/null +++ b/parkings/tests/test_fix_geometries.py @@ -0,0 +1,142 @@ +import os + +import pytest +from django.core import management + +from parkings.management.commands import fix_geometries + +from ..models import ParkingArea +from .utils import call_mgmt_cmd_with_output + +directory = os.path.abspath(os.path.dirname(__file__)) + +parking_areas_yaml = os.path.join(directory, 'invalid_geom_parking_areas.yaml') + + +def test_non_compatible_model(): + with pytest.raises(SystemExit) as excinfo: + call_the_command('Operator') + assert str(excinfo.value) == ( + 'Model should have exactly one MultiPolygon field') + + +@pytest.mark.django_db +@pytest.mark.parametrize('verbosity', [0, 1, 2]) +@pytest.mark.parametrize('dry_run', [False, True]) +def test_fixing(verbosity, dry_run): + # Load the test data and collect its timestamps and area sizes + management.call_command('loaddata', parking_areas_yaml) + timestamps_before = sorted( + ParkingArea.objects.values_list('modified_at', flat=True)) + areas_before = get_areas(ParkingArea.objects.all()) + + # Call the command + (stdout, stderr) = call_the_command( + 'ParkingArea', verbosity=verbosity, dry_run=dry_run) + + # Check the output + assert stdout == get_expected_output(verbosity, dry_run) + assert stderr == '' + + # Check that it did changes as expected + timestamps_after = sorted( + ParkingArea.objects.values_list('modified_at', flat=True)) + if dry_run: + assert timestamps_after == timestamps_before + else: + assert timestamps_after != timestamps_before + + areas_after = get_areas(ParkingArea.objects.all()) + assert areas_after == areas_before, "Areas sizes (m^2) shouldn't change" + + # Make sure that after the command is run, there is no more invalid + # geometries + if not dry_run: + assert call_the_command('ParkingArea', dry_run=True)[0] == ( + 'All good. Nothing to fix.\n') + + +def get_areas(objects): + """ + Get rounded area sizes (in m^2) of the geometries in given objects. + + The results are rounded to two digits (dm^2 if the unit is metre) + and converted to string to allow easy comparison with equals. + """ + return {str(obj): '{:.2f}'.format(obj.geom.area) for obj in objects} + + +def get_expected_output(verbosity, dry_run): + lines = [ + ('Doing geom of ParkingArea:a0000000-d493-4670-a1de-f752fd694ed4 ' + '/ Parking Area 2833', 1), + (' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' Fixed.', 1), + + ('Doing geom of ParkingArea:b0000000-0a2f-4acf-a89d-0c89eb3a0d1b ' + '/ Parking Area 3066', 1), + (' original geometry: SRID=3879;MULTIPOLYGON ' + '(((...)), ((...)), ((...)), ((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' + '(MULTIPOLYGON (((...)), ((...)), ((...))),' + ' MULTILINESTRING ((...), (...)))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON ' + '(((...)), ((...)), ((...)))', 2), + (' Fixed.', 1), + + ('Doing geom of ParkingArea:c0000000-7713-48d5-82f5-920dfe2a61d7 ' + '/ Parking Area 3331', 1), + (' original geometry: SRID=3879;MULTIPOLYGON ' + '(((...)), ((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' + '(MULTIPOLYGON (((...)), ((...))), LINESTRING (...))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' Fixed.', 1), + + ('Doing geom of ParkingArea:d0000000-a225-4542-a7d4-58196cd82d19 ' + '/ Parking Area 3652', 1), + (' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' + '(POLYGON ((...)), MULTILINESTRING ((...), (...)))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2), + (' Fixed.', 1), + + ('Doing geom of ParkingArea:e0000000-c039-462b-9680-798e4fe9d7e4 ' + '/ Parking Area 4396', 1), + (' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' + '(POLYGON ((...)), LINESTRING (...))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2), + (' Fixed.', 1), + + ('Doing geom of ParkingArea:f0000000-28b8-4710-a203-8dd3c597b936 ' + '/ Parking Area 4542', 1), + (' original geometry: SRID=3879;MULTIPOLYGON ' + '(((...)), ((...)), ((...)), ((...)), ((...)))', 2), + (' after MakeValid: SRID=3879;GEOMETRYCOLLECTION ' + '(MULTIPOLYGON (((...)), ((...)), ((...)), ((...))), ' + 'LINESTRING (...))', 2), + (' after conversion: SRID=3879;MULTIPOLYGON ' + '(((...)), ((...)), ((...)), ((...)))', 2), + (' Fixed.', 1), + ] + text = ''.join(line + '\n' for (line, lvl) in lines if lvl <= verbosity) + if dry_run: + return text.replace('Fixed.', 'Not saved, since doing dry-run.') + return text + + +@pytest.mark.django_db +def test_nothing_to_fix(parking_area): + (stdout, stderr) = call_the_command('ParkingArea') + assert stdout == 'All good. Nothing to fix.\n' + assert stderr == '' + + +def call_the_command(*args, **kwargs): + (result, stdout, stderr) = call_mgmt_cmd_with_output( + fix_geometries.Command, *args, **kwargs) + assert result is None + return (stdout, stderr) diff --git a/requirements-test.in b/requirements-test.in index 83dfad44..b9a91ff1 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -3,6 +3,7 @@ pytest pytest-cov pytest-django pytest-factoryboy +pyyaml # Factory Boy 2.9.0 to 2.9.2 don't seem to work. See # https://github.com/pytest-dev/pytest-factoryboy/issues/47 diff --git a/requirements-test.txt b/requirements-test.txt index ecdb1dc0..7ea468d4 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -15,5 +15,6 @@ pytest-cov==2.5.1 pytest-django==3.1.2 pytest-factoryboy==1.3.1 python-dateutil==2.6.1 # via faker, freezegun +pyyaml==3.12 six==1.11.0 # via faker, freezegun, pytest, python-dateutil text-unidecode==1.1 # via faker