Skip to content

Commit

Permalink
Add a script to fix invalid geometries
Browse files Browse the repository at this point in the history
Some of the (imported) geometries might be invalid.  This causes
problems, because it makes many GIS database queries fail .  This
command can be used to fix those geometries.
  • Loading branch information
suutari-ai committed Feb 14, 2018
1 parent bb073e5 commit d11551f
Show file tree
Hide file tree
Showing 5 changed files with 365 additions and 0 deletions.
112 changes: 112 additions & 0 deletions parkings/management/commands/fix_geometries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python
"""
Fix invalid geometries of a model in the database.
Some of the (imported) geometries might be invalid. This causes
problems, because it makes many GIS database queries fail . This
command can be used to fix those geometries.
Currently supports only MultiPolygon geometries.
"""
import re
import sys

from django.apps import apps as django_apps
from django.contrib.gis.db.models.fields import MultiPolygonField
from django.contrib.gis.db.models.functions import MakeValid
from django.contrib.gis.geos import GeometryCollection, MultiPolygon, Polygon
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = __doc__.strip().splitlines()[0]

def add_arguments(self, parser):
parser.add_argument(
'model_name', type=str,
help=("Name of the model to fix, e.g. \"ParkingArea\""))

parser.add_argument(
'--dry-run', '-n', action='store_true',
help=("Dry run, i.e. just show what would be fixed"))

def handle(self, model_name, dry_run=False, *args, **options):
verbosity = int(options['verbosity'])
show_info = self.stdout.write if verbosity >= 1 else (lambda x: None)
show_debug = self.stdout.write if verbosity >= 2 else (lambda x: None)

model_full_name = ('parkings.' + model_name
if '.' not in model_name else model_name)
model = django_apps.get_model(model_full_name)
geometry_fields = [
field for field in model._meta.get_fields()
if isinstance(field, MultiPolygonField)]

if len(geometry_fields) != 1:
sys.exit("Model should have exactly one MultiPolygon field")

field_name = geometry_fields[0].name

objects_to_fix = (
model.objects.order_by('pk')
.filter(**{field_name + '__isvalid': False})
.annotate(**{'valid_geom': MakeValid(field_name)}))

if not objects_to_fix:
show_info("All good. Nothing to fix.")
return

for obj in objects_to_fix:
obj_info = '{field_name} of {model_name}:{obj.pk} / {obj}'.format(
field_name=field_name, model_name=model_name, obj=obj)
show_info("Doing {}".format(obj_info))
show_debug(" original geometry: {}".format(
_geom_info(getattr(obj, field_name), verbosity)))
show_debug(" after MakeValid: {}".format(
_geom_info(obj.valid_geom, verbosity)))
fixed_geom = _to_multipolygon(obj.valid_geom)
show_debug(" after conversion: {}".format(
_geom_info(fixed_geom, verbosity)))
if dry_run:
show_info(" Not saved, since doing dry-run.")
else:
setattr(obj, field_name, fixed_geom)
obj.save()
show_info(" Fixed.")


def _polygon_to_multipolygon(polygon):
assert isinstance(polygon, Polygon)
return MultiPolygon(polygon, srid=polygon.srid)


def _geometry_collection_to_multipolygon(geometry_collection):
assert isinstance(geometry_collection, GeometryCollection)
polygons = []
for geom in geometry_collection:
if geom.dims < 2:
# Just ignore everything that has dimension 0 or 1,
# i.e. points, lines, or linestrings, multipoints, etc.
continue
polygons.extend(_to_multipolygon(geom))
return MultiPolygon(polygons, srid=geometry_collection.srid)


def _to_multipolygon(geometry):
converter = _geometry_converters[type(geometry)]
multipolygon = converter(geometry)
assert isinstance(multipolygon, MultiPolygon)
assert geometry.srid == multipolygon.srid
return multipolygon


_geometry_converters = {
MultiPolygon: lambda x: x,
Polygon: _polygon_to_multipolygon,
GeometryCollection: _geometry_collection_to_multipolygon,
}


def _geom_info(geometry, verbosity=2):
text = str(geometry)
return text if verbosity >= 3 else re.sub(r'\(\d[\d., ]*\)', '(...)', text)
109 changes: 109 additions & 0 deletions parkings/tests/invalid_geom_parking_areas.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
- model: parkings.parkingarea
pk: a0000000-d493-4670-a1de-f752fd694ed4
fields:
origin_id: '2833'
capacity_estimate: null
geom: >-
SRID=3879;MULTIPOLYGON (((25493758.106511 6670974.137834, 25493759.034831
6670969.377303, 25493768.770703 6670971.171852, 25493767.940726
6670975.950511, 25493758.106511 6670974.137834)), ((25493759.034831
6670969.377303, 25493759.919187 6670964.303618, 25493769.753403
6670966.116295, 25493768.819875 6670971.180916, 25493759.034831
6670969.377303)))
created_at: '2017-09-05T13:52:19.677Z'
modified_at: '2017-09-11T11:29:33.103Z'

- model: parkings.parkingarea
pk: b0000000-0a2f-4acf-a89d-0c89eb3a0d1b
fields:
origin_id: '3066'
capacity_estimate: null
geom: >-
SRID=3879;MULTIPOLYGON (((25492123.165669 6672509.440342, 25492089.404534
6672502.727943, 25492089.806087 6672500.195914, 25492123.577051
6672506.510351, 25492123.165669 6672509.440342)), ((25492080.262998
6672501.796631, 25492070.454466 6672499.915259, 25492071.088292
6672496.869559, 25492080.785463 6672498.730063, 25492080.262998
6672501.796631)), ((25492062.81757 6672498.450243, 25492042.021899
6672494.461257, 25492042.194103 6672491.457184, 25492063.110677
6672495.629223, 25492062.81757 6672498.450243)), ((25492042.021899
6672494.461257, 25492031.1663 6672492.378956, 25492031.369354
6672489.202028, 25492042.194103 6672491.457184, 25492042.021899
6672494.461257)), ((25492031.1663 6672492.378956, 25492011.254863
6672488.559611, 25492011.636738 6672485.537399, 25492031.369354
6672489.202028, 25492031.1663 6672492.378956)))
created_at: '2017-09-05T13:52:22.127Z'
modified_at: '2017-09-11T11:29:34.877Z'

- model: parkings.parkingarea
pk: c0000000-7713-48d5-82f5-920dfe2a61d7
fields:
origin_id: '3331'
capacity_estimate: 7
geom: >-
SRID=3879;MULTIPOLYGON (((25495652.656271 6671509.00403, 25495650.825957
6671509.961425, 25495640.210139 6671490.306675, 25495641.927818
6671489.461915, 25495652.656271 6671509.00403)), ((25495638.703651
6671487.543605, 25495633.337664 6671477.207614, 25495635.078776
6671476.263543, 25495640.541004 6671486.528133, 25495638.703651
6671487.543605)), ((25495633.337664 6671477.207614, 25495628.368539
6671468.280437, 25495630.26925 6671467.097773, 25495635.078776
6671476.263543, 25495633.337664 6671477.207614)))
created_at: '2017-09-05T13:52:18.570Z'
modified_at: '2017-09-11T11:29:32.203Z'

- model: parkings.parkingarea
pk: d0000000-a225-4542-a7d4-58196cd82d19
fields:
origin_id: '3652'
capacity_estimate: 46
geom: >-
SRID=3879;MULTIPOLYGON (((25496176.30779 6672190.143211, 25496154.362008
6672175.553799, 25496129.131841 6672158.039108, 25496106.143323
6672141.056291, 25496109.06055 6672136.938323, 25496151.222316
6672166.852771, 25496179.509938 6672185.767273, 25496177.393447
6672188.930668, 25496176.30779 6672190.143211)), ((25496179.509938
6672185.767273, 25496151.222316 6672166.852771, 25496109.06055
6672136.938323, 25496111.873102 6672132.968113, 25496142.014432
6672153.688594, 25496180.817076 6672179.98879, 25496181.952203
6672182.116963, 25496179.509938 6672185.767273)))
created_at: '2017-09-05T13:52:19.375Z'
modified_at: '2017-09-11T11:29:32.850Z'

- model: parkings.parkingarea
pk: e0000000-c039-462b-9680-798e4fe9d7e4
fields:
origin_id: '4396'
capacity_estimate: 46
geom: >-
SRID=3879;MULTIPOLYGON (((25496038.688839 6674542.081072, 25495956.240259
6674527.213296, 25495956.465529 6674521.975783, 25496039.702551
6674537.237781, 25496038.688839 6674542.081072)), ((25496039.702551
6674537.237781, 25495956.465529 6674521.975783, 25495957.253971
6674517.245127, 25496040.659946 6674532.450808, 25496039.702551
6674537.237781)))
created_at: '2017-09-05T13:52:18.167Z'
modified_at: '2017-09-11T11:29:31.869Z'

- model: parkings.parkingarea
pk: f0000000-28b8-4710-a203-8dd3c597b936
fields:
origin_id: '4542'
capacity_estimate: 28
geom: >-
SRID=3879;MULTIPOLYGON (((25495474.469975 6673671.460571, 25495490.024166
6673675.541957, 25495489.604713 6673677.225644, 25495473.948494
6673673.170796, 25495474.469975 6673671.460571)), ((25495567.798874
6673695.949872, 25495567.210002 6673698.23201, 25495494.391687
6673678.746212, 25495494.972847 6673676.84048, 25495567.798874
6673695.949872)), ((25495567.798874 6673695.949872, 25495577.414136
6673698.995531, 25495576.783949 6673700.935242, 25495567.210002
6673698.23201, 25495567.798874 6673695.949872)), ((25495590.525597
6673703.14862, 25495612.655646 6673710.15837, 25495619.406624
6673712.823533, 25495618.852999 6673714.395086, 25495590.30011
6673704.99009, 25495590.525597 6673703.14862)), ((25495622.409608
6673714.009057, 25495654.256126 6673726.581481, 25495653.375526
6673728.812324, 25495621.781501 6673715.746702, 25495622.409608
6673714.009057)))
created_at: '2017-09-05T13:52:22.569Z'
modified_at: '2017-09-11T11:29:35.224Z'
142 changes: 142 additions & 0 deletions parkings/tests/test_fix_geometries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import os

import pytest
from django.core import management

from parkings.management.commands import fix_geometries

from ..models import ParkingArea
from .utils import call_mgmt_cmd_with_output

directory = os.path.abspath(os.path.dirname(__file__))

parking_areas_yaml = os.path.join(directory, 'invalid_geom_parking_areas.yaml')


def test_non_compatible_model():
with pytest.raises(SystemExit) as excinfo:
call_the_command('Operator')
assert str(excinfo.value) == (
'Model should have exactly one MultiPolygon field')


@pytest.mark.django_db
@pytest.mark.parametrize('verbosity', [0, 1, 2])
@pytest.mark.parametrize('dry_run', [False, True])
def test_fixing(verbosity, dry_run):
# Load the test data and collect its timestamps and area sizes
management.call_command('loaddata', parking_areas_yaml)
timestamps_before = sorted(
ParkingArea.objects.values_list('modified_at', flat=True))
areas_before = get_areas(ParkingArea.objects.all())

# Call the command
(stdout, stderr) = call_the_command(
'ParkingArea', verbosity=verbosity, dry_run=dry_run)

# Check the output
assert stdout == get_expected_output(verbosity, dry_run)
assert stderr == ''

# Check that it did changes as expected
timestamps_after = sorted(
ParkingArea.objects.values_list('modified_at', flat=True))
if dry_run:
assert timestamps_after == timestamps_before
else:
assert timestamps_after != timestamps_before

areas_after = get_areas(ParkingArea.objects.all())
assert areas_after == areas_before, "Areas sizes (m^2) shouldn't change"

# Make sure that after the command is run, there is no more invalid
# geometries
if not dry_run:
assert call_the_command('ParkingArea', dry_run=True)[0] == (
'All good. Nothing to fix.\n')


def get_areas(objects):
"""
Get rounded area sizes (in m^2) of the geometries in given objects.
The results are rounded to two digits (dm^2 if the unit is metre)
and converted to string to allow easy comparison with equals.
"""
return {str(obj): '{:.2f}'.format(obj.geom.area) for obj in objects}


def get_expected_output(verbosity, dry_run):
lines = [
('Doing geom of ParkingArea:a0000000-d493-4670-a1de-f752fd694ed4 '
'/ Parking Area 2833', 1),
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' Fixed.', 1),

('Doing geom of ParkingArea:b0000000-0a2f-4acf-a89d-0c89eb3a0d1b '
'/ Parking Area 3066', 1),
(' original geometry: SRID=3879;MULTIPOLYGON '
'(((...)), ((...)), ((...)), ((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION '
'(MULTIPOLYGON (((...)), ((...)), ((...))),'
' MULTILINESTRING ((...), (...)))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON '
'(((...)), ((...)), ((...)))', 2),
(' Fixed.', 1),

('Doing geom of ParkingArea:c0000000-7713-48d5-82f5-920dfe2a61d7 '
'/ Parking Area 3331', 1),
(' original geometry: SRID=3879;MULTIPOLYGON '
'(((...)), ((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION '
'(MULTIPOLYGON (((...)), ((...))), LINESTRING (...))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' Fixed.', 1),

('Doing geom of ParkingArea:d0000000-a225-4542-a7d4-58196cd82d19 '
'/ Parking Area 3652', 1),
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION '
'(POLYGON ((...)), MULTILINESTRING ((...), (...)))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2),
(' Fixed.', 1),

('Doing geom of ParkingArea:e0000000-c039-462b-9680-798e4fe9d7e4 '
'/ Parking Area 4396', 1),
(' original geometry: SRID=3879;MULTIPOLYGON (((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION '
'(POLYGON ((...)), LINESTRING (...))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON (((...)))', 2),
(' Fixed.', 1),

('Doing geom of ParkingArea:f0000000-28b8-4710-a203-8dd3c597b936 '
'/ Parking Area 4542', 1),
(' original geometry: SRID=3879;MULTIPOLYGON '
'(((...)), ((...)), ((...)), ((...)), ((...)))', 2),
(' after MakeValid: SRID=3879;GEOMETRYCOLLECTION '
'(MULTIPOLYGON (((...)), ((...)), ((...)), ((...))), '
'LINESTRING (...))', 2),
(' after conversion: SRID=3879;MULTIPOLYGON '
'(((...)), ((...)), ((...)), ((...)))', 2),
(' Fixed.', 1),
]
text = ''.join(line + '\n' for (line, lvl) in lines if lvl <= verbosity)
if dry_run:
return text.replace('Fixed.', 'Not saved, since doing dry-run.')
return text


@pytest.mark.django_db
def test_nothing_to_fix(parking_area):
(stdout, stderr) = call_the_command('ParkingArea')
assert stdout == 'All good. Nothing to fix.\n'
assert stderr == ''


def call_the_command(*args, **kwargs):
(result, stdout, stderr) = call_mgmt_cmd_with_output(
fix_geometries.Command, *args, **kwargs)
assert result is None
return (stdout, stderr)
1 change: 1 addition & 0 deletions requirements-test.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pytest
pytest-cov
pytest-django
pytest-factoryboy
pyyaml

# Factory Boy 2.9.0 to 2.9.2 don't seem to work. See
# https://github.com/pytest-dev/pytest-factoryboy/issues/47
Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ pytest-cov==2.5.1
pytest-django==3.1.2
pytest-factoryboy==1.3.1
python-dateutil==2.6.1 # via faker, freezegun
pyyaml==3.12
six==1.11.0 # via faker, freezegun, pytest, python-dateutil
text-unidecode==1.1 # via faker

0 comments on commit d11551f

Please sign in to comment.