From 531869b12e05d916db36eb255c3801025fe0946f Mon Sep 17 00:00:00 2001 From: Tuomas Suutari Date: Fri, 9 Feb 2018 11:33:22 +0200 Subject: [PATCH] Add command for filling regions of parkings New parkings with a location will get the region field filled on their `save` method, but the existing parkings will have to be processed separately. Add a management command for that task. The command divides the parkings to time intervals so that each time interval has about 20000 parkings. Then each time interval is processed separately in a transaction. This should (a) limit the amount of resources the command consumes and (b) make it easy to abort the command, if it takes too much resources, and then continue later where it left off. --- .../commands/fill_parking_regions.py | 71 ++++++++++++++++ .../test_fill_parking_regions_command.py | 80 +++++++++++++++++++ parkings/tests/utils.py | 36 +++++++++ 3 files changed, 187 insertions(+) create mode 100755 parkings/management/commands/fill_parking_regions.py create mode 100644 parkings/tests/test_fill_parking_regions_command.py diff --git a/parkings/management/commands/fill_parking_regions.py b/parkings/management/commands/fill_parking_regions.py new file mode 100755 index 00000000..465f816f --- /dev/null +++ b/parkings/management/commands/fill_parking_regions.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +""" +Fill regions to Parking objects. +""" +import datetime + +from django.core.management.base import BaseCommand +from django.db import transaction + +from ...models import Parking, Region + + +class Command(BaseCommand): + help = __doc__.strip().splitlines()[0] + + def add_arguments(self, parser): + parser.add_argument( + 'block_size_target', type=int, nargs='?', default=20000, + help=( + "Block size target, " + "i.e. the number of parkings to process at time")) + + def handle(self, block_size_target, *args, **options): + verbosity = int(options['verbosity']) + silent = (verbosity == 0) + show_info = (self._print_and_flush if not silent else self._null_print) + + regions = Region.objects.all() + parkings = ( + Parking.objects + .exclude(location=None) + .filter(region=None) + .order_by('created_at')) + count = parkings.count() + + if not count: + show_info("Nothing to do") + return + + block_count = int(max(count / block_size_target, 1)) + start = parkings.first().created_at.replace(microsecond=0, second=0) + end = parkings.last().created_at + block_seconds = int((end - start).total_seconds() / block_count) + 1 + block_span = datetime.timedelta(seconds=block_seconds) + + for block_num in range(block_count): + block_start = start + (block_num * block_span) + block_end = start + ((block_num + 1) * block_span) + block = parkings.filter( + created_at__gte=block_start, + created_at__lt=block_end) + block_size = block.count() + + show_info( + "Processing block {:5d}/{:5d}, size {:6d}, {}--{}".format( + block_num + 1, block_count, block_size, + block_start, block_end), ending='') + + with transaction.atomic(): + for (n, region) in enumerate(regions): + if n % 10 == 0: + show_info('.', ending='') + in_region = block.filter(location__intersects=region.geom) + in_region.update(region=region) + show_info('', ending='\n') # Print end of line + + def _print_and_flush(self, *args, ending='\n'): + self.stdout.write(*args, ending=ending) + + def _null_print(self, *args, ending='\n'): + pass diff --git a/parkings/tests/test_fill_parking_regions_command.py b/parkings/tests/test_fill_parking_regions_command.py new file mode 100644 index 00000000..8cb3c506 --- /dev/null +++ b/parkings/tests/test_fill_parking_regions_command.py @@ -0,0 +1,80 @@ +import re + +import pytest +from dateutil.parser import parse as parse_date + +from parkings.management.commands import fill_parking_regions + +from .utils import ( + call_mgmt_cmd_with_output, create_parkings_and_regions, intersects, + intersects_with_any) + + +@pytest.mark.django_db +def test_fill_parking_regions_mgmt_cmd(): + (parkings, regions) = create_parkings_and_regions() + + # Clear the regions + for parking in parkings: + # First save without a location to prevent region being autofilled + old_location = parking.location + parking.location = None + parking.region = None + parking.save() + # Then fill the location back, but save only location + parking.location = old_location + parking.save(update_fields=['location']) + + assert all(parking.region is None for parking in parkings) + + # Call the command with output streams attached + target_block_size = 5 + (stdout, stderr) = call_the_command(target_block_size) + + # Check the results + for parking in parkings: + parking.refresh_from_db() + if parking.location and intersects_with_any(parking.location, regions): + assert parking.region is not None + assert intersects(parking.location, parking.region) + else: + assert parking.region is None + + # Check the outputted lines + block_count = len(parkings) // target_block_size + for (n, line) in enumerate(stdout.splitlines(), 1): + match = re.match( + '^Processing block +(\d+)/ *(\d+), size +(\d+), ([^.]*)(\.+)$', + line) + assert match, 'Invalid output line {}: {!r}'.format(n, line) + assert match.group(1) == str(n) + assert match.group(2) == str(block_count) + assert 0 <= int(match.group(3)) <= len(parkings) + (start_str, end_str) = match.group(4).split('--') + block_start = parse_date(start_str) + block_end = parse_date(end_str) + assert block_start <= block_end + assert len(match.group(5)) == len(regions) // 10 + assert stderr == '' + + # Check that the command doesn't do anything if all parkings with a + # location have region filled + for parking in parkings: + if not parking.region: + parking.location = None + parking.save() + (stdout, stderr) = call_the_command(target_block_size) + assert stdout == 'Nothing to do\n' + assert stderr == '' + + # And finally check that it doesn't print anything when verbosity=0 + (stdout, stderr) = call_the_command(target_block_size, verbosity=0) + assert stdout == '' + assert stderr == '' + + +def call_the_command(*args, **kwargs): + (result, stdout, stderr) = call_mgmt_cmd_with_output( + fill_parking_regions.Command, *args, **kwargs) + assert result is None + return (stdout, stderr) diff --git a/parkings/tests/utils.py b/parkings/tests/utils.py index f4fd558e..fefd154f 100644 --- a/parkings/tests/utils.py +++ b/parkings/tests/utils.py @@ -2,6 +2,8 @@ from django.core import management +from parkings.factories import ParkingFactory, RegionFactory + def call_mgmt_cmd_with_output(command_cls, *args, **kwargs): assert issubclass(command_cls, management.BaseCommand) @@ -11,3 +13,37 @@ def call_mgmt_cmd_with_output(command_cls, *args, **kwargs): assert isinstance(cmd, management.BaseCommand) result = management.call_command(cmd, *args, **kwargs) return (result, stdout.getvalue(), stderr.getvalue()) + + +def create_parkings_and_regions(parking_count=100, region_count=20): + regions = RegionFactory.create_batch(region_count) + parkings = ParkingFactory.create_batch(parking_count) + + centroids = [region.geom.centroid for region in regions] + touching_points = [p for p in centroids if intersects_with_any(p, regions)] + + # Make sure that some of the parkings are inside the regions + for (point, parking) in zip(touching_points, parkings): + parking.location = point + parking.save() + + for parking in parkings: # pragma: no cover + if intersects_with_any(parking.location, regions): + assert parking.region + assert intersects(parking.location, parking.region) + else: + assert parking.region is None + + return (parkings, regions) + + +def intersects_with_any(point, regions): + assert regions + p = point.transform(regions[0].geom.srid, clone=True) + assert all(x.geom.srid == p.srid for x in regions) + return any(p.intersects(x.geom) for x in regions) + + +def intersects(point, region): + geom = region.geom + return point.transform(geom.srid, clone=True).intersects(geom)