Skip to content

Commit

Permalink
Add command for filling regions of parkings
Browse files Browse the repository at this point in the history
New parkings with a location will get the region field filled on their
`save` method, but the existing parkings will have to be processed
separately.  Add a management command for that task.

The command divides the parkings to time intervals so that each time
interval has about 20000 parkings.  Then each time interval is processed
separately in a transaction.  This should (a) limit the amount of
resources the command consumes and (b) make it easy to abort the
command, if it takes too much resources, and then continue later where
it left off.
  • Loading branch information
suutari-ai committed Feb 12, 2018
1 parent e5ddd9e commit 531869b
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 0 deletions.
71 changes: 71 additions & 0 deletions parkings/management/commands/fill_parking_regions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python
"""
Fill regions to Parking objects.
"""
import datetime

from django.core.management.base import BaseCommand
from django.db import transaction

from ...models import Parking, Region


class Command(BaseCommand):
help = __doc__.strip().splitlines()[0]

def add_arguments(self, parser):
parser.add_argument(
'block_size_target', type=int, nargs='?', default=20000,
help=(
"Block size target, "
"i.e. the number of parkings to process at time"))

def handle(self, block_size_target, *args, **options):
verbosity = int(options['verbosity'])
silent = (verbosity == 0)
show_info = (self._print_and_flush if not silent else self._null_print)

regions = Region.objects.all()
parkings = (
Parking.objects
.exclude(location=None)
.filter(region=None)
.order_by('created_at'))
count = parkings.count()

if not count:
show_info("Nothing to do")
return

block_count = int(max(count / block_size_target, 1))
start = parkings.first().created_at.replace(microsecond=0, second=0)
end = parkings.last().created_at
block_seconds = int((end - start).total_seconds() / block_count) + 1
block_span = datetime.timedelta(seconds=block_seconds)

for block_num in range(block_count):
block_start = start + (block_num * block_span)
block_end = start + ((block_num + 1) * block_span)
block = parkings.filter(
created_at__gte=block_start,
created_at__lt=block_end)
block_size = block.count()

show_info(
"Processing block {:5d}/{:5d}, size {:6d}, {}--{}".format(
block_num + 1, block_count, block_size,
block_start, block_end), ending='')

with transaction.atomic():
for (n, region) in enumerate(regions):
if n % 10 == 0:
show_info('.', ending='')
in_region = block.filter(location__intersects=region.geom)
in_region.update(region=region)
show_info('', ending='\n') # Print end of line

def _print_and_flush(self, *args, ending='\n'):
self.stdout.write(*args, ending=ending)

def _null_print(self, *args, ending='\n'):
pass
80 changes: 80 additions & 0 deletions parkings/tests/test_fill_parking_regions_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import re

import pytest
from dateutil.parser import parse as parse_date

from parkings.management.commands import fill_parking_regions

from .utils import (
call_mgmt_cmd_with_output, create_parkings_and_regions, intersects,
intersects_with_any)


@pytest.mark.django_db
def test_fill_parking_regions_mgmt_cmd():
(parkings, regions) = create_parkings_and_regions()

# Clear the regions
for parking in parkings:
# First save without a location to prevent region being autofilled
old_location = parking.location
parking.location = None
parking.region = None
parking.save()
# Then fill the location back, but save only location
parking.location = old_location
parking.save(update_fields=['location'])

assert all(parking.region is None for parking in parkings)

# Call the command with output streams attached
target_block_size = 5
(stdout, stderr) = call_the_command(target_block_size)

# Check the results
for parking in parkings:
parking.refresh_from_db()
if parking.location and intersects_with_any(parking.location, regions):
assert parking.region is not None
assert intersects(parking.location, parking.region)
else:
assert parking.region is None

# Check the outputted lines
block_count = len(parkings) // target_block_size
for (n, line) in enumerate(stdout.splitlines(), 1):
match = re.match(
'^Processing block +(\d+)/ *(\d+), size +(\d+), ([^.]*)(\.+)$',
line)
assert match, 'Invalid output line {}: {!r}'.format(n, line)
assert match.group(1) == str(n)
assert match.group(2) == str(block_count)
assert 0 <= int(match.group(3)) <= len(parkings)
(start_str, end_str) = match.group(4).split('--')
block_start = parse_date(start_str)
block_end = parse_date(end_str)
assert block_start <= block_end
assert len(match.group(5)) == len(regions) // 10
assert stderr == ''

# Check that the command doesn't do anything if all parkings with a
# location have region filled
for parking in parkings:
if not parking.region:
parking.location = None
parking.save()
(stdout, stderr) = call_the_command(target_block_size)
assert stdout == 'Nothing to do\n'
assert stderr == ''

# And finally check that it doesn't print anything when verbosity=0
(stdout, stderr) = call_the_command(target_block_size, verbosity=0)
assert stdout == ''
assert stderr == ''


def call_the_command(*args, **kwargs):
(result, stdout, stderr) = call_mgmt_cmd_with_output(
fill_parking_regions.Command, *args, **kwargs)
assert result is None
return (stdout, stderr)
36 changes: 36 additions & 0 deletions parkings/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from django.core import management

from parkings.factories import ParkingFactory, RegionFactory


def call_mgmt_cmd_with_output(command_cls, *args, **kwargs):
assert issubclass(command_cls, management.BaseCommand)
Expand All @@ -11,3 +13,37 @@ def call_mgmt_cmd_with_output(command_cls, *args, **kwargs):
assert isinstance(cmd, management.BaseCommand)
result = management.call_command(cmd, *args, **kwargs)
return (result, stdout.getvalue(), stderr.getvalue())


def create_parkings_and_regions(parking_count=100, region_count=20):
regions = RegionFactory.create_batch(region_count)
parkings = ParkingFactory.create_batch(parking_count)

centroids = [region.geom.centroid for region in regions]
touching_points = [p for p in centroids if intersects_with_any(p, regions)]

# Make sure that some of the parkings are inside the regions
for (point, parking) in zip(touching_points, parkings):
parking.location = point
parking.save()

for parking in parkings: # pragma: no cover
if intersects_with_any(parking.location, regions):
assert parking.region
assert intersects(parking.location, parking.region)
else:
assert parking.region is None

return (parkings, regions)


def intersects_with_any(point, regions):
assert regions
p = point.transform(regions[0].geom.srid, clone=True)
assert all(x.geom.srid == p.srid for x in regions)
return any(p.intersects(x.geom) for x in regions)


def intersects(point, region):
geom = region.geom
return point.transform(geom.srid, clone=True).intersects(geom)

0 comments on commit 531869b

Please sign in to comment.