Skip to content

Commit

Permalink
Merge pull request #268 from datamade/patch/hcg/refresh-bill-version
Browse files Browse the repository at this point in the history
Refresh bill *versions*, as well as bill documents
  • Loading branch information
hancush authored Jul 9, 2020
2 parents 64adab5 + 13b8994 commit 35016c7
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 34 deletions.
87 changes: 59 additions & 28 deletions councilmatic_core/management/commands/refresh_pic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,84 @@
from django.db.models import Q
import pytz

from opencivicdata.legislative.models import BillDocumentLink, EventDocumentLink, \
EventRelatedEntity
from opencivicdata.legislative.models import BillDocumentLink, BillVersionLink, \
EventDocumentLink, EventRelatedEntity


for configuration in ['AWS_KEY','AWS_SECRET']:
if not hasattr(settings, configuration):
raise ImproperlyConfigured(
'Please define {0} in settings_deployment.py'.format(configuration))


logging.config.dictConfig(settings.LOGGING)
logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = 'Refreshes the property image cache by deleting documents that need to be newly created'

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self.local_now = pytz.timezone(settings.TIME_ZONE)\
.localize(datetime.datetime.now())

self.bills_on_upcoming_agendas = EventRelatedEntity.objects.filter(
bill__isnull=False,
agenda_item__event__start_date__gte=self.local_now
).values_list('bill__id')

def handle(self, *args, **options):
from boto.s3.connection import S3Connection
from boto.s3.key import Key
from boto.exception import S3ResponseError

s3_conn = S3Connection(settings.AWS_KEY, settings.AWS_SECRET)

document_urls = self._get_urls()
aws_keys = self._create_keys(document_urls)
bucket = s3_conn.get_bucket('councilmatic-document-cache')
bucket.delete_keys(aws_keys)

s3_conn = S3Connection(settings.AWS_KEY, settings.AWS_SECRET)
success_message = 'Removed {} document(s) from the councilmatic-document-cache'.format(len(aws_keys))
logger.info(success_message)

bucket = s3_conn.get_bucket('councilmatic-document-cache')
def _get_bill_versions(self, window_start):
'''
Retrieve URLs of updated and upcoming versions, i.e., the bills
themselves.
'''
recently_updated = Q(version__bill__updated_at__gte=window_start)
upcoming = Q(version__bill__id__in=self.bills_on_upcoming_agendas)

bucket.delete_keys(aws_keys)
return BillVersionLink.objects.filter(
recently_updated | upcoming
).values_list('url', flat=True)

logger.info(("Removed {} document(s) from the councilmatic-document-cache").format(len(aws_keys)))
def _get_bill_documents(self, window_start):
'''
Retrieve URLs of updated and upcoming documents, i.e., attachments
to bills (versions).
'''
has_versions = Q(document__bill__versions__isnull=False)
recently_updated = Q(document__bill__updated_at__gte=window_start)
upcoming = Q(document__bill__id__in=self.bills_on_upcoming_agendas)

return BillDocumentLink.objects.filter(
has_versions & (recently_updated | upcoming)
).values_list('url', flat=True)

def _get_event_documents(self, window_start):
'''
Retrieve URLs of updated and upcoming event documents, i.e., agendas.
'''
recently_updated = Q(document__event__updated_at__gte=window_start)
upcoming = Q(document__event__start_date__gte=self.local_now)

return EventDocumentLink.objects.filter(
recently_updated | upcoming
).values_list('url', flat=True)

def _get_urls(self):
'''
Expand All @@ -54,29 +101,13 @@ def _get_urls(self):
that tell us to rescrape entities, toggling the updated timestamps in
our database.
'''
app_timezone = pytz.timezone(settings.TIME_ZONE)
one_hour_ago = app_timezone.localize(datetime.datetime.now()) - datetime.timedelta(hours=1)

has_versions = Q(document__bill__versions__isnull=False)

recently_updated = Q(document__bill__updated_at__gte=one_hour_ago)

bills_on_upcoming_agendas = EventRelatedEntity.objects.filter(
bill__isnull=False,
agenda_item__event__start_date__gt=one_hour_ago
).values_list('bill__id')

upcoming = Q(document__bill__id__in=bills_on_upcoming_agendas)

bill_docs = BillDocumentLink.objects.filter(
has_versions & (recently_updated | upcoming)
).values_list('url', flat=True)

event_docs = EventDocumentLink.objects.filter(
Q(document__event__updated_at__gte=one_hour_ago) | Q(document__event__start_date__gt=one_hour_ago)
).values_list('url', flat=True)
one_hour_ago = self.local_now - datetime.timedelta(hours=1)

return itertools.chain(bill_docs, event_docs)
return itertools.chain(
self._get_bill_versions(one_hour_ago),
self._get_bill_documents(one_hour_ago),
self._get_event_documents(one_hour_ago)
)

def _create_keys(self, document_urls):
return [urllib.parse.quote_plus(url) for url in document_urls]
10 changes: 8 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from councilmatic_core.models import Bill, Event
from opencivicdata.core.models import Jurisdiction, Division
from opencivicdata.legislative.models import BillDocumentLink, EventDocument, \
EventDocumentLink, LegislativeSession, BillVersion, BillDocument
EventDocumentLink, LegislativeSession, BillVersion, BillDocument, \
BillVersionLink


@pytest.fixture
Expand Down Expand Up @@ -90,7 +91,7 @@ def ocd_bill_document(metro_bill, transactional_db):
document = BillDocument.objects.create(**document_info)

document_link_info = {
'url': 'https://metro.legistar.com/ViewReport.ashx?M=R&N=TextL5&GID=557&ID=5016&GUID=LATEST&Title=Board+Report.pdf',
'url': 'http://metro.legistar1.com/metro/attachments/e041786b-a42a-4d03-bd3e-06d5b3113de2.pdf',
'document': document,
}

Expand All @@ -100,6 +101,11 @@ def ocd_bill_document(metro_bill, transactional_db):
note='test',
date='1992-02-16')

BillVersionLink.objects.create(
version=version,
url='https://metro.legistar.com/ViewReport.ashx?M=R&N=TextL5&GID=557&ID=5016&GUID=LATEST&Title=Board+Report.pdf'
)

metro_bill.versions.add(version)
metro_bill.save()

Expand Down
15 changes: 11 additions & 4 deletions tests/test_management_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
def test_refresh_pic(ocd_bill_document,
metro_event_document):
'''
Test that the `_get_urls` and `_create_keys` successfully finds changed bill and event documents
and converts their urls to a list of AWS keys.
Test that the `_get_urls` and `_create_keys` successfully finds changed
bill and event documents and converts their urls to a list of AWS keys.
'''
command = RefreshPic()
document_urls = list(command._get_urls())

# Test that each of the URLs we expect exist, and that no other URLs
# exist.
bill_version_link, = ocd_bill_document.bill.versions.get().links.all()
bill_doc_link, = ocd_bill_document.links.all()
event_doc_link, = metro_event_document.links.all()

assert (bill_doc_link.url in document_urls) == True
assert (event_doc_link.url in document_urls) == True
assert len(document_urls) == 3

assert bill_version_link.url in document_urls
assert bill_doc_link.url in document_urls
assert event_doc_link.url in document_urls

# Test that creating keys from URLs yields the correct number of keys.
aws_keys = command._create_keys(document_urls)

assert len(document_urls) == len(aws_keys)
Expand Down

0 comments on commit 35016c7

Please sign in to comment.