Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rough in search method #67

Merged
merged 9 commits into from
Apr 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions legistar/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,42 @@ def toTime(self, text) :
time = pytz.timezone(self.TIMEZONE).localize(time)
return time

def search(self, route, item_key, search_conditions):
"""
Base function for searching the Legistar API.

Arguments:

route -- The path to search, i.e. /matters/, /events/, etc
item_key -- The unique id field for the items that you are searching.
This is necessary for proper pagination. examples
might be MatterId or EventId
search_conditions -- a string in the OData format for the
your search conditions http://www.odata.org/documentation/odata-version-3-0/url-conventions/#url5.1.2

It would be nice if we could provide a
friendly search API. Something like https://github.com/tuomur/python-odata


Examples:
# Search for bills introduced after Jan. 1, 2017
search('/matters/', 'MatterId', "MatterIntroDate gt datetime'2017-01-01'")
"""

search_url = self.BASE_URL + route

params = {'$filter': search_conditions}

try:
yield from self.pages(search_url,
params=params,
item_key=item_key)
except requests.HTTPError as e:
if e.response.status_code == 400:
raise ValueError(e.response.json()['Message'])
raise


def pages(self, url, params=None, item_key=None):
if params is None:
params = {}
Expand All @@ -263,10 +299,19 @@ def pages(self, url, params=None, item_key=None):
while page_num == 0 or len(response.json()) == 1000 :
params['$skip'] = page_num * 1000
response = self.get(url, params=params)
response.raise_for_status()

for item in response.json() :
if item[item_key] not in seen :
yield item
seen.append(item[item_key])

page_num += 1

def accept_response(self, response, **kwargs):
'''
This overrides a method that controls whether
the scraper should retry on an error. We don't
want to retry if the API returns a 400
'''
return response.status_code < 401
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
mock
pytest
icalendar
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
platforms=['any'],
dependency_links = ['git+ssh://[email protected]/opencivicdata/pupa.git'],
install_requires=[
'requests',
'lxml',
'pytz',
'icalendar'
'icalendar',
'scrapelib',
'pupa',
],
classifiers=["Development Status :: 4 - Beta",
"Intended Audience :: Developers",
Expand Down
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

from legistar import base

@pytest.fixture(scope="module")
def scraper():
scraper = base.LegistarAPIScraper(None, None)
scraper.BASE_URL = 'http://webapi.legistar.com/v1/chicago'
scraper.retry_attempts = 0
scraper.requests_per_minute = 0
return scraper


18 changes: 18 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest

from legistar import base

class TestAPISearch(object):

def test_search_raises(self, scraper):
with pytest.raises(ValueError):
results = scraper.search('/events/', 'EventId',
"MatterFile eq 'O2010-5046'")
list(results)

def test_search(self, scraper):
results = scraper.search('/matters/', 'MatterId',
"MatterFile eq 'O2010-5046'")


assert len(list(results)) == 1