opencivicdata · fgregg · Apr 18, 2018 · Apr 16, 2018 · Apr 16, 2018 · Apr 17, 2018
diff --git a/legistar/base.py b/legistar/base.py
@@ -253,6 +253,42 @@ def toTime(self, text) :
         time = pytz.timezone(self.TIMEZONE).localize(time)
         return time
 
+    def search(self, route, item_key, search_conditions):
+        """
+        Base function for searching the Legistar API.
+
+        Arguments:
+
+        route -- The path to search, i.e. /matters/, /events/, etc
+        item_key -- The unique id field for the items that you are searching.
+                    This is necessary for proper pagination. examples
+                    might be MatterId or EventId
+        search_conditions -- a string in the OData format for the
+                             your search conditions http://www.odata.org/documentation/odata-version-3-0/url-conventions/#url5.1.2
+
+                             It would be nice if we could provide a
+                             friendly search API. Something like https://github.com/tuomur/python-odata
+
+
+        Examples:
+        # Search for bills introduced after Jan. 1, 2017
+        search('/matters/', 'MatterId', "MatterIntroDate gt datetime'2017-01-01'")
+        """
+
+        search_url = self.BASE_URL + route
+
+        params = {'$filter': search_conditions}
+
+        try:
+            yield from self.pages(search_url,
+                                  params=params,
+                                  item_key=item_key)
+        except requests.HTTPError as e:
+            if e.response.status_code == 400:
+                raise ValueError(e.response.json()['Message'])
+            raise
+
+
     def pages(self, url, params=None, item_key=None):
         if params is None:
             params = {}
@@ -263,10 +299,19 @@ def pages(self, url, params=None, item_key=None):
         while page_num == 0 or len(response.json()) == 1000 :
             params['$skip'] = page_num * 1000
             response = self.get(url, params=params)
+            response.raise_for_status()
 
             for item in response.json() :
                 if item[item_key] not in seen :
                     yield item
                     seen.append(item[item_key])
 
             page_num += 1
+
+    def accept_response(self, response, **kwargs):
+        '''
+        This overrides a method that controls whether
+        the scraper should retry on an error. We don't
+        want to retry if the API returns a 400
+        '''
+        return response.status_code < 401
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1 @@
-mock
 pytest
-icalendar
diff --git a/setup.py b/setup.py
@@ -16,9 +16,12 @@
       platforms=['any'],
       dependency_links = ['git+ssh://[email protected]/opencivicdata/pupa.git'],
       install_requires=[
+          'requests',
           'lxml',
           'pytz',
-          'icalendar'
+          'icalendar',
+          'scrapelib',
+          'pupa',
       ],
       classifiers=["Development Status :: 4 - Beta",
                    "Intended Audience :: Developers",

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,13 @@
+import pytest
+
+from legistar import base
+
+@pytest.fixture(scope="module")
+def scraper():
+    scraper = base.LegistarAPIScraper(None, None)
+    scraper.BASE_URL = 'http://webapi.legistar.com/v1/chicago'
+    scraper.retry_attempts = 0
+    scraper.requests_per_minute = 0
+    return scraper
+
+
diff --git a/tests/test_search.py b/tests/test_search.py
@@ -0,0 +1,18 @@
+import pytest
+
+from legistar import base
+
+class TestAPISearch(object):
+
+    def test_search_raises(self, scraper):
+        with pytest.raises(ValueError):
+            results = scraper.search('/events/', 'EventId',
+                                     "MatterFile eq 'O2010-5046'")
+            list(results)
+
+    def test_search(self, scraper):
+        results = scraper.search('/matters/', 'MatterId',
+                                 "MatterFile eq 'O2010-5046'")
+
+
+        assert len(list(results)) == 1