Skip to content

Commit

Permalink
- added documentation for complex query
Browse files Browse the repository at this point in the history
- renamed QueryOper to QueryItems
  • Loading branch information
gregorleban committed Apr 11, 2017
1 parent f8d6b59 commit 97142f7
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 27 deletions.
66 changes: 52 additions & 14 deletions eventregistry/Query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import six


class QueryOper:
class QueryItems:
_AND = "$and"
_OR = "$or"
_Undef = None
Expand All @@ -13,11 +13,11 @@ def __init__(self, oper, items):

@staticmethod
def AND(items):
return QueryOper(QueryOper._AND, items)
return QueryItems(QueryItems._AND, items)

@staticmethod
def OR(items):
return QueryOper(QueryOper._OR, items)
return QueryItems(QueryItems._OR, items)

def getOper(self):
return self._oper
Expand All @@ -27,13 +27,6 @@ def getItems(self):



class QueryType:
Event = 1
Story = 2
Article = 3
Undef = None


class _QueryCore(object):
def __init__(self):
self._queryObj = {}
Expand All @@ -55,7 +48,7 @@ def _setValIfNotDefault(self, propName, value, defVal):

class BaseQuery(_QueryCore):
def __init__(self,
keywords = None,
keyword = None,
conceptUri = None,
sourceUri = None,
locationUri = None,
Expand All @@ -67,9 +60,23 @@ def __init__(self,
dateMentionEnd = None,
categoryIncludeSub = True,
minMaxArticlesInEvent = None):
"""
@param keyword: keyword(s) to query. Either None, string or QueryItems
@param conceptUri: concept(s) to query. Either None, string or QueryItems
@param sourceUri: source(s) to query. Either None, string or QueryItems
@param locationUri: location(s) to query. Either None, string or QueryItems
@param categoryUri: categories to query. Either None, string or QueryItems
@param lang: language(s) to query. Either None, string or QueryItems
@param dateStart: starting date. Either None, string or date or datetime
@param dateEnd: ending date. Either None, string or date or datetime
@param dateMentionStart: search by mentioned dates - use this as the starting date. Either None, string or date or datetime
@param dateMentionEnd: search by mentioned dates - use this as the ending date. Either None, string or date or datetime
@param categoryIncludeSub: should we include the subcategories of the searched categories?
@param minMaxArticlesInEvent: a tuple containing the minimum and maximum number of articles that should be in the resulting events. Parameter relevant only if querying events
"""
super(BaseQuery, self).__init__()

self._setQueryArrVal("keywords", keywords)
self._setQueryArrVal("keyword", keyword)
self._setQueryArrVal("conceptUri", conceptUri)
self._setQueryArrVal("sourceUri", sourceUri)
self._setQueryArrVal("locationUri", locationUri)
Expand Down Expand Up @@ -99,8 +106,8 @@ def _setQueryArrVal(self, propName, value):
# by default we have None - so don't do anything
if value is None:
return
# if we have an instance of QueryOper then apply it
if isinstance(value, QueryOper):
# if we have an instance of QueryItems then apply it
if isinstance(value, QueryItems):
self._queryObj[propName] = { value.getOper(): value.getItems() }

# if we have a string value, just use it
Expand All @@ -119,6 +126,10 @@ def __init__(self):

@staticmethod
def AND(queryArr):
"""
create a combined query with multiple items on which to perform an AND operation
@param queryArr: a list of items on which to perform an AND operation. Items can be either a CombinedQuery or BaseQuery instances.
"""
assert isinstance(queryArr, list), "provided argument as not a list"
assert len(queryArr) > 0, "queryArr had an empty list"
q = CombinedQuery()
Expand All @@ -131,6 +142,10 @@ def AND(queryArr):

@staticmethod
def OR(queryArr):
"""
create a combined query with multiple items on which to perform an OR operation
@param queryArr: a list of items on which to perform an OR operation. Items can be either a CombinedQuery or BaseQuery instances.
"""
assert isinstance(queryArr, list), "provided argument as not a list"
assert len(queryArr) > 0, "queryArr had an empty list"
q = CombinedQuery()
Expand All @@ -149,6 +164,24 @@ def __init__(self,
isDuplicateFilter = "keepAll",
hasDuplicateFilter = "keepAll",
eventFilter = "keepAll"):
"""
create an article query using a complex query
@param includeQuery: an instance of CombinedQuery or BaseQuery to use to find articles that match the conditions
@param excludeQuery: an instance of CombinedQuery or BaseQuery (or None) to find articles to exclude from the articles matched with the includeQuery
@param isDuplicateFilter: some articles can be duplicates of other articles. What should be done with them. Possible values are:
"skipDuplicates" (skip the resulting articles that are duplicates of other articles)
"keepOnlyDuplicates" (return only the duplicate articles)
"keepAll" (no filtering, default)
@param hasDuplicateFilter: some articles are later copied by others. What should be done with such articles. Possible values are:
"skipHasDuplicates" (skip the resulting articles that have been later copied by others)
"keepOnlyHasDuplicates" (return only the articles that have been later copied by others)
"keepAll" (no filtering, default)
@param eventFilter: some articles describe a known event and some don't. This filter allows you to filter the resulting articles based on this criteria.
Possible values are:
"skipArticlesWithoutEvent" (skip articles that are not describing any known event in ER)
"keepOnlyArticlesWithoutEvent" (return only the articles that are not describing any known event in ER)
"keepAll" (no filtering, default)
"""
super(ComplexArticleQuery, self).__init__()

assert isinstance(includeQuery, (CombinedQuery, BaseQuery)), "includeQuery parameter was not a CombinedQuery or BaseQuery instance"
Expand All @@ -167,6 +200,11 @@ class ComplexEventQuery(_QueryCore):
def __init__(self,
includeQuery,
excludeQuery = None):
"""
create an event query suing a complex query
@param includeQuery: an instance of CombinedQuery or BaseQuery to use to find events that match the conditions
@param excludeQuery: an instance of CombinedQuery or BaseQuery (or None) to find events to exclude from the events matched with the includeQuery
"""
super(ComplexEventQuery, self).__init__()

assert isinstance(includeQuery, (CombinedQuery, BaseQuery)), "includeQuery parameter was not a CombinedQuery or BaseQuery instance"
Expand Down
12 changes: 6 additions & 6 deletions eventregistry/tests/TestQueryArticlesComplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ def getQueryUriListForQueryArticles(self, q):

def testCompareSameResults1(self):
cq1 = ComplexArticleQuery(
includeQuery = BaseQuery(conceptUri = QueryOper.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])),
excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"])))
includeQuery = BaseQuery(conceptUri = QueryItems.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])),
excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"])))

cq2 = ComplexArticleQuery(
includeQuery = CombinedQuery.AND([
BaseQuery(conceptUri = self.er.getConceptUri("obama")),
BaseQuery(conceptUri = self.er.getConceptUri("trump"))]),
excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"])))
excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"])))

q = QueryArticles(conceptUri = [self.er.getConceptUri("obama"), self.er.getConceptUri("trump")], conceptOper = "AND", ignoreLang = ["eng", "deu"])

Expand All @@ -38,14 +38,14 @@ def testCompareSameResults1(self):

def testCompareSameResults2(self):
cq1 = ComplexArticleQuery(
includeQuery = BaseQuery(sourceUri = QueryOper.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])),
excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")])))
includeQuery = BaseQuery(sourceUri = QueryItems.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])),
excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")])))

cq2 = ComplexArticleQuery(
includeQuery = CombinedQuery.OR([
BaseQuery(sourceUri = self.er.getNewsSourceUri("bbc")),
BaseQuery(sourceUri = self.er.getNewsSourceUri("associated press"))]),
excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")])))
excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")])))

q = QueryArticles(sourceUri = [self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")], ignoreConceptUri = self.er.getConceptUri("obama"))

Expand Down
12 changes: 6 additions & 6 deletions eventregistry/tests/TestQueryEventsComplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ def getQueryUriListForQueryEvents(self, q):

def testCompareSameResults1(self):
cq1 = ComplexEventQuery(
includeQuery = BaseQuery(conceptUri = QueryOper.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])),
excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"])))
includeQuery = BaseQuery(conceptUri = QueryItems.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])),
excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"])))

cq2 = ComplexEventQuery(
includeQuery = CombinedQuery.AND([
BaseQuery(conceptUri = self.er.getConceptUri("obama")),
BaseQuery(conceptUri = self.er.getConceptUri("trump"))]),
excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"])))
excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"])))

q = QueryEvents(conceptUri = [self.er.getConceptUri("obama"), self.er.getConceptUri("trump")], conceptOper = "AND", ignoreLang = ["eng", "deu"])

Expand All @@ -38,14 +38,14 @@ def testCompareSameResults1(self):

def testCompareSameResults2(self):
cq1 = ComplexEventQuery(
includeQuery = BaseQuery(sourceUri = QueryOper.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])),
excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")])))
includeQuery = BaseQuery(sourceUri = QueryItems.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])),
excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")])))

cq2 = ComplexEventQuery(
includeQuery = CombinedQuery.OR([
BaseQuery(sourceUri = self.er.getNewsSourceUri("bbc")),
BaseQuery(sourceUri = self.er.getNewsSourceUri("associated press"))]),
excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")])))
excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")])))

q = QueryEvents(sourceUri = [self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")], ignoreConceptUri = self.er.getConceptUri("obama"))

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def readme():
return f.read()

setup(name='eventregistry',
version='6.4.0',
version='6.4.1',
description = "A package that can be used to query information in Event Registry (http://eventregistry.org/)",
classifiers=[
'Development Status :: 4 - Beta',
Expand Down

0 comments on commit 97142f7

Please sign in to comment.