diff --git a/eventregistry/Query.py b/eventregistry/Query.py index fa5d389..ad3bd4e 100644 --- a/eventregistry/Query.py +++ b/eventregistry/Query.py @@ -2,7 +2,7 @@ import six -class QueryOper: +class QueryItems: _AND = "$and" _OR = "$or" _Undef = None @@ -13,11 +13,11 @@ def __init__(self, oper, items): @staticmethod def AND(items): - return QueryOper(QueryOper._AND, items) + return QueryItems(QueryItems._AND, items) @staticmethod def OR(items): - return QueryOper(QueryOper._OR, items) + return QueryItems(QueryItems._OR, items) def getOper(self): return self._oper @@ -27,13 +27,6 @@ def getItems(self): -class QueryType: - Event = 1 - Story = 2 - Article = 3 - Undef = None - - class _QueryCore(object): def __init__(self): self._queryObj = {} @@ -55,7 +48,7 @@ def _setValIfNotDefault(self, propName, value, defVal): class BaseQuery(_QueryCore): def __init__(self, - keywords = None, + keyword = None, conceptUri = None, sourceUri = None, locationUri = None, @@ -67,9 +60,23 @@ def __init__(self, dateMentionEnd = None, categoryIncludeSub = True, minMaxArticlesInEvent = None): + """ + @param keyword: keyword(s) to query. Either None, string or QueryItems + @param conceptUri: concept(s) to query. Either None, string or QueryItems + @param sourceUri: source(s) to query. Either None, string or QueryItems + @param locationUri: location(s) to query. Either None, string or QueryItems + @param categoryUri: categories to query. Either None, string or QueryItems + @param lang: language(s) to query. Either None, string or QueryItems + @param dateStart: starting date. Either None, string or date or datetime + @param dateEnd: ending date. Either None, string or date or datetime + @param dateMentionStart: search by mentioned dates - use this as the starting date. Either None, string or date or datetime + @param dateMentionEnd: search by mentioned dates - use this as the ending date. Either None, string or date or datetime + @param categoryIncludeSub: should we include the subcategories of the searched categories? + @param minMaxArticlesInEvent: a tuple containing the minimum and maximum number of articles that should be in the resulting events. Parameter relevant only if querying events + """ super(BaseQuery, self).__init__() - self._setQueryArrVal("keywords", keywords) + self._setQueryArrVal("keyword", keyword) self._setQueryArrVal("conceptUri", conceptUri) self._setQueryArrVal("sourceUri", sourceUri) self._setQueryArrVal("locationUri", locationUri) @@ -99,8 +106,8 @@ def _setQueryArrVal(self, propName, value): # by default we have None - so don't do anything if value is None: return - # if we have an instance of QueryOper then apply it - if isinstance(value, QueryOper): + # if we have an instance of QueryItems then apply it + if isinstance(value, QueryItems): self._queryObj[propName] = { value.getOper(): value.getItems() } # if we have a string value, just use it @@ -119,6 +126,10 @@ def __init__(self): @staticmethod def AND(queryArr): + """ + create a combined query with multiple items on which to perform an AND operation + @param queryArr: a list of items on which to perform an AND operation. Items can be either a CombinedQuery or BaseQuery instances. + """ assert isinstance(queryArr, list), "provided argument as not a list" assert len(queryArr) > 0, "queryArr had an empty list" q = CombinedQuery() @@ -131,6 +142,10 @@ def AND(queryArr): @staticmethod def OR(queryArr): + """ + create a combined query with multiple items on which to perform an OR operation + @param queryArr: a list of items on which to perform an OR operation. Items can be either a CombinedQuery or BaseQuery instances. + """ assert isinstance(queryArr, list), "provided argument as not a list" assert len(queryArr) > 0, "queryArr had an empty list" q = CombinedQuery() @@ -149,6 +164,24 @@ def __init__(self, isDuplicateFilter = "keepAll", hasDuplicateFilter = "keepAll", eventFilter = "keepAll"): + """ + create an article query using a complex query + @param includeQuery: an instance of CombinedQuery or BaseQuery to use to find articles that match the conditions + @param excludeQuery: an instance of CombinedQuery or BaseQuery (or None) to find articles to exclude from the articles matched with the includeQuery + @param isDuplicateFilter: some articles can be duplicates of other articles. What should be done with them. Possible values are: + "skipDuplicates" (skip the resulting articles that are duplicates of other articles) + "keepOnlyDuplicates" (return only the duplicate articles) + "keepAll" (no filtering, default) + @param hasDuplicateFilter: some articles are later copied by others. What should be done with such articles. Possible values are: + "skipHasDuplicates" (skip the resulting articles that have been later copied by others) + "keepOnlyHasDuplicates" (return only the articles that have been later copied by others) + "keepAll" (no filtering, default) + @param eventFilter: some articles describe a known event and some don't. This filter allows you to filter the resulting articles based on this criteria. + Possible values are: + "skipArticlesWithoutEvent" (skip articles that are not describing any known event in ER) + "keepOnlyArticlesWithoutEvent" (return only the articles that are not describing any known event in ER) + "keepAll" (no filtering, default) + """ super(ComplexArticleQuery, self).__init__() assert isinstance(includeQuery, (CombinedQuery, BaseQuery)), "includeQuery parameter was not a CombinedQuery or BaseQuery instance" @@ -167,6 +200,11 @@ class ComplexEventQuery(_QueryCore): def __init__(self, includeQuery, excludeQuery = None): + """ + create an event query suing a complex query + @param includeQuery: an instance of CombinedQuery or BaseQuery to use to find events that match the conditions + @param excludeQuery: an instance of CombinedQuery or BaseQuery (or None) to find events to exclude from the events matched with the includeQuery + """ super(ComplexEventQuery, self).__init__() assert isinstance(includeQuery, (CombinedQuery, BaseQuery)), "includeQuery parameter was not a CombinedQuery or BaseQuery instance" diff --git a/eventregistry/tests/TestQueryArticlesComplex.py b/eventregistry/tests/TestQueryArticlesComplex.py index b20fce1..79431da 100644 --- a/eventregistry/tests/TestQueryArticlesComplex.py +++ b/eventregistry/tests/TestQueryArticlesComplex.py @@ -17,14 +17,14 @@ def getQueryUriListForQueryArticles(self, q): def testCompareSameResults1(self): cq1 = ComplexArticleQuery( - includeQuery = BaseQuery(conceptUri = QueryOper.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])), - excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"]))) + includeQuery = BaseQuery(conceptUri = QueryItems.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])), + excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"]))) cq2 = ComplexArticleQuery( includeQuery = CombinedQuery.AND([ BaseQuery(conceptUri = self.er.getConceptUri("obama")), BaseQuery(conceptUri = self.er.getConceptUri("trump"))]), - excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"]))) + excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"]))) q = QueryArticles(conceptUri = [self.er.getConceptUri("obama"), self.er.getConceptUri("trump")], conceptOper = "AND", ignoreLang = ["eng", "deu"]) @@ -38,14 +38,14 @@ def testCompareSameResults1(self): def testCompareSameResults2(self): cq1 = ComplexArticleQuery( - includeQuery = BaseQuery(sourceUri = QueryOper.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])), - excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")]))) + includeQuery = BaseQuery(sourceUri = QueryItems.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])), + excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")]))) cq2 = ComplexArticleQuery( includeQuery = CombinedQuery.OR([ BaseQuery(sourceUri = self.er.getNewsSourceUri("bbc")), BaseQuery(sourceUri = self.er.getNewsSourceUri("associated press"))]), - excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")]))) + excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")]))) q = QueryArticles(sourceUri = [self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")], ignoreConceptUri = self.er.getConceptUri("obama")) diff --git a/eventregistry/tests/TestQueryEventsComplex.py b/eventregistry/tests/TestQueryEventsComplex.py index b1da0c5..c893e0d 100644 --- a/eventregistry/tests/TestQueryEventsComplex.py +++ b/eventregistry/tests/TestQueryEventsComplex.py @@ -17,14 +17,14 @@ def getQueryUriListForQueryEvents(self, q): def testCompareSameResults1(self): cq1 = ComplexEventQuery( - includeQuery = BaseQuery(conceptUri = QueryOper.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])), - excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"]))) + includeQuery = BaseQuery(conceptUri = QueryItems.AND([self.er.getConceptUri("obama"), self.er.getConceptUri("trump")])), + excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"]))) cq2 = ComplexEventQuery( includeQuery = CombinedQuery.AND([ BaseQuery(conceptUri = self.er.getConceptUri("obama")), BaseQuery(conceptUri = self.er.getConceptUri("trump"))]), - excludeQuery = BaseQuery(lang = QueryOper.OR(["eng", "deu"]))) + excludeQuery = BaseQuery(lang = QueryItems.OR(["eng", "deu"]))) q = QueryEvents(conceptUri = [self.er.getConceptUri("obama"), self.er.getConceptUri("trump")], conceptOper = "AND", ignoreLang = ["eng", "deu"]) @@ -38,14 +38,14 @@ def testCompareSameResults1(self): def testCompareSameResults2(self): cq1 = ComplexEventQuery( - includeQuery = BaseQuery(sourceUri = QueryOper.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])), - excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")]))) + includeQuery = BaseQuery(sourceUri = QueryItems.OR([self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")])), + excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")]))) cq2 = ComplexEventQuery( includeQuery = CombinedQuery.OR([ BaseQuery(sourceUri = self.er.getNewsSourceUri("bbc")), BaseQuery(sourceUri = self.er.getNewsSourceUri("associated press"))]), - excludeQuery = BaseQuery(conceptUri = QueryOper.OR([self.er.getConceptUri("obama")]))) + excludeQuery = BaseQuery(conceptUri = QueryItems.OR([self.er.getConceptUri("obama")]))) q = QueryEvents(sourceUri = [self.er.getNewsSourceUri("bbc"), self.er.getNewsSourceUri("associated press")], ignoreConceptUri = self.er.getConceptUri("obama")) diff --git a/setup.py b/setup.py index e71b46b..d67db18 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ def readme(): return f.read() setup(name='eventregistry', - version='6.4.0', + version='6.4.1', description = "A package that can be used to query information in Event Registry (http://eventregistry.org/)", classifiers=[ 'Development Status :: 4 - Beta',