diff --git a/CHANGELOG.md b/CHANGELOG.md index a7767f6..fbf32e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Change Log +## [v8.7]() (2019-10-16) + +**Added** + +- added `EventRegistry.getServiceStatus()` method that reports status of the services +- `ComplexQueryArticles` and `ComplexQueryEvents` classes now support in the constructor additional filters like `minSentiment`, `maxSentiment`, `minFacebookShares`, `endSourceRankPercentile`, etc. + +**Updated** + +- `ReturnInfo` classes (`ArticleInfoFlags`, `ConceptInfoFlags`, ...) were updated. Some obsolete parameters were removed and we have added support for kwdargs to supply some very rarely used parameters directly. +- `TopicPage.getArticles` and `TopicPage.getEvents` methods now support also `**kwargs` so that you can now also provide other available parameters that are less frequently used. + +**Removed** + +- removed `EventRegistry.suggestCustomConcepts()` and `EventRegistry.getCustomConceptUri()` methods. Not used anymore since we are not supporting anymore the correlation feature. + + ## [v8.6]() (2019-02-22) **Added** diff --git a/eventregistry/Correlations.py b/eventregistry/Correlations.py deleted file mode 100644 index 1d07b56..0000000 --- a/eventregistry/Correlations.py +++ /dev/null @@ -1,220 +0,0 @@ -""" -provides classes needed to identify concepts or categories that trend the most with a concept, category or a custom time series -""" - -import json, six -from eventregistry.Base import * -from eventregistry.ReturnInfo import * -from eventregistry.Info import * -from eventregistry.QueryArticles import * -from eventregistry.Counts import * - - -class GetTopCorrelations(QueryParamsBase): - def __init__(self, - eventRegistry): # instance of EventRegistry class - QueryParamsBase.__init__(self) - self._er = eventRegistry - self._setVal("action", "findTopCorrelations") - - - def _getPath(self): - return "/json/correlate" - - # - # input data methods - # - def setCustomInputData(self, inputDataArr): - """ - specify the user defined array of input data - - @param inputDataArr: array of tuples (date, val) where date is a date object or string in YYYY-MM-DD format - and val is the value/counts for that date - """ - # clear any past test data values - self._clearVal("testData") - for (date, val) in inputDataArr: - assert isinstance(val, (int, float)), "Value is expected to be a number" - dateStr = None - if isinstance(val, datetime.datetime): - dateStr = val.date().isoformat() - elif isinstance(val, datetime.date): - dateStr = val.isoformat() - elif isinstance(val, six.string_types): - assert re.match("\d{4}-\d{2}-\d{2}", date) - dateStr = date - else: - assert False, "First argument in the tuple is not a valid date" - self._addArrayVal("testData", {"date": dateStr, "count": val}) - - - def loadInputDataWithQuery(self, queryArticles): - """ - use the queryArticles to find articles that match the criteria. For the articles that match - criteria in queryArticles compute the time-series (number of resulting articles for each date) - an use the time series as the input data - - @param queryArticles: an instance of QueryArticles class, containing the conditions that are use to - find the matching time-series. You don't need to specify any requested result. - """ - # clear any past test data values - self._clearVal("testData") - - assert isinstance(queryArticles, QueryArticles), "'queryArticles' excpected to be an instance of QueryArticles" - queryArticles.setRequestedResult(RequestArticlesTimeAggr()) - res = self._er.execQuery(queryArticles) - if "timeAggr" in res: - for obj in res["timeAggr"]: - self._addArrayVal("testData", json.dumps(obj)) - - - def loadInputDataWithCounts(self, getCounts): - """ - use GetCounts class to obtain daily counts information for concept/category of interest - @param getCounts: an instance of GetCounts class - """ - # clear any past test data values - self._clearVal("testData") - - assert isinstance(getCounts, GetCounts), "'getCounts' is expected to be an instance of GetCounts" - res = self._er.execQuery(getCounts) - assert len(list(res.keys())) <= 1, "The returned object had multiple keys. When creating the GetCounts instance use only one uri." - assert len(list(res.keys())) != 0, "Obtained an empty object" - assert "error" not in res, res.get("error") - key = list(res.keys())[0] - assert isinstance(res[key], list), "Expected a list" - for obj in res[key]: - self._addArrayVal("testData", json.dumps(obj)) - - - def hasValidInputData(self): - """do we have valid input data (needed before we can compute correlations)""" - return self._hasVal("testData") - - - # - # computing correlations - # - def getTopConceptCorrelations(self, - candidateConceptsQuery = None, - candidatesPerType = 1000, - conceptType = None, - exactCount = 10, - approxCount = 0, - returnInfo = ReturnInfo()): - """ - compute concepts that correlate the most with the input data. If candidateConceptsQuery is provided we first identify the - concepts that are potentially returned as top correlations. Candidates are obtained by making the query and analyzing the - concepts that appear in the resulting articles. The top concepts are used as candidates among which we return the top correlations. - If conceptType is provided then only concepts of the specified type can be provided as the result. - - @param candidateConceptsQuery: optional. An instance of QueryArticles that can be used to limit the space of concept candidates - @param candidatesPerType: If candidateConceptsQuery is provided, then this number of concepts for each valid type will be return as candidates - @param conceptType: optional. A string or an array containing the concept types that are valid candidates on which to compute top correlations - valid values are "person", "org", "loc" and/or "wiki" - @param exactCount: the number of returned concepts for which the exact value of the correlation is computed - @param approxCount: the number of returned concepts for which only an approximate value of the correlation is computed - @param returnInfo: specifies the details about the concepts that should be returned in the output result - """ - - self._clearVal("contextConceptIds") - - # generate all necessary parameters (but don't update the params of the self) - params = QueryParamsBase.copy(self) - - # compute the candidates - if candidateConceptsQuery != None: - assert isinstance(candidateConceptsQuery, QueryArticles), "'candidateConceptsQuery' is expected to be of type QueryArticles" - candidateConceptsQuery.setRequestedResult(RequestArticlesConceptAggr()) - candidateConceptsQuery._setVal("conceptAggrConceptCountPerType", candidatesPerType) - candidateConceptsQuery._setVal("conceptAggrConceptIdOnly", True) - ret = self._er.execQuery(candidateConceptsQuery) - if ret and "conceptAggr" in ret: - params._setVal("contextConceptIds", ",".join([str(x) for x in ret["conceptAggr"]])) - else: - print("Warning: Failed to compute a candidate set of concepts") - - if conceptType: - params._setVal("conceptType", conceptType) - params._setVal("exactCount", exactCount) - params._setVal("approxCount", approxCount) - params._setVal("sourceType", "news-concept") - - # - # compute the correlations - ret = self._er.jsonRequest(self._getPath(), params.queryParams) - - # - # extend the return information with the details about the concepts (label, ...) - if returnInfo != None: - conceptIds = [] - if ret and ret["news-concept"]["exactCorrelations"]: - conceptIds += [info["id"] for info in ret["news-concept"]["exactCorrelations"]] - if ret and ret["news-concept"]["approximateCorrelations"]: - conceptIds += [info["id"] for info in ret["news-concept"]["approximateCorrelations"]] - conceptInfos = {} - for i in range(0, len(conceptIds), 500): - ids = conceptIds[i:i+500] - q = GetConceptInfo(returnInfo = returnInfo) - q.queryById(ids) - info = self._er.execQuery(q) - conceptInfos.update(info) - if ret and ret["news-concept"]["exactCorrelations"]: - for item in ret["news-concept"]["exactCorrelations"]: - item["conceptInfo"] = conceptInfos.get(str(item["id"]), {}) - if ret and ret["news-concept"]["approximateCorrelations"]: - for item in ret["news-concept"]["approximateCorrelations"]: - item["conceptInfo"] = conceptInfos.get(str(item["id"]), {}) - - # return result - return ret - - - def getTopCategoryCorrelations(self, - exactCount = 10, - approxCount = 0, - returnInfo = ReturnInfo()): - """ - compute categories that correlate the most with the input data. - - @param exactCount: the number of returned categories for which the exact value of the correlation is computed - @param approxCount: the number of returned categories for which only an approximate value of the correlation is computed - @param returnInfo: specifies the details about the categories that should be returned in the output result - """ - - # generate all necessary parameters (but don't update the params of the self) - params = QueryParamsBase.copy(self) - # don't send unnecessary data - params._clearVal("contextConceptIds") - params._setVal("exactCount", exactCount) - params._setVal("approxCount", approxCount) - params._setVal("sourceType", "news-category") - - # - # compute the correlations - ret = self._er.jsonRequest(self._getPath(), params.queryParams) - - # - # extend the return information with the details about the categories (label, ...) - if returnInfo != None: - categoryIds = [] - if ret and ret["news-category"]["exactCorrelations"]: - categoryIds += [info["id"] for info in ret["news-category"]["exactCorrelations"]] - if ret and ret["news-category"]["approximateCorrelations"]: - categoryIds += [info["id"] for info in ret["news-category"]["approximateCorrelations"]] - categoryInfos = {} - for i in range(0, len(categoryIds), 500): - ids = categoryIds[i:i+500] - q = GetCategoryInfo(returnInfo = returnInfo) - q.queryById(ids) - info = self._er.execQuery(q) - categoryInfos.update(info) - if ret and ret["news-category"]["exactCorrelations"]: - for item in ret["news-category"]["exactCorrelations"]: - item["categoryInfo"] = categoryInfos.get(str(item["id"]), {}) - if ret and ret["news-category"]["approximateCorrelations"]: - for item in ret["news-category"]["approximateCorrelations"]: - item["categoryInfo"] = categoryInfos.get(str(item["id"]), {}) - - # return result - return ret diff --git a/eventregistry/Counts.py b/eventregistry/Counts.py index 9ed1a93..432269c 100644 --- a/eventregistry/Counts.py +++ b/eventregistry/Counts.py @@ -9,7 +9,7 @@ class CountsBase(QueryParamsBase): def _getPath(self): - return "/json/counters" + return "/api/v1/counters" diff --git a/eventregistry/DailyShares.py b/eventregistry/DailyShares.py index e542301..902112f 100644 --- a/eventregistry/DailyShares.py +++ b/eventregistry/DailyShares.py @@ -30,7 +30,7 @@ def __init__(self, def _getPath(self): - return "/json/article" + return "/api/v1/article" # get top shared events for today or any other day @@ -53,4 +53,4 @@ def __init__(self, def _getPath(self): - return "/json/event" + return "/api/v1/event" diff --git a/eventregistry/EventForText.py b/eventregistry/EventForText.py index b2e761f..e384e25 100644 --- a/eventregistry/EventForText.py +++ b/eventregistry/EventForText.py @@ -46,12 +46,12 @@ def compute(self, compute the list of most similar events for the given text """ params = { "lang": lang, "text": text, "topClustersCount": self._nrOfEventsToReturn } - res = self._er.jsonRequest("/json/getEventForText/enqueueRequest", params) + res = self._er.jsonRequest("/api/v1/getEventForText/enqueueRequest", params) requestId = res["requestId"] for i in range(10): time.sleep(1) # sleep for 1 second to wait for the clustering to perform computation - res = self._er.jsonRequest("/json/getEventForText/testRequest", { "requestId": requestId }) + res = self._er.jsonRequest("/api/v1/getEventForText/testRequest", { "requestId": requestId }) if isinstance(res, list) and len(res) > 0: return res return None diff --git a/eventregistry/EventRegistry.py b/eventregistry/EventRegistry.py index be9ab82..1dbe7ff 100644 --- a/eventregistry/EventRegistry.py +++ b/eventregistry/EventRegistry.py @@ -7,6 +7,7 @@ from eventregistry.Base import * from eventregistry.ReturnInfo import * + class EventRegistry(object): """ the core object that is used to access any data in Event Registry @@ -55,7 +56,7 @@ def __init__(self, # if there is a settings.json file in the directory then try using it to load the API key from it # and to read the host name from it (if custom host is not specified) - currPath = os.path.split(__file__)[0] + currPath = os.path.split(os.path.realpath(__file__))[0] settFName = settingsFName or os.path.join(currPath, "settings.json") if apiKey: print("using user provided API key for making requests") @@ -155,6 +156,11 @@ def getUsageInfo(self): return self.jsonRequest("/api/v1/usage", { "apiKey": self._apiKey }) + def getServiceStatus(self): + """return the status of various services used in Event Registry pipeline""" + return self.jsonRequest("/api/v1/getServiceStatus", {"apiKey": self._apiKey}) + + def getUrl(self, query): """ return the url that can be used to get the content that matches the query @@ -221,7 +227,7 @@ def execQuery(self, query, allowUseOfArchive = None): def jsonRequest(self, methodUrl, paramDict, customLogFName = None, allowUseOfArchive = None): """ make a request for json data. repeat it _repeatFailedRequestCount times, if they fail (indefinitely if _repeatFailedRequestCount = -1) - @param methodUrl: url on er (e.g. "/json/article") + @param methodUrl: url on er (e.g. "/api/v1/article") @param paramDict: optional object containing the parameters to include in the request (e.g. { "articleUri": "123412342" }). @param customLogFName: potentially a file name where the request information can be logged into @param allowUseOfArchive: potentially override the value set when constructing EventRegistry class. @@ -279,9 +285,6 @@ def jsonRequest(self, methodUrl, paramDict, customLogFName = None, allowUseOfArc # remember the available requests self._dailyAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-limit", ""), val = -1) self._remainingAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-remaining", ""), val = -1) - if self._verboseOutput: - timeSec = int(self.getLastHeader("x-response-time", "0")) / 1000. - self.printConsole("request took %.3f sec. Response size: %.2fKB" % (timeSec, len(respInfo.text) / 1024.0)) try: returnData = respInfo.json() break @@ -291,6 +294,8 @@ def jsonRequest(self, methodUrl, paramDict, customLogFName = None, allowUseOfArc except Exception as ex: self._lastException = ex print("Event Registry exception while executing the request:") + if self._verboseOutput: + print("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) self.printLastException() # in case of invalid input parameters, don't try to repeat the search if respInfo != None and respInfo.status_code == 530: @@ -320,8 +325,9 @@ def jsonRequestAnalytics(self, methodUrl, paramDict): while self._repeatFailedRequestCount < 0 or tryCount < self._repeatFailedRequestCount: tryCount += 1 try: + url = self._hostAnalytics + methodUrl # make the request - respInfo = self._reqSession.post(self._hostAnalytics + methodUrl, json = paramDict) + respInfo = self._reqSession.post(url, json = paramDict) # remember the returned headers self._headers = respInfo.headers # if we got some error codes print the error and repeat the request after a short time period @@ -332,6 +338,8 @@ def jsonRequestAnalytics(self, methodUrl, paramDict): except Exception as ex: self._lastException = ex print("Event Registry Analytics exception while executing the request:") + if self._verboseOutput: + print("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) self.printLastException() # in case of invalid input parameters, don't try to repeat the action if respInfo != None and respInfo.status_code == 530: @@ -362,7 +370,7 @@ def suggestConcepts(self, prefix, sources = ["concepts"], lang = "eng", conceptL params = { "prefix": prefix, "source": sources, "lang": lang, "conceptLang": conceptLang, "page": page, "count": count} params.update(returnInfo.getParams()) params.update(kwargs) - return self.jsonRequest("/json/suggestConceptsFast", params) + return self.jsonRequest("/api/v1/suggestConceptsFast", params) def suggestCategories(self, prefix, page = 1, count = 20, returnInfo = ReturnInfo(), **kwargs): @@ -377,7 +385,7 @@ def suggestCategories(self, prefix, page = 1, count = 20, returnInfo = ReturnInf params = { "prefix": prefix, "page": page, "count": count } params.update(returnInfo.getParams()) params.update(kwargs) - return self.jsonRequest("/json/suggestCategoriesFast", params) + return self.jsonRequest("/api/v1/suggestCategoriesFast", params) def suggestNewsSources(self, prefix, dataType = ["news", "pr", "blog"], page = 1, count = 20, **kwargs): @@ -391,7 +399,7 @@ def suggestNewsSources(self, prefix, dataType = ["news", "pr", "blog"], page = 1 assert page > 0, "page parameter should be above 0" params = {"prefix": prefix, "dataType": dataType, "page": page, "count": count} params.update(kwargs) - return self.jsonRequest("/json/suggestSourcesFast", params) + return self.jsonRequest("/api/v1/suggestSourcesFast", params) def suggestSourceGroups(self, prefix, page = 1, count = 20, **kwargs): @@ -404,7 +412,7 @@ def suggestSourceGroups(self, prefix, page = 1, count = 20, **kwargs): assert page > 0, "page parameter should be above 0" params = { "prefix": prefix, "page": page, "count": count } params.update(kwargs) - return self.jsonRequest("/json/suggestSourceGroups", params) + return self.jsonRequest("/api/v1/suggestSourceGroups", params) def suggestLocations(self, prefix, sources = ["place", "country"], lang = "eng", count = 20, countryUri = None, sortByDistanceTo = None, returnInfo = ReturnInfo(), **kwargs): @@ -426,7 +434,7 @@ def suggestLocations(self, prefix, sources = ["place", "country"], lang = "eng", assert len(sortByDistanceTo) == 2, "The sortByDistanceTo should contain two float numbers" params["closeToLat"] = sortByDistanceTo[0] params["closeToLon"] = sortByDistanceTo[1] - return self.jsonRequest("/json/suggestLocationsFast", params) + return self.jsonRequest("/api/v1/suggestLocationsFast", params) def suggestLocationsAtCoordinate(self, latitude, longitude, radiusKm, limitToCities = False, lang = "eng", count = 20, ignoreNonWiki = True, returnInfo = ReturnInfo(), **kwargs): @@ -446,7 +454,7 @@ def suggestLocationsAtCoordinate(self, latitude, longitude, radiusKm, limitToCit params = { "action": "getLocationsAtCoordinate", "lat": latitude, "lon": longitude, "radius": radiusKm, "limitToCities": limitToCities, "count": count, "lang": lang } params.update(returnInfo.getParams()) params.update(kwargs) - return self.jsonRequest("/json/suggestLocationsFast", params) + return self.jsonRequest("/api/v1/suggestLocationsFast", params) def suggestSourcesAtCoordinate(self, latitude, longitude, radiusKm, count = 20, **kwargs): @@ -461,7 +469,7 @@ def suggestSourcesAtCoordinate(self, latitude, longitude, radiusKm, count = 20, assert isinstance(longitude, (int, float)), "The 'longitude' should be a number" params = {"action": "getSourcesAtCoordinate", "lat": latitude, "lon": longitude, "radius": radiusKm, "count": count} params.update(kwargs) - return self.jsonRequest("/json/suggestSourcesFast", params) + return self.jsonRequest("/api/v1/suggestSourcesFast", params) def suggestSourcesAtPlace(self, conceptUri, dataType = "news", page = 1, count = 20, **kwargs): @@ -474,7 +482,7 @@ def suggestSourcesAtPlace(self, conceptUri, dataType = "news", page = 1, count = """ params = {"action": "getSourcesAtPlace", "conceptUri": conceptUri, "page": page, "count": count, "dataType": dataType} params.update(kwargs) - return self.jsonRequest("/json/suggestSourcesFast", params) + return self.jsonRequest("/api/v1/suggestSourcesFast", params) def suggestAuthors(self, prefix, page = 1, count = 20, **kwargs): @@ -487,7 +495,7 @@ def suggestAuthors(self, prefix, page = 1, count = 20, **kwargs): assert page > 0, "page parameter should be above 0" params = {"prefix": prefix, "page": page, "count": count} params.update(kwargs) - return self.jsonRequest("/json/suggestAuthorsFast", params) + return self.jsonRequest("/api/v1/suggestAuthorsFast", params) @@ -506,24 +514,7 @@ def suggestConceptClasses(self, prefix, lang = "eng", conceptLang = "eng", sourc params = { "prefix": prefix, "lang": lang, "conceptLang": conceptLang, "source": source, "page": page, "count": count } params.update(returnInfo.getParams()) params.update(kwargs) - return self.jsonRequest("/json/suggestConceptClasses", params) - - - def suggestCustomConcepts(self, prefix, lang = "eng", conceptLang = "eng", page = 1, count = 20, returnInfo = ReturnInfo(), **kwargs): - """ - return a list of custom concepts that contain the given prefix. Custom concepts are the things (indicators, stock prices, ...) for which we import daily trending values that can be obtained using GetCounts class - @param prefix: input text that should be contained in the concept name - @param lang: language in which the prefix is specified - @param conceptLang: languages in which the label(s) for the concepts are to be returned - @param page: page of the results (1, 2, ...) - @param count: number of returned suggestions - @param returnInfo: what details about categories should be included in the returned information - """ - assert page > 0, "page parameter should be above 0" - params = { "prefix": prefix, "lang": lang, "conceptLang": conceptLang, "page": page, "count": count } - params.update(returnInfo.getParams()) - params.update(kwargs) - return self.jsonRequest("/json/suggestCustomConcepts", params) + return self.jsonRequest("/api/v1/suggestConceptClasses", params) # @@ -617,26 +608,13 @@ def getConceptInfo(self, conceptUri, @param returnInfo: what details about the concept should be included in the returned information """ params = returnInfo.getParams() - params.update({"uri": conceptUri, "action": "getInfo" }) - return self.jsonRequest("/json/concept", params) - - - def getCustomConceptUri(self, label, lang = "eng"): - """ - return a custom concept uri that is the best match for the given custom concept label - note that for the custom concepts we don't have a sensible way of sorting the candidates that match the label - if multiple candidates match the label we cannot guarantee which one will be returned - @param label: label of the custom concept - """ - matches = self.suggestCustomConcepts(label, lang = lang) - if matches != None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: - return matches[0]["uri"] - return None + params.update({"uri": conceptUri }) + return self.jsonRequest("/api/v1/concept/getInfo", params) def getAuthorUri(self, authorName): """ - return author uri that that is the best match for the given author name (and potentially source url) + return author uri that is the best match for the given author name (and potentially source url) if there are multiple matches for the given author name, they are sorted based on the number of articles they have written (from most to least frequent) @param authorName: partial or full name of the author, potentially also containing the source url (e.g. "george brown nytimes") """ @@ -666,18 +644,18 @@ def getArticleUris(self, articleUrls): @returns returns dict where key is article url and value is either None if no match found or a string with article URI. """ assert isinstance(articleUrls, (six.string_types, list)), "Expected a single article url or a list of urls" - return self.jsonRequest("/json/articleMapper", { "articleUrl": articleUrls }) + return self.jsonRequest("/api/v1/articleMapper", { "articleUrl": articleUrls }) def getSourceGroups(self): """return the list of URIs of all known source groups""" - ret = self.jsonRequest("/json/sourceGroup", { "action": "getSourceGroups" }) + ret = self.jsonRequest("/api/v1/sourceGroup/getSourceGroups", {}) return ret def getSourceGroup(self, sourceGroupUri): """return info about the source group""" - ret = self.jsonRequest("/json/sourceGroup", { "action": "getSourceGroupInfo", "uri": sourceGroupUri }) + ret = self.jsonRequest("/api/v1/sourceGroup/getSourceGroupInfo", { "uri": sourceGroupUri }) return ret diff --git a/eventregistry/Info.py b/eventregistry/Info.py index 0e491d2..1196ba3 100644 --- a/eventregistry/Info.py +++ b/eventregistry/Info.py @@ -29,7 +29,7 @@ def queryById(self, idOrIdList): def _getPath(self): - return "/json/source" + return "/api/v1/source" @@ -50,7 +50,7 @@ def __init__(self, def _getPath(self): - return "/json/concept" + return "/api/v1/concept" @@ -76,7 +76,7 @@ def queryByUri(self, uriOrUriList): def _getPath(self): - return "/json/category" + return "/api/v1/category" @@ -100,9 +100,9 @@ def __init__(self, sourceUri = None): def _getPath(self): - return "/json/source" + return "/api/v1/source" def queryByUri(self, uriOrUriList): """ get stats about one or more sources specified by their uris """ - self.queryParams["uri"] = uriOrUriList; + self.queryParams["uri"] = uriOrUriList diff --git a/eventregistry/Query.py b/eventregistry/Query.py index 1be97b5..416683b 100644 --- a/eventregistry/Query.py +++ b/eventregistry/Query.py @@ -163,7 +163,13 @@ def OR(queryArr, class ComplexArticleQuery(_QueryCore): def __init__(self, query, - dataType = "news", + dataType="news", + minSentiment=None, + maxSentiment=None, + minSocialScore=0, + minFacebookShares=0, + startSourceRankPercentile=0, + endSourceRankPercentile = 100, isDuplicateFilter = "keepAll", hasDuplicateFilter = "keepAll", eventFilter = "keepAll"): @@ -172,6 +178,12 @@ def __init__(self, @param query: an instance of CombinedQuery or BaseQuery to use to find articles that match the conditions @param dataType: data type to search for. Possible values are "news" (news content), "pr" (PR content) or "blogs". If you want to use multiple data types, put them in an array (e.g. ["news", "pr"]) + @param minSentiment: what should be the minimum sentiment on the articles in order to return them (None means that we don't filter by sentiment) + @param maxSentiment: what should be the maximum sentiment on the articles in order to return them (None means that we don't filter by sentiment) + @param minSocialScore: at least how many times should the articles be shared on social media in order to return them + @param minFacebookShares: at least how many times should the articles be shared on Facebook in order to return them + @param startSourceRankPercentile: starting percentile of the sources to consider in the results (default: 0). Value should be in range 0-90 and divisible by 10. + @param endSourceRankPercentile: ending percentile of the sources to consider in the results (default: 100). Value should be in range 10-100 and divisible by 10. @param isDuplicateFilter: some articles can be duplicates of other articles. What should be done with them. Possible values are: "skipDuplicates" (skip the resulting articles that are duplicates of other articles) "keepOnlyDuplicates" (return only the duplicate articles) @@ -193,19 +205,38 @@ def __init__(self, filter = {} if dataType != "news": filter["dataType"] = dataType + + if minSentiment != None: + filter["minSentiment"] = minSentiment + if maxSentiment != None: + filter["maxSentiment"] = maxSentiment + + if minSocialScore > 0: + filter["minSocialScore"] = minSocialScore + if minFacebookShares > 0: + filter["minFacebookShares"] = minFacebookShares + if startSourceRankPercentile != 0: + filter["startSourceRankPercentile"] = startSourceRankPercentile + if endSourceRankPercentile != 100: + filter["endSourceRankPercentile"] = endSourceRankPercentile + if isDuplicateFilter != "keepAll": filter["isDuplicate"] = isDuplicateFilter if hasDuplicateFilter != "keepAll": filter["hasDuplicate"] = hasDuplicateFilter if eventFilter != "keepAll": filter["hasEvent"] = eventFilter + if len(filter) > 0: self._queryObj["$filter"] = filter class ComplexEventQuery(_QueryCore): - def __init__(self, query): + def __init__(self, + query, + minSentiment=None, + maxSentiment=None): """ create an event query using a complex query @param query: an instance of CombinedQuery or BaseQuery to use to find events that match the conditions @@ -213,4 +244,12 @@ def __init__(self, query): super(ComplexEventQuery, self).__init__() assert isinstance(query, (CombinedQuery, BaseQuery)), "query parameter was not a CombinedQuery or BaseQuery instance" + filter = {} + if minSentiment != None: + filter["minSentiment"] = minSentiment + if maxSentiment != None: + filter["maxSentiment"] = maxSentiment + + if len(filter) > 0: + self._queryObj["$filter"] = filter self._queryObj["$query"] = query.getQuery() diff --git a/eventregistry/QueryArticle.py b/eventregistry/QueryArticle.py index b3997e1..41cf841 100644 --- a/eventregistry/QueryArticle.py +++ b/eventregistry/QueryArticle.py @@ -18,7 +18,7 @@ def __init__(self, def _getPath(self): - return "/json/article" + return "/api/v1/article" @staticmethod diff --git a/eventregistry/QueryArticles.py b/eventregistry/QueryArticles.py index b004bb4..80b6c79 100644 --- a/eventregistry/QueryArticles.py +++ b/eventregistry/QueryArticles.py @@ -69,13 +69,13 @@ def __init__(self, @param authorUri: find articles that were written by a specific author. If multiple authors should be considered use QueryItems.OR() to provide the list of authors. Author uri for a given author name can be obtained using EventRegistry.getAuthorUri(). - @param locationUri: find articles that describe something that occured at a particular location. + @param locationUri: find articles that describe something that occurred at a particular location. If value can be a string or a list of strings provided in QueryItems.OR(). Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri(). @param lang: find articles that are written in the specified language. If more than one language is specified, resulting articles has to be written in *any* of the languages. @param dateStart: find articles that were written on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. - @param dateEnd: find articles that occured before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. + @param dateEnd: find articles that occurred before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. @param dateMentionStart: find articles that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: find articles that explicitly mention a date that is lower or equal to dateMentionEnd. @param keywordsLoc: where should we look when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" @@ -87,7 +87,7 @@ def __init__(self, @param ignoreSourceLocationUri: ignore articles that have been written by sources located at *any* of the specified locations @param ignoreSourceGroupUri: ignore articles that have been written by sources in *any* of the specified source groups @param ignoreAuthorUri: ignore articles that were written by *any* of the specified authors - @param ignoreLocationUri: ignore articles that occured in any of the provided locations. A location can be a city or a place + @param ignoreLocationUri: ignore articles that occurred in any of the provided locations. A location can be a city or a place @param ignoreLang: ignore articles that are written in *any* of the provided languages @param ignoreKeywordsLoc: where should we look when data should be used when searching using the keywords provided by "ignoreKeywords" parameter. "body" (default), "title", or "body,title" @param isDuplicateFilter: some articles can be duplicates of other articles. What should be done with them. Possible values are: @@ -180,7 +180,7 @@ def __init__(self, def _getPath(self): - return "/json/article" + return "/api/v1/article" def setRequestedResult(self, requestArticles): @@ -287,7 +287,7 @@ def initWithComplexQuery(query): @param query: complex query as ComplexArticleQuery instance, string or a python dict """ q = QueryArticlesIter() - + # provided an instance of ComplexArticleQuery if isinstance(query, ComplexArticleQuery): q._setVal("query", json.dumps(query.getQuery())) diff --git a/eventregistry/QueryEvent.py b/eventregistry/QueryEvent.py index 372953d..1363a4e 100644 --- a/eventregistry/QueryEvent.py +++ b/eventregistry/QueryEvent.py @@ -23,7 +23,7 @@ def __init__(self, def _getPath(self): - return "/json/event" + return "/api/v1/event" def setRequestedResult(self, requestEvent): @@ -87,11 +87,11 @@ def __init__(self, eventUri, @param authorUri: find articles that were written by a specific author. If multiple authors should be considered use QueryItems.OR() to provide the list of authors. Author uri for a given author name can be obtained using EventRegistry.getAuthorUri(). - @param locationUri: find articles that describe something that occured at a particular location. + @param locationUri: find articles that describe something that occurred at a particular location. If value can be a string or a list of strings provided in QueryItems.OR(). Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri(). @param dateStart: find articles that were written on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. - @param dateEnd: find articles that occured before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. + @param dateEnd: find articles that occurred before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. @param dateMentionStart: limit the event articles to those that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: limit the event articles to those that explicitly mention a date that is lower or equal to dateMentionEnd. @@ -306,13 +306,13 @@ def __init__(self, @param authorUri: find articles that were written by a specific author. If multiple authors should be considered use QueryItems.OR() to provide the list of authors. Author uri for a given author name can be obtained using EventRegistry.getAuthorUri(). - @param locationUri: find articles that describe something that occured at a particular location. + @param locationUri: find articles that describe something that occurred at a particular location. If value can be a string or a list of strings provided in QueryItems.OR(). Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri(). @param lang: find articles that are written in the specified language. If more than one language is specified, resulting articles has to be written in *any* of the languages. @param dateStart: find articles that were written on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. - @param dateEnd: find articles that occured before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. + @param dateEnd: find articles that occurred before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. @param dateMentionStart: limit the event articles to those that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: limit the event articles to those that explicitly mention a date that is lower or equal to dateMentionEnd. @@ -461,56 +461,32 @@ class RequestEventSimilarEvents(RequestEvent): def __init__(self, conceptInfoList, count = 50, # number of similar events to return - maxDayDiff = sys.maxsize, # what is the maximum time difference between the similar events and this one + dateStart = None, # what can be the oldest date of the similar events + dateEnd = None, # what can be the newest date of the similar events addArticleTrendInfo = False, # add info how the articles in the similar events are distributed over time aggrHours = 6, # if similarEventsAddArticleTrendInfo == True then this is the aggregating window - includeSelf = False, # should the info about the event itself be included among the results returnInfo = ReturnInfo()): """ compute and return a list of similar events @param conceptInfoList: array of concepts and their importance, e.g. [{ "uri": "http://en.wikipedia.org/wiki/Barack_Obama", "wgt": 100 }, ...] @param count: number of similar events to return (at most 50) - @param maxDayDiff: find only those events that are at most maxDayDiff days apart from the tested event + @param dateStart: what can be the oldest date of the similar events + @param dateEnd: what can be the newest date of the similar events @param addArticleTrendInfo: for the returned events compute how they were trending (intensity of reporting) in different time periods @param aggrHours: time span that is used as a unit when computing the trending info - @param includeSel: include also the tested event in the results (True or False) @param returnInfo: what details should be included in the returned information """ assert count <= 50 assert isinstance(conceptInfoList, list) - self.resultType = "similarEvents" - self.similarEventsConcepts = json.dumps(conceptInfoList) - self.similarEventsCount = count - if maxDayDiff != sys.maxsize: - self.similarEventsMaxDayDiff = maxDayDiff + self.action = "getSimilarEvents" + self.concepts = json.dumps(conceptInfoList) + self.eventsCount = count + if dateStart != None: + self.dateStart = QueryParamsBase.encodeDate(dateStart) + if dateEnd != None: + self.dateEnd = QueryParamsBase.encodeDate(dateEnd) self.similarEventsAddArticleTrendInfo = addArticleTrendInfo self.similarEventsAggrHours = aggrHours - self.similarEventsIncludeSelf = includeSelf - self.__dict__.update(returnInfo.getParams("similarEvents")) - - - -class RequestEventSimilarStories(RequestEvent): - def __init__(self, - conceptInfoList, - count = 50, # number of similar stories to return - lang = ["eng"], # in which language should be the similar stories - maxDayDiff = sys.maxsize, # what is the maximum time difference between the similar stories and this one - returnInfo = ReturnInfo()): - """ - return a list of similar stories (clusters) - @param conceptInfoList: array of concepts and their importance, e.g. [{ "uri": "http://en.wikipedia.org/wiki/Barack_Obama", "wgt": 100 }, ...] - @param count: number of similar stories to return (at most 50) - @param lang: in what language(s) should be the returned stories - @param maxDayDiff: maximum difference in days between the returned stories and the tested event - @param returnInfo: what details should be included in the returned information - """ - assert count <= 50 - assert isinstance(conceptInfoList, list) - self.resultType = "similarStories" - self.similarStoriesConcepts = json.dumps(conceptInfoList) - self.similarStoriesCount = count - self.similarStoriesLang = lang - if maxDayDiff != sys.maxsize: - self.similarStoriesMaxDayDiff = maxDayDiff - self.__dict__.update(returnInfo.getParams("similarStories")) + # setting resultType since we have to, but it's actually ignored on the backend + self.resultType = "similarEvents" + self.__dict__.update(returnInfo.getParams("")) diff --git a/eventregistry/QueryEvents.py b/eventregistry/QueryEvents.py index 63511bc..37fe877 100644 --- a/eventregistry/QueryEvents.py +++ b/eventregistry/QueryEvents.py @@ -63,13 +63,13 @@ def __init__(self, @param authorUri: find events that contain one or more articles that have been written by a specific author. If multiple authors should be considered use QueryItems.OR() or QueryItems.AND() to provide the list of authors. Author uri for a given author name can be obtained using EventRegistry.getAuthorUri(). - @param locationUri: find events that occured at a particular location. + @param locationUri: find events that occurred at a particular location. If value can be a string or a list of strings provided in QueryItems.OR(). Location uri can either be a city or a country. Location uri for a given name can be obtained using EventRegistry.getLocationUri(). @param lang: find events for which we found articles in the specified language. If more than one language is specified, resulting events has to be reported in *any* of the languages. - @param dateStart: find events that occured on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. - @param dateEnd: find events that occured before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. + @param dateStart: find events that occurred on or after dateStart. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. + @param dateEnd: find events that occurred before or on dateEnd. Date should be provided in YYYY-MM-DD format, datetime.time or datetime.datetime. @param minSentiment: minimum value of the sentiment, that the returned events should have. Range [-1, 1]. Note: setting the value will remove all events that don't have a computed value for the sentiment (all events that are not reported in English language) @param maxSentiment: maximum value of the sentiment, that the returned events should have. Range [-1, 1]. Note: setting the value will remove all events that don't have @@ -85,7 +85,7 @@ def __init__(self, @param ignoreSourceLocationUri: ignore events that have articles which been written by sources located at *any* of the specified locations @param ignoreSourceGroupUri: ignore events that have articles which have been written by sources in *any* of the specified source groups @param ignoreAuthorUri: ignore articles that were written by *any* of the specified authors - @param ignoreLocationUri: ignore events that occured in any of the provided locations. A location can be a city or a place + @param ignoreLocationUri: ignore events that occurred in any of the provided locations. A location can be a city or a place @param ignoreLang: ignore events that are reported in any of the provided languages @param keywordsLoc: what data should be used when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" @param ignoreKeywordsLoc: what data should be used when searching using the keywords provided by "ignoreKeywords" parameter. "body" (default), "title", or "body,title" @@ -145,7 +145,7 @@ def __init__(self, def _getPath(self): - return "/json/event" + return "/api/v1/event" def setRequestedResult(self, requestEvents): diff --git a/eventregistry/QueryStory.py b/eventregistry/QueryStory.py index 1c0f166..c139a65 100644 --- a/eventregistry/QueryStory.py +++ b/eventregistry/QueryStory.py @@ -17,7 +17,7 @@ def __init__(self, storyUriOrList = None): def _getPath(self): - return "/json/story" + return "/api/v1/story" def queryByUri(self, uriOrUriList): @@ -118,17 +118,33 @@ def __init__(self, class RequestStorySimilarStories(RequestStory): """ - return a list of similar stories - """ + compute and return a list of similar stories + @param conceptInfoList: array of concepts and their importance, e.g. [{ "uri": "http://en.wikipedia.org/wiki/Barack_Obama", "wgt": 100 }, ...] + @param count: number of similar stories to return (at most 50) + @param dateStart: what can be the oldest date of the similar stories + @param dateEnd: what can be the newest date of the similar stories + @param addArticleTrendInfo: for the returned stories compute how they were trending (intensity of reporting) in different time periods + @param aggrHours: time span that is used as a unit when computing the trending info + @param returnInfo: what details should be included in the returned information + """ def __init__(self, - count = 50, # number of similar stories to return - source = "concept", # how to compute similarity. Options: concept cca - maxDayDiff = sys.maxsize, # what is the maximum time difference between the similar stories and this one - returnInfo = ReturnInfo()): + conceptInfoList, + count=50, # number of similar stories to return + dateStart = None, # what can be the oldest date of the similar stories + dateEnd = None, # what can be the newest date of the similar stories + lang = [], + returnInfo = ReturnInfo()): assert count <= 50 + assert isinstance(conceptInfoList, list) + self.action = "getSimilarStories" + self.concepts = json.dumps(conceptInfoList) + self.storiesCount = count + if dateStart != None: + self.dateStart = QueryParamsBase.encodeDate(dateStart) + if dateEnd != None: + self.dateEnd = QueryParamsBase.encodeDate(dateEnd) + if len(lang) > 0: + self.lang = lang + # setting resultType since we have to, but it's actually ignored on the backend self.resultType = "similarStories" - self.similarStoriesCount = count - self.similarStoriesSource = source - if maxDayDiff != sys.maxsize: - self.similarStoriesMaxDayDiff = maxDayDiff self.__dict__.update(returnInfo.getParams("similarStories")) diff --git a/eventregistry/Recent.py b/eventregistry/Recent.py index aeda074..b0ec8f7 100644 --- a/eventregistry/Recent.py +++ b/eventregistry/Recent.py @@ -31,7 +31,7 @@ def __init__(self, def _getPath(self): - return "/json/minuteStreamEvents" + return "/api/v1/minuteStreamEvents" def getUpdates(self): @@ -76,7 +76,7 @@ def __init__(self, def _getPath(self): - return "/json/minuteStreamArticles" + return "/api/v1/minuteStreamArticles" def getUpdates(self): diff --git a/eventregistry/ReturnInfo.py b/eventregistry/ReturnInfo.py index d10fc26..55ec859 100644 --- a/eventregistry/ReturnInfo.py +++ b/eventregistry/ReturnInfo.py @@ -55,6 +55,14 @@ def _getVals(self, prefix = ""): return dict + def _addKwdArgs(self, kwdArgs): + for name, val in kwdArgs.items(): + if isinstance(val, bool): + self._setFlag(name, val, not val) + else: + self._setVal(name, val) + + class ArticleInfoFlags(ReturnInfoFlagsBase): """" What information about an article should be returned by the API call @@ -76,7 +84,6 @@ class ArticleInfoFlags(ReturnInfoFlagsBase): @param location: the geographic location that the event mentioned in the article is about @param dates: the dates when the articles was crawled and the date when it was published (based on the rss feed date) @param extractedDates: the list of dates found mentioned in the article - @param duplicateList: the list of articles that are a copy of this article @param originalArticle: if the article is a duplicate, this will provide information about the original article @param storyUri: uri of the story (cluster) to which the article belongs """ @@ -98,9 +105,9 @@ def __init__(self, location = False, dates = False, extractedDates = False, - duplicateList = False, originalArticle = False, - storyUri = False): + storyUri=False, + **kwdArgs): self._setVal("articleBodyLen", bodyLen, -1) self._setFlag("includeArticleBasicInfo", basicInfo, True) self._setFlag("includeArticleTitle", title, True) @@ -118,9 +125,9 @@ def __init__(self, self._setFlag("includeArticleLocation", location, False) self._setFlag("includeArticleDates", dates, False) self._setFlag("includeArticleExtractedDates", extractedDates, False) - self._setFlag("includeArticleDuplicateList", duplicateList, False) self._setFlag("includeArticleOriginalArticle", originalArticle, False) self._setFlag("includeArticleStoryUri", storyUri, False) + self._addKwdArgs(kwdArgs) @@ -153,7 +160,8 @@ def __init__(self, infoArticle = False, commonDates = False, socialScore = False, - imageCount = 0): + imageCount = 0, + **kwdArgs): self._setFlag("includeStoryBasicStats", basicStats, True) self._setFlag("includeStoryLocation", location, True) self._setFlag("includeStoryDate", date, False) @@ -166,6 +174,7 @@ def __init__(self, self._setFlag("includeStoryCommonDates", commonDates, False) self._setFlag("includeStorySocialScore", socialScore, False) self._setVal("storyImageCount", imageCount, 0) + self._addKwdArgs(kwdArgs) @@ -178,7 +187,7 @@ class EventInfoFlags(ReturnInfoFlagsBase): @param articleCounts: return the number of articles that are assigned to the event @param concepts: return information about the main concepts related to the event @param categories: return information about the categories related to the event - @param location: return the location where the event occured + @param location: return the location where the event occurred @param date: return information about the date of the event @param commonDates: return the dates that were commonly found in the articles about the event @param infoArticle: return for each language the article from which we have extracted the summary and title for event for that language @@ -198,7 +207,8 @@ def __init__(self, infoArticle = False, stories = False, socialScore = False, - imageCount = 0): + imageCount = 0, + **kwdArgs): self._setFlag("includeEventTitle", title, True) self._setFlag("includeEventSummary", summary, True) self._setFlag("includeEventArticleCounts", articleCounts, True) @@ -212,6 +222,7 @@ def __init__(self, self._setFlag("includeEventStories", stories, False) self._setFlag("includeEventSocialScore", socialScore, False) self._setVal("eventImageCount", imageCount, 0) + self._addKwdArgs(kwdArgs) @@ -224,9 +235,7 @@ class SourceInfoFlags(ReturnInfoFlagsBase): @param location: geographic location of the news source @param ranking: a set of rankings for the news source @param image: different images associated with the news source - @param articleCount: the number of articles from this news source that are stored in Event Registry @param socialMedia: different social media accounts used by the news source - @param sourceGroups: info about the names of the source groups to which the source belongs to """ def __init__(self, title = True, @@ -234,17 +243,15 @@ def __init__(self, location = False, ranking = False, image = False, - articleCount = False, socialMedia = False, - sourceGroups = False): + **kwdArgs): self._setFlag("includeSourceTitle", title, True) self._setFlag("includeSourceDescription", description, False) self._setFlag("includeSourceLocation", location, False) self._setFlag("includeSourceRanking", ranking, False) self._setFlag("includeSourceImage", image, False) - self._setFlag("includeSourceArticleCount", articleCount, False) self._setFlag("includeSourceSocialMedia", socialMedia, False) - self._setFlag("includeSourceSourceGroups", sourceGroups, False) + self._addKwdArgs(kwdArgs) @@ -252,24 +259,13 @@ class CategoryInfoFlags(ReturnInfoFlagsBase): """ What information about a category should be returned by the API call - @param parentUri: uri of the parent category - @param childrenUris: the list of category uris that are children of the category @param trendingScore: information about how the category is currently trending. The score is computed as Pearson residual by comparing the trending of the category in last 2 days compared to last 14 days - @param trendingHistory: information about the number of times articles were assigned to the category in last 30 days - @param trendingSource: source of information to be used when computing the trending score for a category. Relevant only if CategoryInfoFlags.trendingScore == True or CategoryInfoFlags.trendingHistory == True. Valid options: news, social - @type trendingSource: string | list """ def __init__(self, - parentUri = False, - childrenUris = False, trendingScore = False, - trendingHistory = False, - trendingSource = "news"): - self._setFlag("includeCategoryParentUri", parentUri, False) - self._setFlag("includeCategoryChildrenUris", childrenUris, False) + **kwdArgs): self._setFlag("includeCategoryTrendingScore", trendingScore, False) - self._setFlag("includeCategoryTrendingHistory", trendingHistory, False) - self._setVal("categoryTrendingSource", trendingSource, "news") + self._addKwdArgs(kwdArgs) @@ -283,39 +279,29 @@ class ConceptInfoFlags(ReturnInfoFlagsBase): @param synonyms: return concept synonyms (if any) @param image: provide an image associated with the concept @param description: description of the concept - @param conceptClassMembership: provide a list of concept classes where the concept is a member - @param conceptClassMembershipFull: provide a list of concept classes and their parents where the concept is a member @param trendingScore: information about how the concept is currently trending. The score is computed as Pearson residual by comparing the trending of the concept in last 2 days compared to last 14 days - @param trendingHistory: information about the number of times articles were assigned to the concept in last 30 days - @param trendingSource: source of information to be used when computing the trending score for a concept. Relevant only if ConceptInfoFlags.trendingScore == True or ConceptInfoFlags.trendingHistory == True. Valid options: news, social - @param totalCount: the total number of times the concept appeared in the news articles - @type conceptType: str | list - @type conceptLang: str | list - @type trendingSource: string | list + @type type: str | list + @type lang: str | list """ def __init__(self, - type = "concepts", - lang = "eng", - label = True, - synonyms = False, - image = False, - description = False, - conceptClassMembership = False, - conceptClassMembershipFull = False, - totalCount = False, - trendingSource = "news", - maxConceptsPerType = 20): + type = "concepts", + lang = "eng", + label = True, + synonyms = False, + image = False, + description = False, + trendingScore = False, + maxConceptsPerType = 20, + **kwdArgs): self._setVal("conceptType", type, "concepts") self._setVal("conceptLang", lang, "eng") self._setFlag("includeConceptLabel", label, True) self._setFlag("includeConceptSynonyms", synonyms, False) self._setFlag("includeConceptImage", image, False) self._setFlag("includeConceptDescription", description, False) - self._setFlag("includeConceptConceptClassMembership", conceptClassMembership, False) - self._setFlag("includeConceptConceptClassMembershipFull", conceptClassMembershipFull, False) - self._setFlag("includeConceptTotalCount", totalCount, False) - self._setVal("conceptTrendingSource", trendingSource, "news") + self._setFlag("includeConceptTrendingScore", trendingScore, False) self._setVal("maxConceptsPerType", maxConceptsPerType, 20) + self._addKwdArgs(kwdArgs) @@ -338,18 +324,19 @@ class LocationInfoFlags(ReturnInfoFlagsBase): @param placeCountry: return information about the country where the place is located """ def __init__(self, - label = True, - wikiUri = False, - geoNamesId = False, - population = False, - geoLocation = False, - - countryArea = False, - countryDetails = False, - countryContinent = False, - - placeFeatureCode = False, - placeCountry = True): + label = True, + wikiUri = False, + geoNamesId = False, + population = False, + geoLocation = False, + + countryArea = False, + countryDetails = False, + countryContinent = False, + + placeFeatureCode = False, + placeCountry = True, + **kwdArgs): self._setFlag("includeLocationLabel", label, True) self._setFlag("includeLocationWikiUri", wikiUri, False) self._setFlag("includeLocationGeoNamesId", geoNamesId, False) @@ -362,6 +349,7 @@ def __init__(self, self._setFlag("includeLocationPlaceFeatureCode", placeFeatureCode, False) self._setFlag("includeLocationPlaceCountry", placeCountry, True) + self._addKwdArgs(kwdArgs) @@ -374,9 +362,11 @@ class ConceptClassInfoFlags(ReturnInfoFlagsBase): """ def __init__(self, parentLabels = True, - concepts = False): + concepts = False, + **kwdArgs): self._setFlag("includeConceptClassParentLabels", parentLabels, True) self._setFlag("includeConceptClassConcepts", concepts, False) + self._addKwdArgs(kwdArgs) @@ -388,10 +378,12 @@ class ConceptFolderInfoFlags(ReturnInfoFlagsBase): @param owner: return information about the owner of the concept folder """ def __init__(self, - definition = False, - owner = False): + definition = False, + owner = False, + **kwdArgs): self._setFlag("includeConceptFolderDefinition", definition, False) self._setFlag("includeConceptFolderOwner", owner, False) + self._addKwdArgs(kwdArgs) diff --git a/eventregistry/TopicPage.py b/eventregistry/TopicPage.py index efab009..7dcf5d6 100644 --- a/eventregistry/TopicPage.py +++ b/eventregistry/TopicPage.py @@ -56,12 +56,13 @@ def loadTopicPageFromER(self, uri): params = { "action": "getTopicPageJson", "includeConceptDescription": True, + "includeConceptImage": True, "includeTopicPageDefinition": True, "includeTopicPageOwner": True, "uri": uri } self.topicPage = self._createEmptyTopicPage() - self.concept = self.eventRegistry.jsonRequest("/json/topicPage", params) + self.concept = self.eventRegistry.jsonRequest("/api/v1/topicPage", params) self.topicPage.update(self.concept.get("topicPage", {})) @@ -335,7 +336,9 @@ def getArticles(self, count=100, sortBy = "rel", sortByAsc = False, - returnInfo=ReturnInfo()): + dataType = "news", + returnInfo=ReturnInfo(), + **kwargs): """ return a list of articles that match the topic page @param page: which page of the results to return (default: 1) @@ -353,11 +356,12 @@ def getArticles(self, "articlesCount": count, "articlesSortBy": sortBy, "articlesSortByAsc": sortByAsc, - "page": page, + "articlesPage": page, "topicPage": json.dumps(self.topicPage) } params.update(returnInfo.getParams("articles")) - return self.eventRegistry.jsonRequest("/json/article", params) + params.update(kwargs) + return self.eventRegistry.jsonRequest("/api/v1/article", params) def getEvents(self, @@ -365,7 +369,8 @@ def getEvents(self, count=50, sortBy = "rel", sortByAsc = False, - returnInfo=ReturnInfo()): + returnInfo=ReturnInfo(), + **kwargs): """ return a list of events that match the topic page @param page: which page of the results to return (default: 1) @@ -381,8 +386,9 @@ def getEvents(self, "resultType": "events", "dataType": self.topicPage["dataType"], "eventsCount": count, - "page": page, + "eventsPage": page, "topicPage": json.dumps(self.topicPage) } params.update(returnInfo.getParams("events")) - return self.eventRegistry.jsonRequest("/json/event", params) \ No newline at end of file + params.update(kwargs) + return self.eventRegistry.jsonRequest("/api/v1/event", params) \ No newline at end of file diff --git a/eventregistry/Trends.py b/eventregistry/Trends.py index aa49937..d0a5dd9 100644 --- a/eventregistry/Trends.py +++ b/eventregistry/Trends.py @@ -10,7 +10,7 @@ class TrendsBase(QueryParamsBase): def _getPath(self): - return "/json/trends" + return "/api/v1/trends" class GetTrendingConcepts(TrendsBase): diff --git a/eventregistry/__init__.py b/eventregistry/__init__.py index e092a0d..ce55007 100644 --- a/eventregistry/__init__.py +++ b/eventregistry/__init__.py @@ -9,7 +9,6 @@ from eventregistry.QueryArticles import * from eventregistry.QueryArticle import * from eventregistry.QueryStory import * -from eventregistry.Correlations import * from eventregistry.Counts import * from eventregistry.DailyShares import * from eventregistry.Info import * diff --git a/eventregistry/_version.py b/eventregistry/_version.py index 9e46835..023606c 100644 --- a/eventregistry/_version.py +++ b/eventregistry/_version.py @@ -1 +1 @@ -__version__ = "8.6.1" +__version__ = "8.7" diff --git a/eventregistry/examples/CorrelationsExamples.py b/eventregistry/examples/CorrelationsExamples.py deleted file mode 100644 index f5cc915..0000000 --- a/eventregistry/examples/CorrelationsExamples.py +++ /dev/null @@ -1,40 +0,0 @@ -from eventregistry import * - -er = EventRegistry(logging = True) - -# -# first example. Concepts and categories that correlate the most with Obama -corr = GetTopCorrelations(er) - -counts = GetCounts(er.getConceptUri("Obama")) -corr.loadInputDataWithCounts(counts) - -candidateConceptsQuery = QueryArticles(conceptUri = er.getConceptUri("Obama")) - -conceptInfo = corr.getTopConceptCorrelations( - candidateConceptsQuery = candidateConceptsQuery, - conceptType = ["person", "org", "loc"], - exactCount = 10, - approxCount = 100) - -categoryInfo = corr.getTopCategoryCorrelations( - exactCount = 10, - approxCount = 100) - - - -# -# second example. Concepts and categories that correlate with keywords "iphone" -corr = GetTopCorrelations(er) - -query = QueryArticles(keywords = "iphone") -corr.loadInputDataWithQuery(query) - -conceptInfo = corr.getTopConceptCorrelations( - exactCount = 10, - approxCount = 100) - -categoryInfo = corr.getTopCategoryCorrelations( - exactCount = 10, - approxCount = 100) - diff --git a/eventregistry/examples/QueryArticlesExamples.py b/eventregistry/examples/QueryArticlesExamples.py index 8a75530..79f2af6 100644 --- a/eventregistry/examples/QueryArticlesExamples.py +++ b/eventregistry/examples/QueryArticlesExamples.py @@ -34,7 +34,8 @@ q = QueryArticles(keywords = QueryItems.OR(["Barack Obama", "Trump"])) q.setRequestedResult(RequestArticlesInfo(count = 30, returnInfo = ReturnInfo( - articleInfo = ArticleInfoFlags(duplicateList = True, concepts = True, categories = True, location = True, image = True)))) + articleInfo=ArticleInfoFlags(duplicateList=True, concepts=True, categories=True, location=True, image=True), + conceptInfo=ConceptInfoFlags(trendingScore=True)))) res = er.execQuery(q) diff --git a/eventregistry/examples/QueryEventsExamples.py b/eventregistry/examples/QueryEventsExamples.py index 055f3fd..f7a35e7 100644 --- a/eventregistry/examples/QueryEventsExamples.py +++ b/eventregistry/examples/QueryEventsExamples.py @@ -27,7 +27,7 @@ # find events that: # * are about Barack Obama # * that were covered also by New York Times -# * that occured in 2015 +# * that occurred in 2015 # * return events sorted by how much were articles in the event shared on social media (instead of relevance, which is default) q = QueryEvents( conceptUri = obamaUri, @@ -85,7 +85,7 @@ # OTHER AGGREGATES (INSTEAD OF OBTAINING EVENTS) # -# find events that occured in Germany between 2014-04-16 and 2014-04-28 +# find events that occurred in Germany between 2014-04-16 and 2014-04-28 # from the resulting events produce: q = QueryEvents( locationUri = er.getLocationUri("Germany"), @@ -95,7 +95,7 @@ q.setRequestedResult(RequestEventsConceptAggr()) res = er.execQuery(q) -# find where the events occured geographically +# find where the events occurred geographically q.setRequestedResult(RequestEventsLocAggr()) res = er.execQuery(q) @@ -123,7 +123,7 @@ # examples of complex queries that combine various OR and AND operators # -# events that are occured between 2017-02-05 and 2017-02-06 and are not about business +# events that are occurred between 2017-02-05 and 2017-02-06 and are not about business businessUri = er.getCategoryUri("Business") q = QueryEvents.initWithComplexQuery(""" { diff --git a/eventregistry/tests/DataValidator.py b/eventregistry/tests/DataValidator.py index 5a85864..5222925 100644 --- a/eventregistry/tests/DataValidator.py +++ b/eventregistry/tests/DataValidator.py @@ -11,19 +11,19 @@ def __init__(self, *args, **kwargs): super(DataValidator, self).__init__(*args, **kwargs) # load settings from the current folder. use different instance than for regular ER requests - currPath = os.path.split(__file__)[0] + currPath = os.path.split(os.path.realpath(__file__))[0] settPath = os.path.join(currPath, "settings.json") self.er = EventRegistry(verboseOutput = True, settingsFName = settPath, allowUseOfArchive = False, minDelayBetweenRequests=0) - self.articleInfo = ArticleInfoFlags(bodyLen = -1, concepts = True, storyUri = True, duplicateList = True, originalArticle = True, categories = True, - links = True, videos = True, image = True, location = True, extractedDates = True, socialScore = True, sentiment = True) - self.sourceInfo = SourceInfoFlags(title = True, description = True, location = True, ranking = True, image = True, articleCount = True, socialMedia = True, sourceGroups = True) + self.articleInfo = ArticleInfoFlags(bodyLen = -1, concepts = True, storyUri = True, originalArticle = True, categories = True, + links = True, videos = True, image = True, location = True, extractedDates = True, socialScore = True, sentiment = True, includeArticleDuplicateList = True) + self.sourceInfo = SourceInfoFlags(title = True, description = True, location = True, ranking = True, image = True, socialMedia = True) self.conceptInfo = ConceptInfoFlags(type=["entities"], lang = ["eng", "spa"], synonyms = True, image = True, description = True, - conceptClassMembership = True, maxConceptsPerType = 50) + includeConceptConceptClassMembership = True, maxConceptsPerType = 50) self.locationInfo = LocationInfoFlags(wikiUri = True, label = True, geoNamesId = True, geoLocation = True, population = True, countryArea = True, countryDetails = True, countryContinent = True, placeFeatureCode = True, placeCountry = True) - self.categoryInfo = CategoryInfoFlags(parentUri = True, childrenUris = True) + self.categoryInfo = CategoryInfoFlags(includeCategoryParentUri = True, includeCategoryChildrenUris = True) self.eventInfo = EventInfoFlags(commonDates = True, stories = True, socialScore = True, imageCount = 2) self.storyInfo = StoryInfoFlags(categories = True, date = True, concepts = True, title = True, summary = True, medoidArticle = True, commonDates = True, socialScore = True, imageCount = 2) @@ -48,7 +48,7 @@ def ensureValidArticle(self, article, testName): def ensureValidSource(self, source, testName): - for prop in ["uri", "title", "description", "image", "thumbImage", "favicon", "location", "ranking", "articleCount", "sourceGroups", "socialMedia"]: + for prop in ["uri", "title", "description", "image", "thumbImage", "favicon", "location", "ranking", "socialMedia"]: self.assertTrue(prop in source, "Property '%s' was expected in source for test %s" % (prop, testName)) diff --git a/eventregistry/tests/TestAnalytics.py b/eventregistry/tests/TestAnalytics.py index 2c4abee..03d2192 100644 --- a/eventregistry/tests/TestAnalytics.py +++ b/eventregistry/tests/TestAnalytics.py @@ -1,6 +1,6 @@ import unittest, time import eventregistry as ER -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestAnalytics(DataValidator): @@ -81,7 +81,7 @@ def testTrainTopic(self): analytics.trainTopicAddDocument(uri, "Emmanuel Macron’s climate commitment to “make this planet great again” has come under attack after his environment minister dramatically quit, saying the French president was not doing enough on climate and other environmental goals.") analytics.trainTopicAddDocument(uri, "Theresa May claimed that a no-deal Brexit “wouldn’t be the end of the world” as she sought to downplay a controversial warning made by Philip Hammond last week that it would cost £80bn in extra borrowing and inhibit long-term economic growth.") # finish training of the topic - ret = analytics.trainTopicFinishTraining(uri, ignoreConceptTypes="wiki") + ret = analytics.trainTopicGetTrainedTopic(uri, ignoreConceptTypes="wiki") assert ret and "topic" in ret topic = ret["topic"] assert "concepts" in topic and len(topic["concepts"]) > 0 diff --git a/eventregistry/tests/TestAutoSuggestions.py b/eventregistry/tests/TestAutoSuggestions.py index 077407e..a3d6ce8 100644 --- a/eventregistry/tests/TestAutoSuggestions.py +++ b/eventregistry/tests/TestAutoSuggestions.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestAutoSuggest(DataValidator): @@ -22,8 +22,8 @@ def testSource(self): self.assertTrue(self.er.getNewsSourceUri("dailypolitical.com") == "dailypolitical.com") # test blogs - self.assertTrue(self.er.getNewsSourceUri("slideshare.net") == "slideshare.net") - self.assertTrue(self.er.getNewsSourceUri("topix.com") == "topix.com") + # self.assertTrue(self.er.getNewsSourceUri("slideshare.net") == "slideshare.net") + # self.assertTrue(self.er.getNewsSourceUri("topix.com") == "topix.com") self.assertTrue(self.er.suggestLocations("Washington")[0].get("wikiUri") == "http://en.wikipedia.org/wiki/Washington_(state)") self.assertTrue(self.er.suggestLocations("London")[0].get("wikiUri") == "http://en.wikipedia.org/wiki/City_of_London") diff --git a/eventregistry/tests/TestInfo.py b/eventregistry/tests/TestERInfo.py similarity index 84% rename from eventregistry/tests/TestInfo.py rename to eventregistry/tests/TestERInfo.py index db51b2c..51bb566 100644 --- a/eventregistry/tests/TestInfo.py +++ b/eventregistry/tests/TestERInfo.py @@ -1,7 +1,7 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestInfo(DataValidator): def test_sourcesByUri(self): @@ -12,9 +12,7 @@ def test_sourcesByUri(self): description = True, location = True, ranking = True, - articleCount = True, - socialMedia = True, - sourceGroups = True))) + socialMedia = True))) res = self.er.execQuery(q) self.assertEqual(len(res), len(sourceUriList), "Expected different number of sources") for item in list(res.values()): @@ -22,8 +20,6 @@ def test_sourcesByUri(self): self.assertIsNotNone(item.get("title"), "Source title is missing") self.assertIsNotNone(item.get("description"), "Source description is missing") self.assertIsNotNone(item.get("ranking"), "Source ranking is missing") - self.assertIsNotNone(item.get("articleCount"), "Source articleCount is missing") - self.assertIsNotNone(item.get("sourceGroups"), "Source sourceGroups is missing") self.assertIsNotNone(item.get("socialMedia"), "Source socialMedia is missing") @@ -37,9 +33,8 @@ def test_conceptsByUri(self): synonyms = True, image = True, description = True, - conceptClassMembership = True, - conceptClassMembershipFull = True, - trendingSource = ["news", "social"]))) + includeConceptConceptClassMembership = True, + includeConceptConceptClassMembershipFull = True))) res = self.er.execQuery(q) self.assertEqual(len(res), len(uriList), "Expected 10 concepts") @@ -63,8 +58,8 @@ def test_categories(self): catUriList = [category.get("uri") for category in categories] q = GetCategoryInfo(catUriList, returnInfo = ReturnInfo( categoryInfo = CategoryInfoFlags( - parentUri = True, - childrenUris = True))) + includeCategoryParentUri = True, + includeCategoryChildrenUris = True))) res = self.er.execQuery(q) self.assertEqual(len(res), len(catUriList), "Expected 10 categories") for item in list(res.values()): diff --git a/eventregistry/tests/TestInvalidQueries.py b/eventregistry/tests/TestERInvalidQueries.py similarity index 98% rename from eventregistry/tests/TestInvalidQueries.py rename to eventregistry/tests/TestERInvalidQueries.py index eef331f..9407460 100644 --- a/eventregistry/tests/TestInvalidQueries.py +++ b/eventregistry/tests/TestERInvalidQueries.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestInvalidQueries(DataValidator): diff --git a/eventregistry/tests/TestQueryArticle.py b/eventregistry/tests/TestERQueryArticle.py similarity index 97% rename from eventregistry/tests/TestQueryArticle.py rename to eventregistry/tests/TestERQueryArticle.py index edccf10..5832436 100644 --- a/eventregistry/tests/TestQueryArticle.py +++ b/eventregistry/tests/TestERQueryArticle.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestQueryArticle(DataValidator): diff --git a/eventregistry/tests/TestQueryArticles.py b/eventregistry/tests/TestERQueryArticles.py similarity index 86% rename from eventregistry/tests/TestQueryArticles.py rename to eventregistry/tests/TestERQueryArticles.py index 4e5210d..4eac7ec 100644 --- a/eventregistry/tests/TestQueryArticles.py +++ b/eventregistry/tests/TestERQueryArticles.py @@ -1,11 +1,14 @@ import unittest, math from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator + class TestQueryArticles(DataValidator): def createQuery(self): - q = QueryArticles(conceptUri = self.er.getConceptUri("Obama")) + conceptUri = self.er.getConceptUri("Obama") + self.assertTrue(conceptUri != None) + q = QueryArticles(conceptUri = conceptUri) return q @@ -26,12 +29,16 @@ def testArticleList(self): def testArticleUriWgtList(self): - iter = QueryArticlesIter(conceptUri=self.er.getConceptUri("germany")) + conceptUri = self.er.getConceptUri("germany") + self.assertTrue(conceptUri != None) + iter = QueryArticlesIter(conceptUri=conceptUri) expectedCount = iter.count(self.er) countPerPage = 20000 pages = int(math.ceil(expectedCount / float(countPerPage))) - q = QueryArticles(conceptUri=self.er.getConceptUri("germany")) + conceptUri = self.er.getConceptUri("germany") + self.assertTrue(conceptUri != None) + q = QueryArticles(conceptUri=conceptUri) items = [] for page in range(1, pages+1): q.setRequestedResult(RequestArticlesUriWgtList(page = page, count = countPerPage)) @@ -42,7 +49,7 @@ def testArticleUriWgtList(self): lastWgt = None for item in items: - wgt = item.split(":")[1] + wgt = int(item.split(":")[1]) if lastWgt == None: lastWgt = wgt else: assert lastWgt >= wgt @@ -245,7 +252,11 @@ def testArticleListWithLocationSearch(self): def testEventListWithCombinedSearch1(self): - q = QueryArticles(keywords="germany", lang = ["eng", "deu"], conceptUri = [self.er.getConceptUri("Merkel")], categoryUri = self.er.getCategoryUri("Business")) + merkelUri = self.er.getConceptUri("Merkel") + businessUri = self.er.getCategoryUri("business") + self.assertTrue(merkelUri != None) + self.assertTrue(businessUri != None) + q = QueryArticles(keywords="germany", lang = ["eng", "deu"], conceptUri = [merkelUri], categoryUri = businessUri) q.setRequestedResult(RequestArticlesInfo(count = 30, returnInfo = self.returnInfo)) res = self.er.execQuery(q) self.validateGeneralArticleList(res) @@ -253,8 +264,8 @@ def testEventListWithCombinedSearch1(self): q2 = QueryArticles( keywords="germany", lang = ["eng", "deu"], - conceptUri = self.er.getConceptUri("Merkel"), - categoryUri = self.er.getCategoryUri("Business")) + conceptUri = merkelUri, + categoryUri = businessUri) q2.setRequestedResult(RequestArticlesInfo(count = 30, returnInfo = self.returnInfo)) res2 = self.er.execQuery(q2) self.validateGeneralArticleList(res2) @@ -267,8 +278,10 @@ def testEventListWithCombinedSearch1(self): def testConceptTrends(self): q = self.createQuery() + obamaUri = self.er.getConceptUri("obama") + trumpUri = self.er.getConceptUri("trump") q.setRequestedResult(RequestArticlesConceptTrends( - conceptUris = [self.er.getConceptUri("Obama"), self.er.getConceptUri("Trump")], + conceptUris = [obamaUri, trumpUri], returnInfo = self.returnInfo)) res = self.er.execQuery(q) @@ -384,6 +397,8 @@ def testQueryArticlesIterator2(self): def testQuery1(self): obamaUri = self.er.getConceptUri("Obama") LAsourceUri = self.er.getNewsSourceUri("latimes") + self.assertTrue(obamaUri != None) + self.assertTrue(LAsourceUri != None) iter = QueryArticlesIter(keywords = "trump", conceptUri = obamaUri, sourceUri = LAsourceUri) for article in iter.execQuery(self.er, returnInfo = self.returnInfo, maxItems = 500): self.ensureArticleHasConcept(article, obamaUri) @@ -395,6 +410,9 @@ def testQuery2(self): obamaUri = self.er.getConceptUri("Obama") LAsourceUri = self.er.getNewsSourceUri("latimes") businessCatUri = self.er.getCategoryUri("business") + self.assertTrue(obamaUri != None) + self.assertTrue(LAsourceUri != None) + self.assertTrue(businessCatUri != None) iter = QueryArticlesIter(conceptUri = obamaUri, sourceUri = LAsourceUri, categoryUri = businessCatUri) for article in iter.execQuery(self.er, returnInfo = self.returnInfo, maxItems = 500): self.ensureArticleHasCategory(article, businessCatUri) @@ -413,6 +431,14 @@ def testQuery3(self): srcAawsatUri = self.er.getNewsSourceUri("aawsat") srcSvodkaUri = self.er.getNewsSourceUri("svodka") + self.assertTrue(obamaUri != None) + self.assertTrue(politicsUri != None) + self.assertTrue(chinaUri != None) + self.assertTrue(unitedStatesUri != None) + self.assertTrue(srcDailyCallerUri != None) + self.assertTrue(srcAawsatUri != None) + self.assertTrue(srcSvodkaUri != None) + catBusinessUri = self.er.getCategoryUri("business") catPoliticsUri = self.er.getCategoryUri("politics") iter = QueryArticlesIter(conceptUri = obamaUri, @@ -439,6 +465,49 @@ def testQuery3(self): self.ensureArticleHasNotCategory(article, catPoliticsUri) + # + # test if we download all content + # + def testGetAllArticlesCount(self): + returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(body = 0)) + unitedStatesUri = self.er.getConceptUri("united states") + self.assertTrue(unitedStatesUri != None) + iter = QueryArticlesIter(conceptUri=unitedStatesUri, lang="eng", dataType=["news", "blog"]) + + total = iter.count(self.er) + uniqueUris = set() + for article in iter.execQuery(self.er, returnInfo=returnInfo): + if article["uri"] in uniqueUris: + print("again seeing " + article["uri"]) + uniqueUris.add(article["uri"]) + self.assertTrue(total == len(uniqueUris)) + + + def testGetAllArticlesCount2(self): + returnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(body = 0)) + twitterUri = self.er.getConceptUri("twitter") + self.assertTrue(twitterUri != None) + iter = QueryArticlesIter(conceptUri=twitterUri, lang="eng", dataType=["news", "blog"]) + + total = iter.count(self.er) + uniqueUris = set() + for article in iter.execQuery(self.er, returnInfo=returnInfo, sortBy="date"): + if article["uri"] in uniqueUris: + print("again seeing " + article["uri"]) + uniqueUris.add(article["uri"]) + self.assertTrue(total == len(uniqueUris)) + + total = iter.count(self.er) + uniqueUris = set() + for article in iter.execQuery(self.er, returnInfo=returnInfo, sortBy="rel"): + if article["uri"] in uniqueUris: + print("again seeing " + article["uri"]) + uniqueUris.add(article["uri"]) + self.assertTrue(total == len(uniqueUris)) + + + + if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromTestCase(TestQueryArticles) diff --git a/eventregistry/tests/TestQueryArticlesComplex.py b/eventregistry/tests/TestERQueryArticlesComplex.py similarity index 99% rename from eventregistry/tests/TestQueryArticlesComplex.py rename to eventregistry/tests/TestERQueryArticlesComplex.py index ad35127..6f9be99 100644 --- a/eventregistry/tests/TestQueryArticlesComplex.py +++ b/eventregistry/tests/TestERQueryArticlesComplex.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestQueryArticlesComplex(DataValidator): diff --git a/eventregistry/tests/TestQueryEvent.py b/eventregistry/tests/TestERQueryEvent.py similarity index 83% rename from eventregistry/tests/TestQueryEvent.py rename to eventregistry/tests/TestERQueryEvent.py index 3412908..6d1b475 100644 --- a/eventregistry/tests/TestQueryEvent.py +++ b/eventregistry/tests/TestERQueryEvent.py @@ -1,6 +1,6 @@ import unittest, sys from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestQueryEvent(DataValidator): @@ -138,28 +138,10 @@ def testSimilarEvents(self): addArticleTrendInfo = True, returnInfo = self.returnInfo)) res = self.er.execQuery(q) - for event in list(res.values()): - if "newEventUri" in event: - continue - self.assertIsNotNone(event.get("similarEvents"), "Expected to see 'similarEvents'") - for simEvent in event.get("similarEvents").get("results"): - self.ensureValidEvent(simEvent, "testSimilarEvents") - self.assertIsNotNone(event.get("similarEvents").get("trends"), "Expected to see a 'trends' property") - - - def testSimilarStories(self): - q = QueryEvent(self.getValidEvent()) - q.setRequestedResult(RequestEventSimilarStories( - [{ "uri": "http://en.wikipedia.org/wiki/Barack_Obama", "wgt": 100 }, { "uri": "http://en.wikipedia.org/wiki/Donald_Trump", "wgt": 80 }], - returnInfo = self.returnInfo)) - res = self.er.execQuery(q) - - for event in list(res.values()): - if "newEventUri" in event: + for simEvent in res.get("events", {}).get("results", []): + if "newEventUri" in simEvent: continue - self.assertIsNotNone(event.get("similarStories"), "Expected to see 'similarStories'") - for simStory in event.get("similarStories").get("results"): - self.ensureValidStory(simStory, "testSimilarStories") + self.ensureValidEvent(simEvent, "testSimilarEvents") def testEventArticlesIterator(self): diff --git a/eventregistry/tests/TestQueryEvents.py b/eventregistry/tests/TestERQueryEvents.py similarity index 99% rename from eventregistry/tests/TestQueryEvents.py rename to eventregistry/tests/TestERQueryEvents.py index 94524cd..67df8b9 100644 --- a/eventregistry/tests/TestQueryEvents.py +++ b/eventregistry/tests/TestERQueryEvents.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestQueryEvents(DataValidator): def validateGeneralEventList(self, res): diff --git a/eventregistry/tests/TestQueryEventsComplex.py b/eventregistry/tests/TestERQueryEventsComplex.py similarity index 99% rename from eventregistry/tests/TestQueryEventsComplex.py rename to eventregistry/tests/TestERQueryEventsComplex.py index 2bc12c9..5dfac13 100644 --- a/eventregistry/tests/TestQueryEventsComplex.py +++ b/eventregistry/tests/TestERQueryEventsComplex.py @@ -1,6 +1,6 @@ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestQueryEventsComplex(DataValidator): diff --git a/eventregistry/tests/TestQueryPaging.py b/eventregistry/tests/TestERQueryPaging.py similarity index 88% rename from eventregistry/tests/TestQueryPaging.py rename to eventregistry/tests/TestERQueryPaging.py index 9782622..4d49b90 100644 --- a/eventregistry/tests/TestQueryPaging.py +++ b/eventregistry/tests/TestERQueryPaging.py @@ -1,7 +1,7 @@ from __future__ import print_function import unittest, math, random from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator from eventregistryadmin import EventRegistryAdmin @@ -11,11 +11,12 @@ def testPagingUri1(self): """ test pages 1 and 2, download uriwgtlist and then test in reverse """ - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesUriWgtList(page=1, count=1000)) res = self.er.execQuery(q) arr = res.get("uriWgtList", {}).get("results", []) uriList = self.er.getUriFromUriWgt(arr) + self.assertTrue(len(uriList) > 0) q.setRequestedResult(RequestArticlesUriWgtList(page=2, count=1000)) res = self.er.execQuery(q) @@ -25,7 +26,7 @@ def testPagingUri1(self): erAdmin = EventRegistryAdmin(self.er._host) erAdmin.clearCache() - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesUriWgtList(page=2, count=1000)) res = self.er.execQuery(q) arr = res.get("uriWgtList", {}).get("results", []) @@ -47,18 +48,19 @@ def testPagingArt1(self): """ test pages 1 and 2, download items """ - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=1, count=100)) res = self.er.execQuery(q) arr = res.get("articles", {}).get("results", []) uriList = [art["uri"] for art in arr] + self.assertTrue(len(uriList) > 0) q.setRequestedResult(RequestArticlesInfo(page=2, count=100)) res = self.er.execQuery(q) arr = res.get("articles", {}).get("results", []) uriList.extend([art["uri"] for art in arr]) - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=2, count=100)) res = self.er.execQuery(q) arr = res.get("articles", {}).get("results", []) @@ -80,7 +82,7 @@ def testAllPagesArt1(self): """ download all pages of results through articles directly and using uriWgtList - in both cases should be the same """ - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") page = 1 uriList = [] while True: @@ -95,7 +97,7 @@ def testAllPagesArt1(self): erAdmin = EventRegistryAdmin(self.er._host) erAdmin.clearCache() - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-22", dateEnd="2018-04-25") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") page = 1 uriList2 = [] while True: @@ -107,6 +109,7 @@ def testAllPagesArt1(self): if len(arr) == 0: break + self.assertTrue(len(uriList) > 0) uriList.sort() uriList2.sort() self.assertTrue(len(uriList) == len(uriList2)) @@ -118,9 +121,10 @@ def testDownloadingOfArticlePages(self): """ download article pages in random order of pages and in the normal order """ - iter = QueryArticlesIter(sourceUri="bbc.co.uk", dateStart="2018-04-10", dateEnd="2018-04-16") + iter = QueryArticlesIter(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") # number of matches count = iter.count(self.er) + self.assertTrue(count > 0) print("\nFound %d articles" % count) # try again with a randomized order of pages @@ -129,7 +133,7 @@ def testDownloadingOfArticlePages(self): pages = list(range(1, int(1 + math.ceil(count / 100)))) random.shuffle(pages) for page in pages: - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-10", dateEnd="2018-04-16") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=page, count=100)) res = self.er.execQuery(q) c = res.get("articles", {}).get("totalResults", -1) @@ -149,7 +153,7 @@ def testDownloadingOfArticlePages(self): totArts = 0 pages = list(range(1, int(1 + math.ceil(count / 100)))) for page in pages: - q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2018-04-10", dateEnd="2018-04-16") + q = QueryArticles(sourceUri="bbc.co.uk", dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=page, count=100)) res = self.er.execQuery(q) c = res.get("articles", {}).get("totalResults", -1) @@ -164,9 +168,10 @@ def testDownloadingOfArticlePages(self): def testDownloadingOfArticleUris(self): - iter = QueryArticlesIter(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-12-01", dateEnd = "2017-01-01") + iter = QueryArticlesIter(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") # number of matches count = iter.count(self.er) + self.assertTrue(count > 0) print("\nFound %d articles by uris\nDownloading page:" % count, end="") # try again with a randomized order of pages @@ -177,7 +182,7 @@ def testDownloadingOfArticleUris(self): random.shuffle(pages) for page in pages: print("%d" % page, end=", ") - q = QueryArticles(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-12-01", dateEnd = "2017-01-01") + q = QueryArticles(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesUriWgtList(page=page, count=10000)) res = self.er.execQuery(q) c = res.get("uriWgtList", {}).get("totalResults", -1) @@ -198,7 +203,7 @@ def testDownloadingOfArticleUris(self): pages = list(range(1, int(1 + math.ceil(count / 10000)))) for page in pages: print("%d" % page, end=", ") - q = QueryArticles(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-12-01", dateEnd = "2017-01-01") + q = QueryArticles(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesUriWgtList(page=page, count=10000)) res = self.er.execQuery(q) c = res.get("uriWgtList", {}).get("totalResults", -1) @@ -213,9 +218,10 @@ def testDownloadingOfArticleUris(self): def testDownloadingOfArticles(self): - iter = QueryArticlesIter(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-04-18", dateEnd = "2018-04-22") + iter = QueryArticlesIter(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") # number of matches count = iter.count(self.er) + self.assertTrue(count > 0) print("\nFound %d articles\nDownloading page:" % count, end="") # try again with a randomized order of pages @@ -226,7 +232,7 @@ def testDownloadingOfArticles(self): random.shuffle(pages) for page in pages: print("%d" % page, end=", ") - q = QueryArticles(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-04-18", dateEnd = "2018-04-22") + q = QueryArticles(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=page, count=100)) res = self.er.execQuery(q) c = res.get("articles", {}).get("totalResults", -1) @@ -247,7 +253,7 @@ def testDownloadingOfArticles(self): pages = list(range(1, int(1 + math.ceil(count / 100)))) for page in pages: print("%d" % page, end=", ") - q = QueryArticles(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-04-18", dateEnd = "2018-04-22") + q = QueryArticles(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestArticlesInfo(page=page, count=100)) res = self.er.execQuery(q) c = res.get("articles", {}).get("totalResults", -1) @@ -263,9 +269,10 @@ def testDownloadingOfArticles(self): def testDownloadingOfEventUris(self): - iter = QueryEventsIter(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-10-01", dateEnd = "2016-11-01") + iter = QueryEventsIter(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") # number of matches count = iter.count(self.er) + self.assertTrue(count > 0) print("\nFound %d events by uris\nDownloading page:" % count, end="") # try again with a randomized order of pages @@ -275,7 +282,7 @@ def testDownloadingOfEventUris(self): random.shuffle(pages) for page in pages: print("%d" % page, end=", ") - q = QueryEvents(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-10-01", dateEnd = "2016-11-01") + q = QueryEvents(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestEventsUriWgtList(page=page, count=1000)) res = self.er.execQuery(q) c = res.get("uriWgtList", {}).get("totalResults", -1) @@ -294,7 +301,7 @@ def testDownloadingOfEventUris(self): pages = list(range(1, int(1 + math.ceil(count / 1000)))) for page in pages: print("%d" % page, end=", ") - q = QueryEvents(conceptUri= self.er.getConceptUri("Trump"), dateStart = "2016-10-01", dateEnd = "2016-11-01") + q = QueryEvents(conceptUri= self.er.getConceptUri("Trump"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestEventsUriWgtList(page=page, count=1000)) res = self.er.execQuery(q) c = res.get("uriWgtList", {}).get("totalResults", -1) @@ -308,9 +315,10 @@ def testDownloadingOfEventUris(self): def testDownloadingOfEvents(self): - iter = QueryEventsIter(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-03-25", dateEnd = "2018-04-05") + iter = QueryEventsIter(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") # number of matches count = iter.count(self.er) + self.assertTrue(count > 0) print("\nFound %d events\nDownloading page:" % count, end="") # try again with a randomized order of pages @@ -320,7 +328,7 @@ def testDownloadingOfEvents(self): random.shuffle(pages) for page in pages: print("%d" % page, end=", ") - q = QueryEvents(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-03-25", dateEnd = "2018-04-05") + q = QueryEvents(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestEventsInfo(page=page, count=50)) res = self.er.execQuery(q) c = res.get("events", {}).get("totalResults", -1) @@ -335,7 +343,7 @@ def testDownloadingOfEvents(self): pages = list(range(1, int(1 + math.ceil(count / 50)))) for page in pages: print("%d" % page, end=", ") - q = QueryEvents(conceptUri= self.er.getConceptUri("peace"), dateStart = "2018-03-25", dateEnd = "2018-04-05") + q = QueryEvents(conceptUri= self.er.getConceptUri("peace"), dateStart="2017-02-04", dateEnd="2017-02-06") q.setRequestedResult(RequestEventsInfo(page=page, count=50)) res = self.er.execQuery(q) c = res.get("events", {}).get("totalResults", -1) diff --git a/eventregistry/tests/TestReturnInfo.py b/eventregistry/tests/TestERReturnInfo.py similarity index 99% rename from eventregistry/tests/TestReturnInfo.py rename to eventregistry/tests/TestERReturnInfo.py index efea023..a356d59 100644 --- a/eventregistry/tests/TestReturnInfo.py +++ b/eventregistry/tests/TestERReturnInfo.py @@ -3,7 +3,7 @@ """ import unittest from eventregistry import * -from .DataValidator import DataValidator +from eventregistry.tests.DataValidator import DataValidator class TestReturnInfo(DataValidator): diff --git a/eventregistry/tests/TestTopicPage.py b/eventregistry/tests/TestTopicPage.py new file mode 100644 index 0000000..2b9241d --- /dev/null +++ b/eventregistry/tests/TestTopicPage.py @@ -0,0 +1,45 @@ +import unittest, math +from eventregistry import * +from eventregistry.tests.DataValidator import DataValidator + + +class TestTopicPage(DataValidator): + + def createTopicPage(self): + q = TopicPage(self.er) + q.loadTopicPageFromER("5aa6837b-d23d-4a71-bc80-7aad676e1905") + return q + + + def testGetArticlesForTopicPage(self): + q = self.createTopicPage() + uriSet = set() + for page in range(1, 20): + res = q.getArticles(page=page, dataType=["news", "blog"], sortBy="rel") + rel = sys.maxsize + for art in res.get("articles", {}).get("results", []): + assert art.get("wgt") <= rel + rel = art.get("wgt") + assert art.get("uri") not in uriSet + uriSet.add(art.get("uri")) + + + def testGetEventsForTopicPage(self): + q = self.createTopicPage() + uriSet = set() + for page in range(1, 20): + res = q.getEvents(page=page, sortBy="rel") + rel = sys.maxsize + for event in res.get("events", {}).get("results", []): + assert event.get("wgt") <= rel + rel = event.get("wgt") + assert event.get("uri") not in uriSet + uriSet.add(event.get("uri")) + + + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestTopicPage) + # suite = unittest.TestSuite() + # suite.addTest(TestQueryArticles("testQuery2")) + unittest.TextTestRunner(verbosity=3).run(suite) diff --git a/eventregistry/tests/__init__.py b/eventregistry/tests/__init__.py index fc5ea23..5fd9d97 100644 --- a/eventregistry/tests/__init__.py +++ b/eventregistry/tests/__init__.py @@ -1,16 +1,16 @@ import unittest -from .DataValidator import * -from .TestAnalytics import * -from .TestAutoSuggestions import * -from .TestInfo import * -from .TestInvalidQueries import * -from .TestQueryArticle import * -from .TestQueryArticles import * -from .TestQueryArticlesComplex import * -from .TestQueryEvent import * -from .TestQueryEvents import * -from .TestQueryEventsComplex import * +# from .DataValidator import * +# from .TestAnalytics import * +# from .TestAutoSuggestions import * +# from .TestERInfo import * +# from .TestERInvalidQueries import * +# from .TestERQueryArticle import * +# from .TestERQueryArticles import * +# from .TestERQueryArticlesComplex import * +# from .TestERQueryEvent import * +# from .TestERQueryEvents import * +# from .TestERQueryEventsComplex import * def runTests(): - unittest.main(); + unittest.main() \ No newline at end of file