diff --git a/.gitignore b/.gitignore index 3996c93..d08202e 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,6 @@ target/ settings.json *.orig + +# Sonar +.scannerwork/ diff --git a/DEVELOPER.md b/DEVELOPER.md index f605739..43d2168 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -1,5 +1,40 @@ ## Developer Information +#### Sonar Scanning +* Uncomment the `sonar.branch.name` line in `sonar-project.properties` and adjust the value to match your branch name. +* Install the `coverage` module in to your virtual environment. + ``` + virtualenv -p python3 ~/venvs/python-binding-development + source ~/venvs/python-binding-development/bin/activate + pip install --upgrade pip + pip install coverage + ``` +* Generate the coverage data. + ``` + coverage run --source=rosette -m pytest + ``` +* Check the results locally + ``` + coverage report + ``` +* Generate the XML coverage report + ``` + coverage xml + ``` +* Push the results to Sonar + ``` + sonar_host=https://sonar.basistech.net + sonar_token= # Generate a token at https://sonar.basistech.net/account/security/ + + docker run \ + --rm \ + -e SONAR_HOST_URL="${sonar_host}" \ + -e SONAR_LOGIN="${sonar_token}" \ + -v "$(pwd):/usr/src" \ + sonarsource/sonar-scanner-cli + + ``` + ### Testing To test changes you have made to the binding, you can use a pre-configured Docker environment. This environment will: - Compile the binding within the container. diff --git a/docs/source/conf.py b/docs/source/conf.py index 2e30ba6..0846005 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -47,7 +47,7 @@ # General information about the project. project = '' -copyright = '2019, Basis Technology' +copyright = '2022, Basis Technology' author = 'Basis Technology' # The version info for the project you're documenting, acts as replacement for @@ -55,9 +55,9 @@ # built documents. # # The short X.Y version. -version = '1.14.4' +version = '1.20.0' # The full version, including alpha/beta/rc tags. -release = '1.14.4' +release = '1.20.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pytest.ini b/pytest.ini index fc6bcc6..b37e476 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,4 @@ [pytest] -pep8ignore = E501 norecursedirs = .tox - target \ No newline at end of file + target diff --git a/rosette/__init__.py b/rosette/__init__.py index 544db30..bbeb61e 100644 --- a/rosette/__init__.py +++ b/rosette/__init__.py @@ -1,6 +1,6 @@ """ Python client for the Rosette API. -Copyright (c) 2014-2019 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at @@ -12,4 +12,4 @@ limitations under the License. """ -__version__ = '1.14.4' +__version__ = '1.20.0' diff --git a/rosette/api.py b/rosette/api.py index 887cede..a85a49f 100644 --- a/rosette/api.py +++ b/rosette/api.py @@ -3,7 +3,7 @@ """ Python client for the Rosette API. -Copyright (c) 2014-2019 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,12 +28,16 @@ import requests import platform -_BINDING_VERSION = '1.14.4' +_APPLICATION_JSON = 'application/json' +_BINDING_LANGUAGE = 'python' +_BINDING_VERSION = '1.20.0' +_CONCURRENCY_HEADER = 'x-rosetteapi-concurrency' +_CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-' +_CUSTOM_HEADER_PATTERN = re.compile('^' + _CUSTOM_HEADER_PREFIX) _GZIP_BYTEARRAY = bytearray([0x1F, 0x8b, 0x08]) _ISPY3 = sys.version_info[0] == 3 - if _ISPY3: _GZIP_SIGNATURE = _GZIP_BYTEARRAY else: @@ -49,7 +53,6 @@ def __init__(self, js, code): self.status_code = code def json(self): - """ return json""" return self._json @@ -112,7 +115,7 @@ def serialize(self, options): values = {} for (key, val) in self.__params.items(): if val is None: - pass + continue else: values[key] = val @@ -242,7 +245,7 @@ def validate(self): if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Translation parameter, " + option + ", not supplied", + "Required Name Translation parameter is missing: " + option, repr(option)) @@ -268,7 +271,7 @@ def validate(self): if self[option] is None: raise RosetteException( "missingParameter", - "Required Address Similarity parameter, " + option + ", not supplied", + "Required Address Similarity parameter is missing: " + option, repr(option)) @@ -301,7 +304,7 @@ def validate(self): if self[option] is None: raise RosetteException( "missingParameter", - "Required Name Similarity parameter, " + option + ", not supplied", + "Required Name Similarity parameter is missing: " + option, repr(option)) @@ -321,7 +324,7 @@ def validate(self): if self["names"] is None: # required raise RosetteException( "missingParameter", - "Required Name De-Duplication parameter, names, not supplied", + "Required Name De-Duplication parameter is missing: names", repr("names")) @@ -372,31 +375,37 @@ def __finish_result(self, response, ename): raise RosetteException(code, complaint_url + " : failed to communicate with Rosette", msg) - def info(self): - """Issues an "info" request to the L{EndpointCaller}'s specific endpoint. - @return: A dictionary telling server version and other - identifying data.""" - url = self.service_url + self.api.endpoints["INFO"] - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', - 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} + def __set_headers(self): + headers = {'Accept': _APPLICATION_JSON, + _CUSTOM_HEADER_PREFIX + 'Binding': _BINDING_LANGUAGE, + _CUSTOM_HEADER_PREFIX + 'Binding-Version': _BINDING_VERSION} custom_headers = self.api.get_custom_headers() - pattern = re.compile('^X-RosetteAPI-') if custom_headers is not None: for key in custom_headers.keys(): - if pattern.match(key) is not None: + if _CUSTOM_HEADER_PATTERN.match(key) is not None: headers[key] = custom_headers[key] else: raise RosetteException("badHeader", - "Custom header name must begin with \"X-RosetteAPI-\"", + "Custom header name must begin with \"" + _CUSTOM_HEADER_PREFIX + "\"", key) self.api.clear_custom_headers() if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' - self.logger.info('info: ' + url) + headers[_CUSTOM_HEADER_PREFIX + 'Devel'] = 'true' + if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key + headers[_CUSTOM_HEADER_PREFIX + "Key"] = self.user_key + + return headers + + def info(self): + """Issues an "info" request to the L{EndpointCaller}'s specific endpoint. + @return: A dictionary telling server version and other + identifying data.""" + url = self.service_url + self.api.endpoints["INFO"] + headers = self.__set_headers() + self.logger.info('info: ' + url) response = self.api.get_http(url, headers=headers) return self.__finish_result(response, "info") @@ -407,26 +416,8 @@ def ping(self): signalled.""" url = self.service_url + self.api.endpoints['PING'] - headers = {'Accept': 'application/json', 'X-RosetteAPI-Binding': 'python', - 'X-RosetteAPI-Binding-Version': _BINDING_VERSION} - - custom_headers = self.api.get_custom_headers() - pattern = re.compile('^X-RosetteAPI-') - if custom_headers is not None: - for key in custom_headers.keys(): - if pattern.match(key) is not None: - headers[key] = custom_headers[key] - else: - raise RosetteException("badHeader", - "Custom header name must begin with \"X-RosetteAPI-\"", - key) - self.api.clear_custom_headers() - - if self.debug: - headers['X-RosetteAPI-Devel'] = 'true' + headers = self.__set_headers() self.logger.info('Ping: ' + url) - if self.user_key is not None: - headers["X-RosetteAPI-Key"] = self.user_key response = self.api.get_http(url, headers=headers) return self.__finish_result(response, "ping") @@ -454,9 +445,9 @@ def call(self, parameters): if not isinstance(parameters, _DocumentParamSetBase): if self.suburl != self.api.endpoints['NAME_SIMILARITY'] \ - and self.suburl != self.api.self.api.endpoints['NAME_TRANSLATION'] \ - and self.suburl != self.api.self.api.endpoints['NAME_DEDUPLICATION'] \ - and self.suburl != self.api.self.api.endpoints['ADDRESS_SIMILARITY']: + and self.suburl != self.api.self.api.endpoints['NAME_TRANSLATION'] \ + and self.suburl != self.api.self.api.endpoints['NAME_DEDUPLICATION'] \ + and self.suburl != self.api.self.api.endpoints['ADDRESS_SIMILARITY']: text = parameters parameters = DocumentParameters() parameters['content'] = text @@ -471,22 +462,7 @@ def call(self, parameters): params_to_serialize = parameters.serialize(self.api.options) headers = {} if self.user_key is not None: - custom_headers = self.api.get_custom_headers() - pattern = re.compile('^X-RosetteAPI-') - if custom_headers is not None: - for key in custom_headers.keys(): - if pattern.match(key) is not None: - headers[key] = custom_headers[key] - else: - raise RosetteException("badHeader", - "Custom header name must " - "begin with \"X-RosetteAPI-\"", - key) - self.api.clear_custom_headers() - - headers["X-RosetteAPI-Key"] = self.user_key - headers["X-RosetteAPI-Binding"] = "python" - headers["X-RosetteAPI-Binding-Version"] = _BINDING_VERSION + headers = self.__set_headers() if self.use_multipart: payload = None @@ -496,7 +472,7 @@ def call(self, parameters): params = dict( (key, value) for key, - value in params_to_serialize.items() if key == 'language') + value in params_to_serialize.items() if key == 'language') files = { 'content': ( os.path.basename( @@ -506,7 +482,7 @@ def call(self, parameters): 'request': ( 'request_options', json.dumps(params), - 'application/json')} + _APPLICATION_JSON)} request = requests.Request( 'POST', url, files=files, headers=headers, params=payload) prepared_request = self.api.session.prepare_request(request) @@ -519,11 +495,11 @@ def call(self, parameters): _my_loads(rdata, response_headers), status) else: if self.debug: - headers['X-RosetteAPI-Devel'] = True + headers[_CUSTOM_HEADER_PREFIX + 'Devel'] = True self.logger.info('operate: ' + url) - headers['Accept'] = "application/json" + headers['Accept'] = _APPLICATION_JSON headers['Accept-Encoding'] = "gzip" - headers['Content-Type'] = "application/json" + headers['Content-Type'] = _APPLICATION_JSON response = self.api.post_http(url, params_to_serialize, headers) return self.__finish_result(response, "operate") @@ -613,13 +589,21 @@ def get_user_agent_string(self): """ Return the User-Agent string """ return self.user_agent_string - def _set_pool_size(self): + def set_pool_size(self, new_pool_size): + """Sets the connection pool size. + @parameter new_pool_size: pool size to set + """ + self.max_pool_size = new_pool_size adapter = requests.adapters.HTTPAdapter( - pool_maxsize=self.max_pool_size) + pool_maxsize=new_pool_size) if 'https:' in self.service_url: self.session.mount('https://', adapter) else: - self.session.mount('http://', adapter) + self.session.mount('http://', adapter) # NOSONAR + + def __adjust_concurrency(self, dict_headers): + if _CONCURRENCY_HEADER in dict_headers and dict_headers[_CONCURRENCY_HEADER] != self.max_pool_size: + self.set_pool_size(dict_headers[_CONCURRENCY_HEADER]) def _make_request(self, operation, url, data, headers): """ @@ -650,11 +634,8 @@ def _make_request(self, operation, url, data, headers): status = response.status_code rdata = response.content dict_headers = dict(response.headers) + self.__adjust_concurrency(dict_headers) response_headers = {"responseHeaders": dict_headers} - if 'x-rosetteapi-concurrency' in dict_headers: - if dict_headers['x-rosetteapi-concurrency'] != self.max_pool_size: - self.max_pool_size = dict_headers['x-rosetteapi-concurrency'] - self._set_pool_size() if status == 200: return rdata, status, response_headers @@ -670,9 +651,11 @@ def _make_request(self, operation, url, data, headers): if not message: message = rdata raise RosetteException(code, message, url) - - except: - raise + except json.JSONDecodeError as exception: + raise RosetteException( + exception, + "Problem decoding JSON", + rdata) except requests.exceptions.RequestException as exception: raise RosetteException( exception, @@ -964,12 +947,12 @@ def name_deduplication(self, parameters): return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters) def text_embedding(self, parameters): - """ + """ deprecated Create an L{EndpointCaller} to identify text vectors found in the texts to which it is applied and call it. @type parameters: L{DocumentParameters} or L{str} @return: A python dictionary containing the results of text embedding.""" - return EndpointCaller(self, self.endpoints['TEXT_EMBEDDING']).call(parameters) + return self.semantic_vectors(parameters) def semantic_vectors(self, parameters): """ diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 0000000..fb71b92 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,4 @@ +sonar.projectKey=rosette-api-python-binding +sonar.sources=rosette +sonar.python.coverage.reportPaths=coverage.xml +#sonar.branch.name=RCB-596-pool-size diff --git a/tests/__init__.py b/tests/__init__.py index 9d1fe1d..4256e37 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2019 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/tests/test_rosette_api.py b/tests/test_rosette_api.py index ca14c90..0770bec 100644 --- a/tests/test_rosette_api.py +++ b/tests/test_rosette_api.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright (c) 2014-2019 Basis Technology Corporation. +Copyright (c) 2014-2022 Basis Technology Corporation. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,12 +23,13 @@ import platform import httpretty import pytest -from rosette.api import(API, - DocumentParameters, - NameTranslationParameters, - NameSimilarityParameters, - NameDeduplicationParameters, - RosetteException) +from rosette.api import (AddressSimilarityParameters, + API, + DocumentParameters, + NameTranslationParameters, + NameSimilarityParameters, + NameDeduplicationParameters, + RosetteException) _ISPY3 = sys.version_info[0] == 3 @@ -118,7 +119,7 @@ def test_custom_header_props(api): assert value == api.get_custom_headers()[key] api.clear_custom_headers() - assert len(api.get_custom_headers()) is 0 + assert len(api.get_custom_headers()) == 0 # Test for invalid header name @@ -202,6 +203,8 @@ def test_the_max_pool_size(json_response, doc_params): result = api.language(doc_params) assert result["name"] == "Rosette" assert api.get_pool_size() == 5 + api.set_pool_size(11) + assert api.get_pool_size() == 11 httpretty.disable() httpretty.reset() @@ -460,10 +463,10 @@ def test_name_deduplicatation_parameters(api, json_response): params = NameDeduplicationParameters() with pytest.raises(RosetteException) as e_rosette: - result = api.name_deduplication(params) + api.name_deduplication(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name De-Duplication parameter, names, not supplied' + assert e_rosette.value.message == 'Required Name De-Duplication parameter is missing: names' params["names"] = ["John Smith", "Johnathon Smith", "Fred Jones"] @@ -572,6 +575,43 @@ def test_for_no_content_or_contentUri(api, json_response, doc_params): httpretty.disable() httpretty.reset() + +def test_for_address_similarity_required_parameters(api, json_response): + """Test address similarity parameters""" + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=json_response, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/address-similarity", + body=json_response, status=200, content_type="application/json") + + params = AddressSimilarityParameters() + + with pytest.raises(RosetteException) as e_rosette: + api.address_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Address Similarity parameter is missing: address1' + + params["address1"] = {"houseNumber": "1600", + "road": "Pennsylvania Ave NW", + "city": "Washington", + "state": "DC", + "postCode": "20500"} + + with pytest.raises(RosetteException) as e_rosette: + api.address_similarity(params) + + assert e_rosette.value.status == 'missingParameter' + assert e_rosette.value.message == 'Required Address Similarity parameter is missing: address2' + + params["address2"] = {"text": "160 Pennsilvana Avenue, Washington, D.C., 20500"} + + result = api.address_similarity(params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset() + + # Test for required Name Similarity parameters @@ -588,20 +628,20 @@ def test_for_name_similarity_required_parameters(api, json_response): params = NameSimilarityParameters() with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter, name1, not supplied' + assert e_rosette.value.message == 'Required Name Similarity parameter is missing: name1' params["name1"] = { "text": matched_name_data1, "language": "eng", "entityType": "PERSON"} with pytest.raises(RosetteException) as e_rosette: - result = api.name_similarity(params) + api.name_similarity(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Similarity parameter, name2, not supplied' + assert e_rosette.value.message == 'Required Name Similarity parameter is missing: name2' params["name2"] = {"text": matched_name_data2, "entityType": "PERSON"} @@ -626,19 +666,18 @@ def test_for_name_translation_required_parameters(api, json_response): params["targetScript"] = "Latn" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == 'Required Name Translation parameter, name, not supplied' + assert e_rosette.value.message == 'Required Name Translation parameter is missing: name' params["name"] = "some data to translate" with pytest.raises(RosetteException) as e_rosette: - result = api.name_translation(params) + api.name_translation(params) assert e_rosette.value.status == 'missingParameter' - assert e_rosette.value.message == ('Required Name Translation parameter, ' - 'targetLanguage, not supplied') + assert e_rosette.value.message == 'Required Name Translation parameter is missing: targetLanguage' params["targetLanguage"] = "eng" @@ -720,3 +759,57 @@ def test_the_similar_terms_endpoint(api, json_response, doc_params): assert result["name"] == "Rosette" httpretty.disable() httpretty.reset() + + +def test_the_deprecated_endpoints(api, json_response, doc_params): + """There are three deprecated endpoints. Exercise them until they are deleted.""" + + # TEXT_EMBEDDING calls SEMANTIC_VECTORS + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/semantics/vector", + body=json_response, status=200, content_type="application/json") + + result = api.text_embedding(doc_params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset() + + # MATCHED_NAME calls NAME_SIMILARITY + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=json_response, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-similarity", + body=json_response, status=200, content_type="application/json") + + name_similarity_params = NameSimilarityParameters() + + name_similarity_params["name1"] = { + "text": "Michael Jackson", + "language": "eng", + "entityType": "PERSON"} + + name_similarity_params["name2"] = {"text": "迈克尔·杰克逊", "entityType": "PERSON"} + + result = api.matched_name(name_similarity_params) + assert result["name"] == "Rosette" + httpretty.disable() + httpretty.reset() + + # TRANSLATED_NAME calls NAME_TRANSLATION + httpretty.enable() + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/info", + body=json_response, status=200, content_type="application/json") + httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/name-translation", + body=json_response, status=200, content_type="application/json") + + name_translation_params = NameTranslationParameters() + name_translation_params["entityType"] = "PERSON" + name_translation_params["targetScript"] = "Latn" + name_translation_params["name"] = "some data to translate" + name_translation_params["targetLanguage"] = "eng" + + result = api.translated_name(name_translation_params) + assert result["name"] == "Rosette" + + httpretty.disable() + httpretty.reset() diff --git a/tests/tox.ini b/tests/tox.ini index 9bd4a1b..bdf6e7d 100644 --- a/tests/tox.ini +++ b/tests/tox.ini @@ -4,10 +4,9 @@ envlist = py2, py3 [testenv] commands = - pytest -s --pep8 + pytest -s deps = pytest - pytest-pep8 httpretty epydoc requests diff --git a/tox.ini b/tox.ini index 249a088..23301ee 100644 --- a/tox.ini +++ b/tox.ini @@ -10,11 +10,10 @@ envlist = py2, py3 [testenv] commands = {envpython} setup.py install - {envbindir}/py.test --pep8 + {envbindir}/py.test deps = pytest pep8 - pytest-pep8 httpretty epydoc requests