diff --git a/.github/workflows/superlinter.yml b/.github/workflows/superlinter.yml deleted file mode 100644 index fa674e9..0000000 --- a/.github/workflows/superlinter.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -name: Super-Linter - -on: push - -jobs: - super-lint: - name: Lint code base - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Run Super-Linter - uses: github/super-linter@v4 - env: - DEFAULT_BRANCH: master - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9e9130f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,83 @@ +--- +default_language_version: + node: 13.8.0 + python: python3.10 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.5.0 + hooks: + - id: check-toml + - id: check-case-conflict + - id: check-merge-conflict + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-symlinks + - id: check-yaml + args: [--unsafe] + - id: debug-statements + - id: detect-private-key + + - repo: https://github.com/adrienverge/yamllint + rev: v1.26.3 + hooks: + - id: yamllint + args: [-d, relaxed] + + - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt + rev: 0.1.1 + hooks: + - id: yamlfmt + args: [--mapping, '2', --sequence, '4', --offset, '2'] + - repo: https://github.com/asottile/reorder_python_imports + rev: v2.3.5 + hooks: + - id: reorder-python-imports + args: + - --application-directories=api + + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + additional_dependencies: [click==8.0.3] + + - repo: https://github.com/PyCQA/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + additional_dependencies: &flake8-deps + - flake8-formatter-abspath==1.0.1 + - flake8-fixme==1.1.1 + - flake8-isort==4.0.0 + - flake8-unused-arguments==0.0.6 + - flake8-use-fstring==1.1 + + - repo: https://github.com/PyCQA/pylint + rev: v2.9.6 + hooks: + - id: pylint + exclude: migrations + args: + - --extension-pkg-whitelist=math + - --extension-pkg-whitelist=requests + - --extension-pkg-whitelist=pandas + + - repo: https://github.com/asottile/yesqa + rev: v1.2.3 + hooks: + - id: yesqa + additional_dependencies: *flake8-deps + + - repo: https://github.com/pycqa/bandit + rev: 1.7.0 + hooks: + - id: bandit + args: [-iii, -ll] + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.3.0 + hooks: + - id: mypy + additional_dependencies: [types-all] diff --git a/__init__.py b/__init__.py index a998cec..a998e25 100644 --- a/__init__.py +++ b/__init__.py @@ -1,5 +1,28 @@ # Check if users have all the dependencies required # If they installed from the requirements.txt file correctly, this should not raise an error +from __future__ import annotations + +from .apis.acsIndicators import ACSIndicatorsConnection +from .apis.aggregateProfiles import AggregateProfilesConnection +from .apis.automationIndex import AutomationIndexConnection +from .apis.base import EmsiBaseConnection +from .apis.canadaPostings import CanadaPostingsConnection +from .apis.companies import CompaniesConnection +from .apis.coreLmi import CoreLMIConnection +from .apis.emsiTitles import TitlesConnection +from .apis.geography import GeographyConnection +from .apis.globalPostings import GlobalPostingsConnection +from .apis.globalProfiles import GlobalProfilesConnection +from .apis.ipeds import IpedsConnection +from .apis.openSkills import SkillsClassificationConnection +from .apis.talentBenchmark import TalentBenchmarkConnection +from .apis.unitedKingdomPostings import UKPostingsConnection +from .apis.unitedKingdomProfiles import UKProfiles +from .apis.usCompensation import UsCompensationConnection +from .apis.usInputOutput import USInputOutputConncetion +from .apis.usOccEarnings import UsOccupationEarningsConnection +from .apis.usPostings import UnitedStatesPostingsConnection + hard_dependencies = ("requests", "pandas") missing_dependencies = [] @@ -11,28 +34,32 @@ if missing_dependencies: raise ImportError( - "Unable to import required dependencies:\n" + "\n".join(missing_dependencies) + "Unable to import required dependencies:\n" + + "\n".join(missing_dependencies), ) del hard_dependencies, dependency, missing_dependencies # import all of the apis that we have connections defined for up to this point -from .apis.base import EmsiBaseConnection -from .apis.automationIndex import AutomationIndexConnection -from .apis.aggregateProfiles import AggregateProfilesConnection -from .apis.canadaPostings import CanadaPostingsConnection -from .apis.coreLmi import CoreLMIConnection -from .apis.usPostings import UnitedStatesPostingsConnection -from .apis.acsIndicators import ACSIndicatorsConnection -from .apis.openSkills import SkillsClassificationConnection -from .apis.emsiTitles import EmsiTitlesConnection -from .apis.usCompensation import UsCompensationConnection -from .apis.usOccEarnings import UsOccupationEarningsConnection -from .apis.usInputOutput import USInputOutputConncetion -from .apis.geography import GeographyConnection -from .apis.ipeds import IpedsConnection -from .apis.unitedKingdomPostings import UKPostingsConnection -from .apis.talentBenchmark import TalentBenchmarkConnection -from .apis.globalPostings import GlobalPostingsConnection -from .apis.globalProfiles import GlobalProfilesConnection -from .apis.companies import CompaniesConnection -from .apis.unitedKingdomProfiles import UKProfiles + +__all__ = [ + "ACSIndicatorsConnection", + "AggregateProfilesConnection", + "AutomationIndexConnection", + "EmsiBaseConnection", + "CanadaPostingsConnection", + "CompaniesConnection", + "CoreLMIConnection", + "TitlesConnection", + "GeographyConnection", + "GlobalPostingsConnection", + "GlobalProfilesConnection", + "IpedsConnection", + "SkillsClassificationConnection", + "TalentBenchmarkConnection", + "UKPostingsConnection", + "UKProfiles", + "UsCompensationConnection", + "USInputOutputConncetion", + "UsOccupationEarningsConnection", + "UnitedStatesPostingsConnection", +] diff --git a/apis/acsIndicators.py b/apis/acsIndicators.py index c3f5d80..38ab2ae 100644 --- a/apis/acsIndicators.py +++ b/apis/acsIndicators.py @@ -4,6 +4,7 @@ https://api.emsidata.com/apis/emsi-acs-indicators """ +from __future__ import annotations from .base import EmsiBaseConnection @@ -27,7 +28,7 @@ def __init__(self) -> None: self.name = "ACS" - def get_metrics(self, metric_name: str = None) -> dict: + def get_metrics(self, metric_name: str | None = None) -> dict: """ Summary @@ -40,7 +41,7 @@ def get_metrics(self, metric_name: str = None) -> dict: if metric_name is None: response = self.download_data("meta/metrics") else: - response = self.download_data("meta/metrics/{}".format(metric_name)) + response = self.download_data(f"meta/metrics/{metric_name}") return response.json()["data"] diff --git a/apis/aggregateProfiles.py b/apis/aggregateProfiles.py index 9fc9a28..903156b 100644 --- a/apis/aggregateProfiles.py +++ b/apis/aggregateProfiles.py @@ -1,6 +1,8 @@ """ The following is taken from Emsi's documentation, available here: https://api.emsidata.com/apis/aggregate-profile-data """ +from __future__ import annotations + from .base import ProfilesConnection diff --git a/apis/automationIndex.py b/apis/automationIndex.py index 894b018..591d0a7 100644 --- a/apis/automationIndex.py +++ b/apis/automationIndex.py @@ -4,6 +4,8 @@ https://api.emsidata.com/apis/automation-index """ +from __future__ import annotations + from .base import EmsiBaseConnection @@ -63,7 +65,7 @@ def get_metadata(self, nation="us"): Returns: TYPE: Description """ - api_endpoint = "/{}/meta".format(nation) + api_endpoint = f"/{nation}/meta" response = self.download_data(api_endpoint) return response.json()["data"] @@ -77,7 +79,7 @@ def get_index(self, nation="us"): Returns: TYPE: Description """ - api_endpoint = "/{}/data".format(nation) + api_endpoint = f"/{nation}/data" response = self.download_data(api_endpoint) return response.json()["data"] @@ -96,7 +98,9 @@ def filter_soc_index(self, soc_code, nation="us"): TYPE: Description """ if type(soc_code) != list and type(soc_code) != str: - raise ValueError("input `soc_code` must be one of type `list` or `str`") + raise ValueError( + "input `soc_code` must be one of type `list` or `str`", + ) if type(soc_code) == list: payload = soc_code @@ -112,6 +116,6 @@ def filter_soc_index(self, soc_code, nation="us"): try: output[soc] = index[soc] except ValueError: - raise ValueError("`soc_code` '{}' is invalid".format(soc)) + raise ValueError(f"`soc_code` '{soc}' is invalid") return output diff --git a/apis/base.py b/apis/base.py index 9b3d12c..01c312a 100644 --- a/apis/base.py +++ b/apis/base.py @@ -1,9 +1,12 @@ """Summary """ -from datetime import datetime, timedelta +from __future__ import annotations + +from datetime import datetime +from datetime import timedelta -import requests import pandas as pd +import requests try: from ..permissions import DEFAULT @@ -20,7 +23,7 @@ def is_expired(self): return datetime.now() > self.creation + timedelta(minutes=59) -class EmsiBaseConnection(object): +class EmsiBaseConnection: """docstring for EmsiBaseConnection Attributes: @@ -30,6 +33,10 @@ class EmsiBaseConnection(object): scope (str): the scope for requesting an auth token from the API """ + scope = "" + base_url = "" + name = "" + def __init__(self) -> None: """ Parses the username and password from the permissions @@ -61,14 +68,18 @@ def get_new_token(self) -> None: print(response.status_code) raise ValueError( - "Looks like you don't have access to this dataset with those credentials" + "Looks like you don't have access to this dataset with those credentials", ) # self.token = response.json()['access_token'] self.token = Token(response.json()["access_token"]) - def get_data(self, url: str, querystring: dict = None) -> requests.Response: + def get_data( + self, + url: str, + querystring: dict | None = None, + ) -> requests.Response: """ Makes a GET request to the API, given the URL and any querystring parameters. @@ -81,11 +92,16 @@ def get_data(self, url: str, querystring: dict = None) -> requests.Response: """ headers = { "content-type": "application/json", - "authorization": "Bearer {}".format(self.token.token), + "authorization": f"Bearer {self.token.token}", } # added timeout = None - some meta requests from Core LMI are taking a long time to fulfill - response = requests.get(url, headers=headers, params=querystring, timeout=None) + response = requests.get( + url, + headers=headers, + params=querystring, + timeout=None, + ) # if response.status_code == 401: # self.get_new_token() @@ -94,7 +110,10 @@ def get_data(self, url: str, querystring: dict = None) -> requests.Response: return response def post_data( - self, url: str, payload: dict, querystring: dict = None + self, + url: str, + payload: dict, + querystring: dict | None = None, ) -> requests.Response: """ Makes a POST request to the API, given the url and payload (querystring optional) @@ -109,17 +128,23 @@ def post_data( """ headers = { "content-type": "application/json", - "authorization": "Bearer {}".format(self.token.token), + "authorization": f"Bearer {self.token.token}", } # allows for users to pass in a string as the payload (yes, even though it is documented as a dict) if isinstance(payload, str): response = requests.post( - url, headers=headers, data=payload, params=querystring + url, + headers=headers, + data=payload, + params=querystring, ) else: response = requests.post( - url, headers=headers, json=payload, params=querystring + url, + headers=headers, + json=payload, + params=querystring, ) # if response.status_code == 401: @@ -129,7 +154,10 @@ def post_data( return response def download_data( - self, api_endpoint: str, payload: dict = None, querystring: dict = None + self, + api_endpoint: str, + payload: dict | None = None, + querystring: dict | None = None, ) -> requests.Response: """ Handles constructing the api_endpoint with the base url @@ -209,7 +237,11 @@ class JobPostingsConnection(EmsiBaseConnection): def __init__(self) -> None: super().__init__() - def post_totals(self, payload: dict, querystring: dict = None) -> dict: + def post_totals( + self, + payload: dict, + querystring: dict | None = None, + ) -> dict: """ Get summary metrics on all postings matching the filters. @@ -221,12 +253,18 @@ def post_totals(self, payload: dict, querystring: dict = None) -> dict: dict: the data response from the API """ response = self.download_data( - "totals", payload=payload, querystring=querystring + "totals", + payload=payload, + querystring=querystring, ) return response.json()["data"]["totals"] - def post_timeseries(self, payload: dict, querystring: dict = None) -> dict: + def post_timeseries( + self, + payload: dict, + querystring: dict | None = None, + ) -> dict: """ Get summary metrics just like the /totals endpoint but broken out by month or day depending on the format of the requested time-frame. When requesting a daily timeseries only up to 90 days may be requested at a time. @@ -241,7 +279,9 @@ def post_timeseries(self, payload: dict, querystring: dict = None) -> dict: dict: the data response from the API """ response = self.download_data( - "timeseries", payload=payload, querystring=querystring + "timeseries", + payload=payload, + querystring=querystring, ) return response.json()["data"] @@ -257,7 +297,10 @@ def get_rankings(self) -> list: return response.json()["data"] def post_rankings_timeseries( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """Summary @@ -270,7 +313,7 @@ def post_rankings_timeseries( dict: the data response from the API """ response = self.download_data( - "rankings/{}/timeseries".format(facet), + f"rankings/{facet}/timeseries", payload=payload, querystring=querystring, ) @@ -282,7 +325,10 @@ def post_rankings_timeseries( return response.json()["data"] def post_rankings( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """ Group and rank postings by {ranking_facet} with a monthly or daily timeseries for each ranked group. @@ -296,13 +342,19 @@ def post_rankings( dict: the data response from the API """ response = self.download_data( - "rankings/{}".format(facet), payload=payload, querystring=querystring + f"rankings/{facet}", + payload=payload, + querystring=querystring, ) return response.json() def post_nested_rankings( - self, facet: str, nested_facet: str, payload: dict, querystring: dict = None + self, + facet: str, + nested_facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """ Get a nested ranking (e.g. top companies, then top skills per company). @@ -317,14 +369,18 @@ def post_nested_rankings( dict: the data response from the API """ response = self.download_data( - "rankings/{}/rankings/{}".format(facet, nested_facet), + f"rankings/{facet}/rankings/{nested_facet}", payload=payload, querystring=querystring, ) return response.json() - def post_postings(self, payload: dict, querystring: dict = None) -> dict: + def post_postings( + self, + payload: dict, + querystring: dict | None = None, + ) -> dict: """ Get data for individual postings that match your requested filters. Note that not all fields are present for all postings, and some may be null or "Unknown". @@ -338,12 +394,18 @@ def post_postings(self, payload: dict, querystring: dict = None) -> dict: dict: the data response from the API """ response = self.download_data( - "postings", payload=payload, querystring=querystring + "postings", + payload=payload, + querystring=querystring, ) return response.json()["data"] - def get_postings(self, posting_id: str, querystring: dict = None) -> dict: + def get_postings( + self, + posting_id: str, + querystring: dict | None = None, + ) -> dict: """ Get a single posting by its id. @@ -361,7 +423,10 @@ def get_postings(self, posting_id: str, querystring: dict = None) -> dict: return response.json()["data"] def post_distributions( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """ Get a data distribution by percentiles or fixed interval for a selected facet. @@ -375,12 +440,14 @@ def post_distributions( dict: the data response from the API """ response = self.download_data( - "distributions", payload=payload, querystring=querystring + "distributions", + payload=payload, + querystring=querystring, ) return response.json()["data"] - def get_distributions(self, querystring: dict = None) -> list: + def get_distributions(self, querystring: dict | None = None) -> list: """ Get a list of available distribution facets. @@ -392,13 +459,16 @@ def get_distributions(self, querystring: dict = None) -> list: """ response = self.download_data( f"postings/distributions", - querystring=querystring + querystring=querystring, ) return response.json()["data"] def get_taxonomies( - self, facet: str = None, q: str = None, querystring: dict = None + self, + facet: str | None = None, + q: str | None = None, + querystring: dict | None = None, ) -> dict: """ Get a list of current available taxonomy facets. @@ -415,18 +485,24 @@ def get_taxonomies( if facet is None: response = self.download_data("taxonomies") else: - api_endpoint = "taxonomies/{}".format(facet) + api_endpoint = f"taxonomies/{facet}" if querystring is None: querystring = {"q": q} else: querystring["q"] = q - response = self.download_data(api_endpoint, querystring=querystring) + response = self.download_data( + api_endpoint, + querystring=querystring, + ) return response.json()["data"] def post_taxonomies( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """ Look up taxonomy items by ID. @@ -440,7 +516,7 @@ def post_taxonomies( dict: the data response from the API """ response = self.download_data( - "taxonomies/{}/lookup".format(facet), + f"taxonomies/{facet}/lookup", payload=payload, querystring=querystring, ) @@ -448,7 +524,10 @@ def post_taxonomies( return response.json()["data"] def post_rankings_df( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> pd.DataFrame: """Summary @@ -460,7 +539,11 @@ def post_rankings_df( Returns: pd.DataFrame: A pandas dataframe of the rankings data """ - response = self.post_rankings(facet, payload=payload, querystring=querystring) + response = self.post_rankings( + facet, + payload=payload, + querystring=querystring, + ) df = pd.DataFrame(response["data"]["ranking"]["buckets"]) df.rename(columns={"name": facet}, inplace=True) @@ -468,7 +551,11 @@ def post_rankings_df( return df def post_nested_rankings_df( - self, facet: str, nested_facet: str, payload: dict, querystring: dict = None + self, + facet: str, + nested_facet: str, + payload: dict, + querystring: dict | None = None, ) -> pd.DataFrame: """Summary @@ -482,7 +569,10 @@ def post_nested_rankings_df( pd.DataFrame: A pandas dataframe of the nested rankings data """ response = self.post_nested_rankings( - facet, nested_facet, payload=payload, querystring=querystring + facet, + nested_facet, + payload=payload, + querystring=querystring, ) df = pd.DataFrame() @@ -514,7 +604,11 @@ class ProfilesConnection(EmsiBaseConnection): def __init__(self) -> None: super().__init__() - def post_totals(self, payload: dict, querystring: dict = None) -> dict: + def post_totals( + self, + payload: dict, + querystring: dict | None = None, + ) -> dict: """Get summary metrics on all profiles matching the filters. Args: @@ -524,12 +618,18 @@ def post_totals(self, payload: dict, querystring: dict = None) -> dict: dict: the data response from the API """ response = self.download_data( - "totals", payload=payload, querystring=querystring + "totals", + payload=payload, + querystring=querystring, ) return response.json()["data"]["totals"] - def post_recency(self, payload: dict, querystring: dict = None) -> dict: + def post_recency( + self, + payload: dict, + querystring: dict | None = None, + ) -> dict: """Group filtered profile metrics by year, based on profile recency (when they were last updated). Args: @@ -539,7 +639,9 @@ def post_recency(self, payload: dict, querystring: dict = None) -> dict: dict: the data response from the API """ response = self.download_data( - "recency", payload=payload, querystring=querystring + "recency", + payload=payload, + querystring=querystring, ) return response.json()["data"]["recency"] @@ -555,7 +657,10 @@ def get_rankings(self) -> dict: return response.json()["data"] def post_rankings( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """Rank profiles by a given facet @@ -567,13 +672,18 @@ def post_rankings( dict: the data response from the API """ response = self.download_data( - "rankings/{}".format(facet), payload=payload, querystring=querystring + f"rankings/{facet}", + payload=payload, + querystring=querystring, ) return response.json() def get_taxonomies( - self, facet: str = None, q: str = None, querystring: dict = None + self, + facet: str | None = None, + q: str | None = None, + querystring: dict | None = None, ) -> dict: """ Search taxonomies using either whole keywords (relevance search) or partial keywords (autocomplete), or list taxonomy items. @@ -590,18 +700,24 @@ def get_taxonomies( if facet is None: response = self.download_data("taxonomies") else: - api_endpoint = "taxonomies/{}".format(facet) + api_endpoint = f"taxonomies/{facet}" if querystring is None: querystring = {"q": q} else: querystring["q"] = q - response = self.download_data(api_endpoint, querystring=querystring) + response = self.download_data( + api_endpoint, + querystring=querystring, + ) return response.json()["data"] def post_taxonomies( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> dict: """Lookup taxonomy items by ID. @@ -614,7 +730,7 @@ def post_taxonomies( dict: the data response from the API """ response = self.download_data( - "taxonomies/{}/lookup".format(facet), + f"taxonomies/{facet}/lookup", payload=payload, querystring=querystring, ) @@ -622,7 +738,10 @@ def post_taxonomies( return response.json()["data"] def post_rankings_df( - self, facet: str, payload: dict, querystring: dict = None + self, + facet: str, + payload: dict, + querystring: dict | None = None, ) -> pd.DataFrame: """Summary @@ -634,7 +753,11 @@ def post_rankings_df( Returns: pd.DataFrame: Description """ - response = self.post_rankings(facet, payload=payload, querystring=querystring) + response = self.post_rankings( + facet, + payload=payload, + querystring=querystring, + ) df = pd.DataFrame(response["data"]["ranking"]["buckets"]) df.rename(columns={"name": facet}, inplace=True) diff --git a/apis/canadaPostings.py b/apis/canadaPostings.py index 46e5242..0d72efd 100644 --- a/apis/canadaPostings.py +++ b/apis/canadaPostings.py @@ -1,6 +1,8 @@ """ https://api.emsidata.com/apis/canada-job-postings """ +from __future__ import annotations + from .base import JobPostingsConnection diff --git a/apis/companies.py b/apis/companies.py index 56186cb..0ad3c00 100644 --- a/apis/companies.py +++ b/apis/companies.py @@ -2,6 +2,7 @@ This service takes text describing a job and normalizes it into a standardized job title from Emsi's job title taxonomy. https://api.emsidata.com/apis/titles """ +from __future__ import annotations from .base import EmsiBaseConnection @@ -65,7 +66,11 @@ def get_version_changes(self, version: str = "latest") -> dict: return response.json()["data"] def get_list_all_companies( - self, fields=["id", "name"], version: str = "latest", limit=None, after=None + self, + fields=["id", "name"], + version: str = "latest", + limit=None, + after=None, ) -> list: """ Returns a list of all titles in {version} sorted by title name @@ -88,13 +93,17 @@ def get_list_all_companies( querystring["after"] = after response = self.download_data( - f"versions/{version}/companies", querystring=querystring + f"versions/{version}/companies", + querystring=querystring, ) return response.json() def post_list_requested_companies( - self, companies: list, fields: list = ["id", "name"], version: str = "latest" + self, + companies: list, + fields: list = ["id", "name"], + version: str = "latest", ) -> list: """ Usage information. @@ -108,10 +117,17 @@ def post_list_requested_companies( version (str, optional): The companies classification version. """ payload = {"ids": companies, "fields": fields} - response = self.download_data(f"versions/{version}/companies", payload=payload) + response = self.download_data( + f"versions/{version}/companies", + payload=payload, + ) return response.json()["data"] - def get_company_by_id(self, company_id: str, version: str = "latest") -> str: + def get_company_by_id( + self, + company_id: str, + version: str = "latest", + ) -> str: """ Usage information. @@ -122,11 +138,16 @@ def get_company_by_id(self, company_id: str, version: str = "latest") -> str: company_id (str): Description version (str, optional): The titles classification version. """ - response = self.download_data(f"versions/{version}/companies/{company_id}") + response = self.download_data( + f"versions/{version}/companies/{company_id}", + ) return response.json()["data"] def post_normalize_company( - self, title: str, version: str = "latest", fields=["id", "name"] + self, + title: str, + version: str = "latest", + fields=["id", "name"], ) -> dict: """Normalize a raw job title string to the best matching Emsi title. @@ -142,7 +163,10 @@ def post_normalize_company( dict: dictionary of the top match from the API (id, title, and similarity) """ payload = {"term": title, "fields": fields} - response = self.download_data(f"versions/{version}/normalize", payload=payload) + response = self.download_data( + f"versions/{version}/normalize", + payload=payload, + ) return response.json()["data"] @@ -168,12 +192,18 @@ def post_inspect_company_normalization( dict: dictionary of the top match from the API (id, title, and similarity) """ payload = {"term": title, "limit": limit, "fields": fields} - response = self.download_data(f"versions/{version}/normalize/inspect", payload) + response = self.download_data( + f"versions/{version}/normalize/inspect", + payload, + ) return response.json()["data"] def post_normalize_companies_in_bulk( - self, titles: list, version: str = "latest", fields=["id", "name"] + self, + titles: list, + version: str = "latest", + fields=["id", "name"], ) -> list: """ Normalize multiple raw job title strings to a list of best matching Emsi titles. @@ -189,7 +219,8 @@ def post_normalize_companies_in_bulk( """ payload = {"terms": titles, "fields": fields} response = self.download_data( - f"versions/{version}/normalize/bulk", payload=payload + f"versions/{version}/normalize/bulk", + payload=payload, ) return response.json()["data"] diff --git a/apis/coreLmi.py b/apis/coreLmi.py index 1cacbe0..20c9fa9 100644 --- a/apis/coreLmi.py +++ b/apis/coreLmi.py @@ -46,11 +46,14 @@ # 3 Arkansas 1.297678e+11 # 4 California 3.013869e+12 """ +from __future__ import annotations -import requests -from datetime import datetime, timedelta -import pandas as pd import time +from datetime import datetime +from datetime import timedelta + +import pandas as pd +import requests from .base import EmsiBaseConnection @@ -104,7 +107,11 @@ def __init__(self) -> None: self.name = "Core_LMI" def download_data( - self, api_endpoint: str, payload: dict = None, smart_limit: bool = False + self, + api_endpoint: str, + payload: dict | None = None, + querystring: dict | None = None, + smart_limit: bool = False, ) -> requests.Response: """Needs more work for downloading the data from Agnitio, since it does not automatically handle the rate liimit from the API @@ -168,12 +175,15 @@ def get_meta_dataset(self, dataset: str, datarun: str) -> list: Returns: list: list of dataset versions available """ - response = self.download_data("meta/dataset/{}/{}".format(dataset, datarun)) + response = self.download_data(f"meta/dataset/{dataset}/{datarun}") return response.json() def get_meta_dataset_dimension( - self, dataset: str, dimension: str, datarun: str + self, + dataset: str, + dimension: str, + datarun: str, ) -> dict: """ Finally, you can view the hierarchy of a particular dimension of a dataset by adding dataset/// to the path: @@ -187,12 +197,17 @@ def get_meta_dataset_dimension( dict: hierarchichal representation of the dimension of data for the particular dataset """ response = self.download_data( - "meta/dataset/{}/{}/{}".format(dataset, datarun, dimension) + f"meta/dataset/{dataset}/{datarun}/{dimension}", ) return response.json() - def post_retrieve_data(self, dataset: str, payload: dict, datarun: str) -> dict: + def post_retrieve_data( + self, + dataset: str, + payload: dict, + datarun: str, + ) -> dict: """ Agnitio data queries are performed by assembling a JSON description of the query and POSTing it to the specific dataset you wish to query. @@ -204,12 +219,15 @@ def post_retrieve_data(self, dataset: str, payload: dict, datarun: str) -> dict: Returns: dict: full data returned from the API """ - response = self.download_data("{}/{}".format(dataset, datarun), payload) + response = self.download_data(f"{dataset}/{datarun}", payload) return response.json() def get_dimension_hierarchy_df( - self, dataset: str, dimension: str, datarun: str + self, + dataset: str, + dimension: str, + datarun: str, ) -> pd.DataFrame: """ Finally, you can view the hierarchy of a particular dimension of a dataset by adding dataset/// to the path: @@ -228,7 +246,10 @@ def get_dimension_hierarchy_df( return df def post_retrieve_df( - self, dataset: str, payload: dict, datarun: str + self, + dataset: str, + payload: dict, + datarun: str, ) -> pd.DataFrame: """ Agnitio data queries are performed by assembling a JSON description of the query and POSTing it to the specific dataset you wish to query. diff --git a/apis/emsiTitles.py b/apis/emsiTitles.py index 6d3ac9a..31f12b7 100644 --- a/apis/emsiTitles.py +++ b/apis/emsiTitles.py @@ -2,11 +2,15 @@ This service takes text describing a job and normalizes it into a standardized job title from Emsi's job title taxonomy. https://api.emsidata.com/apis/titles """ +from __future__ import annotations + +from typing import Any +from typing import Dict from .base import EmsiBaseConnection -class EmsiTitlesConnection(EmsiBaseConnection): +class TitlesConnection(EmsiBaseConnection): """ This API exposes the complete collection of Emsi titles which includes curated occupation and skill mappings for each title and normalization functionality to transform raw job titles to Emsi titles. @@ -66,11 +70,11 @@ def get_version_changes(self, version: str = "latest") -> dict: def get_list_all_titles( self, - q: str = None, - fields=["id", "name"], + q: str | None = None, + fields: list = ["id", "name"], version: str = "latest", - limit: int = None, - page: int = None, + limit: int | None = None, + page: int | None = None, ) -> list: """ Returns a list of all titles in {version} sorted by title name @@ -84,7 +88,7 @@ def get_list_all_titles( version (str, optional): The titles classification version. limit (None, optional): Limit the number of titles returned in the response. """ - querystring = {"fields": ",".join(fields)} + querystring: dict[str, Any] = {"fields": ",".join(fields)} if q is not None: querystring["q"] = q @@ -96,7 +100,8 @@ def get_list_all_titles( querystring["page"] = page response = self.download_data( - f"versions/{version}/titles", querystring=querystring + f"versions/{version}/titles", + querystring=querystring, ) return response.json()["data"] @@ -119,7 +124,10 @@ def post_list_requested_titles( version (str, optional): The titles classification version. """ payload = {"ids": titles, "fields": fields} - response = self.download_data(f"versions/{version}/titles", payload=payload) + response = self.download_data( + f"versions/{version}/titles", + payload=payload, + ) return response.json()["data"] def get_title_by_id(self, title_id: str, version: str = "latest") -> str: @@ -140,8 +148,8 @@ def post_normalize_title( self, title: str, version: str = "latest", - confidenceThreshold=0.5, - fields=["id", "name", "pluralName"], + confidenceThreshold: float = 0.5, + fields: list = ["id", "name", "pluralName"], ) -> dict: """Normalize a raw job title string to the best matching Emsi title. @@ -161,7 +169,10 @@ def post_normalize_title( "fields": fields, "confidenceThreshold": confidenceThreshold, } - response = self.download_data(f"versions/{version}/normalize", payload=payload) + response = self.download_data( + f"versions/{version}/normalize", + payload=payload, + ) return response.json()["data"] @@ -193,7 +204,10 @@ def post_inspect_title_normalization( "limit": limit, "fields": fields, } - response = self.download_data(f"versions/{version}/normalize/inspect", payload) + response = self.download_data( + f"versions/{version}/normalize/inspect", + payload, + ) return response.json()["data"] @@ -202,7 +216,7 @@ def post_normalize_titles_in_bulk( titles: list, version: str = "latest", confidenceThreshold: float = 0.5, - fields=["id", "name", "pluralName"], + fields: list = ["id", "name", "pluralName"], ) -> list: """ Normalize multiple raw job title strings to a list of best matching Emsi titles. @@ -222,7 +236,8 @@ def post_normalize_titles_in_bulk( "fields": fields, } response = self.download_data( - f"versions/{version}/normalize/bulk", payload=payload + f"versions/{version}/normalize/bulk", + payload=payload, ) return response.json()["data"] diff --git a/apis/geography.py b/apis/geography.py index 23d96cb..1b6887b 100644 --- a/apis/geography.py +++ b/apis/geography.py @@ -2,6 +2,7 @@ This service takes text describing a job and normalizes it into a standardized job title from Emsi's job title taxonomy. https://api.emsidata.com/apis/emsi-job-title-normalization """ +from __future__ import annotations from .base import EmsiBaseConnection @@ -32,47 +33,101 @@ def get_country_meta(self, country: str) -> list: # https://api.emsidata.com/apis/geography#get-country return self.download_data(country).json() - def post_withinproximity(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_withinproximity( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-withinproximity url = f"{country}/{version}/{level}/withinproximity" return self.download_data(url, payload).json() - def post_closest(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_closest( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-closest url = f"{country}/{version}/{level}/closest" return self.download_data(url, payload).json() - def post_contains(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_contains( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-contains url = f"{country}/{version}/{level}/contains" return self.download_data(url, payload).json() - def post_centroid(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_centroid( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-centroid url = f"{country}/{version}/{level}/centroid" - return self.download_data(url, payload = payload).json() - - def post_mbr(self, country: str, version: str, level: str, payload: dict) -> dict: + return self.download_data(url, payload=payload).json() + + def post_mbr( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-mbr url = f"{country}/{version}/{level}/mbr" return self.download_data(url, payload).json() - def post_mbc(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_mbc( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-mbc url = f"{country}/{version}/{level}/mbc" return self.download_data(url, payload).json() - def post_geojson(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_geojson( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-geojson url = f"{country}/{version}/{level}/geojson" return self.download_data(url, payload).json() - def post_svg(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_svg( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://api.emsidata.com/apis/geography#post-country-version-level-svg url = f"{country}/{version}/{level}/svg" return self.download_data(url, payload).json() - def post_traveltime(self, country: str, version: str, level: str, payload: dict) -> dict: + def post_traveltime( + self, + country: str, + version: str, + level: str, + payload: dict, + ) -> dict: # https://emsiapi-internal.surge.sh/apis/geography#post-country-version-level-traveltime url = f"{country}/{version}/{level}/traveltime" return self.download_data(url, payload).json() diff --git a/apis/globalPostings.py b/apis/globalPostings.py index 1b39b1b..d5f40ce 100644 --- a/apis/globalPostings.py +++ b/apis/globalPostings.py @@ -5,6 +5,8 @@ About the data Profiles are collected from various sources and processed/enriched to provide information such as standardized company name, occupation, skills, and geography. """ +from __future__ import annotations + from .base import JobPostingsConnection diff --git a/apis/globalProfiles.py b/apis/globalProfiles.py index 0c10d58..bd15a95 100644 --- a/apis/globalProfiles.py +++ b/apis/globalProfiles.py @@ -5,6 +5,8 @@ About the data Profiles are collected from various sources and processed/enriched to provide information such as standardized company name, occupation, skills, and geography. """ +from __future__ import annotations + from .base import ProfilesConnection diff --git a/apis/ipeds.py b/apis/ipeds.py index d4f1f70..23215e8 100644 --- a/apis/ipeds.py +++ b/apis/ipeds.py @@ -3,7 +3,10 @@ Information and search functionality for institutions are exposed via the institution family of endpoints. A SOC-CIP mapping is provided via the `soccip` endpoints. """ +from __future__ import annotations + import pandas as pd +import requests from .base import EmsiBaseConnection @@ -21,8 +24,7 @@ class IpedsConnection(EmsiBaseConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://ipeds.emsicloud.com/" self.scope = "emsiauth" @@ -54,7 +56,7 @@ def post_institutions(self, institutions: list) -> dict: payload = {"institutionIds": institutions} return self.download_data( "institutions", - payload = payload + payload=payload, ).json() def get_institutions_geo(self, geo_level: str, geo_code: str) -> dict: @@ -71,10 +73,14 @@ def get_institutions_geo(self, geo_level: str, geo_code: str) -> dict: Raises: ValueError: Raises this error if the geo level is not one of [`zip`, `fips`] """ - if geo_level not in ['zip', 'fips']: - raise ValueError(f"`geo_level` must be one of ['zip', 'fips'], found `{geo_level}`") + if geo_level not in ["zip", "fips"]: + raise ValueError( + f"`geo_level` must be one of ['zip', 'fips'], found `{geo_level}`", + ) - return self.download_data(f"institutions/{geo_level}/{geo_code}").json() + return self.download_data( + f"institutions/{geo_level}/{geo_code}", + ).json() def get_institutions_search(self, search: str) -> dict: """ @@ -88,7 +94,7 @@ def get_institutions_search(self, search: str) -> dict: """ return self.download_data(f"institutions/{search}").json() - def post_institutions_search(self, payload: dict) -> dict: + def post_institutions_search(self, payload: dict) -> requests.Response: """ Search institutions using multiple values. Valid search types are `zip`, `fips`, `city`, `id`, and `name`. @@ -100,7 +106,7 @@ def post_institutions_search(self, payload: dict) -> dict: """ return self.download_data( f"institutions/search", - payload = payload + payload=payload, ).json() def get_institutions_all(self, offset: int = 0, limit: int = 0) -> dict: @@ -132,10 +138,10 @@ def post_cip_soc(self, cips: list) -> dict: payload = {"cipCodes": cips} return self.download_data( "soccip/cip2soc", - payload = payload + payload=payload, ).json() - def post_soc_cip(self, socs: list) -> dict: + def post_soc_cip(self, socs: list) -> requests.Response: """ This endpoint maps from one or more SOC codes to the CIP codes of the programs which most likely train for them. @@ -148,7 +154,7 @@ def post_soc_cip(self, socs: list) -> dict: payload = {"socCodes": socs} return self.download_data( "soccip/soc2cip", - payload = payload + payload=payload, ) def post_institutions_df(self, institutions: list) -> pd.DataFrame: @@ -166,7 +172,11 @@ def post_institutions_df(self, institutions: list) -> pd.DataFrame: return df - def get_institutions_geo_df(self, geo_level: str, geo_code: str) -> pd.DataFrame: + def get_institutions_geo_df( + self, + geo_level: str, + geo_code: str, + ) -> pd.DataFrame: """ Return a list of institutions operating in the specified FIPS/ZIP code. @@ -207,12 +217,16 @@ def post_institutions_search_df(self, payload: dict) -> pd.DataFrame: Returns: TYPE: Description """ - data = self.get_institutions_search(payload) + data = self.post_institutions_search(payload).json() df = pd.DataFrame(data["rows"]) return df - def get_institutions_all_df(self, offset: int = 0, limit: int = 0) -> pd.DataFrame: + def get_institutions_all_df( + self, + offset: int = 0, + limit: int = 0, + ) -> pd.DataFrame: """ Lists all institutions with pagination. @@ -244,10 +258,10 @@ def post_cip_soc_df(self, cips: list) -> pd.DataFrame: temp_df = pd.DataFrame( { "cip": [record["code"] for _ in record["corresponding"]], - "soc": [x for x in record["corresponding"]] - } + "soc": [x for x in record["corresponding"]], + }, ) - df = df.append(temp_df, ignore_index = True) + df = df.append(temp_df, ignore_index=True) return df @@ -262,15 +276,15 @@ def post_soc_cip_df(self, socs: list) -> pd.DataFrame: TYPE: Description """ - data = self.post_soc_cip(socs) + data = self.post_soc_cip(socs).json() df = pd.DataFrame() for record in data["mapping"]: temp_df = pd.DataFrame( { "soc": [record["code"] for _ in record["corresponding"]], - "cip": [x for x in record["corresponding"]] - } + "cip": [x for x in record["corresponding"]], + }, ) - df = df.append(temp_df, ignore_index = True) + df = df.append(temp_df, ignore_index=True) return df diff --git a/apis/jobTitleMapping.py b/apis/jobTitleMapping.py index e1a34ee..da3cbd6 100644 --- a/apis/jobTitleMapping.py +++ b/apis/jobTitleMapping.py @@ -2,6 +2,7 @@ This service takes text describing a job and normalizes it into a standardized job title from Emsi's job title taxonomy. https://api.emsidata.com/apis/emsi-job-title-normalization """ +from __future__ import annotations from .base import EmsiBaseConnection @@ -16,8 +17,7 @@ class JobTitleMappingConnection(EmsiBaseConnection): """ def __init__(self) -> None: - """Create the connection - """ + """Create the connection""" super().__init__() self.base_url = "https://emsiservices.com/jtm/" self.scope = "jtm" @@ -26,12 +26,21 @@ def __init__(self) -> None: self.name = "Job_Title_Mapping" - def post_titles(self, titles: list, querystring: dict = None) -> list: - """ - """ - return self.download_data("titles", payload = {"titles": titles}, querystring = querystring).json() - - def get_title(self, title: str, querystring: dict = None) -> list: - """ - """ - return self.download_data(f"titles/{title}", querystring = querystring).json() + def post_titles( + self, + titles: list, + querystring: dict | None = None, + ) -> list: + """ """ + return self.download_data( + "titles", + payload={"titles": titles}, + querystring=querystring, + ).json() + + def get_title(self, title: str, querystring: dict | None = None) -> list: + """ """ + return self.download_data( + f"titles/{title}", + querystring=querystring, + ).json() diff --git a/apis/openSkills.py b/apis/openSkills.py index d14bebb..527b556 100644 --- a/apis/openSkills.py +++ b/apis/openSkills.py @@ -1,6 +1,8 @@ """ Summary """ +from __future__ import annotations + from .base import EmsiBaseConnection @@ -16,8 +18,7 @@ class SkillsClassificationConnection(EmsiBaseConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://emsiservices.com/skills/" self.scope = "emsi_open" @@ -37,7 +38,7 @@ def get_versions(self) -> list: """ return self.download_data("versions").json() - def get_version_metadata(self, version = "latest") -> list: + def get_version_metadata(self, version="latest") -> list: """Summary Returns: @@ -48,7 +49,7 @@ def get_version_metadata(self, version = "latest") -> list: """ return self.download_data(f"versions/{version}").json() - def get_version_changes(self, version = "latest") -> dict: + def get_version_changes(self, version="latest") -> dict: """Summary Args: @@ -62,7 +63,13 @@ def get_version_changes(self, version = "latest") -> dict: return data - def get_list_all_skills(self, version: str = "latest", q: str = None, typeIds: str = None, fields: str = None) -> list: + def get_list_all_skills( + self, + version: str = "latest", + q: str | None = None, + typeIds: str | None = None, + fields: str | None = None, + ) -> dict: """Summary Args: @@ -78,7 +85,7 @@ def get_list_all_skills(self, version: str = "latest", q: str = None, typeIds: s base_querystring = { "q": q, "typeIds": typeIds, - "fields": fields + "fields": fields, } querystring = {} @@ -88,12 +95,21 @@ def get_list_all_skills(self, version: str = "latest", q: str = None, typeIds: s querystring[key] = value if len(querystring) > 0: - return self.download_data("versions/{}/skills".format(version), querystring = querystring).json() + return self.download_data( + f"versions/{version}/skills", + querystring=querystring, + ).json() else: - return self.download_data("versions/{}/skills".format(version)).json() - - def post_list_requested_skills(self, payload: dict, version: str = "latest", typeIds = None, fields = None) -> dict: + return self.download_data(f"versions/{version}/skills").json() + + def post_list_requested_skills( + self, + payload: dict, + version: str = "latest", + typeIds=None, + fields=None, + ) -> dict: """Summary Args: @@ -108,7 +124,7 @@ def post_list_requested_skills(self, payload: dict, version: str = "latest", typ base_querystring = { "typeIds": typeIds, - "fields": fields + "fields": fields, } querystring: dict = {} @@ -117,10 +133,17 @@ def post_list_requested_skills(self, payload: dict, version: str = "latest", typ querystring[key] = value if len(querystring) > 0: - return self.download_data("versions/{}/skills".format(version), payload = payload, querystring = querystring).json() + return self.download_data( + f"versions/{version}/skills", + payload=payload, + querystring=querystring, + ).json() else: - return self.download_data("versions/{}/skills".format(version), payload = payload).json() + return self.download_data( + f"versions/{version}/skills", + payload=payload, + ).json() def get_skill_by_id(self, skill_id: str, version: str = "latest") -> dict: """Summary @@ -132,9 +155,17 @@ def get_skill_by_id(self, skill_id: str, version: str = "latest") -> dict: Returns: dict: Description """ - return self.download_data("versions/{}/skills/{}".format(version, skill_id)).json() + return self.download_data( + f"versions/{version}/skills/{skill_id}", + ).json() - def post_find_related_skills(self, skill_ids: list, limit = 10, fields = ["id", "name", "type", "infoUrl"], version: str = "latest"): + def post_find_related_skills( + self, + skill_ids: list, + limit=10, + fields=["id", "name", "type", "infoUrl"], + version: str = "latest", + ): """Summary Args: @@ -149,11 +180,19 @@ def post_find_related_skills(self, skill_ids: list, limit = 10, fields = ["id", payload = { "ids": skill_ids, "limit": limit, - "fields": fields + "fields": fields, } - return self.download_data("versions/{}/related".format(version), payload = payload).json() + return self.download_data( + f"versions/{version}/related", + payload=payload, + ).json() - def post_extract(self, description: str, version: str = 'latest', confidenceThreshold: float = 0.5) -> dict: + def post_extract( + self, + description: str, + version: str = "latest", + confidenceThreshold: float = 0.5, + ) -> dict: """Summary Args: @@ -165,12 +204,17 @@ def post_extract(self, description: str, version: str = 'latest', confidenceThre dict: Description """ return self.download_data( - "versions/{}/extract".format(version), - payload = {"text": description}, - querystring = {"confidenceThreshold": confidenceThreshold} + f"versions/{version}/extract", + payload={"text": description}, + querystring={"confidenceThreshold": confidenceThreshold}, ).json() - def post_extract_with_source(self, description: str, version: str = 'latest', includeNormalizedText: bool = False) -> dict: + def post_extract_with_source( + self, + description: str, + version: str = "latest", + includeNormalizedText: bool = False, + ) -> dict: """Summary Args: @@ -185,9 +229,9 @@ def post_extract_with_source(self, description: str, version: str = 'latest', in confidenceThreshold (float, optional): Description """ return self.download_data( - "versions/{}/extract/trace".format(version), - payload = { + f"versions/{version}/extract/trace", + payload={ "text": description, - "includeNormalizedText": includeNormalizedText + "includeNormalizedText": includeNormalizedText, }, ).json() diff --git a/apis/talentBenchmark.py b/apis/talentBenchmark.py index 38aa74a..2e86c27 100644 --- a/apis/talentBenchmark.py +++ b/apis/talentBenchmark.py @@ -3,7 +3,9 @@ This is an interface for retrieving key indicators to help benchmark talent by location in the United States. About the data -The data in this API exposes key talent benchmarking metrics oriented around supply, demand, diversity, and compensation. These metrics are aggregated from various Emsi datasets including US job postings, US profiles, and US Diversity along with our Compensation model for any job title and city in the United States. +The data in this API exposes key talent benchmarking metrics oriented around supply, demand, diversity, and compensation. +These metrics are aggregated from various Emsi datasets including US job postings, US profiles, and US Diversity +along with our Compensation model for any job title and city in the United States. Docs: # create the connection @@ -615,102 +617,106 @@ ``` """ +from __future__ import annotations + from .base import EmsiBaseConnection class TalentBenchmarkConnection(EmsiBaseConnection): - """ - Use case - This is an interface for retrieving key indicators to help benchmark talent by location in the United States. - - About the data - The data in this API exposes key talent benchmarking metrics oriented around supply, demand, diversity, and compensation. These metrics are aggregated from various Emsi datasets including US job postings, US profiles, and US Diversity along with our Compensation model for any job title and city in the United States. - - Attributes: - base_url (str): what every url has to start with to query the API - scope (str): the scope for requesting the proper access token - token (str): the auth token received from the auth API - """ - - def __init__(self) -> None: - super().__init__() - self.base_url = "https://emsiservices.com/benchmark/" - self.scope = "benchmark" - - self.get_new_token() - - self.name = "Talent_Benchmark" - - def get_service_status(self) -> dict: - # same as get_status, but this is more in line with the Emsi documentation - return self.get_status() - - def __send_request(self, endpoint, city, title) -> dict: - """ - Private function to abstract the building of the payload and making the request - - Returns: - dict: the body of the response from the API as a json object - """ - payload = {"title": title, "city": city} - return self.download_data(endpoint, payload = payload).json() - - def get_service_metadata(self) -> dict: - """ - https://api.emsidata.com/apis/talent-benchmark#get-get-service-metadata - Get service metadata, including access information and attribution text. - - Returns: - dict: dictionary of attribution and access available to the given client_id - """ - return self.download_data("meta").json()['data'] - - def post_benchmark_summary(self, city: str, title: str) -> dict: """ - https://api.emsidata.com/apis/talent-benchmark#post-get-benchmark-summary - Get summary data on each metric that you have access to. - - Returns: - dict: search parameters and data response from the API - """ - return self.__send_request("", city, title) - - def post_supply_benchmark_data(self, city: str, title: str) -> dict: - """ - https://api.emsidata.com/apis/talent-benchmark#post-get-supply-benchmark-data - Emsi aggregates online social profiles from all over the web. The details in this endpoint provide aggregate totals for the top employers, top titles, and top skills associated with the profiles matching your search. - - Returns: - dict: search parameters and data response from the API - """ - return self.__send_request("supply", city, title) - - def post_demand_benchmark_data(self, city: str, title: str) -> dict: - """ - https://api.emsidata.com/apis/talent-benchmark#post-get-demand-benchmark-data - Emsi aggregates job posting details from all over the web. The details in this endpoint provide aggregate totals for the top employers, top titles, and top skills associated with the postings matching your search. - - Returns: - dict: search parameters and data response from the API - """ - return self.__send_request("demand", city, title) - - def post_compensation_benchmark_data(self, city: str, title: str) -> dict: - """ - https://api.emsidata.com/apis/talent-benchmark#post-get-compensation-benchmark-data - Emsi models compensation data using government data and advertised salary observations identified from job postings data. - - Returns: - dict: search parameters and data response from the API - """ - return self.__send_request("compensation", city, title) - - def post_diversity_benchmark_data(self, city: str, title: str) -> dict: + Use case + This is an interface for retrieving key indicators to help benchmark talent by location in the United States. + + About the data + The data in this API exposes key talent benchmarking metrics oriented around supply, demand, diversity, and compensation. + These metrics are aggregated from various Emsi datasets including US job postings, US profiles, + and US Diversity along with our Compensation model for any job title and city in the United States. + + Attributes: + base_url (str): what every url has to start with to query the API + scope (str): the scope for requesting the proper access token + token (str): the auth token received from the auth API """ - https://api.emsidata.com/apis/talent-benchmark#post-get-diversity-benchmark-data - Emsi models diversity data by applying a staffing pattern to government data related to industries. - Returns: - dict: search parameters and data response from the API - """ - return self.__send_request("diversity", city, title) + def __init__(self) -> None: + super().__init__() + self.base_url = "https://emsiservices.com/benchmark/" + self.scope = "benchmark" + + self.get_new_token() + + self.name = "Talent_Benchmark" + + def get_service_status(self) -> str: + # same as get_status, but this is more in line with the Emsi documentation + return self.get_status() + + def __send_request(self, endpoint, city, title) -> dict: + """ + Private function to abstract the building of the payload and making the request + + Returns: + dict: the body of the response from the API as a json object + """ + payload = {"title": title, "city": city} + return self.download_data(endpoint, payload=payload).json() + + def get_service_metadata(self) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#get-get-service-metadata + Get service metadata, including access information and attribution text. + + Returns: + dict: dictionary of attribution and access available to the given client_id + """ + return self.download_data("meta").json()["data"] + + def post_benchmark_summary(self, city: str, title: str) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#post-get-benchmark-summary + Get summary data on each metric that you have access to. + + Returns: + dict: search parameters and data response from the API + """ + return self.__send_request("", city, title) + + def post_supply_benchmark_data(self, city: str, title: str) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#post-get-supply-benchmark-data + Emsi aggregates online social profiles from all over the web. The details in this endpoint provide aggregate totals for the top employers, top titles, and top skills associated with the profiles matching your search. + + Returns: + dict: search parameters and data response from the API + """ + return self.__send_request("supply", city, title) + + def post_demand_benchmark_data(self, city: str, title: str) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#post-get-demand-benchmark-data + Emsi aggregates job posting details from all over the web. The details in this endpoint provide aggregate totals for the top employers, top titles, and top skills associated with the postings matching your search. + + Returns: + dict: search parameters and data response from the API + """ + return self.__send_request("demand", city, title) + + def post_compensation_benchmark_data(self, city: str, title: str) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#post-get-compensation-benchmark-data + Emsi models compensation data using government data and advertised salary observations identified from job postings data. + + Returns: + dict: search parameters and data response from the API + """ + return self.__send_request("compensation", city, title) + + def post_diversity_benchmark_data(self, city: str, title: str) -> dict: + """ + https://api.emsidata.com/apis/talent-benchmark#post-get-diversity-benchmark-data + Emsi models diversity data by applying a staffing pattern to government data related to industries. + + Returns: + dict: search parameters and data response from the API + """ + return self.__send_request("diversity", city, title) diff --git a/apis/unitedKingdomPostings.py b/apis/unitedKingdomPostings.py index 338ce34..fe5d64e 100644 --- a/apis/unitedKingdomPostings.py +++ b/apis/unitedKingdomPostings.py @@ -1,5 +1,7 @@ """Summary """ +from __future__ import annotations + from .base import JobPostingsConnection @@ -13,8 +15,7 @@ class UKPostingsConnection(JobPostingsConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://emsiservices.com/uk-jpa/" self.scope = "postings:uk" diff --git a/apis/unitedKingdomProfiles.py b/apis/unitedKingdomProfiles.py index d1343be..80e8cda 100644 --- a/apis/unitedKingdomProfiles.py +++ b/apis/unitedKingdomProfiles.py @@ -1,20 +1,23 @@ """ Use case -This is an interface for retrieving aggregated Emsi Global Profile data that is filtered, sorted and ranked by various properties of the profiles. +This is an interface for retrieving aggregated Emsi Global Profile data that is +filtered, sorted and ranked by various properties of the profiles. About the data -Profiles are collected from various sources and processed/enriched to provide information such as standardized company name, occupation, skills, and geography. +Profiles are collected from various sources and processed/enriched to provide +information such as standardized company name, +occupation, skills, and geography. """ +from __future__ import annotations + from .base import ProfilesConnection class UKProfiles(ProfilesConnection): """ Use case - This is an interface for retrieving aggregated Emsi Global Profile data that is filtered, sorted and ranked by various properties of the profiles. - - About the data - Profiles are collected from various sources and processed/enriched to provide information such as standardized company name, occupation, skills, and geography. + This is an interface for retrieving aggregated UK Profile data + that is filtered, sorted and ranked by various properties of the profiles. Attributes: base_url (str): what every url has to start with to query the API diff --git a/apis/usCompensation.py b/apis/usCompensation.py index 6097b13..5afba96 100644 --- a/apis/usCompensation.py +++ b/apis/usCompensation.py @@ -1,5 +1,7 @@ """Summary """ +from __future__ import annotations + from .base import EmsiBaseConnection @@ -17,8 +19,7 @@ class UsCompensationConnection(EmsiBaseConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://comp.emsicloud.com/" diff --git a/apis/usInputOutput.py b/apis/usInputOutput.py index 4d61024..ab6eb9d 100644 --- a/apis/usInputOutput.py +++ b/apis/usInputOutput.py @@ -1,22 +1,24 @@ """ -This documentation describes Emsi's Input-Output API for regions in the U.S., Canada, and the U.K. +This documentation describes Emsi's Input-Output API +for regions in the U.S., Canada, and the U.K. """ -from .base import EmsiBaseConnection +from __future__ import annotations import requests +from .base import EmsiBaseConnection + class USInputOutputConncetion(EmsiBaseConnection): """ Attributes: base_url (str): the base url for making requests to the API - scope (str): the scope for handling authentication to the Oauth 2.0 server + scope (str): the scope for authentication to the Oauth 2.0 server token (str): the Oauth 2.0 token """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://io.emsicloud.com/" self.scope = "us-io" # todo: add more scopes for other nations @@ -25,7 +27,12 @@ def __init__(self) -> None: self.name = "US_InputOutput" - def download_data(self, api_endpoint: str, payload: dict = None, querystring: dict = None) -> requests.Response: + def download_data( + self, + api_endpoint: str, + payload: dict | None = None, + querystring: dict | None = None, + ) -> requests.Response: """Summary Args: @@ -52,23 +59,31 @@ def download_data(self, api_endpoint: str, payload: dict = None, querystring: di return response - def get_dataruns(self, country = "us") -> list: - """All possible dataruns available for a country may be accessed by doing a GET request to https://io.emsicloud.com/v1//. + def get_dataruns(self, country="us") -> list: + """ + All possible dataruns available for a country may be accessed by + doing a GET request to https://io.emsicloud.com/v1//. Args: - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + country (str): the country to request data for. default 'us' Returns: list: Description """ - return self.download_data("v1/{}".format(country)).json() + return self.download_data(f"v1/{country}").json() - def get_years(self, datarun: str = None, country: str = "us") -> list: - """All possible years available for a country and datarun may be accessed by doing a GET or POST request to https://io.emsicloud.com/v1///. + def get_years( + self, + datarun: str | None = None, + country: str | None = "us", + ) -> list: + """All possible years available for a country and datarun may be + accessed by doing a GET or POST request to + https://io.emsicloud.com/v1///. Args: - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + country (str, optional): the country to request data for Returns: list: years available @@ -78,12 +93,19 @@ def get_years(self, datarun: str = None, country: str = "us") -> list: return self.download_data(f"v1/{country}/{datarun}").json() - def get_codes(self, datarun: str = None, country: str = "us") -> list: - """A listing of all codes and their definitions can be obtained by doing a GET or POST request to https://io.emsicloud.com/v1///codes/. + def get_codes( + self, + datarun: str | None = None, + country: str | None = "us", + ) -> list: + """ + A listing of all codes and their definitions can be obtained by doing + a GET or POST request to + https://io.emsicloud.com/v1///codes/. Args: - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + country (str, optional): the country to request data for Returns: list: list of the codes and definitions @@ -93,13 +115,19 @@ def get_codes(self, datarun: str = None, country: str = "us") -> list: return self.download_data(f"v1/{country}/{datarun}/codes").json() - def get_functions(self, datarun: str = None, year: str = None, country: str = "us"): - """A listing of each data function, their respective descriptions, and whether the function has a multi-regional counterpart can be obtained by doing a GET or POST request to https://io.emsicloud.com/v1////. + def get_functions( + self, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ): + """A listing of each data function, their respective descriptions, + and whether the function has a multi-regional counterpart Args: - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: list: a list of the services available @@ -112,8 +140,15 @@ def get_functions(self, datarun: str = None, year: str = None, country: str = "u return self.download_data(f"v1/{country}/{datarun}/{year}/").json() - def post_basics(self, payload: dict, datarun: str = None, year: str = None, country: str = "us") -> dict: - """A series of services that just return data and don't require any inputs. They can be requested individually as services by using https://io.emsicloud.com/v1////. Alternatively, when requesting the basics service using https://io.emsicloud.com/v1////, they can be bundled in the metrics array. + def post_basics( + self, + payload: dict, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ) -> dict: + """ + A series of services that just return data and don't require any inputs - Multipliers: type-to-type multipliers - ConsultantMultipliers: sales-to-type multipliers @@ -124,18 +159,21 @@ def post_basics(self, payload: dict, datarun: str = None, year: str = None, coun - Ratios: sales to jobs/earnings/value added ratios - RPCs: Regional Purchasing Coefficients - RID: Residents' Income Data - - A: A matrix (technical coefficients) Warning: very large amount of data! - - B: B matrix (sales multipliers) Warning: very large amount of data! - - BJobs: Jobs B matrix (jobs-to-jobs multipliers) Warning: very large amount of data! - - Z: Z matrix (transactions) Warning: very large amount of data! + - A: A matrix (technical coefficients) + - B: B matrix (sales multipliers) + - BJobs: Jobs B matrix (jobs-to-jobs multipliers) + - Z: Z matrix (transactions) - The metrics object is an array of objects with the properties altName and name. The altName is an arbitrary string that will be returned from the API in conjunction with the output of its respective dataset. + The metrics object is an array of objects with the properties + altName and name. The altName is an arbitrary string that will be + returned from the API in conjunction with the output of its + respective dataset. Args: payload (dict): the json being passed to the server - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: dict: the json portion of the response from the API @@ -147,20 +185,26 @@ def post_basics(self, payload: dict, datarun: str = None, year: str = None, coun year = max(self.get_years(datarun, country)) response = self.download_data( - url = f"v1/{country}/{datarun}/{year}/basics", - payload = payload + api_endpoint=f"v1/{country}/{datarun}/{year}/basics", + payload=payload, ) return response.json() - def post_scenario(self, payload: dict, datarun: str = None, year: str = None, country: str = "us") -> dict: + def post_scenario( + self, + payload: dict, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ) -> dict: """Run a scenario on the I-O API Args: payload (dict): the json being passed to the server - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: dict: the json portion of the response from the API @@ -172,20 +216,27 @@ def post_scenario(self, payload: dict, datarun: str = None, year: str = None, co year = max(self.get_years(datarun, country)) return self.download_data( - url = f"v1/{country}/{datarun}/{year}/scenario", - payload = payload + api_endpoint=f"v1/{country}/{datarun}/{year}/scenario", + payload=payload, ).json() - def post_requirements(self, payload: dict, datarun: str = None, year: str = None, country: str = "us") -> dict: + def post_requirements( + self, + payload: dict, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ) -> dict: """ This service requires inputs of sectors for the service to process. - If no input is specified, the service acts like a basic service and returns the requirements for all sectors. + If no input is specified, the service acts like a basic service and + returns the requirements for all sectors. Args: payload (dict): the json being passed to the server - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: dict: the json portion of the response from the API @@ -198,18 +249,25 @@ def post_requirements(self, payload: dict, datarun: str = None, year: str = None return self.download_data( f"v1/{country}/{datarun}/{year}/requirements", - payload = payload + payload=payload, ).json() - def post_spending(self, payload: dict, datarun: str = None, year: str = None, country: str = "us") -> dict: + def post_spending( + self, + payload: dict, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ) -> dict: """This service requires inputs of sectors for the service to process. - If no input is specified, the service acts like a basic service and returns the spending for all sectors. + If no input is specified, the service acts like a basic service and + returns the spending for all sectors. Args: payload (dict): the json being passed to the server - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: dict: the json portion of the response from the API @@ -221,18 +279,25 @@ def post_spending(self, payload: dict, datarun: str = None, year: str = None, co year = max(self.get_years(datarun, country)) return self.download_data( - url = f"v1/{country}/{datarun}/{year}/spending", - payload = payload + api_endpoint=f"v1/{country}/{datarun}/{year}/spending", + payload=payload, ).json() - def post_econbase(self, payload: dict, datarun: str = None, year: str = None, country: str = "us") -> dict: - """This service requires inputs of all sectors that will be broken out into groups. + def post_econbase( + self, + payload: dict, + datarun: str | None = None, + year: str | None = None, + country: str = "us", + ) -> dict: + """ + Requires inputs of all sectors that will be broken into groups. Args: payload (dict): the json being passed to the server - datarun (str, optional): the datarun to use. If none is provided, will use the latest datarun available - year (str, optional): the year to use. If none is provided, will use the latest year available - country (str, optional): the country to request data for (currently only US supported). If none is provided, will default to the US + datarun (str, optional): the datarun to use. + year (str, optional): the year to use. + country (str, optional): the country to request data for Returns: dict: the json portion of the response from the API @@ -244,6 +309,6 @@ def post_econbase(self, payload: dict, datarun: str = None, year: str = None, co year = max(self.get_years(datarun, country)) return self.download_data( - url = f"v1/{country}/{datarun}/{year}/econbase", - payload = payload + api_endpoint=f"v1/{country}/{datarun}/{year}/econbase", + payload=payload, ).json() diff --git a/apis/usOccEarnings.py b/apis/usOccEarnings.py index 92c4a35..6391a37 100644 --- a/apis/usOccEarnings.py +++ b/apis/usOccEarnings.py @@ -1,5 +1,7 @@ """Summary """ +from __future__ import annotations + from .base import EmsiBaseConnection @@ -17,8 +19,7 @@ class UsOccupationEarningsConnection(EmsiBaseConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://earnings.emsicloud.com/" @@ -45,7 +46,7 @@ def get_datarun_years(self, datarun: str) -> list: Returns: list: Description """ - return self.download_data("v1/us/{}/years".format(datarun)).json() + return self.download_data(f"v1/us/{datarun}/years").json() def post_percentile_wages(self, datarun: str, payload: dict) -> dict: """Summary @@ -57,7 +58,10 @@ def post_percentile_wages(self, datarun: str, payload: dict) -> dict: Returns: dict: Description """ - return self.download_data("v1/us/{}/percentile_wages".format(datarun), payload).json() + return self.download_data( + f"v1/us/{datarun}/percentile_wages", + payload, + ).json() def post_employment_at_wage(self, datarun: str, payload: dict) -> dict: """Summary @@ -69,9 +73,16 @@ def post_employment_at_wage(self, datarun: str, payload: dict) -> dict: Returns: dict: Description """ - return self.download_data("v1/us/{}/employment_at_wage".format(datarun), payload).json() - - def postemployment_at_wage_by_occ(self, datarun: str, payload: dict) -> dict: + return self.download_data( + f"v1/us/{datarun}/employment_at_wage", + payload, + ).json() + + def postemployment_at_wage_by_occ( + self, + datarun: str, + payload: dict, + ) -> dict: """Summary Args: @@ -81,4 +92,7 @@ def postemployment_at_wage_by_occ(self, datarun: str, payload: dict) -> dict: Returns: dict: Description """ - return self.download_data("v1/us/{}/employment_at_wage_by_occ".format(datarun), payload).json() + return self.download_data( + f"v1/us/{datarun}/employment_at_wage_by_occ", + payload, + ).json() diff --git a/apis/usPostings.py b/apis/usPostings.py index 6db8a44..dd701a8 100644 --- a/apis/usPostings.py +++ b/apis/usPostings.py @@ -1,5 +1,7 @@ """Summary """ +from __future__ import annotations + from .base import JobPostingsConnection @@ -13,8 +15,7 @@ class UnitedStatesPostingsConnection(JobPostingsConnection): """ def __init__(self) -> None: - """Summary - """ + """Summary""" super().__init__() self.base_url = "https://emsiservices.com/jpa/" self.scope = "postings:us" diff --git a/docs/emsi_titles.md b/docs/emsi_titles.md index 8e1f9a1..dcf4744 100644 --- a/docs/emsi_titles.md +++ b/docs/emsi_titles.md @@ -2,7 +2,7 @@ ```python import EmsiApiPy -conn = EmsiApiPy.EmsiTitlesConnection() +conn = EmsiApiPy.TitlesConnection() # make sure we have a good connection assert conn.is_healthy() @@ -26,21 +26,21 @@ print(conn.get_list_all_skills()) print(conn.get_title_by_id("ETEB3BB8E555C79368")) """ { - 'pluralName': '.NET Developers', + 'pluralName': '.NET Developers', 'mapping': { 'skills': [ { 'id': 'KS1200B62W5ZF38RJ7TD', 'name': '.NET Framework' } - ], + ], 'socs': [ { - 'id': '15-1256', + 'id': '15-1256', 'name': 'Software Developers and Software Quality Assurance Analysts and Testers' - }, + }, { - 'id': '15-1251', + 'id': '15-1251', 'name': 'Computer Programmers' } ] diff --git a/docs/raw_doc_pages/ACS.md b/docs/raw_doc_pages/ACS.md index 59f105d..e24fa47 100644 --- a/docs/raw_doc_pages/ACS.md +++ b/docs/raw_doc_pages/ACS.md @@ -824,4 +824,3 @@ Resource not found. - diff --git a/docs/raw_doc_pages/Automation_Index.md b/docs/raw_doc_pages/Automation_Index.md index bb160be..53e1ded 100644 --- a/docs/raw_doc_pages/Automation_Index.md +++ b/docs/raw_doc_pages/Automation_Index.md @@ -1,4 +1,4 @@ -# Occupation Automation Index +# Occupation Automation Index #### v1.1.2 ##### Information on past releases can be found in the [Changelog](/updates/automation-index-changelog). @@ -611,4 +611,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/Canada_Postings.md b/docs/raw_doc_pages/Canada_Postings.md index b872f44..0a78df1 100644 --- a/docs/raw_doc_pages/Canada_Postings.md +++ b/docs/raw_doc_pages/Canada_Postings.md @@ -1,4 +1,4 @@ -# Canada Job Postings +# Canada Job Postings #### v1.23.0 ##### Information on past releases can be found in the [Changelog](/updates/canada-job-postings-api-changelog). @@ -22262,4 +22262,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/Companies.md b/docs/raw_doc_pages/Companies.md index 9dd1985..ad97a8c 100644 --- a/docs/raw_doc_pages/Companies.md +++ b/docs/raw_doc_pages/Companies.md @@ -1,4 +1,4 @@ -# Companies API +# Companies API #### v1.0.0 ##### Information on past releases can be found in the [Changelog](/updates/companies-api-changelog). @@ -2168,4 +2168,3 @@ Unsupported Content Type. - diff --git a/docs/raw_doc_pages/Geography.md b/docs/raw_doc_pages/Geography.md index 57e7024..6b6c705 100644 --- a/docs/raw_doc_pages/Geography.md +++ b/docs/raw_doc_pages/Geography.md @@ -1,4 +1,4 @@ -# Geography API (GIS) +# Geography API (GIS) #### v1.13.2 ##### Information on past releases can be found in the [Changelog](/updates/geography-changelog). diff --git a/docs/raw_doc_pages/Global_Postings.md b/docs/raw_doc_pages/Global_Postings.md index 95e92e0..6fbb71a 100644 --- a/docs/raw_doc_pages/Global_Postings.md +++ b/docs/raw_doc_pages/Global_Postings.md @@ -1,4 +1,4 @@ -# Global Job Postings +# Global Job Postings #### v1.6.1 ##### Information on past releases can be found in the [Changelog](/updates/global-job-postings-api-changelog). @@ -6502,4 +6502,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/Global_Profiles.md b/docs/raw_doc_pages/Global_Profiles.md index effc0a7..b2629c6 100644 --- a/docs/raw_doc_pages/Global_Profiles.md +++ b/docs/raw_doc_pages/Global_Profiles.md @@ -1,4 +1,4 @@ -# Global Profiles +# Global Profiles #### v1.10.2 ##### Information on past releases can be found in the [Changelog](/updates/global-profiles-changelog). @@ -3857,4 +3857,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/IPEDS.md b/docs/raw_doc_pages/IPEDS.md index 0430d2e..020ef18 100644 --- a/docs/raw_doc_pages/IPEDS.md +++ b/docs/raw_doc_pages/IPEDS.md @@ -3,7 +3,7 @@ ## Introduction This API provides metadata about educational institutions reporting via IPEDS, and a translation layer between CIP and SOC codes. -Information and search functionality for institutions are exposed via the +Information and search functionality for institutions are exposed via the [institution](#institution-endpoints) family of endpoints. A SOC-CIP mapping is provided via the [soccip endpoints](#soc-cip-mapping-endpoints). @@ -28,7 +28,7 @@ This endpoint checks the health of the service. If the service is healthy, retu ## Institution Endpoints ### `POST` /institutions -Fetch information for one or more institutions. The institution IDs are +Fetch information for one or more institutions. The institution IDs are IPEDS Unit IDs. #### Code Examples diff --git a/docs/raw_doc_pages/Skills.md b/docs/raw_doc_pages/Skills.md index cdb766a..6e33152 100644 --- a/docs/raw_doc_pages/Skills.md +++ b/docs/raw_doc_pages/Skills.md @@ -1,4 +1,4 @@ -# Emsi Skills API +# Emsi Skills API #### v2.6.1 > Transitioning from v1 of the Skills API? See our [v2 upgrade notes](/updates/upgrade-to-skills-v2) for changes in the new version. @@ -3481,4 +3481,3 @@ Unsupported Content Type. - diff --git a/docs/raw_doc_pages/Talent_Benchmark.md b/docs/raw_doc_pages/Talent_Benchmark.md index c22b59c..394e8d8 100644 --- a/docs/raw_doc_pages/Talent_Benchmark.md +++ b/docs/raw_doc_pages/Talent_Benchmark.md @@ -1,4 +1,4 @@ -# Talent Benchmark +# Talent Benchmark #### v1.4.0 ##### Information on past releases can be found in the [Changelog](/updates/talent-benchmark-changelog). @@ -4494,4 +4494,3 @@ Could not normalize title 'invalid'. - diff --git a/docs/raw_doc_pages/Titles.md b/docs/raw_doc_pages/Titles.md index 13f2d7e..b18626c 100644 --- a/docs/raw_doc_pages/Titles.md +++ b/docs/raw_doc_pages/Titles.md @@ -1,4 +1,4 @@ -# Titles API +# Titles API #### v1.11.0 ##### Information on past releases can be found in the [Changelog](/updates/titles-api-changelog). @@ -3234,4 +3234,3 @@ Unsupported Content Type. - diff --git a/docs/raw_doc_pages/UK_Postings.md b/docs/raw_doc_pages/UK_Postings.md index dd2e26e..47eb363 100644 --- a/docs/raw_doc_pages/UK_Postings.md +++ b/docs/raw_doc_pages/UK_Postings.md @@ -1,4 +1,4 @@ -# UK Job Postings +# UK Job Postings #### v2.18.0 ##### Information on past releases can be found in the [Changelog](/updates/uk-job-postings-changelog). @@ -16783,4 +16783,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/US_Postings.md b/docs/raw_doc_pages/US_Postings.md index ec95ee4..3d5e104 100644 --- a/docs/raw_doc_pages/US_Postings.md +++ b/docs/raw_doc_pages/US_Postings.md @@ -1,4 +1,4 @@ -# Job Postings +# Job Postings #### v2.39.0 ##### Information on past releases can be found in the [Changelog](/updates/job-postings-api-changelog). @@ -25312,4 +25312,3 @@ The facet you requested wasn't found. - diff --git a/docs/raw_doc_pages/US_Profiles.md b/docs/raw_doc_pages/US_Profiles.md index 96c7736..cdc4bd2 100644 --- a/docs/raw_doc_pages/US_Profiles.md +++ b/docs/raw_doc_pages/US_Profiles.md @@ -1,4 +1,4 @@ -# Aggregate Profiles +# Aggregate Profiles #### v1.15.2 ##### Information on past releases can be found in the [Changelog](/updates/aggregate-profile-data-api-changelog). @@ -15629,4 +15629,3 @@ The facet you requested wasn't found. - diff --git a/docs/us_profiles.md b/docs/us_profiles.md index 59aa52e..7062ce4 100644 --- a/docs/us_profiles.md +++ b/docs/us_profiles.md @@ -29,10 +29,10 @@ print(conn.post_totals(payload)) print(conn.get_rankings()) """ [ - 'certifications', 'certifications_name', 'cip2', 'cip2_name', 'cip4', 'cip4_name', 'cip6', 'cip6_name', - 'city', 'city_name', 'company', 'company_name', 'county', 'edulevels', 'edulevels_name', 'fips', - 'hard_skills', 'hard_skills_name', 'msa', 'naics2', 'naics3', 'naics4', 'naics5', 'naics6', 'onet', - 'schools', 'schools_ipeds', 'schools_name', 'skills', 'skills_name', 'soc2', 'soc3', 'soc4', 'soc5', + 'certifications', 'certifications_name', 'cip2', 'cip2_name', 'cip4', 'cip4_name', 'cip6', 'cip6_name', + 'city', 'city_name', 'company', 'company_name', 'county', 'edulevels', 'edulevels_name', 'fips', + 'hard_skills', 'hard_skills_name', 'msa', 'naics2', 'naics3', 'naics4', 'naics5', 'naics6', 'onet', + 'schools', 'schools_ipeds', 'schools_name', 'skills', 'skills_name', 'soc2', 'soc3', 'soc4', 'soc5', 'soft_skills', 'soft_skills_name', 'state', 'title', 'title_name' ] """ @@ -61,11 +61,11 @@ print(conn.post_rankings(facet, payload)) { 'data': { 'ranking': { - 'buckets': [...], - 'facet': 'hard_skills_name', - 'limit': 10, + 'buckets': [...], + 'facet': 'hard_skills_name', + 'limit': 10, 'rank_by': 'profiles' - }, + }, 'totals': { 'profiles': 37798 } diff --git a/generate_raw_docs.py b/generate_raw_docs.py index e90e8ee..b7bc509 100644 --- a/generate_raw_docs.py +++ b/generate_raw_docs.py @@ -1,52 +1,46 @@ """ Loops through the APIs and generates the raw documentation files for each API. """ +from __future__ import annotations -from EmsiApiPy import ( +from EmsiApiPy import ACSIndicatorsConnection +from EmsiApiPy import AggregateProfilesConnection +from EmsiApiPy import AutomationIndexConnection +from EmsiApiPy import CanadaPostingsConnection +from EmsiApiPy import CompaniesConnection +from EmsiApiPy import GeographyConnection +from EmsiApiPy import GlobalPostingsConnection +from EmsiApiPy import GlobalProfilesConnection +from EmsiApiPy import IpedsConnection +from EmsiApiPy import SkillsClassificationConnection +from EmsiApiPy import TalentBenchmarkConnection +from EmsiApiPy import TitlesConnection +from EmsiApiPy import UKPostingsConnection +from EmsiApiPy import UnitedStatesPostingsConnection +from EmsiApiPy import UsOccupationEarningsConnection + +for connection in [ AutomationIndexConnection, AggregateProfilesConnection, CanadaPostingsConnection, UnitedStatesPostingsConnection, ACSIndicatorsConnection, SkillsClassificationConnection, - EmsiTitlesConnection, + TitlesConnection, UsOccupationEarningsConnection, GeographyConnection, IpedsConnection, UKPostingsConnection, TalentBenchmarkConnection, - GlobalPostingsConnection, GlobalProfilesConnection, + GlobalPostingsConnection, CompaniesConnection, # these apis don't have a "docs" endpoint # UsCompensationConnection, # USInputOutputConncetion, # CoreLMIConnection, -) - -for connection in [ - AutomationIndexConnection, - AggregateProfilesConnection, - CanadaPostingsConnection, - UnitedStatesPostingsConnection, - ACSIndicatorsConnection, - SkillsClassificationConnection, - EmsiTitlesConnection, - UsOccupationEarningsConnection, - GeographyConnection, - IpedsConnection, - UKPostingsConnection, - TalentBenchmarkConnection, - GlobalProfilesConnection, - GlobalPostingsConnection, - CompaniesConnection, - # these apis don't have a "docs" endpoint - # UsCompensationConnection, - # USInputOutputConncetion, - # CoreLMIConnection, ]: conn = connection() - print(conn.name) doc_string = conn.get_docs() with open(f"docs/raw_doc_pages/{conn.name}.md", "w+") as out_file: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a8f43fe --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +line-length = 79 diff --git a/samples/Job Title Normalization.ipynb b/samples/Job Title Normalization.ipynb index 33fb717..47a9e5d 100644 --- a/samples/Job Title Normalization.ipynb +++ b/samples/Job Title Normalization.ipynb @@ -17,8 +17,8 @@ "import json # for pretty printing\n", "\n", "# import the library and create the connection\n", - "from EmsiApiPy import EmsiTitlesConnection\n", - "conn = EmsiTitlesConnection()" + "from EmsiApiPy import TitlesConnection\n", + "conn = TitlesConnection()" ] }, { diff --git a/samples/Mapping Title to ONET.ipynb b/samples/Mapping Title to ONET.ipynb index 15a029c..2ae359d 100644 --- a/samples/Mapping Title to ONET.ipynb +++ b/samples/Mapping Title to ONET.ipynb @@ -16,12 +16,12 @@ "outputs": [], "source": [ "# import the libraries\n", - "from EmsiApiPy import EmsiTitlesConnection, UnitedStatesPostingsConnection\n", + "from EmsiApiPy import TitlesConnection, UnitedStatesPostingsConnection\n", "\n", "# for pretty printing\n", "import json\n", "\n", - "titles_conn = EmsiTitlesConnection()\n", + "titles_conn = TitlesConnection()\n", "jpa_conn = UnitedStatesPostingsConnection()" ] }, diff --git a/samples/Mapping Title to SOC.ipynb b/samples/Mapping Title to SOC.ipynb index b9b940c..0182133 100644 --- a/samples/Mapping Title to SOC.ipynb +++ b/samples/Mapping Title to SOC.ipynb @@ -16,12 +16,12 @@ "outputs": [], "source": [ "# import the libraries\n", - "from EmsiApiPy import EmsiTitlesConnection, UnitedStatesPostingsConnection\n", + "from EmsiApiPy import TitlesConnection, UnitedStatesPostingsConnection\n", "\n", "# for pretty printing\n", "import json\n", "\n", - "titles_conn = EmsiTitlesConnection()\n", + "titles_conn = TitlesConnection()\n", "jpa_conn = UnitedStatesPostingsConnection()" ] }, diff --git a/samples/coreLmiSample.py b/samples/coreLmiSample.py index d9de23f..cd493ba 100644 --- a/samples/coreLmiSample.py +++ b/samples/coreLmiSample.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import EmsiApiPy conn = EmsiApiPy.CoreLMIConnection() @@ -6,7 +8,11 @@ dimension = "Area" -df = conn.get_dimension_hierarchy_df(dataset = dataset, dimension = dimension) +df = conn.get_dimension_hierarchy_df( + dataset=dataset, + dimension=dimension, + datarun="2023.1", +) print(df.head()) @@ -20,23 +26,29 @@ """ # limit only to the states -df = df.loc[df['level_name'] == '2'] +df = df.loc[df["level_name"] == "2"] payload = { "metrics": [ { - "name": "Dollars.2019" - } + "name": "Dollars.2019", + }, ], "constraints": [ { "dimensionName": "Area", - "map": {row[1]['name']: [row[1]["child"]] for row in df.iterrows()} - } - ] + "map": { + row[1]["name"]: [row[1]["child"]] for row in df.iterrows() + }, + }, + ], } -data_df = conn.post_retrieve_df(dataset = dataset, payload = payload) +data_df = conn.post_retrieve_df( + dataset=dataset, + payload=payload, + datarun="2023.1", +) print(data_df.head()) """ diff --git a/samples/download_skills.py b/samples/download_skills.py index 969fec7..7fd76f5 100644 --- a/samples/download_skills.py +++ b/samples/download_skills.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import EmsiApiPy import pandas as pd @@ -5,19 +7,19 @@ conn = EmsiApiPy.SkillsClassificationConnection() # download all the skills -data = conn.get_list_all_skills()['data'] +data = conn.get_list_all_skills()["data"] # load into pandas df = pd.DataFrame( { - "id": [record['id'] for record in data], - "infoUrl": [record['infoUrl'] for record in data], - "name": [record['name'] for record in data], - "type": [record['type']['name'] for record in data] - } + "id": [record["id"] for record in data], + "infoUrl": [record["infoUrl"] for record in data], + "name": [record["name"] for record in data], + "type": [record["type"]["name"] for record in data], + }, ) # export writer = pd.ExcelWriter("skills_list.xlsx") -df.to_excel(writer, 'Skills', index = False) +df.to_excel(writer, "Skills", index=False) writer.save() diff --git a/samples/download_titles.py b/samples/download_titles.py index 2e83519..e30652b 100644 --- a/samples/download_titles.py +++ b/samples/download_titles.py @@ -1,8 +1,10 @@ +from __future__ import annotations + import EmsiApiPy import pandas as pd # create the connection -conn = EmsiApiPy.EmsiTitlesConnection() +conn = EmsiApiPy.TitlesConnection() # download the data data = conn.get_list_all_titles() @@ -12,5 +14,5 @@ # export writer = pd.ExcelWriter("title_list.xlsx") -df.to_excel(writer, 'Titles', index = False) +df.to_excel(writer, "Titles", index=False) writer.save() diff --git a/setup.py b/setup.py index bb1cea1..bcf8020 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,15 @@ +from __future__ import annotations + import setuptools -with open("README.md", "r", encoding="utf-8") as fh: +with open("README.md", encoding="utf-8") as fh: long_description = fh.read() setuptools.setup( name="EmsiApiPy", version="0.0.1", author="Caleb Courtney", - author_email=None, + author_email="None", description="Package for connecting to the Emsi APIs", long_description=long_description, long_description_content_type="text/markdown", diff --git a/tests/run_tests.sh b/tests/run_tests.sh deleted file mode 100755 index d54dc90..0000000 --- a/tests/run_tests.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -pytest tests/test_acs.py -pytest tests/test_corelmi.py -pytest tests/test_us_profiles.py -pytest tests/test_emsi_title.py -pytest tests/test_open_skills.py diff --git a/tests/test_acs.py b/tests/test_acs.py deleted file mode 100644 index 8f54227..0000000 --- a/tests/test_acs.py +++ /dev/null @@ -1,53 +0,0 @@ -# thanks to Paolo Rovelli for this: https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py/27876800#27876800 -import sys -from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - - -from apis.acsIndicators import ACSIndicatorsConnection - -acs_conn = ACSIndicatorsConnection() - - -def test_get_status(): - response = acs_conn.get_status() - - assert response == "Service is healthy" - - -def test_get_meta(): - response = acs_conn.get_meta() - - for key in ['acs_version', 'area_levels', 'attribution', 'metrics', 'taxonomies']: - assert key in response['data'] - - -def test_get_metrics(): - response = acs_conn.get_metrics() - assert len(response) > 0 - - response = acs_conn.get_metrics("median_age") - assert response['name'] == 'median_age' - - -def test_get_level(): - response = acs_conn.get_level("nation", ["name", "median_age"]) - - assert len(response) == 1 - assert response[0]['name'] == 'United States' - - -def test_post_level(): - payload = { - "ids": [ - 0 - ], - "metrics": [ - "median_age", - "name" - ] - } - response = acs_conn.post_level("nation", payload) - - assert len(response) == 1 - assert response[0]['name'] == 'United States' diff --git a/tests/test_corelmi.py b/tests/test_corelmi.py deleted file mode 100644 index c6252d3..0000000 --- a/tests/test_corelmi.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Need to add testing to ensure that when the number of queries runs out (download_data) that the process in fact waits for queries to become available again. -""" - -# thanks to Paolo Rovelli for this: https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py/27876800#27876800 -import sys -from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - - -from apis.coreLmi import CoreLMIConnection - -lmi_conn = CoreLMIConnection() - - -def test_get_meta(): - response = lmi_conn.get_meta() - assert response != {} - - -def test_get_meta_dataset(): - response = lmi_conn.get_meta_dataset(dataset = "emsi.us.occupation", datarun = '2020.3') - assert response != {} - - -def test_get_meta_dataset_dimension(): - response = lmi_conn.get_meta_dataset_dimension("emsi.us.occupation", "Occupation", datarun = '2020.3') - for column in ['name', 'hierarchy']: - assert column in response - - -def test_post_retrieve_data(): - payload = { - "metrics": [ - { - "name": "Jobs.2019" - } - ], - "constraints": [ - { - "dimensionName": "Area", - "map": {"US": [0]} - } - ] - } - response = lmi_conn.post_retrieve_data("emsi.us.occupation", payload, datarun = '2020.3') - - for key in ['data', 'errors', 'timings', 'totalRows']: - assert key in response - - assert response['totalRows'] == 1 - - -def test_get_dimension_hierarchy_df(): - df = lmi_conn.get_dimension_hierarchy_df('emsi.us.occupation', 'Occupation', datarun = '2020.3') - - assert not df.empty - columns = ['parent', 'level_name', 'name', 'descr', 'typicalEducation', 'typicalExperience', 'typicalTraining', 'child'] - for column in columns: - assert column in df.columns - - -def test_post_retrieve_df(): - payload = { - "metrics": [ - { - "name": "Jobs.2019" - } - ], - "constraints": [ - { - "dimensionName": "Area", - "map": {"US": [0]} - } - ] - } - - df = lmi_conn.post_retrieve_df("emsi.us.occupation", payload, datarun = '2020.3') - - assert not df.empty - assert len(df) == 1 - for column in ['Area', 'Jobs.2019']: - column in df.columns - - -# def test_download_data(): -# limit = 300 -# while limit > 1: -# payload = { -# "metrics": [ -# { -# "name": "Jobs.2019" -# } -# ], -# "constraints": [ -# { -# "dimensionName": "Area", -# "map": { -# "US": [ -# 0 -# ] -# } -# } -# ] -# } - -# response = lmi_conn.download_data("emsi.us.industry", payload) - -# limit = int(response.headers['X-Rate-Limit-Remaining']) - -# if limit == 1: -# break - -# response = lmi_conn.download_data("emsi.us.industry", payload) -# print(response.text) - -# response = lmi_conn.download_data("emsi.us.industry", payload) -# print(response.text) - -# response = lmi_conn.download_data("emsi.us.industry", payload) -# print(response.text) - -# assert False diff --git a/tests/test_emsi_title.py b/tests/test_emsi_title.py deleted file mode 100644 index ad2554b..0000000 --- a/tests/test_emsi_title.py +++ /dev/null @@ -1,39 +0,0 @@ -# thanks to Paolo Rovelli for this: https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py/27876800#27876800 -import sys -from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - - -from apis.emsiTitles import EmsiTitlesConnection - -titles_conn = EmsiTitlesConnection() - - -def test_get_status(): - response = titles_conn.get_status() - - assert response.status_code == 200 - - -def test_get_help(): - response = titles_conn.get_help() - assert isinstance(response, str) - - -def test_get_titles(): - response = titles_conn.get_titles() - assert len(response) > 0 - - -def test_get_normalize(): - response = titles_conn.get_normalize("software engineer iii") - - for key in ['id', 'title', 'similarity']: - assert key in response - - -def test_post_normalize(): - response = titles_conn.post_normalize("software engineer iii") - - for key in ['id', 'title', 'similarity']: - assert key in response diff --git a/tests/test_open_skills.py b/tests/test_open_skills.py deleted file mode 100644 index be4229c..0000000 --- a/tests/test_open_skills.py +++ /dev/null @@ -1,76 +0,0 @@ -# thanks to Paolo Rovelli for this: https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py/27876800#27876800 -import sys -from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - - -from apis.openSkills import SkillsClassificationConnection - -skills_conn = SkillsClassificationConnection() - - -def test_get_status(): - response = skills_conn.get_status() - - assert response.json()['data']['message'] == 'Service is healthy' - - -def test_get_is_healthy(): - response = skills_conn.is_healthy() - - assert response is True - - -def test_get_versions(): - response = skills_conn.get_versions() - - assert len(response['data']) >= 38 - - -def test_get_version_metadata(): - response = skills_conn.get_version_metadata() - - assert float(response['data']['version']) >= 7.2 - - -def test_get_list_all_skills(): - response = skills_conn.get_list_all_skills() - - assert len(response['data']) > 29000 - - -def test_post_list_requested_skills(): - skills = "{ \"ids\": [ \"KS1200364C9C1LK3V5Q1\", \"KS1275N74XZ574T7N47D\", \"KS125QD6K0QLLKCTPJQ0\" ] }" - response = skills_conn.post_list_requested_skills(payload = skills) - - assert len(response['data']) == 3 - - -def test_get_skill_by_id(): - skill_id = 'KS125LS6N7WP4S6SFTCK' - response = skills_conn.get_skill_by_id(skill_id = skill_id) - - assert response['data']['name'] == 'Python (Programming Language)' - - -def test_post_find_related_skills(): - skills = ["KS1200364C9C1LK3V5Q1", "KS1275N74XZ574T7N47D", "KS125QD6K0QLLKCTPJQ0"] - response = skills_conn.post_find_related_skills(skill_ids = skills) - - assert len(response['data']) > 1 - - -def test_post_extract(): - job_description = "{ \"text\": \"... Great candidates also have\\n\\n Experience with a particular JS MV* framework (we happen to use React)\\n Experience working with databases\\n Experience with AWS\\n Familiarity with microservice architecture\\n Familiarity with modern CSS practices, e.g. LESS, SASS, CSS-in-JS ...\"}" - - response = skills_conn.post_extract(description = job_description) - for skill in ['JavaScript (Programming Language)', 'React.js', 'Amazon Web Services', 'Cascading Style Sheets (CSS)']: - assert skill in [d['name'] for d in [e['skill'] for e in response['data']]] - - -def test_post_extract_with_source(): - job_description = "{ \"text\": \"... Great candidates also have\\n\\n Experience with a particular JS MV* framework (we happen to use React)\\n Experience working with databases\\n Experience with AWS\\n Familiarity with microservice architecture\\n Familiarity with modern CSS practices, e.g. LESS, SASS, CSS-in-JS ...\"}" - - response = skills_conn.post_extract_with_source(description = job_description) - - assert len(response['data']) > 0 diff --git a/tests/test_us_profiles.py b/tests/test_us_profiles.py deleted file mode 100644 index a3e1fe3..0000000 --- a/tests/test_us_profiles.py +++ /dev/null @@ -1,161 +0,0 @@ -# thanks to Paolo Rovelli for this: https://stackoverflow.com/questions/11536764/how-to-fix-attempted-relative-import-in-non-package-even-with-init-py/27876800#27876800 -import sys -from os import path -sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) - - -from apis.aggregateProfiles import AggregateProfilesConnection - -us_profiles_conn = AggregateProfilesConnection() - - -def test_get_status(): - message = us_profiles_conn.get_status() - assert message == "Service is healthy", "API is down or message has changed" - - -def test_is_healthy(): - """ - """ - assert us_profiles_conn.is_healthy(), "connection is not healthy" - - -def test_metadata(): - """ - """ - response = us_profiles_conn.get_metadata() - - for key in ['attribution', 'earliest_year', 'facets', 'filters', 'latest_year', 'metrics', 'supportsAdvancedFilters', 'taxonomies', 'taxonomy_versions']: - assert key in response, "{} not in metadata".format(key) - - -def test_post_totals(): - """ - This will likely need to be updated on a regular basis. Should find a more consistent way to test this endpoint - """ - payload = { - "filter": { - "last_updated": { - "start": "2018", - "end": "2019" - }, - "state": [ - 16 - ], - "skills_name": { - "include": [ - "SQL (Programming Language)", - "C++ (Programming Language)" - ], - "exclude": [ - "Java (Programming Language)", - "C Sharp (Programming Language)" - ], - "include_op": "and", - "exclude_op": "or" - }, - "educations": { - "schools_name": [ - "University of Idaho" - ] - } - }, - "metrics": [ - "profiles" - ] - } - response = us_profiles_conn.post_totals(payload) - - assert response['profiles'] == 24 - - -def test_post_recency(): - payload = { - "filter": { - "last_updated": { - "start": "2001", - "end": "2019" - } - }, - "metrics": [ - "profiles", - "unique_schools", - "unique_companies" - ] - } - response = us_profiles_conn.post_recency(payload) - - for key in ['profiles', 'unique_schools', 'year', 'unique_companies']: - assert key in response, "{} not in recency".format(key) - - -def test_get_rankings(): - response = us_profiles_conn.get_rankings() - assert len(response) != 0, "No facets to rank by? Seems dubious" - - -def test_post_rankings(): - payload = { - "filter": { - "last_updated": { - "start": "2001", - "end": "2019" - }, - "state": [ - 16 - ] - }, - "rank": { - "by": "profiles", - "limit": 2 - } - } - - facet = 'fips' - - response = us_profiles_conn.post_rankings(facet, payload) - - assert len(response['data']['ranking']['buckets']) == 2, "buckets data should be length 2, found {}".format(response['ranking']['buckets']) - - -def test_get_taxonomies(): - response = us_profiles_conn.get_taxonomies() - assert len(response) != 0, "No taxonomies? Seems dubious" - - response = us_profiles_conn.get_taxonomies(facet = 'title', q = 'Data Scientist') - assert len(response) > 0, "No matches for Data Scientist? Seems dubious" - - -def test_post_taxonomies(): - payload = { - "ids": [ - "11-1011" - ] - } - response = us_profiles_conn.post_taxonomies('soc5', payload) - assert len(response) == 1, "No exact match for Chief Execs? Seems dubious" - - -def test_post_rankings_df(): - payload = { - "filter": { - "last_updated": { - "start": "2001", - "end": "2019" - }, - "state": [ - 16 - ] - }, - "rank": { - "by": "profiles", - "limit": 2 - } - } - - facet = 'fips' - - df = us_profiles_conn.post_rankings_df(facet, payload) - - assert not(df.empty), "No data received from the API. Seems dubious." - assert len(df) == 2, "DataFrame doesn't have exactly two rows, found {} instead".format(len(df))