diff --git a/.github/workflows/test_and_build.yml b/.github/workflows/test_and_build.yml index 9eb0079e..5421bd7b 100644 --- a/.github/workflows/test_and_build.yml +++ b/.github/workflows/test_and_build.yml @@ -37,6 +37,9 @@ jobs: COGNITE_CLIENT_SECRET: ${{ secrets.COGNITE_PROJECT_CLIENT_SECRET }} COGNITE_TOKEN_SCOPES: ${{ secrets.COGNITE_PROJECT_SCOPES }} COGNITE_TOKEN_URL: ${{ secrets.COGNITE_PROJECT_TOKEN_URL }} + KEYVAULT_CLIENT_ID: ${{ secrets.KEYVAULT_CLIENT_ID }} + KEYVAULT_TENANT_ID: ${{ secrets.KEYVAULT_TENANT_ID }} + KEYVAULT_CLIENT_SECRET: ${{ secrets.KEYVAULT_CLIENT_SECRET }} COGNITE_PROJECT: extractor-tests COGNITE_BASE_URL: https://greenfield.cognitedata.com run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 47502ae2..8de8b5f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,12 @@ Changes are grouped as follows - `Fixed` for any bug fixes. - `Security` in case of vulnerabilities. +## [6.3.0] + +### Added + + * Added support to retrieve secrets from Azure Keyvault. + ## [6.2.2] ### Added diff --git a/cognite/extractorutils/__init__.py b/cognite/extractorutils/__init__.py index 04d02604..789b29e1 100644 --- a/cognite/extractorutils/__init__.py +++ b/cognite/extractorutils/__init__.py @@ -16,5 +16,5 @@ Cognite extractor utils is a Python package that simplifies the development of new extractors. """ -__version__ = "6.2.2" +__version__ = "6.3.0" from .base import Extractor diff --git a/cognite/extractorutils/configtools/loaders.py b/cognite/extractorutils/configtools/loaders.py index 2b031a4f..2ae835a4 100644 --- a/cognite/extractorutils/configtools/loaders.py +++ b/cognite/extractorutils/configtools/loaders.py @@ -24,6 +24,10 @@ import dacite import yaml +from azure.core.credentials import TokenCredential +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError, ServiceRequestError +from azure.identity import ClientSecretCredential, DefaultAzureCredential +from azure.keyvault.secrets import SecretClient from yaml.scanner import ScannerError from cognite.extractorutils.configtools._util import _to_snake_case @@ -36,10 +40,88 @@ CustomConfigClass = TypeVar("CustomConfigClass", bound=BaseConfig) +class KeyVaultAuthenticationMethod(Enum): + DEFAULT = "default" + CLIENTSECRET = "client-secret" + + +class KeyVaultLoader: + def __init__(self, config: Optional[dict]): + self.config = config + + self.credentials: Optional[TokenCredential] = None + self.client: Optional[SecretClient] = None + + def _init_client(self) -> None: + from dotenv import find_dotenv, load_dotenv + + if not self.config: + raise InvalidConfigError( + "Attempted to load values from Azure key vault with no key vault configured. " + "Include an `azure-keyvault` section in your config to use the !keyvault tag." + ) + + if "keyvault-name" not in self.config: + raise InvalidConfigError("Please add the keyvault-name") + + if "authentication-method" not in self.config: + raise InvalidConfigError( + "Please enter the authentication method to access Azure KeyVault" + "Possible values are: default or client-secret" + ) + + vault_url = f"https://{self.config['keyvault-name']}.vault.azure.net" + + if self.config["authentication-method"] == KeyVaultAuthenticationMethod.DEFAULT.value: + _logger.info("Using Azure DefaultCredentials to access KeyVault") + self.credentials = DefaultAzureCredential() + + elif self.config["authentication-method"] == KeyVaultAuthenticationMethod.CLIENTSECRET.value: + auth_parameters = ("client-id", "tenant-id", "secret") + + _logger.info("Using Azure ClientSecret credentials to access KeyVault") + + dotenv_path = find_dotenv(usecwd=True) + load_dotenv(dotenv_path=dotenv_path, override=True) + + if all(param in self.config for param in auth_parameters): + tenant_id = os.path.expandvars(self.config.get("tenant-id", None)) + client_id = os.path.expandvars(self.config.get("client-id", None)) + secret = os.path.expandvars(self.config.get("secret", None)) + + self.credentials = ClientSecretCredential( + tenant_id=tenant_id, + client_id=client_id, + client_secret=secret, + ) + else: + raise InvalidConfigError( + "Missing client secret parameters. client-id, tenant-id and client-secret are mandatory" + ) + else: + raise InvalidConfigError( + "Invalid KeyVault authentication method. Possible values : default or client-secret" + ) + + self.client = SecretClient(vault_url=vault_url, credential=self.credentials) # type: ignore + + def __call__(self, _: yaml.SafeLoader, node: yaml.Node) -> str: + self._init_client() + try: + return self.client.get_secret(node.value).value # type: ignore # _init_client guarantees not None + except (ResourceNotFoundError, ServiceRequestError, HttpResponseError) as e: + raise InvalidConfigError(str(e)) + + class _EnvLoader(yaml.SafeLoader): pass +class SafeLoaderIgnoreUnknown(yaml.SafeLoader): + def ignore_unknown(self, node: yaml.Node) -> None: + return None + + def _env_constructor(_: yaml.SafeLoader, node: yaml.Node) -> bool: bool_values = { "true": True, @@ -49,10 +131,6 @@ def _env_constructor(_: yaml.SafeLoader, node: yaml.Node) -> bool: return bool_values.get(expanded_value.lower(), expanded_value) -_EnvLoader.add_implicit_resolver("!env", re.compile(r"\$\{([^}^{]+)\}"), None) -_EnvLoader.add_constructor("!env", _env_constructor) - - def _load_yaml_dict( source: Union[TextIO, str], case_style: str = "hyphen", @@ -61,6 +139,24 @@ def _load_yaml_dict( ) -> Dict[str, Any]: loader = _EnvLoader if expand_envvars else yaml.SafeLoader + class SafeLoaderIgnoreUnknown(yaml.SafeLoader): + def ignore_unknown(self, node: yaml.Node) -> None: + return None + + # Ignoring types since the key can be None. + + SafeLoaderIgnoreUnknown.add_constructor(None, SafeLoaderIgnoreUnknown.ignore_unknown) # type: ignore + initial_load = yaml.load(source, Loader=SafeLoaderIgnoreUnknown) # noqa: S506 + + if not isinstance(source, str): + source.seek(0) + + keyvault_config = initial_load.get("azure-keyvault") + + _EnvLoader.add_implicit_resolver("!env", re.compile(r"\$\{([^}^{]+)\}"), None) + _EnvLoader.add_constructor("!env", _env_constructor) + _EnvLoader.add_constructor("!keyvault", KeyVaultLoader(keyvault_config)) + try: config_dict = yaml.load(source, Loader=loader) # noqa: S506 except ScannerError as e: @@ -69,6 +165,9 @@ def _load_yaml_dict( cause = e.problem or e.context raise InvalidConfigError(f"Invalid YAML{formatted_location}: {cause or ''}") from e + if "azure-keyvault" in config_dict: + config_dict.pop("azure-keyvault") + config_dict = dict_manipulator(config_dict) config_dict = _to_snake_case(config_dict, case_style) diff --git a/pyproject.toml b/pyproject.toml index 3c6b44da..56c4cc77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cognite-extractor-utils" -version = "6.2.2" +version = "6.3.0" description = "Utilities for easier development of extractors for CDF" authors = ["Mathias Lohne "] license = "Apache-2.0" @@ -62,6 +62,8 @@ more-itertools = "^10.0.0" typing-extensions = ">=3.7.4, <5" python-dotenv = "^1.0.0" jq = [{version = "^1.3.0", platform = "darwin"}, {version = "^1.3.0", platform = "linux"}] +azure-identity = "^1.14.0" +azure-keyvault-secrets = "^4.7.0" [tool.poetry.extras] experimental = ["cognite-sdk-experimental"] diff --git a/tests/tests_unit/dummyconfig_keyvault.yaml b/tests/tests_unit/dummyconfig_keyvault.yaml new file mode 100644 index 00000000..7a14a61e --- /dev/null +++ b/tests/tests_unit/dummyconfig_keyvault.yaml @@ -0,0 +1,25 @@ +version: "1" + +logger: + console: + level: INFO + +azure-keyvault: + authentication-method: client-secret + client-id: ${KEYVAULT_CLIENT_ID} + tenant-id: ${KEYVAULT_TENANT_ID} + secret: ${KEYVAULT_CLIENT_SECRET} + keyvault-name: extractor-keyvault + +cognite: + project: mathiaslohne-develop + + idp-authentication: + client-id: !keyvault test-id + secret: !keyvault test-secret + token-url: https://get-a-token.com/token + scopes: + - https://api.cognitedata.com/.default + +source: + frequency: 0.1 diff --git a/tests/tests_unit/test_base.py b/tests/tests_unit/test_base.py index e9702933..eae0675f 100644 --- a/tests/tests_unit/test_base.py +++ b/tests/tests_unit/test_base.py @@ -49,6 +49,14 @@ def test_load_config(self): e1._initial_load_config("tests/tests_unit/dummyconfig.yaml") self.assertIsInstance(e1.config, ConfigWithStates) + def test_load_config_keyvault(self): + e7 = Extractor(name="my_extractor7", description="description", config_class=ConfigWithoutStates) + e7._initial_load_config("tests/tests_unit/dummyconfig_keyvault.yaml") + + # dummy Azure KeyVault secrets + self.assertEqual(e7.config.cognite.idp_authentication.client_id, "12345") + self.assertEqual(e7.config.cognite.idp_authentication.secret, "abcde") + @patch("cognite.client.CogniteClient") def test_load_state_store(self, get_client_mock): e2 = Extractor(name="my_extractor2", description="description", config_class=ConfigWithStates)