Skip to content

Commit

Permalink
Azure Keyvault support (#268)
Browse files Browse the repository at this point in the history
* wip

* adding app registration support

* lint

* fixing env var retrieval, adding tests

* adding env vars to tests workflow

* improving config logic

* bumping version

* adding suggestions

* removing garbage from .toml

* pre commit

* changing keyvault

* bumping version

* lint again

---------

Co-authored-by: Mathias Lohne <[email protected]>
  • Loading branch information
rsjr and mathialo authored Jan 11, 2024
1 parent dc7cf3e commit d166116
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ jobs:
COGNITE_CLIENT_SECRET: ${{ secrets.COGNITE_PROJECT_CLIENT_SECRET }}
COGNITE_TOKEN_SCOPES: ${{ secrets.COGNITE_PROJECT_SCOPES }}
COGNITE_TOKEN_URL: ${{ secrets.COGNITE_PROJECT_TOKEN_URL }}
KEYVAULT_CLIENT_ID: ${{ secrets.KEYVAULT_CLIENT_ID }}
KEYVAULT_TENANT_ID: ${{ secrets.KEYVAULT_TENANT_ID }}
KEYVAULT_CLIENT_SECRET: ${{ secrets.KEYVAULT_CLIENT_SECRET }}
COGNITE_PROJECT: extractor-tests
COGNITE_BASE_URL: https://greenfield.cognitedata.com
run: |
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ Changes are grouped as follows
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [6.3.0]

### Added

* Added support to retrieve secrets from Azure Keyvault.

## [6.2.2]

### Added
Expand Down
2 changes: 1 addition & 1 deletion cognite/extractorutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@
Cognite extractor utils is a Python package that simplifies the development of new extractors.
"""

__version__ = "6.2.2"
__version__ = "6.3.0"
from .base import Extractor
107 changes: 103 additions & 4 deletions cognite/extractorutils/configtools/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

import dacite
import yaml
from azure.core.credentials import TokenCredential
from azure.core.exceptions import HttpResponseError, ResourceNotFoundError, ServiceRequestError
from azure.identity import ClientSecretCredential, DefaultAzureCredential
from azure.keyvault.secrets import SecretClient
from yaml.scanner import ScannerError

from cognite.extractorutils.configtools._util import _to_snake_case
Expand All @@ -36,10 +40,88 @@
CustomConfigClass = TypeVar("CustomConfigClass", bound=BaseConfig)


class KeyVaultAuthenticationMethod(Enum):
DEFAULT = "default"
CLIENTSECRET = "client-secret"


class KeyVaultLoader:
def __init__(self, config: Optional[dict]):
self.config = config

self.credentials: Optional[TokenCredential] = None
self.client: Optional[SecretClient] = None

def _init_client(self) -> None:
from dotenv import find_dotenv, load_dotenv

if not self.config:
raise InvalidConfigError(
"Attempted to load values from Azure key vault with no key vault configured. "
"Include an `azure-keyvault` section in your config to use the !keyvault tag."
)

if "keyvault-name" not in self.config:
raise InvalidConfigError("Please add the keyvault-name")

if "authentication-method" not in self.config:
raise InvalidConfigError(
"Please enter the authentication method to access Azure KeyVault"
"Possible values are: default or client-secret"
)

vault_url = f"https://{self.config['keyvault-name']}.vault.azure.net"

if self.config["authentication-method"] == KeyVaultAuthenticationMethod.DEFAULT.value:
_logger.info("Using Azure DefaultCredentials to access KeyVault")
self.credentials = DefaultAzureCredential()

elif self.config["authentication-method"] == KeyVaultAuthenticationMethod.CLIENTSECRET.value:
auth_parameters = ("client-id", "tenant-id", "secret")

_logger.info("Using Azure ClientSecret credentials to access KeyVault")

dotenv_path = find_dotenv(usecwd=True)
load_dotenv(dotenv_path=dotenv_path, override=True)

if all(param in self.config for param in auth_parameters):
tenant_id = os.path.expandvars(self.config.get("tenant-id", None))
client_id = os.path.expandvars(self.config.get("client-id", None))
secret = os.path.expandvars(self.config.get("secret", None))

self.credentials = ClientSecretCredential(
tenant_id=tenant_id,
client_id=client_id,
client_secret=secret,
)
else:
raise InvalidConfigError(
"Missing client secret parameters. client-id, tenant-id and client-secret are mandatory"
)
else:
raise InvalidConfigError(
"Invalid KeyVault authentication method. Possible values : default or client-secret"
)

self.client = SecretClient(vault_url=vault_url, credential=self.credentials) # type: ignore

def __call__(self, _: yaml.SafeLoader, node: yaml.Node) -> str:
self._init_client()
try:
return self.client.get_secret(node.value).value # type: ignore # _init_client guarantees not None
except (ResourceNotFoundError, ServiceRequestError, HttpResponseError) as e:
raise InvalidConfigError(str(e))


class _EnvLoader(yaml.SafeLoader):
pass


class SafeLoaderIgnoreUnknown(yaml.SafeLoader):
def ignore_unknown(self, node: yaml.Node) -> None:
return None


def _env_constructor(_: yaml.SafeLoader, node: yaml.Node) -> bool:
bool_values = {
"true": True,
Expand All @@ -49,10 +131,6 @@ def _env_constructor(_: yaml.SafeLoader, node: yaml.Node) -> bool:
return bool_values.get(expanded_value.lower(), expanded_value)


_EnvLoader.add_implicit_resolver("!env", re.compile(r"\$\{([^}^{]+)\}"), None)
_EnvLoader.add_constructor("!env", _env_constructor)


def _load_yaml_dict(
source: Union[TextIO, str],
case_style: str = "hyphen",
Expand All @@ -61,6 +139,24 @@ def _load_yaml_dict(
) -> Dict[str, Any]:
loader = _EnvLoader if expand_envvars else yaml.SafeLoader

class SafeLoaderIgnoreUnknown(yaml.SafeLoader):
def ignore_unknown(self, node: yaml.Node) -> None:
return None

# Ignoring types since the key can be None.

SafeLoaderIgnoreUnknown.add_constructor(None, SafeLoaderIgnoreUnknown.ignore_unknown) # type: ignore
initial_load = yaml.load(source, Loader=SafeLoaderIgnoreUnknown) # noqa: S506

if not isinstance(source, str):
source.seek(0)

keyvault_config = initial_load.get("azure-keyvault")

_EnvLoader.add_implicit_resolver("!env", re.compile(r"\$\{([^}^{]+)\}"), None)
_EnvLoader.add_constructor("!env", _env_constructor)
_EnvLoader.add_constructor("!keyvault", KeyVaultLoader(keyvault_config))

try:
config_dict = yaml.load(source, Loader=loader) # noqa: S506
except ScannerError as e:
Expand All @@ -69,6 +165,9 @@ def _load_yaml_dict(
cause = e.problem or e.context
raise InvalidConfigError(f"Invalid YAML{formatted_location}: {cause or ''}") from e

if "azure-keyvault" in config_dict:
config_dict.pop("azure-keyvault")

config_dict = dict_manipulator(config_dict)
config_dict = _to_snake_case(config_dict, case_style)

Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cognite-extractor-utils"
version = "6.2.2"
version = "6.3.0"
description = "Utilities for easier development of extractors for CDF"
authors = ["Mathias Lohne <[email protected]>"]
license = "Apache-2.0"
Expand Down Expand Up @@ -62,6 +62,8 @@ more-itertools = "^10.0.0"
typing-extensions = ">=3.7.4, <5"
python-dotenv = "^1.0.0"
jq = [{version = "^1.3.0", platform = "darwin"}, {version = "^1.3.0", platform = "linux"}]
azure-identity = "^1.14.0"
azure-keyvault-secrets = "^4.7.0"

[tool.poetry.extras]
experimental = ["cognite-sdk-experimental"]
Expand Down
25 changes: 25 additions & 0 deletions tests/tests_unit/dummyconfig_keyvault.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
version: "1"

logger:
console:
level: INFO

azure-keyvault:
authentication-method: client-secret
client-id: ${KEYVAULT_CLIENT_ID}
tenant-id: ${KEYVAULT_TENANT_ID}
secret: ${KEYVAULT_CLIENT_SECRET}
keyvault-name: extractor-keyvault

cognite:
project: mathiaslohne-develop

idp-authentication:
client-id: !keyvault test-id
secret: !keyvault test-secret
token-url: https://get-a-token.com/token
scopes:
- https://api.cognitedata.com/.default

source:
frequency: 0.1
8 changes: 8 additions & 0 deletions tests/tests_unit/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ def test_load_config(self):
e1._initial_load_config("tests/tests_unit/dummyconfig.yaml")
self.assertIsInstance(e1.config, ConfigWithStates)

def test_load_config_keyvault(self):
e7 = Extractor(name="my_extractor7", description="description", config_class=ConfigWithoutStates)
e7._initial_load_config("tests/tests_unit/dummyconfig_keyvault.yaml")

# dummy Azure KeyVault secrets
self.assertEqual(e7.config.cognite.idp_authentication.client_id, "12345")
self.assertEqual(e7.config.cognite.idp_authentication.secret, "abcde")

@patch("cognite.client.CogniteClient")
def test_load_state_store(self, get_client_mock):
e2 = Extractor(name="my_extractor2", description="description", config_class=ConfigWithStates)
Expand Down

0 comments on commit d166116

Please sign in to comment.