Skip to content

Commit

Permalink
[FEATURE] DataBricksSecret for getting secrets from DataBricks scope (#…
Browse files Browse the repository at this point in the history
…133)

<!--- Provide a general summary of your changes in the Title above -->

## Description
`DataBricksSecret` class can be used to get secrets from DataBricks
scopes.

## Related Issue
#66 

## Motivation and Context
Support secret scope in Databricks

## How Has This Been Tested?
Add mocked test

## Screenshots (if appropriate):

## Types of changes
<!--- What types of changes does your code introduce? Put an `x` in all
the boxes that apply: -->
- [ ] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)

## Checklist:
<!--- Go over all the following points, and put an `x` in all the boxes
that apply. -->
<!--- If you're unsure about any of these, don't hesitate to ask. We're
here to help! -->
- [x] My code follows the code style of this project.
- [ ] My change requires a change to the documentation.
- [ ] I have updated the documentation accordingly.
- [x] I have read the **CONTRIBUTING** document.
- [x] I have added tests to cover my changes.
- [ ] All new and existing tests passed.

---------

Co-authored-by: Danny Meijer <[email protected]>
Co-authored-by: Danny Meijer <[email protected]>
  • Loading branch information
3 people authored Nov 29, 2024
1 parent de56d00 commit a7d2997
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 24 deletions.
17 changes: 0 additions & 17 deletions docs/tutorials/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,6 @@
```
</details>

<details>
<summary>poetry</summary>

If you're using Poetry, add the following entry to the `pyproject.toml` file:

```toml title="pyproject.toml"
[[tool.poetry.source]]
name = "nike"
url = "https://artifactory.nike.com/artifactory/api/pypi/python-virtual/simple"
secondary = true
```

```bash
poetry add koheesio
```
</details>

<details>
<summary>pip</summary>

Expand Down
2 changes: 1 addition & 1 deletion src/koheesio/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

LICENSE_INFO = "Licensed as Apache 2.0"
SOURCE = "https://github.com/Nike-Inc/koheesio"
__version__ = "0.9.0"
__version__ = "0.9.0rc7"
__logo__ = (
75,
(
Expand Down
2 changes: 1 addition & 1 deletion src/koheesio/integrations/snowflake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ class GrantPrivilegesOnObject(SnowflakeRunQueryPython):
object="MY_TABLE",
type="TABLE",
warehouse="MY_WH",
user="gid.account@nike.com",
user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
permissions=["SELECT", "INSERT"],
Expand Down
Empty file.
79 changes: 79 additions & 0 deletions src/koheesio/integrations/spark/databricks/secrets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Module for retrieving secrets from DataBricks Scopes.
Secrets are stored as SecretContext and can be accessed accordingly.
See DataBricksSecret for more information.
"""

from typing import Dict, Optional
import re

from pyspark.sql import SparkSession

from koheesio.integrations.spark.databricks.utils import get_dbutils
from koheesio.models import Field, model_validator
from koheesio.secrets import Secret


class DataBricksSecret(Secret):
"""
Retrieve secrets from DataBricks secret scope and wrap them into Context class for easy access.
All secrets are stored under the "secret" root and "parent". "Parent" either derived from the
secure scope by replacing "/" and "-", or manually provided by the user.
Secrets are wrapped into the pydantic.SecretStr.
Examples
---------
```python
context = {"secrets": {"parent": {"webhook": SecretStr("**********"), "description": SecretStr("**********")}}}
```
Values can be decoded like this:
```python
context.secrets.parent.webhook.get_secret_value()
```
or if working with dictionary is preferable:
```python
for key, value in context.get_all().items():
value.get_secret_value()
```
"""

scope: str = Field(description="Scope")
alias: Optional[Dict[str, str]] = Field(default_factory=dict, description="Alias for secret keys")

@model_validator(mode="before")
def _set_parent_to_scope(cls, values):
"""
Set default value for `parent` parameter on model initialization when it was not
explicitly set by the user. In this scenario scope will be used:
'secret-scope' -> secret_scope
"""
regex = re.compile(r"[/-]")
path = values.get("scope")

if not values.get("parent"):
values["parent"] = regex.sub("_", path)

return values

@property
def _client(self):
"""
Instantiated Databricks client.
"""

return get_dbutils(SparkSession.getActiveSession()) # type: ignore

def _get_secrets(self):
"""Dictionary of secrets."""
all_keys = (secret_meta.key for secret_meta in self._client.secrets.list(scope=self.scope))
secret_data = {}

for key in all_keys:
key_name = key if not (self.alias and self.alias.get(key)) else self.alias[key] # pylint: disable=E1101
secret_data[key_name] = self._client.secrets.get(scope=self.scope, key=key)

return secret_data
16 changes: 16 additions & 0 deletions src/koheesio/integrations/spark/databricks/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from __future__ import annotations

from pyspark.sql import SparkSession

from koheesio.spark.utils import on_databricks


def get_dbutils(spark_session: SparkSession) -> DBUtils: # type: ignore # noqa: F821
if not on_databricks():
raise RuntimeError("dbutils not available")

from pyspark.dbutils import DBUtils # pylint: disable=E0611,E0401 # type: ignore

dbutils = DBUtils(spark_session)

return dbutils
8 changes: 4 additions & 4 deletions src/koheesio/integrations/spark/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ class Query(SnowflakeReader):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
user="gid.account@nike.com",
user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
query="SELECT * FROM MY_TABLE",
Expand Down Expand Up @@ -412,7 +412,7 @@ class CreateOrReplaceTableFromDataFrame(SnowflakeTransformation):
database="MY_DB",
schema="MY_SCHEMA",
warehouse="MY_WH",
user="gid.account@nike.com",
user="gid.account@abc.com",
password="super-secret-password",
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
Expand Down Expand Up @@ -477,7 +477,7 @@ class GetTableSchema(SnowflakeStep):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
user="gid.account@nike.com",
user="gid.account@abc.com",
password="super-secret-password",
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
Expand Down Expand Up @@ -512,7 +512,7 @@ class AddColumn(SnowflakeStep):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
user="gid.account@nike.com",
user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
Expand Down
18 changes: 18 additions & 0 deletions tests/spark/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Any
from collections import namedtuple
import datetime
from decimal import Decimal
Expand Down Expand Up @@ -347,3 +348,20 @@ def df_with_all_types(spark):
data=[[v[0] for v in data.values()]],
schema=StructType([StructField(name=v[1], dataType=v[2]) for v in data.values()]),
)


class ScopeSecrets:
class SecretMeta:
def __init__(self, key: str):
self.key = key

def __init__(self, secrets: dict):
self.secrets = secrets

def get(self, scope: str, key: str) -> Any:
return self.secrets.get(key)

def list(self, scope: str):
keys = [ScopeSecrets.SecretMeta(key=key) for key in self.secrets.keys()]

return keys
48 changes: 48 additions & 0 deletions tests/spark/integrations/databrikcs/test_secrets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from unittest.mock import patch

from conftest import ScopeSecrets

from koheesio.integrations.spark.databricks.secrets import DataBricksSecret


class TestDatabricksSecret:
def test_set_parent_to_scope(self):
# Test when parent is not provided
secret = DataBricksSecret(scope="secret-scope")
assert secret.parent == "secret_scope"

# Test when parent is provided
secret = DataBricksSecret(scope="secret-scope", parent="custom_parent")
assert secret.parent == "custom_parent"

@patch("koheesio.integrations.spark.databricks.secrets.DataBricksSecret._client")
def test_get_secrets_no_alias(self, mock_databricks_client):
with patch("koheesio.integrations.spark.databricks.utils.on_databricks", return_value=True):
dd = {
"key1": "value_of_key1",
"key2": "value_of_key2",
}
databricks = DataBricksSecret(scope="dummy", parent="kafka")
mock_databricks_client.secrets = ScopeSecrets(dd)
secrets = databricks._get_secrets()

assert secrets["key1"] == "value_of_key1"
assert secrets["key2"] == "value_of_key2"

@patch("koheesio.integrations.spark.databricks.secrets.DataBricksSecret._client")
def test_get_secrets_alias(self, mock_databricks_client):
with patch("koheesio.integrations.spark.databricks.utils.on_databricks", return_value=True):
dd = {
"key1": "value_of_key1",
"key2": "value_of_key2",
}
alias = {
"key1": "new_name_key1",
"key2": "new_name_key2",
}
databricks = DataBricksSecret(scope="dummy", parent="kafka", alias=alias)
mock_databricks_client.secrets = ScopeSecrets(dd)
secrets = databricks._get_secrets()

assert secrets["new_name_key1"] == "value_of_key1"
assert secrets["new_name_key2"] == "value_of_key2"
2 changes: 1 addition & 1 deletion tests/spark/test_spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_import_error_no_error(self):
with mock.patch.dict("sys.modules", {"pyspark": None}):
from koheesio.sso.okta import OktaAccessToken

OktaAccessToken(url="https://nike.okta.com", client_id="client_id", client_secret=secret)
OktaAccessToken(url="https://abc.okta.com", client_id="client_id", client_secret=secret)

def test_import_error_with_error(self):
with mock.patch.dict("sys.modules", {"pyspark.sql": None, "koheesio.steps.spark": None}):
Expand Down

0 comments on commit a7d2997

Please sign in to comment.