diff --git a/docs/tutorials/getting-started.md b/docs/tutorials/getting-started.md
index 8fd80ff..6fe8889 100644
--- a/docs/tutorials/getting-started.md
+++ b/docs/tutorials/getting-started.md
@@ -19,23 +19,6 @@
```
-
- poetry
-
- If you're using Poetry, add the following entry to the `pyproject.toml` file:
-
- ```toml title="pyproject.toml"
- [[tool.poetry.source]]
- name = "nike"
- url = "https://artifactory.nike.com/artifactory/api/pypi/python-virtual/simple"
- secondary = true
- ```
-
- ```bash
- poetry add koheesio
- ```
-
-
pip
diff --git a/src/koheesio/__about__.py b/src/koheesio/__about__.py
index 7a8c687..20f1a1b 100644
--- a/src/koheesio/__about__.py
+++ b/src/koheesio/__about__.py
@@ -12,7 +12,7 @@
LICENSE_INFO = "Licensed as Apache 2.0"
SOURCE = "https://github.com/Nike-Inc/koheesio"
-__version__ = "0.9.0"
+__version__ = "0.9.0rc7"
__logo__ = (
75,
(
diff --git a/src/koheesio/integrations/snowflake/__init__.py b/src/koheesio/integrations/snowflake/__init__.py
index 9e1603d..dcabbcb 100644
--- a/src/koheesio/integrations/snowflake/__init__.py
+++ b/src/koheesio/integrations/snowflake/__init__.py
@@ -449,7 +449,7 @@ class GrantPrivilegesOnObject(SnowflakeRunQueryPython):
object="MY_TABLE",
type="TABLE",
warehouse="MY_WH",
- user="gid.account@nike.com",
+ user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
permissions=["SELECT", "INSERT"],
diff --git a/src/koheesio/integrations/spark/databricks/__init__.py b/src/koheesio/integrations/spark/databricks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/koheesio/integrations/spark/databricks/secrets.py b/src/koheesio/integrations/spark/databricks/secrets.py
new file mode 100644
index 0000000..1f04d54
--- /dev/null
+++ b/src/koheesio/integrations/spark/databricks/secrets.py
@@ -0,0 +1,79 @@
+"""Module for retrieving secrets from DataBricks Scopes.
+
+Secrets are stored as SecretContext and can be accessed accordingly.
+
+See DataBricksSecret for more information.
+"""
+
+from typing import Dict, Optional
+import re
+
+from pyspark.sql import SparkSession
+
+from koheesio.integrations.spark.databricks.utils import get_dbutils
+from koheesio.models import Field, model_validator
+from koheesio.secrets import Secret
+
+
+class DataBricksSecret(Secret):
+ """
+ Retrieve secrets from DataBricks secret scope and wrap them into Context class for easy access.
+ All secrets are stored under the "secret" root and "parent". "Parent" either derived from the
+ secure scope by replacing "/" and "-", or manually provided by the user.
+ Secrets are wrapped into the pydantic.SecretStr.
+
+ Examples
+ ---------
+
+ ```python
+ context = {"secrets": {"parent": {"webhook": SecretStr("**********"), "description": SecretStr("**********")}}}
+ ```
+
+ Values can be decoded like this:
+ ```python
+ context.secrets.parent.webhook.get_secret_value()
+ ```
+ or if working with dictionary is preferable:
+ ```python
+ for key, value in context.get_all().items():
+ value.get_secret_value()
+ ```
+ """
+
+ scope: str = Field(description="Scope")
+ alias: Optional[Dict[str, str]] = Field(default_factory=dict, description="Alias for secret keys")
+
+ @model_validator(mode="before")
+ def _set_parent_to_scope(cls, values):
+ """
+ Set default value for `parent` parameter on model initialization when it was not
+ explicitly set by the user. In this scenario scope will be used:
+
+ 'secret-scope' -> secret_scope
+ """
+ regex = re.compile(r"[/-]")
+ path = values.get("scope")
+
+ if not values.get("parent"):
+ values["parent"] = regex.sub("_", path)
+
+ return values
+
+ @property
+ def _client(self):
+ """
+ Instantiated Databricks client.
+ """
+
+ return get_dbutils(SparkSession.getActiveSession()) # type: ignore
+
+ def _get_secrets(self):
+ """Dictionary of secrets."""
+ all_keys = (secret_meta.key for secret_meta in self._client.secrets.list(scope=self.scope))
+ secret_data = {}
+
+ for key in all_keys:
+ key_name = key if not (self.alias and self.alias.get(key)) else self.alias[key] # pylint: disable=E1101
+ secret_data[key_name] = self._client.secrets.get(scope=self.scope, key=key)
+
+ return secret_data
diff --git a/src/koheesio/integrations/spark/databricks/utils.py b/src/koheesio/integrations/spark/databricks/utils.py
new file mode 100644
index 0000000..9a2a884
--- /dev/null
+++ b/src/koheesio/integrations/spark/databricks/utils.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from pyspark.sql import SparkSession
+
+from koheesio.spark.utils import on_databricks
+
+
+def get_dbutils(spark_session: SparkSession) -> DBUtils: # type: ignore # noqa: F821
+ if not on_databricks():
+ raise RuntimeError("dbutils not available")
+
+ from pyspark.dbutils import DBUtils # pylint: disable=E0611,E0401 # type: ignore
+
+ dbutils = DBUtils(spark_session)
+
+ return dbutils
diff --git a/src/koheesio/integrations/spark/snowflake.py b/src/koheesio/integrations/spark/snowflake.py
index a6e6941..ba292af 100644
--- a/src/koheesio/integrations/spark/snowflake.py
+++ b/src/koheesio/integrations/spark/snowflake.py
@@ -302,7 +302,7 @@ class Query(SnowflakeReader):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
- user="gid.account@nike.com",
+ user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
query="SELECT * FROM MY_TABLE",
@@ -412,7 +412,7 @@ class CreateOrReplaceTableFromDataFrame(SnowflakeTransformation):
database="MY_DB",
schema="MY_SCHEMA",
warehouse="MY_WH",
- user="gid.account@nike.com",
+ user="gid.account@abc.com",
password="super-secret-password",
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
@@ -477,7 +477,7 @@ class GetTableSchema(SnowflakeStep):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
- user="gid.account@nike.com",
+ user="gid.account@abc.com",
password="super-secret-password",
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
@@ -512,7 +512,7 @@ class AddColumn(SnowflakeStep):
database="MY_DB",
schema_="MY_SCHEMA",
warehouse="MY_WH",
- user="gid.account@nike.com",
+ user="gid.account@abc.com",
password=Secret("super-secret-password"),
role="APPLICATION.SNOWFLAKE.ADMIN",
table="MY_TABLE",
diff --git a/tests/spark/conftest.py b/tests/spark/conftest.py
index f918ae4..9809b9b 100644
--- a/tests/spark/conftest.py
+++ b/tests/spark/conftest.py
@@ -1,3 +1,4 @@
+from typing import Any
from collections import namedtuple
import datetime
from decimal import Decimal
@@ -347,3 +348,20 @@ def df_with_all_types(spark):
data=[[v[0] for v in data.values()]],
schema=StructType([StructField(name=v[1], dataType=v[2]) for v in data.values()]),
)
+
+
+class ScopeSecrets:
+ class SecretMeta:
+ def __init__(self, key: str):
+ self.key = key
+
+ def __init__(self, secrets: dict):
+ self.secrets = secrets
+
+ def get(self, scope: str, key: str) -> Any:
+ return self.secrets.get(key)
+
+ def list(self, scope: str):
+ keys = [ScopeSecrets.SecretMeta(key=key) for key in self.secrets.keys()]
+
+ return keys
diff --git a/tests/spark/integrations/databrikcs/test_secrets.py b/tests/spark/integrations/databrikcs/test_secrets.py
new file mode 100644
index 0000000..11af117
--- /dev/null
+++ b/tests/spark/integrations/databrikcs/test_secrets.py
@@ -0,0 +1,48 @@
+from unittest.mock import patch
+
+from conftest import ScopeSecrets
+
+from koheesio.integrations.spark.databricks.secrets import DataBricksSecret
+
+
+class TestDatabricksSecret:
+ def test_set_parent_to_scope(self):
+ # Test when parent is not provided
+ secret = DataBricksSecret(scope="secret-scope")
+ assert secret.parent == "secret_scope"
+
+ # Test when parent is provided
+ secret = DataBricksSecret(scope="secret-scope", parent="custom_parent")
+ assert secret.parent == "custom_parent"
+
+ @patch("koheesio.integrations.spark.databricks.secrets.DataBricksSecret._client")
+ def test_get_secrets_no_alias(self, mock_databricks_client):
+ with patch("koheesio.integrations.spark.databricks.utils.on_databricks", return_value=True):
+ dd = {
+ "key1": "value_of_key1",
+ "key2": "value_of_key2",
+ }
+ databricks = DataBricksSecret(scope="dummy", parent="kafka")
+ mock_databricks_client.secrets = ScopeSecrets(dd)
+ secrets = databricks._get_secrets()
+
+ assert secrets["key1"] == "value_of_key1"
+ assert secrets["key2"] == "value_of_key2"
+
+ @patch("koheesio.integrations.spark.databricks.secrets.DataBricksSecret._client")
+ def test_get_secrets_alias(self, mock_databricks_client):
+ with patch("koheesio.integrations.spark.databricks.utils.on_databricks", return_value=True):
+ dd = {
+ "key1": "value_of_key1",
+ "key2": "value_of_key2",
+ }
+ alias = {
+ "key1": "new_name_key1",
+ "key2": "new_name_key2",
+ }
+ databricks = DataBricksSecret(scope="dummy", parent="kafka", alias=alias)
+ mock_databricks_client.secrets = ScopeSecrets(dd)
+ secrets = databricks._get_secrets()
+
+ assert secrets["new_name_key1"] == "value_of_key1"
+ assert secrets["new_name_key2"] == "value_of_key2"
diff --git a/tests/spark/test_spark.py b/tests/spark/test_spark.py
index e19b3e0..003060b 100644
--- a/tests/spark/test_spark.py
+++ b/tests/spark/test_spark.py
@@ -26,7 +26,7 @@ def test_import_error_no_error(self):
with mock.patch.dict("sys.modules", {"pyspark": None}):
from koheesio.sso.okta import OktaAccessToken
- OktaAccessToken(url="https://nike.okta.com", client_id="client_id", client_secret=secret)
+ OktaAccessToken(url="https://abc.okta.com", client_id="client_id", client_secret=secret)
def test_import_error_with_error(self):
with mock.patch.dict("sys.modules", {"pyspark.sql": None, "koheesio.steps.spark": None}):