-
Notifications
You must be signed in to change notification settings - Fork 5
/
azdbx_notebook_provisioner.py
57 lines (47 loc) · 2.79 KB
/
azdbx_notebook_provisioner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# This is a sample solution for how to provision existing notebooks
# (like from an existing git repo) into user sandbox folders in an
# Azure Databricks workspace
# This script expects that the following environment vars are set:
#
# AZURE_SUBSCRIPTION_ID: with your Azure Subscription Id
# AZURE_RESOURCE_GROUP: with your Azure Resource Group
import os
import json
from base64 import b64encode
from azdbx_api_client import DatabricksAPIClient
# Get the Azure Databricks template parameters to get the deployed workspace's name
adb_template_parameters = None
adb_template_params_path = os.path.join(
os.path.dirname(__file__), 'arm_template_params', 'azure_databricks_npip_template_params.json')
with open(adb_template_params_path, 'r') as adb_template_params_file:
adb_template_parameters = json.load(adb_template_params_file)
# Form the full resource id of the Azure Databricks workspace
adb_workspace_resource_id = "/subscriptions/" + os.environ.get(
'AZURE_SUBSCRIPTION_ID', '11111111-1111-1111-1111-111111111111') + "/resourceGroups/" + \
os.environ.get('AZURE_RESOURCE_GROUP', 'my-adb-e2-rg') + "/providers/Microsoft.Databricks/workspaces/" + \
adb_template_parameters['workspaceName']
print("The workspace resource id is {}".format(adb_workspace_resource_id))
# Create the Databricks API client
databricks_api_client = DatabricksAPIClient(adb_workspace_resource_id)
print("The workspace URL is {}".format(databricks_api_client.get_url_prefix()))
# Import the notebooks to user sandbox folders in the Azure Databricks workspace
create_mount_point_nb_path = os.path.join(
os.path.dirname(__file__), 'notebooks', 'Create_Mount_Point_on_ADLS_Gen2.dbc')
with open(create_mount_point_nb_path, 'rb') as create_mount_point_nb_file:
content = b64encode(create_mount_point_nb_file.read()).decode()
databricks_api_client.import_notebook('/Users/[email protected]/Create_Mount_Point_on_ADLS_Gen2',
'PYTHON', 'DBC', content)
read_adls_gen2_nb_path = os.path.join(
os.path.dirname(__file__), 'notebooks', 'Read_Data_From_ADLS_Gen2.dbc')
with open(read_adls_gen2_nb_path, 'rb') as read_adls_gen2_nb_file:
content = b64encode(read_adls_gen2_nb_file.read()).decode()
databricks_api_client.import_notebook('/Users/[email protected]/Read_Data_From_ADLS_Gen2',
'PYTHON', 'DBC', content)
databricks_api_client.import_notebook('/Users/[email protected]/Read_Data_From_ADLS_Gen2',
'PYTHON', 'DBC', content)
test_spark_configs_nb_path = os.path.join(
os.path.dirname(__file__), 'notebooks', 'test_spark_configs.dbc')
with open(test_spark_configs_nb_path, 'rb') as test_spark_configs_nb_file:
content = b64encode(test_spark_configs_nb_file.read()).decode()
databricks_api_client.import_notebook('/Users/[email protected]/test_spark_configs',
'PYTHON', 'DBC', content)