-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from sfc-gh-chmarshall/main
Updates for Summit '24 Hands on Lab
- Loading branch information
Showing
18 changed files
with
238 additions
and
321 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Can override the default connection name with an environment variable as follows | ||
#export SNOWFLAKE_DEFAULT_CONNECTION_NAME="default" | ||
|
||
# Only for Snow CLI, can override connection details as follows | ||
#export SNOWFLAKE_CONNECTIONS_DEFAULT_PASSWORD="" | ||
|
||
default_connection_name = "default" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Can override the default connection name with an environment variable as follows | ||
#export SNOWFLAKE_DEFAULT_CONNECTION_NAME="default" | ||
|
||
[default] | ||
account = "myaccount" | ||
user = "myuser" | ||
password = "mypassword" | ||
role = "HOL_ROLE" | ||
warehouse = "HOL_WH" | ||
database = "HOL_DB" | ||
schema = "HOL_SCHEMA" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{ | ||
"configurations": [ | ||
{ | ||
"name": "Python Debugger: Python File", | ||
"type": "debugpy", | ||
"request": "launch", | ||
"program": "${file}" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#------------------------------------------------------------------------------ | ||
# Hands-On Lab: Data Engineering with Snowpark | ||
# Script: 02_load_raw.py | ||
# Author: Jeremiah Hansen, Caleb Baechtold | ||
# Last Updated: 1/9/2023 | ||
#------------------------------------------------------------------------------ | ||
|
||
import time | ||
from snowflake.snowpark import Session | ||
|
||
|
||
POS_TABLES = ['country', 'franchise', 'location', 'menu', 'truck', 'order_header', 'order_detail'] | ||
CUSTOMER_TABLES = ['customer_loyalty'] | ||
TABLE_DICT = { | ||
"pos": {"schema": "RAW_POS", "tables": POS_TABLES}, | ||
"customer": {"schema": "RAW_CUSTOMER", "tables": CUSTOMER_TABLES} | ||
} | ||
|
||
# SNOWFLAKE ADVANTAGE: Schema detection | ||
# SNOWFLAKE ADVANTAGE: Data ingestion with COPY | ||
# SNOWFLAKE ADVANTAGE: Snowflake Tables (not file-based) | ||
|
||
def load_raw_table(session, tname=None, s3dir=None, year=None, schema=None): | ||
session.use_schema(schema) | ||
if year is None: | ||
location = "@external.frostbyte_raw_stage/{}/{}".format(s3dir, tname) | ||
else: | ||
print('\tLoading year {}'.format(year)) | ||
location = "@external.frostbyte_raw_stage/{}/{}/year={}".format(s3dir, tname, year) | ||
|
||
# we can infer schema using the parquet read option | ||
df = session.read.option("compression", "snappy") \ | ||
.parquet(location) | ||
df.copy_into_table("{}".format(tname)) | ||
|
||
# SNOWFLAKE ADVANTAGE: Warehouse elasticity (dynamic scaling) | ||
|
||
def load_all_raw_tables(session): | ||
_ = session.sql("ALTER WAREHOUSE HOL_WH SET WAREHOUSE_SIZE = XLARGE WAIT_FOR_COMPLETION = TRUE").collect() | ||
|
||
for s3dir, data in TABLE_DICT.items(): | ||
tnames = data['tables'] | ||
schema = data['schema'] | ||
for tname in tnames: | ||
print("Loading {}".format(tname)) | ||
# Only load the first 3 years of data for the order tables at this point | ||
# We will load the 2022 data later in the lab | ||
if tname in ['order_header', 'order_detail']: | ||
for year in ['2019', '2020', '2021']: | ||
load_raw_table(session, tname=tname, s3dir=s3dir, year=year, schema=schema) | ||
else: | ||
load_raw_table(session, tname=tname, s3dir=s3dir, schema=schema) | ||
|
||
_ = session.sql("ALTER WAREHOUSE HOL_WH SET WAREHOUSE_SIZE = XSMALL").collect() | ||
|
||
def validate_raw_tables(session): | ||
# check column names from the inferred schema | ||
for tname in POS_TABLES: | ||
print('{}: \n\t{}\n'.format(tname, session.table('RAW_POS.{}'.format(tname)).columns)) | ||
|
||
for tname in CUSTOMER_TABLES: | ||
print('{}: \n\t{}\n'.format(tname, session.table('RAW_CUSTOMER.{}'.format(tname)).columns)) | ||
|
||
|
||
# For local debugging | ||
if __name__ == "__main__": | ||
# Create a local Snowpark session | ||
with Session.builder.getOrCreate() as session: | ||
load_all_raw_tables(session) | ||
validate_raw_tables(session) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,5 +9,5 @@ dependencies: | |
- openssl=1.1.1 | ||
- pip: | ||
# Snowflake | ||
- snowflake-cli-labs | ||
- snowflake | ||
- snowflake-cli-labs==0.2.9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1 @@ | ||
snowflake-snowpark-python[pandas] | ||
snowflake | ||
snowflake-cli-labs==0.2.9 | ||
snowflake-snowpark-python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
definition_version: 1 | ||
snowpark: | ||
project_name: "hol" | ||
stage_name: "hol_schema.deployment" | ||
src: "app/" | ||
|
||
procedures: | ||
- name: "load_daily_city_metrics_sp" | ||
database: "hol_db" | ||
schema: "hol_schema" | ||
handler: "06_load_daily_city_metrics.main" | ||
runtime: "3.10" | ||
signature: "" | ||
returns: string | ||
- name: "load_raw_data_sp" | ||
database: "hol_db" | ||
schema: "hol_schema" | ||
handler: "05_raw_data.load_all_raw_tables" | ||
runtime: "3.10" | ||
signature: "" | ||
returns: string |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
USE ROLE SECURITYADMIN; | ||
SET MY_USER = CURRENT_USER(); | ||
CREATE ROLE IF NOT EXISTS GIT_ADMIN; | ||
GRANT ROLE GIT_ADMIN to ROLE SYSADMIN; | ||
GRANT ROLE GIT_ADMIN TO USER IDENTIFIER($MY_USER); | ||
|
||
|
||
USE ROLE SYSADMIN; | ||
CREATE OR REPLACE DATABASE GIT_REPO; | ||
USE SCHEMA PUBLIC; | ||
GRANT OWNERSHIP ON DATABASE GIT_REPO TO ROLE GIT_ADMIN; | ||
USE DATABASE GIT_REPO; | ||
GRANT OWNERSHIP ON SCHEMA PUBLIC TO ROLE GIT_ADMIN; | ||
|
||
|
||
USE ROLE GIT_ADMIN; | ||
USE DATABASE GIT_REPO; | ||
USE SCHEMA PUBLIC; | ||
CREATE OR REPLACE SECRET GIT_SECRET | ||
TYPE = PASSWORD | ||
USERNAME = '<your_git_user' | ||
PASSWORD = '<your_personal_access_token>'; | ||
|
||
--Create an API integration for interacting with the repository API | ||
USE ROLE ACCOUNTADMIN; | ||
GRANT CREATE INTEGRATION ON ACCOUNT TO ROLE GIT_ADMIN; | ||
USE ROLE GIT_ADMIN; | ||
|
||
CREATE OR REPLACE API INTEGRATION GIT_API_INTEGRATION | ||
API_PROVIDER = GIT_HTTPS_API | ||
API_ALLOWED_PREFIXES = ('https://github.com/<your_git_user>') | ||
ALLOWED_AUTHENTICATION_SECRETS = (GIT_SECRET) | ||
ENABLED = TRUE; | ||
|
||
CREATE OR REPLACE GIT REPOSITORY DE_QUICKSTART | ||
API_INTEGRATION = GIT_API_INTEGRATION | ||
GIT_CREDENTIALS = GIT_SECRET | ||
ORIGIN = '<your git repo URL ending in .git>'; | ||
|
||
SHOW GIT BRANCHES IN DE_QUICKSTART; | ||
ls @DE_QUICKSTART/branches/main; | ||
|
||
USE ROLE ACCOUNTADMIN; | ||
SET MY_USER = CURRENT_USER(); | ||
EXECUTE IMMEDIATE | ||
FROM @GIT_REPO.PUBLIC.DE_QUICKSTART/branches/main/steps/03_setup_snowflake.sql | ||
USING (MY_USER=>$MY_USER); | ||
|
Oops, something went wrong.