-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ETL-676] Create data loading procedure for each parquet datatype (#132)
* create procedure to copy data into table from stage * Add external stage for parquet dev * return resultset rather than table * Revert "return resultset rather than table" This reverts commit c265b5f. * create a procedure for each parquet datatype
- Loading branch information
1 parent
355af2b
commit 6a610b1
Showing
7 changed files
with
142 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
35 changes: 35 additions & 0 deletions
35
snowflake/objects/database/recover/schema/parquet/procedure/copy_into_table_from_stage.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
A stored procedure which copies Parquet data from a named stage into a table. | ||
Because of limitations in how we can pass variables to stage names, | ||
this procedure is specific to a stage location. That is, we cannot | ||
use Snowflake scripting variables within the stage name, so we instead | ||
use Jinja variables, which has the side effect of "fixing" the procedure | ||
to use a specific stage location. | ||
Jinja templating variables: | ||
datatype - The datatype which our stage location refers to. | ||
stage_name - The name of the stage where our data exists. | ||
stage_path - The location within the stage where our data exists. | ||
file_format - The name of the file format object used during copy. | ||
*/ | ||
CREATE OR REPLACE PROCEDURE copy_into_table_from_{{ datatype }}_parquet_stage( | ||
target_table VARCHAR | ||
) | ||
RETURNS TABLE () | ||
LANGUAGE SQL | ||
as | ||
$$ | ||
DECLARE | ||
res RESULTSET DEFAULT ( | ||
COPY INTO IDENTIFIER(:target_table) | ||
FROM @{{ stage_name }}/{{ stage_path }} | ||
FILE_FORMAT = ( | ||
FORMAT_NAME = '{{ file_format }}' | ||
) | ||
MATCH_BY_COLUMN_NAME = CASE_SENSITIVE | ||
); | ||
BEGIN | ||
RETURN TABLE(res); | ||
END; | ||
$$; |
61 changes: 61 additions & 0 deletions
61
snowflake/objects/database/recover/schema/parquet/procedure/deploy.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
Deploy all PROCEDURE objects | ||
Jinja templating variables: | ||
stage_name - The name of the stage where our data exists. | ||
file_format - The name of the file format object used by the | ||
`copy_into_table_from_stage.sql` procedure. | ||
*/ | ||
|
||
WITH create_procedure_for_each_parquet_table AS PROCEDURE () | ||
RETURNS VARCHAR | ||
LANGUAGE SQL | ||
AS | ||
$$ | ||
DECLARE | ||
parquet_datatypes ARRAY := [ | ||
'enrolledparticipants_customfields_symptoms', | ||
'enrolledparticipants_customfields_treatments', | ||
'enrolledparticipants', | ||
'fitbitactivitylogs', | ||
'fitbitdailydata', | ||
'fitbitdevices', | ||
'fitbitecg', | ||
'fitbitecg_waveformsamples', | ||
'fitbitintradaycombined', | ||
'fitbitrestingheartrates', | ||
'fitbitsleeplogs', | ||
'fitbitsleeplogs_sleeplogdetails', | ||
'googlefitsamples', | ||
'healthkitv2activitysummaries', | ||
'healthkitv2electrocardiogram', | ||
'healthkitv2electrocardiogram_subsamples', | ||
'healthkitv2heartbeat', | ||
'healthkitv2heartbeat_subsamples', | ||
'healthkitv2samples', | ||
'healthkitv2statistics', | ||
'healthkitv2workouts_events', | ||
'healthkitv2workouts', | ||
'symptomlog', | ||
'symptomlog_value_symptoms', | ||
'symptomlog_value_treatments' | ||
]; | ||
datatype VARCHAR; | ||
dataset_name VARCHAR; | ||
BEGIN | ||
FOR i in 0 to array_size(:parquet_datatypes)-1 DO | ||
datatype := GET(:parquet_datatypes, :i); | ||
dataset_name := CONCAT('dataset_', :datatype); | ||
-- Create a stored procedure which uses this data type's stage location | ||
EXECUTE IMMEDIATE | ||
FROM './copy_into_table_from_stage.sql' | ||
USING ( | ||
datatype => :datatype, | ||
stage_name => '{{ stage_name }}', | ||
stage_path => :dataset_name, | ||
file_format => '{{ file_format }}' | ||
); | ||
END FOR; | ||
END; | ||
$$ | ||
CALL create_procedure_for_each_parquet_table(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 6 additions & 0 deletions
6
snowflake/objects/database/recover/schema/parquet/stage/parquet_dev_s3.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
/* | ||
Create an external stage over the dev Parquet data in S3 | ||
*/ | ||
CREATE OR REPLACE STAGE {{ parquet_stage_name }} | ||
URL = 's3://recover-dev-processed-data/{{ git_branch }}/parquet/' | ||
STORAGE_INTEGRATION = recover_dev_s3; |
2 changes: 1 addition & 1 deletion
2
...cover/schema/parquet/stage/parquet_s3.sql → .../schema/parquet/stage/parquet_prod_s3.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters