From 3a9f587d53ecb2a9eb24db9c9b831caeee1a62d2 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Fri, 13 Jan 2023 18:38:19 +0000 Subject: [PATCH 01/10] my dbt installation set up --- greenery/.gitignore | 4 ++ greenery/README.md | 15 ++++++++ greenery/analyses/.gitkeep | 0 greenery/dbt_project.yml | 38 +++++++++++++++++++ greenery/macros/.gitkeep | 0 .../models/example/my_first_dbt_model.sql | 27 +++++++++++++ .../models/example/my_second_dbt_model.sql | 6 +++ greenery/models/example/schema.yml | 21 ++++++++++ greenery/seeds/.gitkeep | 0 greenery/snapshots/.gitkeep | 0 greenery/tests/.gitkeep | 0 11 files changed, 111 insertions(+) create mode 100644 greenery/.gitignore create mode 100644 greenery/README.md create mode 100644 greenery/analyses/.gitkeep create mode 100644 greenery/dbt_project.yml create mode 100644 greenery/macros/.gitkeep create mode 100644 greenery/models/example/my_first_dbt_model.sql create mode 100644 greenery/models/example/my_second_dbt_model.sql create mode 100644 greenery/models/example/schema.yml create mode 100644 greenery/seeds/.gitkeep create mode 100644 greenery/snapshots/.gitkeep create mode 100644 greenery/tests/.gitkeep diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..7874ac842 --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..e5031fe92 --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ directory +# as tables. These settings can be overridden in the individual model files +# using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql new file mode 100644 index 000000000..f31a12d94 --- /dev/null +++ b/greenery/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql new file mode 100644 index 000000000..c91f8793a --- /dev/null +++ b/greenery/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml new file mode 100644 index 000000000..2a5308171 --- /dev/null +++ b/greenery/models/example/schema.yml @@ -0,0 +1,21 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb From 6a74996985b7b34692f8aad3d078b54e05981b71 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Sat, 14 Jan 2023 19:30:21 +0000 Subject: [PATCH 02/10] Starting my week 1 project demo --- projects/week 1/README.md | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 projects/week 1/README.md diff --git a/projects/week 1/README.md b/projects/week 1/README.md new file mode 100644 index 000000000..df4c7974c --- /dev/null +++ b/projects/week 1/README.md @@ -0,0 +1,2 @@ +# Week 1 project +The answer can be found below From bd4f4c351c27116ea99916eaff295734d602c9d2 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Sun, 15 Jan 2023 18:08:07 +0000 Subject: [PATCH 03/10] Analytics engineering with dbt week a project --- .../staging/postgres/postgres_source.yml | 13 +++++++++++ .../postgres/stg_postgres_order_items.sql | 0 .../staging/postgres/stg_postgres_orders.sql | 22 +++++++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 greenery/models/staging/postgres/postgres_source.yml create mode 100644 greenery/models/staging/postgres/stg_postgres_order_items.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_orders.sql diff --git a/greenery/models/staging/postgres/postgres_source.yml b/greenery/models/staging/postgres/postgres_source.yml new file mode 100644 index 000000000..a8e69324c --- /dev/null +++ b/greenery/models/staging/postgres/postgres_source.yml @@ -0,0 +1,13 @@ +version: 2 + +sources: + - name: postgres + database: raw + schema: public + tables: + - name: orders + - name: order_items + - name: addresses + + + diff --git a/greenery/models/staging/postgres/stg_postgres_order_items.sql b/greenery/models/staging/postgres/stg_postgres_order_items.sql new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/staging/postgres/stg_postgres_orders.sql b/greenery/models/staging/postgres/stg_postgres_orders.sql new file mode 100644 index 000000000..96cf1dcdf --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_orders.sql @@ -0,0 +1,22 @@ +with src_orders as ( + select * from {{ source('postgres', 'orders') }} +) +, renamed_recast as ( + select + ORDER_ID as order_guid + ,USER_ID as user_guid + ,PROMO_ID as promo_desc + ,ADDRESS_ID as address_guid + ,CREATED_AT::timestampntz as created_at_utc + ,ORDER_COST + ,SHIPPING_COST + ,ORDER_TOTAL + ,TRACKING_ID as tracking_guid + ,SHIPPING_SERVICE + ,ESTIMATED_DELIVERY_AT::timestampntz as estimated_delivery_at_utc + ,DELIVERED_AT::timestampntz as delivery_at_utc + ,STATUS + from src_orders +) + +select * from renamed_recast \ No newline at end of file From 85285e8bc25c095a9a65c5655747d82d56d078af Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Mon, 16 Jan 2023 12:10:52 +0000 Subject: [PATCH 04/10] week 1 project submission --- .../models/staging/postgres/postgres_model.yml | 11 +++++++++++ .../staging/postgres/postgres_source.yml | 4 ++++ .../postgres/stg_postgres_addresses.sql | 15 +++++++++++++++ .../staging/postgres/stg_postgres_events.sql | 18 ++++++++++++++++++ .../postgres/stg_postgres_order_items.sql | 13 +++++++++++++ .../staging/postgres/stg_postgres_products.sql | 14 ++++++++++++++ .../staging/postgres/stg_postgres_promos.sql | 14 ++++++++++++++ .../staging/postgres/stg_postgres_users.sql | 18 ++++++++++++++++++ 8 files changed, 107 insertions(+) create mode 100644 greenery/models/staging/postgres/postgres_model.yml create mode 100644 greenery/models/staging/postgres/stg_postgres_addresses.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_events.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_products.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_promos.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_users.sql diff --git a/greenery/models/staging/postgres/postgres_model.yml b/greenery/models/staging/postgres/postgres_model.yml new file mode 100644 index 000000000..ad5dbb32d --- /dev/null +++ b/greenery/models/staging/postgres/postgres_model.yml @@ -0,0 +1,11 @@ +version: 2 + +models: + - name: stg_addresses + - name: stg_events + - name: stg_order_items + - name: stg_orders + - name: stg_products + - name: stg_promos + - name: stg_users + \ No newline at end of file diff --git a/greenery/models/staging/postgres/postgres_source.yml b/greenery/models/staging/postgres/postgres_source.yml index a8e69324c..6042fb0f9 100644 --- a/greenery/models/staging/postgres/postgres_source.yml +++ b/greenery/models/staging/postgres/postgres_source.yml @@ -8,6 +8,10 @@ sources: - name: orders - name: order_items - name: addresses + - name: products + - name: events + - name: promos + - name: users diff --git a/greenery/models/staging/postgres/stg_postgres_addresses.sql b/greenery/models/staging/postgres/stg_postgres_addresses.sql new file mode 100644 index 000000000..ceef7a541 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_addresses.sql @@ -0,0 +1,15 @@ +with src_address as ( + select * from {{ source('postgres', 'addresses') }} +), + renamed_recast as ( + select + address_id as address_id, + address as address_line_1, + zipcode as address_zipcode, + state as address_state, + country as address_country + + from src_address +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_events.sql b/greenery/models/staging/postgres/stg_postgres_events.sql new file mode 100644 index 000000000..eac58745e --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_events.sql @@ -0,0 +1,18 @@ +with src_events as ( + select * from {{ source('postgres', 'events') }} +), + renamed_recast as ( + select + event_id, + session_id, + user_id, + page_url as event_page_url, + created_at::timestamp_ntz as event_created_at_utc, + event_type, + order_id, + product_id + + from src_events +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_order_items.sql b/greenery/models/staging/postgres/stg_postgres_order_items.sql index e69de29bb..1b206e598 100644 --- a/greenery/models/staging/postgres/stg_postgres_order_items.sql +++ b/greenery/models/staging/postgres/stg_postgres_order_items.sql @@ -0,0 +1,13 @@ +with src_order_items as ( + select * from {{ source('postgres', 'order_items') }} +), + renamed_recast as ( + select + order_id as order_id, + product_id as product_id, + quantity as order_item_quantity + + from src_order_items +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_products.sql b/greenery/models/staging/postgres/stg_postgres_products.sql new file mode 100644 index 000000000..ce0c433d4 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_products.sql @@ -0,0 +1,14 @@ +with src_products as ( + select * from {{ source('postgres', 'products') }} +), + renamed_recast as ( + select + product_id, + name as product_name, + price as product_price, + inventory as product_inventory + + from src_products +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_promos.sql b/greenery/models/staging/postgres/stg_postgres_promos.sql new file mode 100644 index 000000000..8266bca63 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_promos.sql @@ -0,0 +1,14 @@ +with src_promos as ( + select * from {{ source('postgres', 'promos') }} +), + renamed_recast as ( + select + md5(promo_id) as promo_id, + promo_id as promo_name, + discount as promo_discount, + status as promo_status + + from src_promos +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_users.sql b/greenery/models/staging/postgres/stg_postgres_users.sql new file mode 100644 index 000000000..86c53199f --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_users.sql @@ -0,0 +1,18 @@ +with src_users as ( + select * from {{ source('postgres', 'users') }} +), + renamed_recast as ( + select + user_id, + first_name as user_first_name, + last_name as user_last_name, + email as user_email, + phone_number as user_phone_number, + created_at::timestamp_ntz as user_created_at_utc, + updated_at::timestamp_ntz as user_updated_at_utc, + address_id + + from src_users +) + +select * from renamed_recast \ No newline at end of file From ede865f15707382330c835e00657c9c5d6062b96 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Mon, 16 Jan 2023 12:13:35 +0000 Subject: [PATCH 05/10] week 1 project submission --- .../models/example/my_first_dbt_model.sql | 27 ------------------- .../models/example/my_second_dbt_model.sql | 6 ----- greenery/models/example/schema.yml | 21 --------------- 3 files changed, 54 deletions(-) delete mode 100644 greenery/models/example/my_first_dbt_model.sql delete mode 100644 greenery/models/example/my_second_dbt_model.sql delete mode 100644 greenery/models/example/schema.yml diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql deleted file mode 100644 index f31a12d94..000000000 --- a/greenery/models/example/my_first_dbt_model.sql +++ /dev/null @@ -1,27 +0,0 @@ - -/* - Welcome to your first dbt model! - Did you know that you can also configure models directly within SQL files? - This will override configurations stated in dbt_project.yml - - Try changing "table" to "view" below -*/ - -{{ config(materialized='table') }} - -with source_data as ( - - select 1 as id - union all - select null as id - -) - -select * -from source_data - -/* - Uncomment the line below to remove records with null `id` values -*/ - --- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql deleted file mode 100644 index c91f8793a..000000000 --- a/greenery/models/example/my_second_dbt_model.sql +++ /dev/null @@ -1,6 +0,0 @@ - --- Use the `ref` function to select from other models - -select * -from {{ ref('my_first_dbt_model') }} -where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml deleted file mode 100644 index 2a5308171..000000000 --- a/greenery/models/example/schema.yml +++ /dev/null @@ -1,21 +0,0 @@ - -version: 2 - -models: - - name: my_first_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null - - - name: my_second_dbt_model - description: "A starter dbt model" - columns: - - name: id - description: "The primary key for this table" - tests: - - unique - - not_null From 2739f436b2d473a5afbdcf8888f8d4ac466e2009 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Mon, 16 Jan 2023 13:34:04 +0000 Subject: [PATCH 06/10] the revised readme solutions --- greenery/solution_to_week1 _project.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 greenery/solution_to_week1 _project.md diff --git a/greenery/solution_to_week1 _project.md b/greenery/solution_to_week1 _project.md new file mode 100644 index 000000000..e69de29bb From 2f35a4d94c635e43643ee3d6d0c0929a6a20addc Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Mon, 16 Jan 2023 14:26:53 +0000 Subject: [PATCH 07/10] project submission --- .../staging/postgres/postgres_model.yml | 14 ++-- .../postgres/stg_postgres_addresses.sql | 7 +- .../staging/postgres/stg_postgres_events.sql | 6 +- .../postgres/stg_postgres_order_items.sql | 3 +- .../staging/postgres/stg_postgres_orders.sql | 30 ++++----- .../postgres/stg_postgres_products.sql | 4 +- .../staging/postgres/stg_postgres_promos.sql | 13 ++-- .../staging/postgres/stg_postgres_users.sql | 7 +- greenery/solution_to_week1 _project.md | 64 +++++++++++++++++++ 9 files changed, 105 insertions(+), 43 deletions(-) diff --git a/greenery/models/staging/postgres/postgres_model.yml b/greenery/models/staging/postgres/postgres_model.yml index ad5dbb32d..609393c35 100644 --- a/greenery/models/staging/postgres/postgres_model.yml +++ b/greenery/models/staging/postgres/postgres_model.yml @@ -1,11 +1,11 @@ version: 2 models: - - name: stg_addresses - - name: stg_events - - name: stg_order_items - - name: stg_orders - - name: stg_products - - name: stg_promos - - name: stg_users + - name: stg_postgres_addresses + - name: stg_postgres_events + - name: stg_postgres_order_items + - name: stg_postgres_orders + - name: stg_postgres_products + - name: stg_postgres_promos + - name: stg_postgres_users \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_addresses.sql b/greenery/models/staging/postgres/stg_postgres_addresses.sql index ceef7a541..3bb6a77a7 100644 --- a/greenery/models/staging/postgres/stg_postgres_addresses.sql +++ b/greenery/models/staging/postgres/stg_postgres_addresses.sql @@ -1,15 +1,14 @@ with src_address as ( select * from {{ source('postgres', 'addresses') }} ), - renamed_recast as ( - select +renamed_recast as ( + select address_id as address_id, address as address_line_1, zipcode as address_zipcode, state as address_state, country as address_country - - from src_address + from src_address ) select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_events.sql b/greenery/models/staging/postgres/stg_postgres_events.sql index eac58745e..0dce40ac7 100644 --- a/greenery/models/staging/postgres/stg_postgres_events.sql +++ b/greenery/models/staging/postgres/stg_postgres_events.sql @@ -1,8 +1,8 @@ with src_events as ( select * from {{ source('postgres', 'events') }} ), - renamed_recast as ( - select +renamed_recast as ( + select event_id, session_id, user_id, @@ -11,8 +11,8 @@ with src_events as ( event_type, order_id, product_id + from src_events - from src_events ) select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_order_items.sql b/greenery/models/staging/postgres/stg_postgres_order_items.sql index 1b206e598..47bfca80c 100644 --- a/greenery/models/staging/postgres/stg_postgres_order_items.sql +++ b/greenery/models/staging/postgres/stg_postgres_order_items.sql @@ -5,7 +5,8 @@ with src_order_items as ( select order_id as order_id, product_id as product_id, - quantity as order_item_quantity + quantity as order_item_quantity, + md5(concat(order_id, product_id)) as order_items_id from src_order_items ) diff --git a/greenery/models/staging/postgres/stg_postgres_orders.sql b/greenery/models/staging/postgres/stg_postgres_orders.sql index 96cf1dcdf..b9a6e5fe6 100644 --- a/greenery/models/staging/postgres/stg_postgres_orders.sql +++ b/greenery/models/staging/postgres/stg_postgres_orders.sql @@ -1,21 +1,21 @@ with src_orders as ( select * from {{ source('postgres', 'orders') }} -) -, renamed_recast as ( +), +renamed_recast as ( select - ORDER_ID as order_guid - ,USER_ID as user_guid - ,PROMO_ID as promo_desc - ,ADDRESS_ID as address_guid - ,CREATED_AT::timestampntz as created_at_utc - ,ORDER_COST - ,SHIPPING_COST - ,ORDER_TOTAL - ,TRACKING_ID as tracking_guid - ,SHIPPING_SERVICE - ,ESTIMATED_DELIVERY_AT::timestampntz as estimated_delivery_at_utc - ,DELIVERED_AT::timestampntz as delivery_at_utc - ,STATUS + ORDER_ID, + USER_ID, + md5(PROMO_ID) as promo_id, + ADDRESS_ID, + CREATED_AT::timestamp_ntz as created_at_utc, + ORDER_COST, + SHIPPING_COST, + ORDER_TOTAL, + TRACKING_ID, + SHIPPING_SERVICE, + ESTIMATED_DELIVERY_AT::timestamp_ntz as estimated_delivery_at_utc, + DELIVERED_AT::timestamp_ntz as delivery_at_utc, + STATUS from src_orders ) diff --git a/greenery/models/staging/postgres/stg_postgres_products.sql b/greenery/models/staging/postgres/stg_postgres_products.sql index ce0c433d4..e6a44182f 100644 --- a/greenery/models/staging/postgres/stg_postgres_products.sql +++ b/greenery/models/staging/postgres/stg_postgres_products.sql @@ -1,8 +1,8 @@ with src_products as ( select * from {{ source('postgres', 'products') }} ), - renamed_recast as ( - select +renamed_recast as ( + select product_id, name as product_name, price as product_price, diff --git a/greenery/models/staging/postgres/stg_postgres_promos.sql b/greenery/models/staging/postgres/stg_postgres_promos.sql index 8266bca63..ed2677904 100644 --- a/greenery/models/staging/postgres/stg_postgres_promos.sql +++ b/greenery/models/staging/postgres/stg_postgres_promos.sql @@ -1,14 +1,13 @@ with src_promos as ( select * from {{ source('postgres', 'promos') }} ), - renamed_recast as ( +renamed_recast as ( select - md5(promo_id) as promo_id, - promo_id as promo_name, - discount as promo_discount, - status as promo_status - - from src_promos + md5(promo_id) as promo_id, + promo_id as promo_name, + discount as promo_discount, + status as promo_status + from src_promos ) select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres_users.sql b/greenery/models/staging/postgres/stg_postgres_users.sql index 86c53199f..b0f8c4a1b 100644 --- a/greenery/models/staging/postgres/stg_postgres_users.sql +++ b/greenery/models/staging/postgres/stg_postgres_users.sql @@ -1,8 +1,8 @@ with src_users as ( select * from {{ source('postgres', 'users') }} ), - renamed_recast as ( - select +renamed_recast as ( + select user_id, first_name as user_first_name, last_name as user_last_name, @@ -11,8 +11,7 @@ with src_users as ( created_at::timestamp_ntz as user_created_at_utc, updated_at::timestamp_ntz as user_updated_at_utc, address_id - - from src_users + from src_users ) select * from renamed_recast \ No newline at end of file diff --git a/greenery/solution_to_week1 _project.md b/greenery/solution_to_week1 _project.md index e69de29bb..039fca9db 100644 --- a/greenery/solution_to_week1 _project.md +++ b/greenery/solution_to_week1 _project.md @@ -0,0 +1,64 @@ +Solutions to week 1 project +1. How many users do we have? +select count(distinct user_id) from stg_postgres_users + +Solution: 130 + +2. On average, how many orders do we receive per hour? +with hourly_total as ( + select + trunc(created_at_utc,'hour'), + count(*) as order_count + from stg_postgres_orders + group by 1) + select avg(order_count) + from hourly_total + +Solution: 7.520833 + +3. On average, how long does an order take from being placed to being delivered? +with delivered as +( + select + order_id, + datediff('days', created_at_utc, delivery_at_utc ) as delivery_day + from stg_postgres_orders +) + +select round(avg(delivery_day),1) as avg_delivery_day from delivered + +Solution: 3.9 + +4. How many users have only made one purchase? Two purchases? Three+ purchases? +with user_orders as ( + select + user_id, + count(*) order_count + from stg_postgres_orders + group by 1 +) + +select + case order_count + when 1 then '1 Purchase' + when 2 then '2 Purchases' + else '3+ Purchases' + end as order_bin, + count(user_id) +from user_orders +group by 1 + +solution: 1 Purchase, 25 2 Purchases, 28 3+ Purchases, 71 + +4. On average, how many unique sessions do we have per hour? +with events_per_hour as ( + select + date_trunc('hour', event_created_at_utc) as event_hour, + count(distinct session_id) as unique_session_count + from stg_postgres_events + group by 1 +) + +select avg(unique_session_count) as sessions_per_hour from events_per_hour + +solution: 16.327586 \ No newline at end of file From 68005613b62da122c3dc54fc20707133dd04f4e8 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Mon, 16 Jan 2023 14:31:59 +0000 Subject: [PATCH 08/10] Solution to week 1 project submission --- greenery/solution_to_week1 _project.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/greenery/solution_to_week1 _project.md b/greenery/solution_to_week1 _project.md index 039fca9db..cd8988cd1 100644 --- a/greenery/solution_to_week1 _project.md +++ b/greenery/solution_to_week1 _project.md @@ -1,10 +1,14 @@ Solutions to week 1 project 1. How many users do we have? + + select count(distinct user_id) from stg_postgres_users Solution: 130 2. On average, how many orders do we receive per hour? + + with hourly_total as ( select trunc(created_at_utc,'hour'), @@ -17,6 +21,8 @@ with hourly_total as ( Solution: 7.520833 3. On average, how long does an order take from being placed to being delivered? + + with delivered as ( select @@ -30,6 +36,8 @@ select round(avg(delivery_day),1) as avg_delivery_day from delivered Solution: 3.9 4. How many users have only made one purchase? Two purchases? Three+ purchases? + + with user_orders as ( select user_id, @@ -51,6 +59,8 @@ group by 1 solution: 1 Purchase, 25 2 Purchases, 28 3+ Purchases, 71 4. On average, how many unique sessions do we have per hour? + + with events_per_hour as ( select date_trunc('hour', event_created_at_utc) as event_hour, From fda52488a6439dbf6e192e2782be57ec87594236 Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Sat, 1 Apr 2023 14:52:27 +0000 Subject: [PATCH 09/10] Analytics engineering assignment --- .../Intermediate /int_session_events_agg.sql | 24 ++++++++++++++ greenery/models/marts/fct_users_sessions.sql | 27 ++++++++++++++++ .../staging/postgres/stg_postgres_orders.yml | 31 +++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 greenery/models/Intermediate /int_session_events_agg.sql create mode 100644 greenery/models/marts/fct_users_sessions.sql create mode 100644 greenery/models/staging/postgres/stg_postgres_orders.yml diff --git a/greenery/models/Intermediate /int_session_events_agg.sql b/greenery/models/Intermediate /int_session_events_agg.sql new file mode 100644 index 000000000..d231d62c5 --- /dev/null +++ b/greenery/models/Intermediate /int_session_events_agg.sql @@ -0,0 +1,24 @@ +{{ + config( + MATERIALIZED = 'table' + ) +}} + +with events as ( + select * from {{ ref('stg_postgres_events') }} +), +final as ( + select + event_id, + session_id, + sum(case when event_type = 'add_to_cart' then 1 else 0 end) as add_to_carts, + sum(case when event_type = 'checkout' then 1 else 0 end) as checkouts, + sum(case when event_type = 'package_shipped' then 1 else 0 end) as package_shippeds, + sum(case when event_type = 'page_view' then 1 else 0 end) as page_views, + min(event_created_at_utc) as first_session_event_at_utc, + max(event_created_at_utc) as last_session_event_at_utc + from {{ref('stg_postgres_events')}} + group by 1,2 + +) +select * from final \ No newline at end of file diff --git a/greenery/models/marts/fct_users_sessions.sql b/greenery/models/marts/fct_users_sessions.sql new file mode 100644 index 000000000..eee907ff7 --- /dev/null +++ b/greenery/models/marts/fct_users_sessions.sql @@ -0,0 +1,27 @@ +{{ + config( + MATERIALIZED = 'table' + ) +}} + +with +session_events_agg as ( +select * from {{ref('int_session_events_agg')}} +), +users as ( + select * from {{ ref('stg_postgres_users')}} ) + +select + session_events_agg.session_id, + session_events_agg.event_id, + users.user_first_name, + users.user_last_name, + users.user_email, + session_events_agg.page_views, + session_events_agg.add_to_carts, + session_events_agg.checkouts, + session_events_agg.package_shippeds + +from session_events_agg +left join users +on session_events_agg.event_id = users.user_id diff --git a/greenery/models/staging/postgres/stg_postgres_orders.yml b/greenery/models/staging/postgres/stg_postgres_orders.yml new file mode 100644 index 000000000..34bd52a0e --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres_orders.yml @@ -0,0 +1,31 @@ +version: 2 + +model: + - name: stg_postgres_orders + description: all orders, all the time! + columns: + - name: order_id + description: unqiue identifier for an order + tests: + - not null + - name: user_id + description: user identifier related to who placed the order + - name: order_cost + description: cost of the order less shipping + - name: order_shipping_cost + description: shipping cost of the order + - name: order_total_cost + description: total order cost + database: raw + schema: public + tables: + - name: orders + - name: order_items + - name: addresses + - name: products + - name: events + - name: promos + - name: users + + + From fda6efdd0da02e0c22274b79e4b995fa83b1716c Mon Sep 17 00:00:00 2001 From: David-Adejumo Date: Wed, 28 Jun 2023 13:32:22 +0000 Subject: [PATCH 10/10] New Product requirements for the business --- .../int_session_events_macro_agg.sql | 23 +++++++++++ .../marts/product/fct_user_sessions.sql | 40 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 greenery/models/Intermediate /int_session_events_macro_agg.sql create mode 100644 greenery/models/marts/product/fct_user_sessions.sql diff --git a/greenery/models/Intermediate /int_session_events_macro_agg.sql b/greenery/models/Intermediate /int_session_events_macro_agg.sql new file mode 100644 index 000000000..45c7d9ae3 --- /dev/null +++ b/greenery/models/Intermediate /int_session_events_macro_agg.sql @@ -0,0 +1,23 @@ +{{ + config ( + MATERIALIZED = 'table' + ) +}} + +{%- + set event_types = dbt_utils.get_column_values( + table = ref('stg_postgres_events'), + column = 'event_type', + order_by = 'event_type asc' + ) +-%} + +select + event_user_guid, + event_session_guid + {%- for event_type in event_types %}, + sum(case when event_type = '{{event_type}}' then 1 else 0 end) as {{event_type}}s + {%- endfor %} +from {{ref('stg_postgres_events')}} + +group by 1,2 \ No newline at end of file diff --git a/greenery/models/marts/product/fct_user_sessions.sql b/greenery/models/marts/product/fct_user_sessions.sql new file mode 100644 index 000000000..a151f2648 --- /dev/null +++ b/greenery/models/marts/product/fct_user_sessions.sql @@ -0,0 +1,40 @@ +{{ + config( + MATERIALIZED = 'table' + ) +}} + +with session_length as ( + select + session_id, + min (created_at::timestamp_ntz as event_created_at_utc) as first_event, + max (created_at::timestamp_ntz as event_created_at_utc) as last_event + from {{ref ('stg_postgres_events')}} + group by 1 + ) +, session_events_agg as ( + select * from {{ ref('int_session_events_agg')}} +) +, users as ( + select * from {{'stg_postgres_users'}} +) + +select + session_events_agg.session_id, + session_events_agg.event_id, + users.user_first_name, + users.user_last_name, + users.user_email, + session_events_agg.page_views, + session_events_agg.add_to_carts, + session_events_agg.checkouts, + session_events_agg.package_shippeds, + session_length.first_event as first_session_event, + session_length.last_event as last_session_event, + datediff ('minute', session_length.first_event, session_length.last_event) as session_length_minutes + +from session_events_agg +left join users +on session_events_agg.event_id = users.user_id +left join session_length +on session_events_agg.event_id = session_length.session_id \ No newline at end of file