diff --git a/.gitignore b/.gitignore index 49f147cb98..1ad9d634bd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ target/ dbt_packages/ logs/ + +venv \ No newline at end of file diff --git a/.user.yml b/.user.yml new file mode 100644 index 0000000000..00ce2feeec --- /dev/null +++ b/.user.yml @@ -0,0 +1 @@ +id: d2acdfe4-0365-48cc-a665-d08289354915 diff --git a/README.md b/README.md index 24eb3b1e66..cf59e7adba 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,61 @@ -# Indicium Academy +# Desafio Final LightHouse + +# Adventure Works dbt + +## Estrutura do Projeto + +- models/: Contém todos os modelos dbt. +- staging/: Modelos de staging que preparam dados brutos para uso posterior. +- marts/: Modelos de marts que agregam e transformam dados para análise. +- tests/: Contém testes para garantir a qualidade dos dados. +- macros/: Contém macros reutilizáveis para o projeto. + +## Configurações de ambiente + +### Ativando o virtual environment + +O virtual environment deve ser ativado a cada novo dia através do comando em bash no Windows: + +- `source venv/Scripts/activate` + +Para desativar o ambiente virtual, basta rodar: + +- `deactivate`. + +### Comandos do dbt + +Para confirmar se o **profiles.yml** e o **dbt_project.yml** estão configurados e rodando corretamente, além de verificar novamente as dependências e as conexões necessárias: + +- `dbt debug` + +Para instalar os pacotes descritos no arquivo *packages.yml*, pode-se rodar: + +- `dbt deps` + +Para fazer testes nas sources: + +- `dbt test --select "source:*"` + +Para refazer os modelos e atualizar os metadados: + +- `dbt run -s --full-refresh` + +### Comandos de git + +Remover um arquivo caso eu tenha dado git add nele mas não quero commitar ele. + +- `git reset ` + + + + +# Readme do repositório clonado + +## Indicium Academy Repositório para ser utilizado no desafio para a obtenção da certificação de Analytics Engineer by Indicium. Faça o fork deste repositório e o utilize durante o desafio para fazer a insgestão das tabelas do SAP do Adventure Works. -## Instruções +### Instruções Todas as tabelas do banco fonte do SAP da Adventure Works serão carregadas como seeds pelo dbt. Os arquivos .csv com os dados já estão na pasta de seeds. @@ -12,12 +65,12 @@ Para fazer o carregamento de todas as tabelas usem o comando: Para carregar uma tabela especifíca utilizem o comando - `dbt seed -s nome_do_csv` -### Problemas comuns +#### Problemas comuns Em caso a linha de comando do dbt fique com o status de estar sempre carregando, ou, o job do comando `dbt seed` fique rodando indefinitivamente mesmo após as 64 tabelas forem carregadas você precisará reiniciar o terminal. Para isso, clique nos três pontos no canto inferior direito ou no lado direito da linha de comando e escolha a opção `Restart IDE`. -## Recursos: +### Recursos: - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers - Join the [dbt community](http://community.getbdt.com/) to learn from other analytics engineers diff --git a/dbt_project.yml b/dbt_project.yml index 2e3cef948b..bdd66ef41b 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -2,12 +2,12 @@ # Name your project! Project names should contain only lowercase characters # and underscores. A good package name should reflect your organization's # name or the intended use of these models -name: 'my_new_project' # <-- Name of the project. +name: 'adventureworks_dbt' # <-- Name of the project. version: '1.0.0' config-version: 2 # This setting configures which "profile" dbt uses for this project. -profile: 'default' +profile: 'adventureworks' # These configurations specify where dbt should look for different types of files. # The `source-paths` config, for example, states that models in this project can be @@ -32,14 +32,19 @@ clean-targets: # directories to be removed by `dbt clean` # as tables. These settings can be overridden in the individual model files # using the `{{ config(...) }}` macro. models: - my_new_project: # <-- Name of the project. If you renamed your project you have to change this as well - + adventureworks_dbt: # <-- Name of the project. If you renamed your project you have to change this as well + +persist_docs: + relation: true + columns: true # Applies to all files under models/example/ - example: - materialized: view + staging: + +materialized: view + marts: + +schema: prod + +materialized: table seeds: - my_new_project: # <-- Name of the project. If you renamed your project you have to change this as well + adventureworks_dbt: # <-- Name of the project. If you renamed your project you have to change this as well sap_adventure_works: +schema: sap_adw \ No newline at end of file diff --git a/models/example.sql b/models/example.sql deleted file mode 100644 index 9a3709f06d..0000000000 --- a/models/example.sql +++ /dev/null @@ -1 +0,0 @@ -select 1 as example \ No newline at end of file diff --git a/models/marts/agg_sales.sql b/models/marts/agg_sales.sql new file mode 100644 index 0000000000..54245b02bd --- /dev/null +++ b/models/marts/agg_sales.sql @@ -0,0 +1,57 @@ +/* This table is a aggregation of sales by sales person and sales terrytory */ +with + fct_order_details as ( + select + * + from {{ ref('fct_order_details') }} + ) + + , dim_region as ( + select + * + from {{ ref('dim_region') }} + ) + + , dim_sales_person as ( + select + * + from {{ ref('dim_sales_person') }} + ) + + , final_agg as ( + select + {{ dbt_utils.generate_surrogate_key([ + "dim_region.territory_id" + , "dim_region.stateprovince_id" + , "dim_region.country_region_name" + , "fct_order_details.salesorder_id" + ]) }} as aggsales_sk + , dim_region.region_sk as region_fk + , dim_sales_person.salesperson_sk as salesperson_fk + , fct_order_details.orderdetail_sk as orderdetail_fk + , dim_sales_person.businessentity_id + , dim_region.geographical_region + , dim_region.territory_name + , dim_region.country_region_name + , dim_region.state_province_name + , dim_region.territory_sales_ytd + , dim_region.territory_sales_last_year + , dim_region.territory_cost_ytd + , dim_region.territory_cost_last_year + , dim_sales_person.person_name + , fct_order_details.payment_method + , fct_order_details.subtotal + , fct_order_details.total_due + , fct_order_details.orderqty + , fct_order_details.unitprice + , fct_order_details.unitprice_discount + , fct_order_details.order_date + from fct_order_details + left join dim_region + on fct_order_details.region_fk = dim_region.region_sk + left join dim_sales_person + on fct_order_details.salesperson_fk = dim_sales_person.salesperson_sk + ) + +select * +from final_agg \ No newline at end of file diff --git a/models/marts/agg_sales.yml b/models/marts/agg_sales.yml new file mode 100644 index 0000000000..31c23b2682 --- /dev/null +++ b/models/marts/agg_sales.yml @@ -0,0 +1,89 @@ +version: 2 + +models: + - name: agg_sales + description: Aggregates sales information by salesperson and region + columns: + - name: aggsales_sk + description: The Primary Key of table. + tests: + - not_null + - unique + + - name: region_fk + description: Foreign Key for the region. + tests: + - not_null + - relationships: + to: ref('dim_region') + field: region_sk + + - name: salesperson_fk + description: Foreign Key for the sales person. + tests: + - relationships: + to: ref('dim_sales_person') + field: salesperson_sk + + - name: orderdetail_fk + description: Foreign Key for the order details. + tests: + - relationships: + to: ref('fct_order_details') + field: orderdetail_sk + + - name: businessentity_id + description: Natural Key for the sales person. + + - name: geographical_region + description: Name of geographical region. + + - name: territory_name + description: Name of the territory. + + - name: country_region_name + description: Name of the country or region. + + - name: state_province_name + description: Name of the state or province. + + - name: territory_sales_ytd + description: Year-to-date sales total. + + - name: territory_sales_last_year + description: Last year sales total. + + - name: territory_cost_ytd + description: Year-to-date cost total. + + - name: territory_cost_last_year + description: Last year cost total. + + - name: person_name + description: Person's name. + + - name: payment_method + description: Payment method. + + - name: subtotal + description: Total value of items before tax and freight. + + - name: total_due + description: Total amount due for the order. + + - name: orderqty + description: Quantity of the product sold. + + - name: unitprice + description: Unit price of the product. + + - name: unitprice_discount + description: Discount applied per unit. + + - name: order_date + description: Date for the sales order. + tests: + - relationships: + to: ref('dim_dates') + field: date_day + diff --git a/models/marts/agg_sales_by_store.sql b/models/marts/agg_sales_by_store.sql new file mode 100644 index 0000000000..154c30f933 --- /dev/null +++ b/models/marts/agg_sales_by_store.sql @@ -0,0 +1,59 @@ +/* This table is a aggregation of sales by sales person and sales terrytory */ +with + fct_order_details as ( + select + * + from {{ ref('fct_order_details') }} + ) + + , dim_region as ( + select + * + from {{ ref('dim_region') }} + ) + + , dim_customers as ( + select + * + from {{ ref('dim_customers') }} + ) + + , dim_products as ( + select + * + from {{ ref('dim_products') }} + ) + + , final_agg as ( + select + fct_order_details.salesorder_id + , dim_customers.store_id + , dim_products.product_id + , dim_region.territory_id + , dim_region.stateprovince_id + , dim_customers.store_name + , dim_region.geographical_region + , dim_region.country_region_name + , dim_region.territory_name + , dim_region.state_province_name + , dim_region.isonlystateprovinceflag + , fct_order_details.total_due + , fct_order_details.orderqty + , fct_order_details.unitprice + , dim_products.product_name + , dim_products.subcategory_name + , dim_products.category_name + , dim_products.total_quantity + , fct_order_details.order_date + from fct_order_details + left join dim_region + on fct_order_details.region_fk = dim_region.region_sk + left join dim_customers + on fct_order_details.customers_fk = dim_customers.customers_sk + left join dim_products + on fct_order_details.products_fk = dim_products.products_sk + ) + +select * +from final_agg +where store_id is not null \ No newline at end of file diff --git a/models/marts/agg_sales_by_store.yml b/models/marts/agg_sales_by_store.yml new file mode 100644 index 0000000000..7eed213fe7 --- /dev/null +++ b/models/marts/agg_sales_by_store.yml @@ -0,0 +1,66 @@ +version: 2 + +models: + - name: agg_sales_by_store + description: Aggregates sales information by store customers + columns: + - name: salesorder_id + description: Natural Key for the sales order. + + - name: store_id + description: Key for the store associated with the customer. + + - name: store_name + description: Stores's name. + + - name: geographical_region + description: Name of geographical region. + + - name: territory_name + description: Name of the territory. + + - name: country_region_name + description: Name of the country or region. + + - name: state_province_name + description: Name of the state or province. + + - name: isonlystateprovinceflag + description: Indicates if the state/province is unique for country. + + - name: territory_sales_ytd + description: Year-to-date sales total. + + - name: territory_sales_last_year + description: Last year sales total. + + - name: territory_cost_ytd + description: Year-to-date cost total. + + - name: territory_cost_last_year + description: Last year cost total. + + - name: payment_method + description: Payment method. + + - name: subtotal + description: Total value of items before tax and freight. + + - name: total_due + description: Total amount due for the order. + + - name: orderqty + description: Quantity of the product sold. + + - name: unitprice + description: Unit price of the product. + + - name: unitprice_discount + description: Discount applied per unit. + + - name: order_date + description: Date for the sales order. + tests: + - relationships: + to: ref('dim_dates') + field: date_day diff --git a/models/marts/dim_customers.sql b/models/marts/dim_customers.sql new file mode 100644 index 0000000000..74041b8b0c --- /dev/null +++ b/models/marts/dim_customers.sql @@ -0,0 +1,78 @@ +with + person as ( + select + businessentity_id + , person_name + from {{ ref('stg_person') }} + ) + + , customer as ( + select + customer_id + , person_id + , store_id + , territory_id + , customer_category + from {{ ref('stg_customer') }} + ) + + , store as ( + select + businessentity_id + , store_name + from {{ ref('stg_store') }} + ) + + , emailaddress as ( + select + businessentity_id + , email_address + from {{ ref('stg_emailaddress') }} + ) + + , salesterritory as ( + select + territory_id + , territory_name + , geographical_region + from {{ ref('stg_salesterritory') }} + ) + + , deduping_salesterritory as ( + select + territory_id + , territory_name + , geographical_region + , row_number ( ) over (partition by territory_id order by territory_id) as rownumber + from salesterritory + ) + + , final_customers as ( + select + {{ dbt_utils.generate_surrogate_key([ + "customer.customer_id" + ]) }} as customers_sk + , customer.customer_id + , customer.person_id + , customer.store_id + , customer.territory_id + , customer.customer_category + , deduping_salesterritory.territory_name + , deduping_salesterritory.geographical_region + , person.person_name + , store.store_name + , emailaddress.email_address + from customer + left join person + on customer.person_id = person.businessentity_id + left join store + on customer.store_id = store.businessentity_id + left join emailaddress + on person.businessentity_id = emailaddress.businessentity_id + left join deduping_salesterritory + on customer.territory_id = deduping_salesterritory.territory_id + and deduping_salesterritory.rownumber = 1 + ) + +select * +from final_customers diff --git a/models/marts/dim_customers.yml b/models/marts/dim_customers.yml new file mode 100644 index 0000000000..c7489a88e0 --- /dev/null +++ b/models/marts/dim_customers.yml @@ -0,0 +1,41 @@ +version: 2 + +models: + - name: dim_customers + description: Contains information about customers. + columns: + - name: customers_sk + description: The Primary Key of the customer. + tests: + - not_null + - unique + + - name: customer_id + description: The Natural Key of the customer. + + - name: person_id + description: Foreign Key for the people associated with the customer. + + - name: store_id + description: Foreign Key for the store associated with the customer. + + - name: territory_id + description: Foreign Key for the region associated with the customer. + + - name: customer_category + description: Category of the customer. + + - name: territory_name + description: Name of the territory. + + - name: geographical_region + description: Name of the country or region. + + - name: person_name + description: Person's name. + + - name: store_name + description: Stores's name. + + - name: email_address + description: Email address. diff --git a/models/marts/dim_dates.sql b/models/marts/dim_dates.sql new file mode 100644 index 0000000000..07658c8003 --- /dev/null +++ b/models/marts/dim_dates.sql @@ -0,0 +1,28 @@ +/* Generating dates using the macro from the dbt package */ +with + raw_generated_data as ( + {{ dbt_date.get_date_dimension("1990-01-01", "2090-12-31") }} + ) + + , selecting as ( + select + date_day + , day_of_week + , day_of_week_name + , day_of_week_name_short + , day_of_month + , day_of_year + , month_of_year + , month_name + , month_name_short + , quarter_of_year + , quarter_start_date + , quarter_end_date + , year_number + , year_start_date + , year_end_date + from raw_generated_data + ) + +select * +from selecting \ No newline at end of file diff --git a/models/marts/dim_dates.yml b/models/marts/dim_dates.yml new file mode 100644 index 0000000000..e7a7b0487d --- /dev/null +++ b/models/marts/dim_dates.yml @@ -0,0 +1,51 @@ +version: 2 + +models: + - name: dim_dates + description: Date dimension table. + columns: + - name: date_day + description: The specific calendar date (YYYY-MM-DD format). + - unique + + - name: day_of_week + description: The numeric representation of the day of the week. + + - name: day_of_week_name + description: Full name of the day of the week. + + - name: day_of_week_name_short + description: Abbreviated name of the day of the week. + + - name: day_of_month + description: The day of the month. + + - name: day_of_year + description: The day of the year. + + - name: month_of_year + description: The numeric representation of the month. + + - name: month_name + description: Full name of the month. + + - name: month_name_short + description: Abbreviated name of the month. + + - name: quarter_of_year + description: The quarter of the year. + + - name: quarter_start_date + description: The start date of the quarter for the given date. + + - name: quarter_end_date + description: The end date of the quarter for the given date. + + - name: year_number + description: The year associated with the date. + + - name: year_start_date + description: The start date of the year for the given date. + + - name: year_end_date + description: The end date of the year for the given date. diff --git a/models/marts/dim_products.sql b/models/marts/dim_products.sql new file mode 100644 index 0000000000..8aade57aab --- /dev/null +++ b/models/marts/dim_products.sql @@ -0,0 +1,56 @@ +with + product as ( + select + product_id + , product_name + , product_subcategory_id + from {{ ref('stg_product') }} + ) + + , subcategory as ( + select + product_subcategory_id + , product_category_id + , subcategory_name + from {{ ref('stg_productsubcategory') }} + ) + + , category as ( + select + product_category_id + ,category_name + from {{ ref('stg_productcategory') }} + ) + + , inventory as ( + select + product_id + , sum(quantity) as total_quantity + from {{ ref('stg_productinventory') }} + group by product_id + ) + + + , final_products as ( + select + {{ dbt_utils.generate_surrogate_key([ + "product.product_id" + , "subcategory.product_category_id" + , "subcategory.product_subcategory_id" + ]) }} as products_sk + , product.product_id + , product.product_name + , subcategory.subcategory_name + , category.category_name + , inventory.total_quantity + from product + left join inventory + on product.product_id = inventory.product_id + left join subcategory + on product.product_subcategory_id = subcategory.product_subcategory_id + left join category + on subcategory.product_category_id = category.product_category_id + ) + +select * +from final_products diff --git a/models/marts/dim_products.yml b/models/marts/dim_products.yml new file mode 100644 index 0000000000..c1ab67d3fa --- /dev/null +++ b/models/marts/dim_products.yml @@ -0,0 +1,26 @@ +version: 2 + +models: + - name: dim_products + description: Contains information about products. + columns: + - name: products_sk + description: The Primary Key of the products. + tests: + - not_null + - unique + + - name: product_id + description: Natural Key for the product. + + - name: product_name + description: Product name. + + - name: subcategory_name + description: Name of the product subcategory. + + - name: category_name + description: Name of the product category. + + - name: total_quantity + description: Quantity of the product in stock. diff --git a/models/marts/dim_region.sql b/models/marts/dim_region.sql new file mode 100644 index 0000000000..6dcf9d3a05 --- /dev/null +++ b/models/marts/dim_region.sql @@ -0,0 +1,59 @@ +with + salesterritory as ( + select + territory_id + , country_region_code + , territory_name + , geographical_region + , territory_sales_ytd + , territory_sales_last_year + , territory_cost_ytd + , territory_cost_last_year + from {{ ref('stg_salesterritory') }} + ) + + , stateprovince as ( + select + stateprovince_id + , country_region_code + , territory_id + , state_province_name + , isonlystateprovinceflag + from {{ ref('stg_stateprovince') }} + ) + + , countryregion as ( + select + country_region_code + , country_region_name + from {{ ref('stg_countryregion') }} + ) + + , final_region as ( + select + {{ dbt_utils.generate_surrogate_key([ + "salesterritory.territory_id" + , "stateprovince.stateprovince_id" + , "countryregion.country_region_name" + ]) }} as region_sk + , salesterritory.territory_id + , stateprovince.stateprovince_id + , stateprovince.isonlystateprovinceflag + , salesterritory.geographical_region + , salesterritory.territory_name + , countryregion.country_region_name + , stateprovince.state_province_name + , salesterritory.territory_sales_ytd + , salesterritory.territory_sales_last_year + , salesterritory.territory_cost_ytd + , salesterritory.territory_cost_last_year + from salesterritory + left join stateprovince + on salesterritory.territory_id = stateprovince.territory_id + left join countryregion + on salesterritory.country_region_code = countryregion.country_region_code + + ) + +select * +from final_region \ No newline at end of file diff --git a/models/marts/dim_region.yml b/models/marts/dim_region.yml new file mode 100644 index 0000000000..c3b0d99ac2 --- /dev/null +++ b/models/marts/dim_region.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + - name: dim_region + description: Contains information about regions. + columns: + - name: region_sk + description: The Primary Key of the region. + tests: + - not_null + - unique + + - name: territory_id + description: Natural Key for the territory. + + - name: stateprovince_id + description: Natural Key for the state province. + + - name: isonlystateprovinceflag + description: Indicates if the state/province is unique for country. + + - name: geographical_region + description: Name of geographical region. + + - name: territory_name + description: Name of the territory. + + - name: country_region_name + description: Name of the country or region. + + - name: state_province_name + description: Name of the state or province. + + - name: territory_sales_ytd + description: Year-to-date sales total. + + - name: territory_sales_last_year + description: Last year sales total. + + - name: territory_cost_ytd + description: Year-to-date cost total. + + - name: territory_cost_last_year + description: Last year cost total. \ No newline at end of file diff --git a/models/marts/dim_sales_person.sql b/models/marts/dim_sales_person.sql new file mode 100644 index 0000000000..517d0cc816 --- /dev/null +++ b/models/marts/dim_sales_person.sql @@ -0,0 +1,30 @@ +with + person as ( + select + businessentity_id + , person_name + from {{ ref('stg_person') }} + ) + + , salesperson as ( + select + businessentity_id + , territory_id + from {{ ref('stg_salesperson') }} + ) + + , final_salesperson as ( + select + {{ dbt_utils.generate_surrogate_key([ + "salesperson.businessentity_id" + ]) }} as salesperson_sk + , salesperson.businessentity_id + , salesperson.territory_id + , person.person_name + from salesperson + left join person + on salesperson.businessentity_id = person.businessentity_id + ) + +select * +from final_salesperson \ No newline at end of file diff --git a/models/marts/dim_sales_person.yml b/models/marts/dim_sales_person.yml new file mode 100644 index 0000000000..c522082e0b --- /dev/null +++ b/models/marts/dim_sales_person.yml @@ -0,0 +1,20 @@ +version: 2 + +models: + - name: dim_sales_person + description: Contains information about sales person. + columns: + - name: salesperson_sk + description: The Primary Key of the sales person. + tests: + - not_null + - unique + + - name: person_name + description: Person's name. + + - name: businessentity_id + description: Natural Key for the sales person. + + - name: territory_id + description: Foreign Key for the region associated with the salesperson. \ No newline at end of file diff --git a/models/marts/dim_sales_reason.sql b/models/marts/dim_sales_reason.sql new file mode 100644 index 0000000000..b57cbb90fd --- /dev/null +++ b/models/marts/dim_sales_reason.sql @@ -0,0 +1,31 @@ +with + salesreason as ( + select + sales_reason_id + , sales_reason_category + from {{ ref('stg_salesreason') }} + ) + + , salesorderheader_salesreason as ( + select + salesorder_id + , sales_reason_id + from {{ ref('stg_salesorderheadersalesreason') }} + ) + + , final_salesreason as ( + select + {{ dbt_utils.generate_surrogate_key([ + "salesorderheader_salesreason.sales_reason_id" + , "salesorderheader_salesreason.salesorder_id" + ]) }} as salesreason_sk + , salesorderheader_salesreason.sales_reason_id + , salesorderheader_salesreason.salesorder_id + , salesreason.sales_reason_category + from salesorderheader_salesreason + left join salesreason + on salesorderheader_salesreason.sales_reason_id = salesreason.sales_reason_id + ) + +select * +from final_salesreason \ No newline at end of file diff --git a/models/marts/dim_sales_reason.yml b/models/marts/dim_sales_reason.yml new file mode 100644 index 0000000000..3cab5b3949 --- /dev/null +++ b/models/marts/dim_sales_reason.yml @@ -0,0 +1,20 @@ +version: 2 + +models: + - name: dim_sales_reason + description: Contains information about sales reason. + columns: + - name: salesreason_sk + description: The Primary Key of the sales reason. + tests: + - not_null + - unique + + - name: sales_reason_id + description: Natural Key for the sales reason. + + - name: salesorder_id + description: Foreign Key for the sales order. + + - name: sales_reason_category + description: Reason for the sale. \ No newline at end of file diff --git a/models/marts/dim_ship_method.sql b/models/marts/dim_ship_method.sql new file mode 100644 index 0000000000..2bf295f744 --- /dev/null +++ b/models/marts/dim_ship_method.sql @@ -0,0 +1,13 @@ +with + shipmethod as ( + select + {{ dbt_utils.generate_surrogate_key([ + "shipmethod_id" + ]) }} as shipmethod_sk + , shipmethod_id + , ship_method_name + from {{ ref('stg_shipmethod') }} + ) + +select * +from shipmethod \ No newline at end of file diff --git a/models/marts/dim_ship_method.yml b/models/marts/dim_ship_method.yml new file mode 100644 index 0000000000..f1d5bc96e6 --- /dev/null +++ b/models/marts/dim_ship_method.yml @@ -0,0 +1,17 @@ +version: 2 + +models: + - name: dim_ship_method + description: Contains information about ship method. + columns: + - name: shipmethod_sk + description: The Primary Key of the sales reason. + tests: + - not_null + - unique + + - name: shipmethod_id + description: Natural Key for the the ship method. + + - name: ship_method_name + description: Name of the shipping method. \ No newline at end of file diff --git a/models/marts/dim_special_offer.sql b/models/marts/dim_special_offer.sql new file mode 100644 index 0000000000..960a3cc0de --- /dev/null +++ b/models/marts/dim_special_offer.sql @@ -0,0 +1,34 @@ +with + specialoffer as ( + select + specialoffer_id + + , offer_type + , offer_category + from {{ ref('stg_specialoffer') }} + ) + + , specialofferproduct as ( + select + specialoffer_id + , product_id + from {{ ref('stg_specialofferproduct') }} + ) + + , final_special_offer as ( + select + {{ dbt_utils.generate_surrogate_key([ + "specialoffer.specialoffer_id" + , "specialofferproduct.product_id" + ]) }} as specialoffer_sk + , specialoffer.specialoffer_id + , specialoffer.offer_type + , specialoffer.offer_category + , specialofferproduct.product_id + from specialoffer + left join specialofferproduct + on specialoffer.specialoffer_id = specialofferproduct.specialoffer_id + ) + +select * +from final_special_offer \ No newline at end of file diff --git a/models/marts/dim_special_offer.yml b/models/marts/dim_special_offer.yml new file mode 100644 index 0000000000..bb493f901f --- /dev/null +++ b/models/marts/dim_special_offer.yml @@ -0,0 +1,23 @@ +version: 2 + +models: + - name: dim_special_offer + description: Contains information about offer. + columns: + - name: specialoffer_sk + description: The Primary Key of the sales reason. + tests: + - not_null + - unique + + - name: specialoffer_id + description: Natural Key for the promotional offer. + + - name: offer_type + description: Category of the promotion. + + - name: offer_category + description: If the promotion is from reseller or customer. + + - name: product_id + description: Identifier for the associated product. diff --git a/models/marts/fct_order_details.sql b/models/marts/fct_order_details.sql new file mode 100644 index 0000000000..63770b5f1b --- /dev/null +++ b/models/marts/fct_order_details.sql @@ -0,0 +1,150 @@ +with + customers as ( + select + * + from {{ ref('dim_customers') }} + ) + + , products as ( + select * + from {{ ref('dim_products') }} + ) + + , region as ( + select * + from {{ ref('dim_region') }} + ) + + , deduping_region as ( + select + region_sk + , territory_id + , territory_name + , row_number ( ) over (partition by territory_id order by territory_id) as rownumber + from region + ) + + , sales_person as ( + select * + from {{ ref('dim_sales_person') }} + ) + + , sales_reason as ( + select * + from {{ ref('dim_sales_reason') }} + ) + + , deduping_sales_reason as ( + select + salesreason_sk + , salesorder_id + , sales_reason_category + , row_number ( ) over (partition by salesorder_id order by salesorder_id) as rownumber + from sales_reason + ) + + , special_offer as ( + select + * + from {{ ref('dim_special_offer') }} + ) + + , deduping_special_offer as ( + select + specialoffer_sk + , specialoffer_id + , row_number ( ) over (partition by specialoffer_id order by specialoffer_id) as rownumber + from special_offer + ) + + , shipmethod as ( + select + * + from {{ ref('dim_ship_method')}} + ) + + , orders_header as ( + select + salesorder_id + , sales_person_id + , customer_id + , territory_id + , shipmethod_id + , payment_method + , order_date + , subtotal + , total_due + from {{ ref('stg_salesorderheader') }} + ) + + , orders_detail as ( + select + salesorderdetail_id + , salesorder_id + , specialoffer_id + , product_id + , orderqty + , unitprice + , unitprice_discount + from {{ ref('stg_salesorderdetail')}} + ) + + , deduping_orders_detail as ( + select + salesorderdetail_id + , salesorder_id + , specialoffer_id + , product_id + , orderqty + , unitprice + , unitprice_discount + , row_number ( ) over (partition by salesorder_id order by salesorder_id) as rownumber + from orders_detail + ) + + , order_detail_final as ( + select + {{ dbt_utils.generate_surrogate_key([ + "orders_header.salesorder_id" + ]) }} as orderdetail_sk + , customers.customers_sk as customers_fk + , deduping_region.region_sk as region_fk + , shipmethod.shipmethod_sk as shipmethod_fk + , deduping_special_offer.specialoffer_sk as specialoffer_fk + , products.products_sk as products_fk + , sales_person.salesperson_sk as salesperson_fk + , deduping_sales_reason.salesreason_sk as salesreason_fk + , deduping_orders_detail.salesorder_id + , deduping_orders_detail.product_id + , orders_header.payment_method + , orders_header.subtotal + , orders_header.total_due + , deduping_orders_detail.orderqty + , deduping_orders_detail.unitprice + , deduping_orders_detail.unitprice_discount + , orders_header.order_date + from orders_header + left join deduping_orders_detail + on orders_header.salesorder_id = deduping_orders_detail.salesorder_id + and deduping_orders_detail.rownumber = 1 + left join customers + on orders_header.customer_id = customers.customer_id + left join shipmethod + on orders_header.shipmethod_id = shipmethod.shipmethod_id + left join products + on deduping_orders_detail.product_id = products.product_id + left join sales_person + on sales_person.businessentity_id = orders_header.sales_person_id + left join deduping_special_offer + on deduping_orders_detail.specialoffer_id = deduping_special_offer.specialoffer_id + and deduping_special_offer.rownumber = 1 + left join deduping_sales_reason + on orders_header.salesorder_id = deduping_sales_reason.salesorder_id + and deduping_sales_reason.rownumber = 1 + left join deduping_region + on orders_header.territory_id = deduping_region.territory_id + and deduping_region.rownumber = 1 + ) + +select * +from order_detail_final \ No newline at end of file diff --git a/models/marts/fct_order_details.yml b/models/marts/fct_order_details.yml new file mode 100644 index 0000000000..d914e061e6 --- /dev/null +++ b/models/marts/fct_order_details.yml @@ -0,0 +1,96 @@ +version: 2 + +models: + - name: fct_order_details + description: Stores information about sales order item details. + columns: + - name: orderdetail_sk + description: The Primary Key of the order details. + tests: + - not_null + - unique + + - name: customers_fk + description: Foreign Key for the customer. + tests: + - not_null + - relationships: + to: ref('dim_customers') + field: customers_sk + + - name: region_fk + description: Foreign Key for the region. + tests: + - not_null + - relationships: + to: ref('dim_region') + field: region_sk + + - name: shipmethod_fk + description: Foreign Key for the ship method. + tests: + - not_null + - relationships: + to: ref('dim_ship_method') + field: shipmethod_sk + + - name: specialoffer_fk + description: Foreign Key for the special offer. + tests: + - not_null + - relationships: + to: ref('dim_special_offer') + field: specialoffer_sk + + - name: products_fk + description: Foreign Key for the product. + tests: + - not_null + - relationships: + to: ref('dim_products') + field: products_sk + + - name: salesperson_fk + description: Foreign Key for the sales person. + tests: + - relationships: + to: ref('dim_sales_person') + field: salesperson_sk + + - name: salesreason_fk + description: Foreign Key for the sales reason. + tests: + - relationships: + to: ref('dim_sales_reason') + field: salesreason_sk + + - name: salesorder_id + description: Natural Key for the sales order. + + - name: product_id + description: Natural Key for the product. + + - name: order_date + description: Date for the sales order. + tests: + - relationships: + to: ref('dim_dates') + field: date_day + + - name: payment_method + description: Payment method. + + - name: subtotal + description: Total value of items before tax and freight. + + - name: total_due + description: Total amount due for the order. + + - name: orderqty + description: Quantity of the product sold. + + - name: unitprice + description: Unit price of the product. + + - name: unitprice_discount + description: Discount applied per unit. \ No newline at end of file diff --git a/models/staging/sources.yml b/models/staging/sources.yml new file mode 100644 index 0000000000..d914eb5f17 --- /dev/null +++ b/models/staging/sources.yml @@ -0,0 +1,265 @@ +version: 2 + +sources: + + - name: sap_adw + description: This is a replica of the Postgres database used by our client. + # Documenting only the data we will use in the data products + tables: + - name: address + description: Stores address information. + columns: + - name: ADDRESSID + description: Unique identifier for the address. + tests: + - not_null + - unique + - name: CITY + description: City name. + - name: STATEPROVINCEID + description: Identifier for the state or province. + + - name: countryregion_new + description: Stores country and region details. + columns: + - name: COUNTRYREGIONCODE + description: Code for the country or region. + tests: + - not_null + - unique + - name: NAME + description: Name of the country or region. + + - name: customer + description: Contains information about customers. + columns: + - name: CUSTOMERID + description: Unique identifier for the customer. + tests: + - not_null + - unique + - name: PERSONID + description: Foreign Key for the people associated with the customer. + - name: STOREID + description: Foreign Key for the store associated with the customer. + - name: TERRITORYID + description: Foreign Key for the region associated with the customer. + + - name: emailaddress + description: Lists email addresses for customers, both active and inactive customers. + columns: + - name: EMAILADDRESSID + description: Unique identifier for email address. + tests: + - not_null + - unique + - name: BUSINESSENTITYID + description: Foreign key for the associated person. + - name: EMAILADDRESS + description: Email address. + + - name: person + description: Contains information about people. + columns: + - name: BUSINESSENTITYID + description: Unique identifier for the person. + tests: + - not_null + - unique + - name: FIRSTNAME + description: Person's first name. + - name: LASTNAME + description: Person's last name. + - name: EMAILPROMOTION + description: Email marketing preferences. + + - name: product + description: Contains information about products. + columns: + - name: PRODUCTID + description: Unique identifier for the product. + tests: + - not_null + - unique + - name: NAME + description: Product name. + - name: SAFETYSTOCKLEVEL + description: Minimum stock level for safety. + - name: PRODUCTSUBCATEGORYID + description: Identifier for the product subcategory. + + - name: productcategory + description: Contains information about product categories. + columns: + - name: PRODUCTCATEGORYID + description: Unique identifier for the product category. + tests: + - not_null + - unique + - name: NAME + description: Name of the product category. + + - name: productinventory + description: Stores information about product inventory. + columns: + - name: PRODUCTID + description: Unique identifier for the product. + tests: + - not_null + - name: QUANTITY + description: Quantity of the product in stock. + + - name: productsubcategory + description: Stores information about product subcategories. + columns: + - name: PRODUCTSUBCATEGORYID + description: Unique identifier for the product subcategory. + tests: + - not_null + - unique + - name: PRODUCTCATEGORYID + description: Identifier for the associated product category. + - name: NAME + description: Name of the product subcategory. + + - name: salesorderdetail + description: Stores information about sales order item details. + columns: + - name: SALESORDERDETAILID + description: Unique identifier for sales order detail. + - name: SALESORDERID + description: Foreign Key for the sales order header. + - name: PRODUCTID + description: Product sold in the order. + - name: ORDERQTY + description: Quantity of the product sold. + - name: UNITPRICE + description: Unit price of the product. + - name: UNITPRICEDISCOUNT + description: Discount applied per unit. + - name: SPECIALOFFERID + description: Identifier for the special offer applied. + + - name: salesorderheader + description: Stores information about sales orders, including customer and shipping details. + columns: + - name: SALESORDERID + description: Unique identifier for the sales order. + tests: + - not_null + - unique + - name: SALESPERSONID + description: Foreign key for the associated sales person. + - name: ORDERDATE + description: Date when the order was placed. + - name: SUBTOTAL + description: Total value of items before tax and freight. + - name: TAXAMT + description: Tax amount for the order. + - name: FREIGHT + description: Shipping cost for the order. + - name: TOTALDUE + description: Total amount due for the order. + - name: CUSTOMERID + description: Identifier for the associated customer. + - name: TERRITORYID + description: Region for the order. + - name: SHIPMETHODID + description: Identifier for the shipping method used for the order. + - name: CREDITCARDID + description: Identifier for thepayment method (credit card). + + - name: salesorderheadersalesreason + description: Links sales orders with reasons for the sale. + columns: + - name: SALESORDERID + description: Identifier for the sales order (foreign key). + - name: SALESREASONID + description: Identifier for the reason. + + - name: salesperson + description: Stores information about salespeople. + columns: + - name: BUSINESSENTITYID + description: Unique identifier for the salesperson. + - name: TERRITORYID + description: Region associated with the salesperson. + + - name: salesreason + description: Stores reasons for sales. + columns: + - name: SALESREASONID + description: Unique identifier for the sales reason. + tests: + - not_null + - unique + - name: NAME + description: Reason for the sale. + + - name: salesterritory_new + description: This table defines sales territories. + columns: + - name: TERRITORYID + description: Unique identifier for the territory. + - name: NAME + description: Name of the territory. + - name: GEOGRAPHICALREGION + description: Name of geographical region. + - name: COUNTRYREGIONCODE + description: Country or region associated with the territory. + - name: SALESYTD + description: Year-to-date sales total. + - name: SALESLASTYEAR + description: Last year sales total. + - name: COSTYTD + description: Year-to-date cost total. + - name: COSTLASTYEAR + description: Last year cost total. + + - name: shipmethod + description: Stores information about shipping methods available for orders. + columns: + - name: SHIPMETHODID + description: Identifier to the shipping method used for the order. + - name: NAME + description: Shipping method used for the order. + + - name: specialoffer + description: Stores information about promotional offers. + columns: + - name: SPECIALOFFERID + description: Unique identifier for the promotional offer. + - name: TYPE + description: category of the promotion. + - name: CATEGORY + description: If the promotion is from reseller or customer. + + - name: specialofferproduct + description: This table associates products with promotional offers. + columns: + - name: SPECIALOFFERID + description: Identifier for the promotional offer. + - name: PRODUCTID + description: Identifier for the associated product. + + - name: stateprovince + description: Stores information about states or provinces in different countries. + columns: + - name: STATEPROVINCEID + description: Unique identifier for the state or province. + - name: COUNTRYREGIONCODE + description: Associated country or region. + - name: TERRITORYID + description: Region identifier for the state or province. + - name: NAME + description: Name of the state or province. + - name: ISONLYSTATEPROVINCEFLAG + description: If is only state province or not. + + - name: store + description: Stores information about stores acting as customers. + columns: + - name: BUSINESSENTITYID + description: Unique identifier for the store. + - name: NAME + description: Name of the store. \ No newline at end of file diff --git a/models/staging/stg_address.sql b/models/staging/stg_address.sql new file mode 100644 index 0000000000..e64a0cf6a0 --- /dev/null +++ b/models/staging/stg_address.sql @@ -0,0 +1,15 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "ADDRESSID" as address_id + /* Foreign Key */ + , "STATEPROVINCEID" as stateprovince_id + + , "CITY" as city_name + from {{ source('sap_adw','address') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_countryregion.sql b/models/staging/stg_countryregion.sql new file mode 100644 index 0000000000..d961109270 --- /dev/null +++ b/models/staging/stg_countryregion.sql @@ -0,0 +1,12 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "COUNTRYREGIONCODE" as country_region_code + , "NAME"as country_region_name + from {{ source('sap_adw','countryregion_new') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_customer.sql b/models/staging/stg_customer.sql new file mode 100644 index 0000000000..de5918a3ad --- /dev/null +++ b/models/staging/stg_customer.sql @@ -0,0 +1,21 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "CUSTOMERID" as customer_id + /* Foreign Key */ + , "PERSONID" as person_id + , "STOREID" as store_id + , "TERRITORYID" as territory_id + , case + when person_id is not null and store_id is not null then 'person and store' + when person_id is not null then 'person' + when store_id is not null then 'store' + else 'other' + end as customer_category + from {{ source('sap_adw','customer') }} + ) + + select * + from source \ No newline at end of file diff --git a/models/staging/stg_emailaddress.sql b/models/staging/stg_emailaddress.sql new file mode 100644 index 0000000000..af8b15f5c7 --- /dev/null +++ b/models/staging/stg_emailaddress.sql @@ -0,0 +1,12 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + "BUSINESSENTITYID" as businessentity_id + + , "EMAILADDRESS" as email_address + from {{ source('sap_adw','emailaddress') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_person.sql b/models/staging/stg_person.sql new file mode 100644 index 0000000000..8993f87a1c --- /dev/null +++ b/models/staging/stg_person.sql @@ -0,0 +1,13 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "BUSINESSENTITYID" as businessentity_id + + , CONCAT("FIRSTNAME", ' ', "LASTNAME") as person_name + from {{ source('sap_adw','person') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_product.sql b/models/staging/stg_product.sql new file mode 100644 index 0000000000..ea2af39205 --- /dev/null +++ b/models/staging/stg_product.sql @@ -0,0 +1,14 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "PRODUCTID" as product_id + /* Foreign Key */ + , "PRODUCTSUBCATEGORYID" as product_subcategory_id + + , "NAME" as product_name + from {{ source('sap_adw','product') }} + ) + +select * from source \ No newline at end of file diff --git a/models/staging/stg_productcategory.sql b/models/staging/stg_productcategory.sql new file mode 100644 index 0000000000..b4ceb5f3c2 --- /dev/null +++ b/models/staging/stg_productcategory.sql @@ -0,0 +1,12 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "PRODUCTCATEGORYID" as product_category_id + + ,"NAME" as category_name + from {{ source('sap_adw','productcategory') }} + ) + +select * from source \ No newline at end of file diff --git a/models/staging/stg_productinventory.sql b/models/staging/stg_productinventory.sql new file mode 100644 index 0000000000..526fecd06c --- /dev/null +++ b/models/staging/stg_productinventory.sql @@ -0,0 +1,13 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "PRODUCTID" as product_id + + , "QUANTITY" as quantity + from {{ source('sap_adw','productinventory') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_productsubcategory.sql b/models/staging/stg_productsubcategory.sql new file mode 100644 index 0000000000..cd8abf7e71 --- /dev/null +++ b/models/staging/stg_productsubcategory.sql @@ -0,0 +1,14 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "PRODUCTSUBCATEGORYID" as product_subcategory_id + /* Foreign Key */ + , "PRODUCTCATEGORYID" as product_category_id + + , "NAME" as subcategory_name + from {{ source('sap_adw','productsubcategory') }} + ) + +select * from source \ No newline at end of file diff --git a/models/staging/stg_salesorderdetail.sql b/models/staging/stg_salesorderdetail.sql new file mode 100644 index 0000000000..82720370be --- /dev/null +++ b/models/staging/stg_salesorderdetail.sql @@ -0,0 +1,19 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primery Key */ + "SALESORDERDETAILID" as salesorderdetail_id + /* Foreign Key */ + , "SALESORDERID" as salesorder_id + , "SPECIALOFFERID" as specialoffer_id + , "PRODUCTID" as product_id + + , "ORDERQTY" as orderqty + , "UNITPRICE" as unitprice + , "UNITPRICEDISCOUNT" as unitprice_discount + from {{ source('sap_adw','salesorderdetail') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_salesorderheader.sql b/models/staging/stg_salesorderheader.sql new file mode 100644 index 0000000000..44f877599d --- /dev/null +++ b/models/staging/stg_salesorderheader.sql @@ -0,0 +1,28 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "SALESORDERID" as salesorder_id + /* Foreign Key */ + , "SALESPERSONID" as sales_person_id + , "CUSTOMERID" as customer_id + , "TERRITORYID" as territory_id + , "SHIPMETHODID" as shipmethod_id + , "CREDITCARDID" as creditcard_id + + , case + when creditcard_id is not null then 'Card' + else 'Other Payment Methods' + end as payment_method + + , cast ("ORDERDATE" as timestamp) as order_date_time + , cast(date_trunc('day', cast("ORDERDATE" as timestamp)) as date) as order_date + , cast(to_char(cast("ORDERDATE" as timestamp), 'HH24:MI:SS') as time) as order_time + , "SUBTOTAL" as subtotal + , "TOTALDUE" as total_due + from {{ source('sap_adw','salesorderheader') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_salesorderheadersalesreason.sql b/models/staging/stg_salesorderheadersalesreason.sql new file mode 100644 index 0000000000..3564caa457 --- /dev/null +++ b/models/staging/stg_salesorderheadersalesreason.sql @@ -0,0 +1,11 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + "SALESORDERID" as salesorder_id + , "SALESREASONID" as sales_reason_id + from {{ source('sap_adw','salesorderheadersalesreason') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_salesperson.sql b/models/staging/stg_salesperson.sql new file mode 100644 index 0000000000..b47173ecd8 --- /dev/null +++ b/models/staging/stg_salesperson.sql @@ -0,0 +1,14 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "BUSINESSENTITYID" as businessentity_id + + /* Foreign Key */ + , "TERRITORYID" as territory_id + from {{ source('sap_adw','salesperson') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_salesreason.sql b/models/staging/stg_salesreason.sql new file mode 100644 index 0000000000..d2706a93d6 --- /dev/null +++ b/models/staging/stg_salesreason.sql @@ -0,0 +1,13 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "SALESREASONID" as sales_reason_id + + , "NAME" as sales_reason_category + from {{ source('sap_adw','salesreason') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_salesterritory.sql b/models/staging/stg_salesterritory.sql new file mode 100644 index 0000000000..56ad1163e3 --- /dev/null +++ b/models/staging/stg_salesterritory.sql @@ -0,0 +1,20 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "TERRITORYID" as territory_id + /* Foreign Key */ + , "COUNTRYREGIONCODE" as country_region_code + + , "NAME" as territory_name + , "GEOGRAPHICALREGION" as geographical_region + , "SALESYTD" as territory_sales_ytd + , "SALESLASTYEAR" as territory_sales_last_year + , "COSTYTD" as territory_cost_ytd + , "COSTLASTYEAR" as territory_cost_last_year + from {{ source('sap_adw','salesterritory_new') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_shipmethod.sql b/models/staging/stg_shipmethod.sql new file mode 100644 index 0000000000..9a6700a237 --- /dev/null +++ b/models/staging/stg_shipmethod.sql @@ -0,0 +1,13 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "SHIPMETHODID" as shipmethod_id + + , "NAME" as ship_method_name + from {{ source('sap_adw','shipmethod') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_specialoffer.sql b/models/staging/stg_specialoffer.sql new file mode 100644 index 0000000000..3fb28c1b63 --- /dev/null +++ b/models/staging/stg_specialoffer.sql @@ -0,0 +1,14 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "SPECIALOFFERID" as specialoffer_id + + , "TYPE" as offer_type + , "CATEGORY" as offer_category + from {{ source('sap_adw','specialoffer') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_specialofferproduct.sql b/models/staging/stg_specialofferproduct.sql new file mode 100644 index 0000000000..dd356b54eb --- /dev/null +++ b/models/staging/stg_specialofferproduct.sql @@ -0,0 +1,11 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + "SPECIALOFFERID" as specialoffer_id + , "PRODUCTID" as product_id + from {{ source('sap_adw','specialofferproduct') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_stateprovince.sql b/models/staging/stg_stateprovince.sql new file mode 100644 index 0000000000..59c227803b --- /dev/null +++ b/models/staging/stg_stateprovince.sql @@ -0,0 +1,17 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "STATEPROVINCEID" as stateprovince_id + /* Foreign Key */ + , "COUNTRYREGIONCODE" as country_region_code + , "TERRITORYID" as territory_id + + , "NAME" as state_province_name + , "ISONLYSTATEPROVINCEFLAG" as isonlystateprovinceflag + from {{ source('sap_adw','stateprovince') }} + ) + +select * +from source \ No newline at end of file diff --git a/models/staging/stg_store.sql b/models/staging/stg_store.sql new file mode 100644 index 0000000000..a324b8c3d4 --- /dev/null +++ b/models/staging/stg_store.sql @@ -0,0 +1,13 @@ +with + source as ( + select + /* Selecting only the data we will use in the data products */ + /* Primary Key */ + "BUSINESSENTITYID" as businessentity_id + + , "NAME" as store_name + from {{ source('sap_adw','store') }} + ) + +select * +from source \ No newline at end of file diff --git a/packages.yml b/packages.yml new file mode 100644 index 0000000000..09f716436e --- /dev/null +++ b/packages.yml @@ -0,0 +1,6 @@ +packages: + - package: dbt-labs/dbt_utils + version: 1.1.0 + + - package: calogica/dbt_date + version: [">=0.10.0", "<0.11.0"] \ No newline at end of file diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 0000000000..10cce8b0c1 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,14 @@ +adventureworks: + target: dev + outputs: + dev: + type: snowflake + account: WWTEWMZ.EZ21293 + host: WWTEWMZ-EZ21293.snowflakecomputing.com + user: DESAFIODIANA + password: Lighthouse2024! + role: ACCOUNTADMIN + database: ADVENTURE_WORKS + warehouse: COMPUTE_WH + schema: DEV + threads: 4 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..d37c166b6b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,71 @@ +agate==1.9.1 +annotated-types==0.7.0 +asn1crypto==1.5.1 +attrs==24.3.0 +babel==2.16.0 +backports.tarfile==1.2.0 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +colorama==0.4.6 +cryptography==44.0.0 +daff==1.3.46 +dbt-adapters==1.7.0 +dbt-common==1.11.0 +dbt-core==1.8.7 +dbt-extractor==0.5.1 +dbt-semantic-interfaces==0.5.1 +dbt-snowflake==1.8.4 +deepdiff==7.0.1 +filelock==3.16.1 +idna==3.10 +importlib-metadata==6.11.0 +importlib_resources==6.4.5 +isodate==0.6.1 +jaraco.classes==3.4.0 +jaraco.context==6.0.1 +jaraco.functools==4.1.0 +Jinja2==3.1.5 +jsonschema==4.23.0 +jsonschema-specifications==2023.12.1 +keyring==25.5.0 +leather==0.4.0 +Logbook==1.5.3 +MarkupSafe==2.1.5 +mashumaro==3.14 +minimal-snowplow-tracker==0.0.2 +more-itertools==10.5.0 +msgpack==1.1.0 +networkx==3.1 +ordered-set==4.1.0 +packaging==24.2 +parsedatetime==2.6 +pathspec==0.12.1 +pkgutil_resolve_name==1.3.10 +platformdirs==4.3.6 +protobuf==4.25.5 +pycparser==2.22 +pydantic==2.10.4 +pydantic_core==2.27.2 +PyJWT==2.9.0 +pyOpenSSL==24.3.0 +python-dateutil==2.9.0.post0 +python-slugify==8.0.4 +pytimeparse==1.1.8 +pytz==2024.2 +pywin32-ctypes==0.2.3 +PyYAML==6.0.2 +referencing==0.35.1 +requests==2.32.3 +rpds-py==0.20.1 +six==1.17.0 +snowflake-connector-python==3.12.4 +sortedcontainers==2.4.0 +sqlparse==0.5.3 +text-unidecode==1.3 +tomlkit==0.13.2 +typing_extensions==4.12.2 +tzdata==2024.2 +urllib3==1.26.20 +zipp==3.20.2 diff --git a/seeds/sap_adventure_works/corrections/countryregion_new.csv b/seeds/sap_adventure_works/corrections/countryregion_new.csv new file mode 100644 index 0000000000..d8eecc237b --- /dev/null +++ b/seeds/sap_adventure_works/corrections/countryregion_new.csv @@ -0,0 +1,239 @@ +,countryregioncode,name,modifieddate +1,AD,Andorra,2008-04-30 00:00:00.000 +2,AE,United Arab Emirates,2008-04-30 00:00:00.000 +3,AF,Afghanistan,2008-04-30 00:00:00.000 +4,AG,Antigua and Barbuda,2008-04-30 00:00:00.000 +5,AI,Anguilla,2008-04-30 00:00:00.000 +6,AL,Albania,2008-04-30 00:00:00.000 +7,AM,Armenia,2008-04-30 00:00:00.000 +8,AN,Netherlands Antilles,2008-04-30 00:00:00.000 +9,AO,Angola,2008-04-30 00:00:00.000 +10,AQ,Antarctica,2008-04-30 00:00:00.000 +11,AR,Argentina,2008-04-30 00:00:00.000 +12,AS,American Samoa,2008-04-30 00:00:00.000 +13,AT,Austria,2008-04-30 00:00:00.000 +14,AU,Australia,2008-04-30 00:00:00.000 +15,AW,Aruba,2008-04-30 00:00:00.000 +16,AZ,Azerbaijan,2008-04-30 00:00:00.000 +17,BA,Bosnia and Herzegovina,2008-04-30 00:00:00.000 +18,BB,Barbados,2008-04-30 00:00:00.000 +19,BD,Bangladesh,2008-04-30 00:00:00.000 +20,BE,Belgium,2008-04-30 00:00:00.000 +21,BF,Burkina Faso,2008-04-30 00:00:00.000 +22,BG,Bulgaria,2008-04-30 00:00:00.000 +23,BH,Bahrain,2008-04-30 00:00:00.000 +24,BI,Burundi,2008-04-30 00:00:00.000 +25,BJ,Benin,2008-04-30 00:00:00.000 +26,BM,Bermuda,2008-04-30 00:00:00.000 +27,BN,Brunei,2008-04-30 00:00:00.000 +28,BO,Bolivia,2008-04-30 00:00:00.000 +29,BR,Brazil,2008-04-30 00:00:00.000 +30,BS,"Bahamas, The",2008-04-30 00:00:00.000 +31,BT,Bhutan,2008-04-30 00:00:00.000 +32,BV,Bouvet Island,2008-04-30 00:00:00.000 +33,BW,Botswana,2008-04-30 00:00:00.000 +34,BY,Belarus,2008-04-30 00:00:00.000 +35,BZ,Belize,2008-04-30 00:00:00.000 +36,CA,Canada,2008-04-30 00:00:00.000 +37,CC,Cocos (Keeling) Islands,2008-04-30 00:00:00.000 +38,CD,Congo (DRC),2008-04-30 00:00:00.000 +39,CF,Central African Republic,2008-04-30 00:00:00.000 +40,CG,Congo,2008-04-30 00:00:00.000 +41,CH,Switzerland,2008-04-30 00:00:00.000 +42,CI,Côte d'Ivoire,2008-04-30 00:00:00.000 +43,CK,Cook Islands,2008-04-30 00:00:00.000 +44,CL,Chile,2008-04-30 00:00:00.000 +45,CM,Cameroon,2008-04-30 00:00:00.000 +46,CN,China,2008-04-30 00:00:00.000 +47,CO,Colombia,2008-04-30 00:00:00.000 +48,CR,Costa Rica,2008-04-30 00:00:00.000 +49,CS,Serbia and Montenegro,2008-04-30 00:00:00.000 +50,CU,Cuba,2008-04-30 00:00:00.000 +51,CV,Cape Verde,2008-04-30 00:00:00.000 +52,CX,Christmas Island,2008-04-30 00:00:00.000 +53,CY,Cyprus,2008-04-30 00:00:00.000 +54,CZ,Czech Republic,2008-04-30 00:00:00.000 +55,DE,Germany,2008-04-30 00:00:00.000 +56,DJ,Djibouti,2008-04-30 00:00:00.000 +57,DK,Denmark,2008-04-30 00:00:00.000 +58,DM,Dominica,2008-04-30 00:00:00.000 +59,DO,Dominican Republic,2008-04-30 00:00:00.000 +60,DZ,Algeria,2008-04-30 00:00:00.000 +61,EC,Ecuador,2008-04-30 00:00:00.000 +62,EE,Estonia,2008-04-30 00:00:00.000 +63,EG,Egypt,2008-04-30 00:00:00.000 +64,ER,Eritrea,2008-04-30 00:00:00.000 +65,ES,Spain,2008-04-30 00:00:00.000 +66,ET,Ethiopia,2008-04-30 00:00:00.000 +67,FI,Finland,2008-04-30 00:00:00.000 +68,FJ,Fiji Islands,2008-04-30 00:00:00.000 +69,FK,Falkland Islands (Islas Malvinas),2008-04-30 00:00:00.000 +70,FM,Micronesia,2008-04-30 00:00:00.000 +71,FO,Faroe Islands,2008-04-30 00:00:00.000 +72,FR,France,2008-04-30 00:00:00.000 +73,GA,Gabon,2008-04-30 00:00:00.000 +74,GB,United Kingdom,2008-04-30 00:00:00.000 +75,GD,Grenada,2008-04-30 00:00:00.000 +76,GE,Georgia,2008-04-30 00:00:00.000 +77,GF,French Guiana,2008-04-30 00:00:00.000 +78,GH,Ghana,2008-04-30 00:00:00.000 +79,GI,Gibraltar,2008-04-30 00:00:00.000 +80,GL,Greenland,2008-04-30 00:00:00.000 +81,GM,"Gambia, The",2008-04-30 00:00:00.000 +82,GN,Guinea,2008-04-30 00:00:00.000 +83,GP,Guadeloupe,2008-04-30 00:00:00.000 +84,GQ,Equatorial Guinea,2008-04-30 00:00:00.000 +85,GR,Greece,2008-04-30 00:00:00.000 +86,GS,South Georgia and the South Sandwich Islands,2008-04-30 00:00:00.000 +87,GT,Guatemala,2008-04-30 00:00:00.000 +88,GU,Guam,2008-04-30 00:00:00.000 +89,GW,Guinea-Bissau,2008-04-30 00:00:00.000 +90,GY,Guyana,2008-04-30 00:00:00.000 +91,HK,Hong Kong SAR,2008-04-30 00:00:00.000 +92,HM,Heard Island and McDonald Islands,2008-04-30 00:00:00.000 +93,HN,Honduras,2008-04-30 00:00:00.000 +94,HR,Croatia,2008-04-30 00:00:00.000 +95,HT,Haiti,2008-04-30 00:00:00.000 +96,HU,Hungary,2008-04-30 00:00:00.000 +97,ID,Indonesia,2008-04-30 00:00:00.000 +98,IE,Ireland,2008-04-30 00:00:00.000 +99,IL,Israel,2008-04-30 00:00:00.000 +100,IN,India,2008-04-30 00:00:00.000 +101,IO,British Indian Ocean Territory,2008-04-30 00:00:00.000 +102,IQ,Iraq,2008-04-30 00:00:00.000 +103,IR,Iran,2008-04-30 00:00:00.000 +104,IS,Iceland,2008-04-30 00:00:00.000 +105,IT,Italy,2008-04-30 00:00:00.000 +106,JM,Jamaica,2008-04-30 00:00:00.000 +107,JO,Jordan,2008-04-30 00:00:00.000 +108,JP,Japan,2008-04-30 00:00:00.000 +109,KE,Kenya,2008-04-30 00:00:00.000 +110,KG,Kyrgyzstan,2008-04-30 00:00:00.000 +111,KH,Cambodia,2008-04-30 00:00:00.000 +112,KI,Kiribati,2008-04-30 00:00:00.000 +113,KM,Comoros,2008-04-30 00:00:00.000 +114,KN,Saint Kitts and Nevis,2008-04-30 00:00:00.000 +115,KP,North Korea,2008-04-30 00:00:00.000 +116,KR,Korea,2008-04-30 00:00:00.000 +117,KW,Kuwait,2008-04-30 00:00:00.000 +118,KY,Cayman Islands,2008-04-30 00:00:00.000 +119,KZ,Kazakhstan,2008-04-30 00:00:00.000 +120,LA,Laos,2008-04-30 00:00:00.000 +121,LB,Lebanon,2008-04-30 00:00:00.000 +122,LC,Saint Lucia,2008-04-30 00:00:00.000 +123,LI,Liechtenstein,2008-04-30 00:00:00.000 +124,LK,Sri Lanka,2008-04-30 00:00:00.000 +125,LR,Liberia,2008-04-30 00:00:00.000 +126,LS,Lesotho,2008-04-30 00:00:00.000 +127,LT,Lithuania,2008-04-30 00:00:00.000 +128,LU,Luxembourg,2008-04-30 00:00:00.000 +129,LV,Latvia,2008-04-30 00:00:00.000 +130,LY,Libya,2008-04-30 00:00:00.000 +131,MA,Morocco,2008-04-30 00:00:00.000 +132,MC,Monaco,2008-04-30 00:00:00.000 +133,MD,Moldova,2008-04-30 00:00:00.000 +134,MG,Madagascar,2008-04-30 00:00:00.000 +135,MH,Marshall Islands,2008-04-30 00:00:00.000 +136,MK,"Macedonia, Former Yugoslav Republic of",2008-04-30 00:00:00.000 +137,ML,Mali,2008-04-30 00:00:00.000 +138,MM,Myanmar,2008-04-30 00:00:00.000 +139,MN,Mongolia,2008-04-30 00:00:00.000 +140,MO,Macao SAR,2008-04-30 00:00:00.000 +141,MP,Northern Mariana Islands,2008-04-30 00:00:00.000 +142,MQ,Martinique,2008-04-30 00:00:00.000 +143,MR,Mauritania,2008-04-30 00:00:00.000 +144,MS,Montserrat,2008-04-30 00:00:00.000 +145,MT,Malta,2008-04-30 00:00:00.000 +146,MU,Mauritius,2008-04-30 00:00:00.000 +147,MV,Maldives,2008-04-30 00:00:00.000 +148,MW,Malawi,2008-04-30 00:00:00.000 +149,MX,Mexico,2008-04-30 00:00:00.000 +150,MY,Malaysia,2008-04-30 00:00:00.000 +151,MZ,Mozambique,2008-04-30 00:00:00.000 +152,NA,Namibia,2008-04-30 00:00:00.000 +153,NC,New Caledonia,2008-04-30 00:00:00.000 +154,NE,Niger,2008-04-30 00:00:00.000 +155,NF,Norfolk Island,2008-04-30 00:00:00.000 +156,NG,Nigeria,2008-04-30 00:00:00.000 +157,NI,Nicaragua,2008-04-30 00:00:00.000 +158,NL,Netherlands,2008-04-30 00:00:00.000 +159,NO,Norway,2008-04-30 00:00:00.000 +160,NP,Nepal,2008-04-30 00:00:00.000 +161,NR,Nauru,2008-04-30 00:00:00.000 +162,NU,Niue,2008-04-30 00:00:00.000 +163,NZ,New Zealand,2008-04-30 00:00:00.000 +164,OM,Oman,2008-04-30 00:00:00.000 +165,PA,Panama,2008-04-30 00:00:00.000 +166,PE,Peru,2008-04-30 00:00:00.000 +167,PF,French Polynesia,2008-04-30 00:00:00.000 +168,PG,Papua New Guinea,2008-04-30 00:00:00.000 +169,PH,Philippines,2008-04-30 00:00:00.000 +170,PK,Pakistan,2008-04-30 00:00:00.000 +171,PL,Poland,2008-04-30 00:00:00.000 +172,PM,Saint Pierre and Miquelon,2008-04-30 00:00:00.000 +173,PN,Pitcairn Islands,2008-04-30 00:00:00.000 +174,PR,Puerto Rico,2008-04-30 00:00:00.000 +175,PS,Palestinian Authority,2008-04-30 00:00:00.000 +176,PT,Portugal,2008-04-30 00:00:00.000 +177,PW,Palau,2008-04-30 00:00:00.000 +178,PY,Paraguay,2008-04-30 00:00:00.000 +179,QA,Qatar,2008-04-30 00:00:00.000 +180,RE,Réunion,2008-04-30 00:00:00.000 +181,RO,Romania,2008-04-30 00:00:00.000 +182,RU,Russia,2008-04-30 00:00:00.000 +183,RW,Rwanda,2008-04-30 00:00:00.000 +184,SA,Saudi Arabia,2008-04-30 00:00:00.000 +185,SB,Solomon Islands,2008-04-30 00:00:00.000 +186,SC,Seychelles,2008-04-30 00:00:00.000 +187,SD,Sudan,2008-04-30 00:00:00.000 +188,SE,Sweden,2008-04-30 00:00:00.000 +189,SG,Singapore,2008-04-30 00:00:00.000 +190,SH,Saint Helena,2008-04-30 00:00:00.000 +191,SI,Slovenia,2008-04-30 00:00:00.000 +192,SJ,Svalbard and Jan Mayen,2008-04-30 00:00:00.000 +193,SK,Slovakia,2008-04-30 00:00:00.000 +194,SL,Sierra Leone,2008-04-30 00:00:00.000 +195,SM,San Marino,2008-04-30 00:00:00.000 +196,SN,Senegal,2008-04-30 00:00:00.000 +197,SO,Somalia,2008-04-30 00:00:00.000 +198,SR,Suriname,2008-04-30 00:00:00.000 +199,ST,São Tomé and Príncipe,2008-04-30 00:00:00.000 +200,SV,El Salvador,2008-04-30 00:00:00.000 +201,SY,Syria,2008-04-30 00:00:00.000 +202,SZ,Swaziland,2008-04-30 00:00:00.000 +203,TC,Turks and Caicos Islands,2008-04-30 00:00:00.000 +204,TD,Chad,2008-04-30 00:00:00.000 +205,TF,French Southern and Antarctic Lands,2008-04-30 00:00:00.000 +206,TG,Togo,2008-04-30 00:00:00.000 +207,TH,Thailand,2008-04-30 00:00:00.000 +208,TJ,Tajikistan,2008-04-30 00:00:00.000 +209,TK,Tokelau,2008-04-30 00:00:00.000 +210,TL,Timor-Leste,2008-04-30 00:00:00.000 +211,TM,Turkmenistan,2008-04-30 00:00:00.000 +212,TN,Tunisia,2008-04-30 00:00:00.000 +213,TO,Tonga,2008-04-30 00:00:00.000 +214,TR,Turkey,2008-04-30 00:00:00.000 +215,TT,Trinidad and Tobago,2008-04-30 00:00:00.000 +216,TV,Tuvalu,2008-04-30 00:00:00.000 +217,TW,Taiwan,2008-04-30 00:00:00.000 +218,TZ,Tanzania,2008-04-30 00:00:00.000 +219,UA,Ukraine,2008-04-30 00:00:00.000 +220,UG,Uganda,2008-04-30 00:00:00.000 +221,UM,U.S. Minor Outlying Islands,2008-04-30 00:00:00.000 +222,US,United States,2008-04-30 00:00:00.000 +223,UY,Uruguay,2008-04-30 00:00:00.000 +224,UZ,Uzbekistan,2008-04-30 00:00:00.000 +225,VA,Vatican City,2008-04-30 00:00:00.000 +226,VC,Saint Vincent and the Grenadine,2008-04-30 00:00:00.000 +227,VE,Venezuela,2008-04-30 00:00:00.000 +228,VG,"Virgin Islands, British",2008-04-30 00:00:00.000 +229,VI,"Virgin Islands, U.S.",2008-04-30 00:00:00.000 +230,VN,Vietnam,2008-04-30 00:00:00.000 +231,VU,Vanuatu,2008-04-30 00:00:00.000 +232,WF,Wallis and Futuna,2008-04-30 00:00:00.000 +233,WS,Samoa,2008-04-30 00:00:00.000 +234,YE,Yemen,2008-04-30 00:00:00.000 +235,YT,Mayotte,2008-04-30 00:00:00.000 +236,ZA,South Africa,2008-04-30 00:00:00.000 +237,ZM,Zambia,2008-04-30 00:00:00.000 +238,ZW,Zimbabwe,2008-04-30 00:00:00.000 \ No newline at end of file diff --git a/seeds/sap_adventure_works/corrections/salesterritory_new.csv b/seeds/sap_adventure_works/corrections/salesterritory_new.csv new file mode 100644 index 0000000000..ab3ce8c45c --- /dev/null +++ b/seeds/sap_adventure_works/corrections/salesterritory_new.csv @@ -0,0 +1,11 @@ +"territoryid","name","countryregioncode","geographicalregion","salesytd","saleslastyear","costytd","costlastyear","rowguid","modifieddate" +1,Northwest,US,North America,7887186.7882,3298694.4938,0,0,"43689a10-e30b-497f-b0de-11de20267ff7",2008-04-30 00:00:00.000 +2,Northeast,US,North America,2402176.8476,3607148.9371,0,0,"00fb7309-96cc-49e2-8363-0a1ba72486f2",2008-04-30 00:00:00.000 +3,Central,US,North America,3072175.118,3205014.0767,0,0,df6e7fd8-1a8d-468c-b103-ed8addb452c1,2008-04-30 00:00:00.000 +4,Southwest,US,North America,10510853.8739,5366575.7098,0,0,dc3e9ea0-7950-4431-9428-99dbcbc33865,2008-04-30 00:00:00.000 +5,Southeast,US,North America,2538667.2515,3925071.4318,0,0,"6dc4165a-5e4c-42d2-809d-4344e0ac75e7",2008-04-30 00:00:00.000 +6,Canada,CA,North America,6771829.1376,5693988.86,0,0,"06b4af8a-1639-476e-9266-110461d66b00",2008-04-30 00:00:00.000 +7,France,FR,Europe,4772398.3078,2396539.7601,0,0,bf806804-9b4c-4b07-9d19-706f2e689552,2008-04-30 00:00:00.000 +8,Germany,DE,Europe,3805202.3478,1307949.7917,0,0,"6d2450db-8159-414f-a917-e73ee91c38a9",2008-04-30 00:00:00.000 +9,Australia,AU,Pacific,5977814.9154,2278548.9776,0,0,"602e612e-dfe9-41d9-b894-27e489747885",2008-04-30 00:00:00.000 +10,United Kingdom,GB,Europe,5012905.3656,1635823.3967,0,0,"05fc7e1f-2dea-414e-9ecd-09d150516fb5",2008-04-30 00:00:00.000 diff --git a/tests/test_sum_qty.sql b/tests/test_sum_qty.sql new file mode 100644 index 0000000000..e31e4c51ca --- /dev/null +++ b/tests/test_sum_qty.sql @@ -0,0 +1,9 @@ +with + validation as ( + select sum(orderqty) as sum_val + from {{ref ('fct_order_details') }} + where order_date between '2011-01-01' and '2013-12-31' + ) + +select * +from validation where sum_val != 24961 \ No newline at end of file