Skip to content

Commit

Permalink
Fix/country names (#246)
Browse files Browse the repository at this point in the history
  • Loading branch information
coryamanda authored Dec 24, 2024
2 parents 2730e2e + c0a9f89 commit 950b679
Show file tree
Hide file tree
Showing 9 changed files with 379 additions and 16 deletions.
7 changes: 5 additions & 2 deletions dbt/data/_seed_files.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ seeds:
- name: seed_course_names

- name: seed_country_iso_metadata
description: country-level categorizations and metadata provided by the global team, used primarily for creating regional groupings and segmentations

- name: seed_cs_state_grad_requirement
description: states that have passed CS grad requirements, current as of October 2024

Expand All @@ -122,8 +125,8 @@ seeds:
description: districts enrolled in the district program, as of October 2024

- name: seed_districts_target
description: |
This data is exported fron Hubspot on a monthly basis to compile a list of target districts. Data last exported: December 2024.
description: This data is exported fron Hubspot on a monthly basis to compile a list of target districts. Data last exported December 2024.

- name: seed_hoc_internal_tutorials
description: reference list of Code.org HOC tutorials (differentiated from 3rd party) provided by Bethany on 2024-10-29

Expand Down
251 changes: 251 additions & 0 deletions dbt/data/seed_country_iso_metadata.csv

Large diffs are not rendered by default.

41 changes: 41 additions & 0 deletions dbt/macros/country_normalization.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% macro country_normalization(raw_country_name) %}
case
when lower({{raw_country_name }}) in ('åland') then 'åland islands'
when lower({{raw_country_name }}) in ('brunei') then 'brunei darussalam'
when lower({{raw_country_name }}) in ('cape verde') then 'cabo verde'
when lower({{raw_country_name }}) in ('cocos [keeling] islands') then 'cocos (keeling) islands'
when lower({{raw_country_name }}) in ('dr congo','congo','congo, the democratic republic of the') then 'congo, democratic republic of'
when lower({{raw_country_name }}) in ('republic of the congo','congo republic') then 'congo, republic of'
when lower({{raw_country_name }}) in ('ivory coast','cote d''ivoire') then 'côte d''ivoire'
when lower({{raw_country_name }}) in ('czech republic') then 'czechia'
when lower({{raw_country_name }}) in ('swaziland') then 'eswatini'
when lower({{raw_country_name }}) in ('iran, islamic republic of') then 'iran'
when lower({{raw_country_name }}) in ('hashemite kingdom of jordan') then 'jordan'
when lower({{raw_country_name }}) in ('lao people''s democratic republic') then 'laos'
when lower({{raw_country_name }}) in ('republic of lithuania') then 'lithuania'
when lower({{raw_country_name }}) in ('macau') then 'macao'
when lower({{raw_country_name }}) in ('federated states of micronesia') then 'micronesia, federated states of'
when lower({{raw_country_name }}) in ('republic of moldova','moldova') then 'moldova, republic of'
when lower({{raw_country_name }}) in ('principality of monaco') then 'monaco'
when lower({{raw_country_name }}) in ('myanmar [burma]') then 'myanmar'
when lower({{raw_country_name }}) in ('the netherlands') then 'netherlands'
when lower({{raw_country_name }}) in ('korea, democratic people''s republic of') then 'north korea'
when lower({{raw_country_name }}) in ('macedonia') then 'north macedonia'
when lower({{raw_country_name }}) in ('palestinian territory') then 'palestine'
when lower({{raw_country_name }}) in ('russian federation') then 'russia'
when lower({{raw_country_name }}) in ('saint-barthélemy','saint-barthélemy') then 'saint barthélemy'
when lower({{raw_country_name }}) in ('saint helena') then 'saint helena, ascension, and tristan da cunha'
when lower({{raw_country_name }}) in ('st kitts and nevis') then 'saint kitts and nevis'
when lower({{raw_country_name }}) in ('st vincent and grenadines') then 'saint vincent and the grenadines'
when lower({{raw_country_name }}) in ('sao tome and principe') then 'são tomé and príncipe'
when lower({{raw_country_name }}) in ('slovak republic') then 'slovakia'
when lower({{raw_country_name }}) in ('republic of korea', 'korea, republic of') then 'south korea'
when lower({{raw_country_name }}) in ('syrian arab republic') then 'syria'
when lower({{raw_country_name }}) in ('tanzania, united republic of') then 'tanzania'
when lower({{raw_country_name }}) in ('democratic republic of timor-leste','east timor') then 'timor-leste'
when lower({{raw_country_name }}) in ('turkey') then 'türkiye'
when lower({{raw_country_name }}) in ('u.s. minor outlying islands') then 'united states minor outlying islands'
when {{raw_country_name}} = '' then NULL
else lower({{raw_country_name }})
end
{% endmacro %}
18 changes: 13 additions & 5 deletions dbt/models/marts/hoc/dim_hoc_starts.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ internal_tutorials as (
from {{ ref('seed_hoc_internal_tutorials') }}
),

country_metadata as (
select *
from {{ref('dim_country_reference')}}
),

final as (
select
hoc_activity.hoc_start_id
Expand All @@ -34,18 +39,21 @@ final as (
then 1
else 0
end as is_flagged_for_quality
, hoc_activity.city as city
, hoc_activity.country as country
, city
, hoc_activity.country as country
, country_metadata.iso2 as country_code
, hoc_activity.state
, hoc_activity.state_code
--, hoc_activity.country_code
from hoc_activity
from hoc_activity
join school_years as sy
on hoc_activity.started_at
between sy.started_at
and sy.ended_at
left join internal_tutorials as it
on hoc_activity.tutorial = it.tutorial_codes )
on hoc_activity.tutorial = it.tutorial_codes
left join country_metadata
on hoc_activity.country = country_metadata.country
)

select *
from final
47 changes: 47 additions & 0 deletions dbt/models/marts/misc/_misc_models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,52 @@ models:
description: 1 if the ambassador indicated they took csa, 0 otherwise
- name: took_csd
description: 1 if the ambassador indicated they took csd, 0 otherwise
config:
tags: ['released']

- name: dim_country_reference
description: |
this model provides country-level metadata provided by the global team, primarily used for creating standard regional groupings
columns:
- name: iso2
description: 2-letter ISO2 country code
data_tests:
- not_null
- unique
- name: country
description: Country name as defined by ISO2
data_tests:
- not_null
- unique
- name: region
description: High-level region as defined by the Global team
data_tests:
- not_null
- name: subregion
description: Subregion as defined by the Global team
data_tests:
- not_null
- name: iso_region
description: Region as defined in the ISO country classification
data_tests:
- not_null
- name: iso_subregion
description: Subregion as defined in the ISO country classification
data_tests:
- not_null
- name: worldbank_code
description: 3-letter country code used by Worldbank. NULL for countries / regions not used in the Worldbank data. XXA and XXB substituted for Taiwan and Vatican city
data_tests:
- unique
- name: population
description: Country population in 2022. Data source is https://data.worldbank.org/indicator/sp.pop.totl. Data for Taiwan and Vatican city supplied manually. In case of unavailable data, population is set as 1.
data_tests:
- not_null
- name: income_group
description: Country income group categorization according to Worldbank (as of December 2024)
- name: primary_language
description: Defined by global team from the languages available in the Code.org platform as of December 2024. In case of multilingual countries, one of the languages selected as primary for reporting simplicity. In case the country's language is not used on the Code.org platform, the primary foreign language is indicated in the brackets (e.g. "other (French)" means the country has a different primary language, but French is the most widely spoken foreign language)
data_tests:
- not_null
config:
tags: ['released']
6 changes: 6 additions & 0 deletions dbt/models/marts/misc/dim_country_reference.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/* Author: Cory
Date: 2024-11-30
Description: ISO Codes and regions for use by the Global team*/

select * from
{{ref('seed_country_iso_metadata')}}
8 changes: 8 additions & 0 deletions dbt/models/staging/dashboard/_dashboard__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ models:
- name: school_id
data_tests:
- not_null
- name: stg_dashboard__user_geos
description: |
Staging model for `user_geos` source data
columns:
- name: user_id
description: unique id for for each user
data_tests:
- not_null

- name: stg_dashboard__users
description: |
Expand Down
4 changes: 2 additions & 2 deletions dbt/models/staging/dashboard/stg_dashboard__user_geos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ final as (
lower(city) as city,
lower(state) as state_name,
postal_code,
lower(country) as country,
{{ country_normalization('country')}} as country,
is_international,
us_intl,

Expand All @@ -35,4 +35,4 @@ final as (
)

select *
from final
from final
13 changes: 6 additions & 7 deletions dbt/models/staging/pegasus_pii/stg_pegasus_pii__hoc_activity.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ hoc_starts as (
company,
tutorial,
coalesce(started_at, pixel_started_at, pixel_finished_at) as started_at,
country_code,
state_code,
city,
country,
state
from {{ ref("base_pegasus_pii__hoc_activity") }}
lower(city) as city,
{{ country_normalization('country') }} as country,
lower(state) as state
from
{{ ref('base_pegasus_pii__hoc_activity') }}
{% if is_incremental() %}

where coalesce(started_at, pixel_started_at, pixel_finished_at) > (select max(started_at) from {{ this }} )

{% endif %}
{% endif %}
)

select *
Expand Down

0 comments on commit 950b679

Please sign in to comment.