From d0b59458faf368b51ad8b6d44bce75ab895e6ee8 Mon Sep 17 00:00:00 2001 From: Rory Sawyer Date: Tue, 17 Oct 2023 10:36:41 -0400 Subject: [PATCH] feat: add block name with location to models This adds `display_name_with_location` to models that include block_name. Additionally, data types are added to model documentation. --- models/base/dim_course_blocks.sql | 3 ++- models/base/schema.yml | 21 +++++++++++++++ models/base/sources.yml | 4 ++- models/completion/fact_completions.sql | 1 + models/completion/schema.yml | 15 ++++++++++- models/enrollment/schema.yml | 7 +++++ models/forum/schema.yml | 24 +++++++++++++++++ models/grading/fact_grades.sql | 1 + models/grading/schema.yml | 13 ++++++++++ .../problems/fact_learner_problem_summary.sql | 4 +++ models/problems/fact_problem_responses.sql | 1 + models/problems/int_problem_hints.sql | 1 + models/problems/int_problem_results.sql | 1 + models/problems/schema.yml | 26 +++++++++++++++++++ models/video/fact_transcript_usage.sql | 1 + models/video/fact_video_plays.sql | 1 + models/video/schema.yml | 20 ++++++++++++++ packages.yml | 2 +- 18 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 models/base/schema.yml create mode 100644 models/forum/schema.yml diff --git a/models/base/dim_course_blocks.sql b/models/base/dim_course_blocks.sql index e690ec93..7c764ec0 100644 --- a/models/base/dim_course_blocks.sql +++ b/models/base/dim_course_blocks.sql @@ -4,7 +4,8 @@ select courses.course_name as course_name, courses.course_run as course_run, blocks.location as block_id, - blocks.block_name as block_name + blocks.block_name as block_name, + blocks.display_name_with_location as display_name_with_location from {{ source('event_sink', 'course_block_names') }} blocks join {{ source('event_sink', 'course_names')}} courses diff --git a/models/base/schema.yml b/models/base/schema.yml new file mode 100644 index 00000000..04af7eec --- /dev/null +++ b/models/base/schema.yml @@ -0,0 +1,21 @@ +version: 2 + +models: + - name: dim_course_blocks + description: "A denormalized table of course block information" + columns: + - name: org + data_type: String + - name: course_key + data_type: String + - name: course_name + data_type: String + - name: course_run + data_type: String + - name: block_id + data_type: String + - name: block_name + data_type: String + - name: display_name_with_location + data_type: String + description: "The block's display name with section, subsection, and unit prepended to the name. This provides additional context when looking at block names and can help data consumers understand which block they are analyzing" diff --git a/models/base/sources.yml b/models/base/sources.yml index b9844439..72315d0e 100644 --- a/models/base/sources.yml +++ b/models/base/sources.yml @@ -85,7 +85,7 @@ sources: - name: org - name: verb_id - name: scaled_score - + - name: completions_events identifier: "{{ env_var('ASPECTS_COMPLETION_EVENTS_TABLE', 'completion_events') }}" columns: @@ -117,6 +117,8 @@ sources: - name: location - name: block_name - name: course_key + - name: graded + - name: display_name_with_location - name: course_names columns: diff --git a/models/completion/fact_completions.sql b/models/completion/fact_completions.sql index 033bc207..a0a45a29 100644 --- a/models/completion/fact_completions.sql +++ b/models/completion/fact_completions.sql @@ -23,6 +23,7 @@ select courses.course_run as course_run, completions.entity_id as entity_id, if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name, + if(blocks.block_name != '', blocks.display_name_with_location, null) as entity_name_with_location, completions.actor_id as actor_id, cast(completions.scaled_progress as Float) as scaled_progress, case diff --git a/models/completion/schema.yml b/models/completion/schema.yml index 0afbc8e9..46421f7f 100644 --- a/models/completion/schema.yml +++ b/models/completion/schema.yml @@ -2,19 +2,32 @@ version: 2 models: - name: fact_completions + database: "{{ env_var('DBT_PROFILE_TARGET_DATABASE', 'reporting') }}" description: "One record per completion events for component" columns: - name: emission_time description: "Timestamp, to the second, of when this event was emitted" + data_type: DateTime64(3) - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: entity_id description: "The block ID or course key for the graded entity" + data_type: String - name: entity_name + data_type: String + - name: entity_name_with_location + data_type: Nullable(String) - name: actor_id - - name: progress_percent + data_type: String + - name: scaled_progress description: "A ratio between 0 and 1, inclusive, of the learner's progress" + data_type: Float32 - name: completion_bucket description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance" + data_type: String diff --git a/models/enrollment/schema.yml b/models/enrollment/schema.yml index 1caa99e3..a320516e 100644 --- a/models/enrollment/schema.yml +++ b/models/enrollment/schema.yml @@ -6,13 +6,20 @@ models: columns: - name: emission_time description: "Timestamp, to the second, of when this event was emitted" + data_type: DateTime - name: org + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: actor_id + data_type: String - name: enrollment_mode + data_type: LowCardinality(String) - name: enrollment_status description: "Whether a learner is actively enrolled in a course" tests: - accepted_values: values: ["registered", "unregistered"] + data_type: String diff --git a/models/forum/schema.yml b/models/forum/schema.yml new file mode 100644 index 00000000..336c177e --- /dev/null +++ b/models/forum/schema.yml @@ -0,0 +1,24 @@ +version: 2 + +models: + - name: fact_forum_interactions + description: "One record per forum interaction" + columns: + - name: event_id + data_type: UUID + - name: emission_time + data_type: DateTime64(3) + - name: org + data_type: String + - name: course_key + data_type: String + - name: course_name + data_type: String + - name: course_run + data_type: String + - name: object_id + data_type: String + - name: actor_id + data_type: String + - name: verb_id + data_type: LowCardinality(String) diff --git a/models/grading/fact_grades.sql b/models/grading/fact_grades.sql index fa48f4cb..0e31bc9e 100644 --- a/models/grading/fact_grades.sql +++ b/models/grading/fact_grades.sql @@ -27,6 +27,7 @@ select courses.course_run as course_run, grades.entity_id as entity_id, if(blocks.block_name != '', blocks.block_name, courses.course_name) as entity_name, + if(blocks.block_name != '', blocks.display_name_with_location, null) as entity_name_with_location, grades.grade_type as grade_type, grades.actor_id as actor_id, cast(grades.scaled_score as Float) as scaled_score, diff --git a/models/grading/schema.yml b/models/grading/schema.yml index 966903e2..068174b0 100644 --- a/models/grading/schema.yml +++ b/models/grading/schema.yml @@ -6,20 +6,33 @@ models: columns: - name: emission_time description: "Timestamp, to the second, of when this event was emitted" + data_type: DateTime64(3) - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: entity_id description: "The block ID or course key for the graded entity" + data_type: String - name: entity_name + data_type: String + - name: entity_name_with_location + data_type: Nullable(String) - name: grade_type description: "The type of object graded" tests: - accepted_values: values: ["course", "subsection", "problem"] + data_type: Nullable(String) - name: actor_id + data_type: String - name: scaled_score description: "A ratio between 0 and 1, inclusive, of the learner's grade" + data_type: Float32 - name: grade_bucket description: "A displayable value of grades sorted into 10% buckets. Useful for grouping grades together to show high-level learner performance" + data_type: String diff --git a/models/problems/fact_learner_problem_summary.sql b/models/problems/fact_learner_problem_summary.sql index 54ec384a..6944636a 100644 --- a/models/problems/fact_learner_problem_summary.sql +++ b/models/problems/fact_learner_problem_summary.sql @@ -8,6 +8,7 @@ with results_with_hints as ( course_run, problem_id, problem_name, + problem_name_with_location, actor_id, success, attempts, @@ -22,6 +23,7 @@ with results_with_hints as ( course_run, problem_id, problem_name, + problem_name_with_location, actor_id, null as success, null as attempts, @@ -47,6 +49,7 @@ select course_run, problem_id, problem_name, + problem_name_with_location, actor_id, coalesce(any(success), false) as success, coalesce(any(attempts), 0) as attempts, @@ -61,4 +64,5 @@ group by course_run, problem_id, problem_name, + problem_name_with_location, actor_id diff --git a/models/problems/fact_problem_responses.sql b/models/problems/fact_problem_responses.sql index be450f5b..89d347a0 100644 --- a/models/problems/fact_problem_responses.sql +++ b/models/problems/fact_problem_responses.sql @@ -22,6 +22,7 @@ select blocks.course_run as course_run, responses.problem_id as problem_id, blocks.block_name as problem_name, + blocks.display_name_with_location as problem_name_with_location, responses.actor_id as actor_id, responses.responses as responses, responses.success as success, diff --git a/models/problems/int_problem_hints.sql b/models/problems/int_problem_hints.sql index 501b99ee..a193a227 100644 --- a/models/problems/int_problem_hints.sql +++ b/models/problems/int_problem_hints.sql @@ -24,6 +24,7 @@ select blocks.course_run as course_run, hints.problem_id as problem_id, blocks.block_name as problem_name, + blocks.display_name_with_location as problem_name_with_location, hints.actor_id as actor_id, hints.help_type as help_type from diff --git a/models/problems/int_problem_results.sql b/models/problems/int_problem_results.sql index c5839ad5..1e0d3b91 100644 --- a/models/problems/int_problem_results.sql +++ b/models/problems/int_problem_results.sql @@ -71,6 +71,7 @@ select course_run, problem_id, problem_name, + problem_name_with_location, actor_id, responses, success, diff --git a/models/problems/schema.yml b/models/problems/schema.yml index 9cc292a3..e4fa7fce 100644 --- a/models/problems/schema.yml +++ b/models/problems/schema.yml @@ -5,39 +5,65 @@ models: description: "One record per learner per problem in a course" columns: - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: problem_id + data_type: String - name: problem_name + data_type: String + - name: problem_name_with_location + data_type: String - name: actor_id + data_type: String - name: success description: "The result of the last submission" tests: - not_null + data_type: Bool - name: attempts description: "The number of attempts made" tests: - not_null + data_type: Int16 - name: num_hints_displayed description: "The number of times a learner asked for a hint" + data_type: UInt64 - name: num_answers_displayed description: "The number of times a learner requested the answers for the problem" + data_type: UInt64 - name: fact_problem_responses description: "One record for each submitted response to a problem" columns: - name: emission_time + data_type: DateTime - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: problem_id + data_type: String - name: problem_name + data_type: String + - name: problem_name_with_location + data_type: String - name: actor_id + data_type: String - name: responses description: "The responses for this submission. If a problem has multiple parts, values for all parts will be in this field" + data_type: String - name: success description: "Boolean indicating whether the responses were correct" + data_type: Bool - name: attempts description: "Number indicating which attempt this was" + data_type: Int16 diff --git a/models/video/fact_transcript_usage.sql b/models/video/fact_transcript_usage.sql index aa3257a1..7ee162e2 100644 --- a/models/video/fact_transcript_usage.sql +++ b/models/video/fact_transcript_usage.sql @@ -20,6 +20,7 @@ select blocks.course_run as course_run, transcripts.video_id as video_id, blocks.block_name as video_name, + blocks.display_name_with_location as video_name_with_location, transcripts.actor_id as actor_id from transcripts diff --git a/models/video/fact_video_plays.sql b/models/video/fact_video_plays.sql index 33accd71..35d7d430 100644 --- a/models/video/fact_video_plays.sql +++ b/models/video/fact_video_plays.sql @@ -21,6 +21,7 @@ select blocks.course_run as course_run, plays.video_id as video_id, blocks.block_name as video_name, + blocks.display_name_with_location as video_name_with_location, plays.actor_id as actor_id from plays diff --git a/models/video/schema.yml b/models/video/schema.yml index 37ca13b0..0a8bd250 100644 --- a/models/video/schema.yml +++ b/models/video/schema.yml @@ -5,22 +5,42 @@ models: description: "One record for each time a learner played a video" columns: - name: emission_time + data_type: DateTime - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: video_id + data_type: String - name: video_name + data_type: String + - name: video_name_with_location + data_type: String - name: actor_id + data_type: String - name: fact_transcript_usage description: "One record for each time a transcript or closed caption was enabled" columns: - name: emission_time + data_type: DateTime - name: org + data_type: String - name: course_key + data_type: String - name: course_name + data_type: String - name: course_run + data_type: String - name: video_id + data_type: String - name: video_name + data_type: String + - name: video_name_with_location + data_type: String - name: actor_id + data_type: String diff --git a/packages.yml b/packages.yml index 67a82b7a..e76d00a6 100644 --- a/packages.yml +++ b/packages.yml @@ -1,5 +1,5 @@ packages: - package: dbt-labs/codegen - version: 0.9.0 + version: 0.11.0 - package: dbt-labs/dbt_utils version: 1.1.0