Skip to content

Commit

Permalink
Merge pull request openedx#132 from saraburns1/mvtest
Browse files Browse the repository at this point in the history
feat: Use parameterized view to fix memory issue
  • Loading branch information
saraburns1 authored Nov 14, 2024
2 parents 6e53552 + f474f40 commit b88bde7
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 176 deletions.
1 change: 1 addition & 0 deletions models/users/sources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ sources:
- name: id
- name: user_id
- name: name
- name: username
- name: email
- name: meta
- name: courseware
Expand Down
18 changes: 0 additions & 18 deletions models/video/unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,21 +105,3 @@ unit_tests:
format: sql
rows: |
select * from fact_video_engagement_expected
- name: test_watched_video_duration
model: watched_video_duration
config:
tags: 'ci'
given:
- input: ref('video_playback_events')
format: sql
rows: |
select * from video_playback_events
- input: ref('dim_course_blocks')
format: sql
rows: |
select * from dim_course_blocks
expect:
format: sql
rows: |
select * from watched_video_duration_expected
123 changes: 66 additions & 57 deletions models/video/watched_video_duration.sql
Original file line number Diff line number Diff line change
@@ -1,72 +1,75 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("ReplacingMergeTree()"),
order_by="(org,course_key,actor_id)",
post_hook="OPTIMIZE TABLE {{ this }} {{ on_cluster() }} FINAL",
)
}}

with
starts as (
data as (
select
event_id,
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as start_position,
splitByString('/xblock/', object_id)[-1] as video_id,
video_duration
from {{ ref("video_playback_events") }}
where verb_id in ('https://w3id.org/xapi/video/verbs/played')
),
ends as (
select
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as end_position,
splitByString('/xblock/', object_id)[-1] as video_id
video_position,
object_id,
video_duration,
if(
verb_id = 'https://w3id.org/xapi/video/verbs/played', 'start', 'end'
) as verb
from {{ ref("video_playback_events") }}
where
verb_id in (
'https://w3id.org/xapi/video/verbs/played',
'http://adlnet.gov/expapi/verbs/completed',
'https://w3id.org/xapi/video/verbs/paused',
'http://adlnet.gov/expapi/verbs/terminated',
'https://w3id.org/xapi/video/verbs/seeked'
)
and org in cast({org_filter:String}, 'Array(String)')
and course_key in (
select course_key
from {{ ref("course_names") }}
where course_name in cast({course_name_filter:String}, 'Array(String)')
)
),
matches as (
select
*,
first_value(event_id) filter (where verb = 'start') over (
partition by org, course_key, actor_id, object_id
order by emission_time
rows between unbounded preceding and 1 preceding
) as matching_event_id,
first_value(event_id) filter (where verb = 'end') over (
partition by org, course_key, actor_id, object_id
order by emission_time
rows between 1 following and unbounded following
) as matching_event_id2
from data
),
ends as (
select *
from matches
where verb = 'end' and notEmpty(matching_event_id) and empty(matching_event_id2)
),
range_multi as (
starts as (
select *
from matches
where
verb = 'start' and notEmpty(matching_event_id2) and empty(matching_event_id)
),
range as (
select
starts.event_id as event_id,
starts.org as org,
starts.course_key as course_key,
starts.actor_id as actor_id,
starts.video_id as video_id,
starts.video_duration as video_duration,
starts.start_position as start_position,
ends.end_position as end_position,
starts.event_id,
starts.org,
starts.course_key,
starts.actor_id,
starts.object_id,
starts.video_duration,
starts.video_position as start_position,
ends.video_position as end_position,
starts.emission_time as start_emission_time,
ends.emission_time as end_emission_time,
row_number() over (
partition by org, course_key, actor_id, video_id, start_position
order by ends.emission_time
) as rownum
ends.emission_time as end_emission_time
from starts
left join
ends
on starts.org = ends.org
and starts.course_key = ends.course_key
and starts.video_id = ends.video_id
and starts.actor_id = ends.actor_id
where
starts.emission_time < ends.emission_time
and starts.start_position < ends.end_position
inner join ends on starts.event_id = ends.matching_event_id
where ends.video_position > starts.video_position
),
range as (select * from range_multi where rownum = 1),
rewatched as (
select a.event_id as event_id1, b.event_id as event_id2, actor_id
from range a
Expand All @@ -75,7 +78,7 @@ with
on a.org = b.org
and a.course_key = b.course_key
and a.actor_id = b.actor_id
and a.video_id = b.video_id
and a.object_id = b.object_id
where
(
(
Expand All @@ -89,34 +92,40 @@ with
)
and b.start_emission_time > a.start_emission_time
),
rewatched_combined as (
select event_id1 as event_id
from rewatched
union all
select event_id2 as event_id
from rewatched
),
course_data as (
select org, course_key, count(distinct block_id) video_count
from {{ ref("dim_course_blocks") }}
where block_type = 'video'
group by org, course_key
)
select
course_data.org as org,
course_data.course_key as course_key,
concat(course_data.org, range.org) as org,
concat(course_data.course_key, range.course_key) as course_key,
range.actor_id as actor_id,
video_duration,
cast(video_count as Int32) as video_count,
sum(
case
when r1.actor_id = '' and r2.actor_id = ''
when empty(rewatched_combined.event_id)
then end_position - start_position
else 0
end
) as watched_time,
sum(
case
when r1.actor_id <> '' or r2.actor_id <> ''
when notEmpty(rewatched_combined.event_id)
then end_position - start_position
else 0
end
) as rewatched_time
from course_data
left join range on range.course_key = course_data.course_key
left join rewatched r1 on range.event_id = r1.event_id1
left join rewatched r2 on range.event_id = r2.event_id2
full join range on range.course_key = course_data.course_key
left join rewatched_combined on range.event_id = rewatched_combined.event_id
group by org, course_key, actor_id, video_count, video_duration
Loading

0 comments on commit b88bde7

Please sign in to comment.