Skip to content

Commit

Permalink
feat: optimize watched video query
Browse files Browse the repository at this point in the history
  • Loading branch information
saraburns1 committed Oct 25, 2024
1 parent 9e6acd3 commit 6cd03c4
Show file tree
Hide file tree
Showing 6 changed files with 278 additions and 245 deletions.
2 changes: 1 addition & 1 deletion models/users/user_pii.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ with
from {{ source("event_sink", "user_profile") }}
group by user_id
)
select ex.user_id as user_id, ex.external_user_id, ex.username, up.name, up.email
select ex.user_id as user_id, ex.external_user_id as external_user_id, ex.username as username, up.name as name, up.email as email
from {{ source("event_sink", "external_id") }} ex
left outer join most_recent_user_profile mrup on mrup.user_id = ex.user_id
left outer join
Expand Down
30 changes: 30 additions & 0 deletions models/video/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,33 @@ models:
- name: rewatched_time
data_type: Int32
description: "Seconds of the video rewatched"

- name: watched_video_slices
description: ""
columns:
- name: event_id
data_type: UUID
description: ""
- name: org
data_type: string
description: "The organization that the course belongs to"
- name: course_key
data_type: string
description: "The course key for the course"
- name: actor_id
data_type: string
description: "The xAPI actor identifier"
- name: video_id
data_type: String
description: ""
- name: video_duration
data_type: Int32
description: "Total duration of the video"
- name: start_position
data_type: Int32
description: ""
- name: end_position
data_type: Int32
description: ""
- name: start_emission_time
data_type: DateTime
4 changes: 2 additions & 2 deletions models/video/unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ unit_tests:
config:
tags: 'ci'
given:
- input: ref('video_playback_events')
- input: ref('watched_video_slices')
format: sql
rows: |
select * from video_playback_events
select * from watched_video_slices
- input: ref('dim_course_blocks')
format: sql
rows: |
Expand Down
116 changes: 26 additions & 90 deletions models/video/watched_video_duration.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,86 +8,35 @@
)
}}

with
starts as (
select
event_id,
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as start_position,
splitByString('/xblock/', object_id)[-1] as video_id,
video_duration
from {{ ref("video_playback_events") }}
where verb_id in ('https://w3id.org/xapi/video/verbs/played')
with
potential_rewatched as (
select org, course_key, actor_id, video_id
from {{ ref('watched_video_slices') }}
group by org, course_key, actor_id, video_id
having count(1) > 1
),
ends as (
select
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as end_position,
splitByString('/xblock/', object_id)[-1] as video_id
from {{ ref("video_playback_events") }}
where
verb_id in (
'http://adlnet.gov/expapi/verbs/completed',
'https://w3id.org/xapi/video/verbs/paused',
'http://adlnet.gov/expapi/verbs/terminated',
'https://w3id.org/xapi/video/verbs/seeked'
)
rewatched_data as (
select org, course_key, actor_id, video_id, start_emission_time, event_id, start_position, end_position
from {{ ref('watched_video_slices') }}
where (org, course_key, actor_id, video_id) in (
select org, course_key, actor_id, video_id
from potential_rewatched
)
),
range_multi as (
select
starts.event_id as event_id,
starts.org as org,
starts.course_key as course_key,
starts.actor_id as actor_id,
starts.video_id as video_id,
starts.video_duration as video_duration,
starts.start_position as start_position,
ends.end_position as end_position,
starts.emission_time as start_emission_time,
ends.emission_time as end_emission_time,
row_number() over (
partition by org, course_key, actor_id, video_id, start_position
order by ends.emission_time
) as rownum
from starts
left join
ends
on starts.org = ends.org
and starts.course_key = ends.course_key
and starts.video_id = ends.video_id
and starts.actor_id = ends.actor_id
where
starts.emission_time < ends.emission_time
and starts.start_position < ends.end_position
),
range as (select * from range_multi where rownum = 1),
rewatched as (
select a.event_id as event_id1, b.event_id as event_id2, actor_id
from range a
inner join
range b
select distinct a.event_id as event_id1, b.event_id as event_id2
from rewatched_data a
inner join rewatched_data b
on a.org = b.org
and a.course_key = b.course_key
and a.actor_id = b.actor_id
and a.video_id = b.video_id
where
(
(
b.start_position > a.start_position
and b.start_position < a.end_position
)
or (
b.end_position > a.start_position
and b.end_position < a.end_position
)
)
and b.start_emission_time > a.start_emission_time
where b.start_emission_time > a.start_emission_time
and (
(b.start_position > a.start_position and b.start_position < a.end_position)
or
(b.end_position > a.start_position and b.end_position < a.end_position)
)
),
course_data as (
select org, course_key, count(distinct block_id) video_count
Expand All @@ -101,22 +50,9 @@ select
range.actor_id as actor_id,
video_duration,
cast(video_count as Int32) as video_count,
sum(
case
when r1.actor_id = '' and r2.actor_id = ''
then end_position - start_position
else 0
end
) as watched_time,
sum(
case
when r1.actor_id <> '' or r2.actor_id <> ''
then end_position - start_position
else 0
end
) as rewatched_time
sum(case when empty(r1.event_id) then end_position - start_position else 0 end) as watched_time,
sum(case when notEmpty(r1.event_id) then end_position - start_position else 0 end) as rewatched_time
from course_data
left join range on range.course_key = course_data.course_key
left join rewatched r1 on range.event_id = r1.event_id1
left join rewatched r2 on range.event_id = r2.event_id2
left join {{ ref('watched_video_slices') }} range on range.course_key = course_data.course_key
left join (select event_id1 as event_id from rewatched union all select event_id2 as event_id from rewatched) r1 on range.event_id = r1.event_id
group by org, course_key, actor_id, video_count, video_duration
67 changes: 67 additions & 0 deletions models/video/watched_video_slices.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("ReplacingMergeTree()"),
order_by="(org,course_key,video_id,actor_id,start_position,end_position,start_emission_time)",
primary_key="(org,course_key,video_id,actor_id)",
)
}}

with
starts as (
select
event_id,
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as start_position,
splitByString('/xblock/', object_id)[-1] as video_id,
video_duration
from {{ ref("video_playback_events") }}
where verb_id in ('https://w3id.org/xapi/video/verbs/played')
),
ends as (
select
org,
course_key,
actor_id,
emission_time,
cast(video_position as Int32) as end_position,
splitByString('/xblock/', object_id)[-1] as video_id
from {{ ref("video_playback_events") }}
where
verb_id in (
'http://adlnet.gov/expapi/verbs/completed',
'https://w3id.org/xapi/video/verbs/paused',
'http://adlnet.gov/expapi/verbs/terminated',
'https://w3id.org/xapi/video/verbs/seeked'
)
),
range_multi as (
select
starts.event_id as event_id,
starts.org as org,
starts.course_key as course_key,
starts.actor_id as actor_id,
starts.video_id as video_id,
starts.video_duration as video_duration,
starts.start_position as start_position,
ends.end_position as end_position,
starts.emission_time as start_emission_time,
row_number() over (
partition by org, course_key, actor_id, video_id, start_position
order by ends.emission_time
) as rownum
from starts
inner join ends
on starts.org = ends.org
and starts.course_key = ends.course_key
and starts.video_id = ends.video_id
and starts.actor_id = ends.actor_id
where
starts.emission_time < ends.emission_time
and starts.start_position < ends.end_position
)
select * except (rownum) from range_multi where rownum = 1
Loading

0 comments on commit 6cd03c4

Please sign in to comment.