-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathqueries.sql
183 lines (178 loc) · 4.78 KB
/
queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
-- defaults:
-- '{"disk":10, "memory":2, "cpu": 1, "gpu": 0, "time":1}'
CREATE OR REPLACE FUNCTION id2num(val text) RETURNS int AS $$
DECLARE
id int = -1;
CHARS_LEN decimal = 62;
cc char;
i int;
BEGIN
SELECT INTO id
mod(sum((CASE
WHEN code >= 97 THEN code-96
WHEN code >= 65 THEN code-38
ELSE code+5
END)*power(62,idx)::decimal
)-1,power(10,15)::decimal)::int
FROM (
SELECT
ascii(char)::decimal as code,
(length(val)-num)::decimal as idx
FROM
unnest(regexp_split_to_array(val, '')) WITH ORDINALITY AS t(char, num)
ORDER BY num DESC
) q;
RETURN id;
END;
$$ LANGUAGE plpgsql;
-- running time 13569 seconds (3.7 hours)
SELECT
id2num(dataset_id) as dataset_id,
-- dataset_id,
task_index,
task_name,
job_index,
input_size,
output_size,
rss,
run_time,
initial_rss_request,
final_rss_request,
evictions,
mean_ttf,
failed_GB_hours,
input_duration,
output_duration,
time_finished,
task_id
FROM
(
SELECT
task_id,
requirements,
task_rel_id
FROM
task
-- INNER JOIN search USING (task_id)
-- WHERE
-- dataset_id='\x4f317239396335376c754d586b4b' AND name='\x6465746563746f72'
-- LIMIT
-- 10
) task INNER JOIN LATERAL (
SELECT
encode(dataset_id, 'escape') as dataset_id,
task_index,
encode(name, 'escape') as task_name,
requirements
FROM
task_rel
WHERE
task_rel_id=task.task_rel_id AND
(json(task_rel.requirements) #>> '{memory}') NOT LIKE '$eval%' -- skip parameterized tasks
) task_rel ON true INNER JOIN LATERAL (
SELECT
job_id,
encode(task_status, 'escape') as task_status,
CASE
WHEN length(task_rel.requirements) > 0 AND (json(task_rel.requirements) #> '{memory}' IS NOT NULL) THEN
(json(task_rel.requirements) #>> '{memory}')::float
ELSE
2.0
END as initial_rss_request,
CASE
WHEN length(task.requirements) > 0 AND (json(task.requirements) #> '{memory}' IS NOT NULL) THEN
(json(task.requirements) #>> '{memory}')::float
WHEN length(task_rel.requirements) > 0 AND (json(task_rel.requirements) #> '{memory}' IS NOT NULL) THEN
(json(task_rel.requirements) #>> '{memory}')::float
ELSE
2.0
END as final_rss_request
FROM
search
WHERE
task_id=task.task_id AND
encode(task_status, 'escape')='complete'
) search ON true INNER JOIN LATERAL (
SELECT
job_index
FROM
job
WHERE
job_id=search.job_id
) job ON true LEFT JOIN LATERAL ( -- left join here, because some tasks don't have inputs
SELECT
sum((record #>> '{size}')::bigint)/pow(2.0,30.0) as input_size,
sum((record #>> '{duration}')::float)/3600.0 as input_duration
FROM
(
SELECT
task_stat_id,
json_array_elements(json(stat) #> '{task_stats,download}') as record
FROM
task_stat
WHERE
task_id=task.task_id
) unnested
WHERE
(record #>> '{error}')::bool is false
GROUP BY
task_stat_id
LIMIT 1
) input_size ON true INNER JOIN LATERAL ( -- all sensible tasks have outputs
SELECT
sum((record #>> '{size}')::bigint)/pow(2.0,30.0) as output_size,
sum((record #>> '{duration}')::float)/3600.0 as output_duration
FROM
(
SELECT
task_stat_id,
json_array_elements(json(stat) #> '{task_stats,upload}') as record
FROM
task_stat
WHERE
task_id=task.task_id
) unnested
WHERE
(record #>> '{error}')::bool is false
GROUP BY
task_stat_id
LIMIT 1
) output_size ON true INNER JOIN LATERAL (
SELECT
max((json(stat) #>> '{resources,memory}')::float) as rss,
max((json(stat) #>> '{time}')::timestamp) as time_finished
FROM
task_stat
WHERE
task_id=task.task_id AND
(json(stat) #>> '{error}') IS NULL -- skip failed task reports
GROUP BY
task_id
) rss ON true LEFT JOIN LATERAL (
SELECT
count(task_id) as evictions,
avg((json(stat) #>> '{resources,time}')::float) as mean_ttf,
sum(
(json(stat) #>> '{resources,time}')::float
*(json(stat) #>> '{resources,memory}')::float) as failed_GB_hours
FROM
task_stat
WHERE
task_id=task.task_id AND
(json(stat) #>> '{error_summary}') LIKE 'Resource overusage for memory%'
GROUP BY
task_id
) evictions ON true LEFT JOIN LATERAL (
SELECT
sum((json(stat) #>> '{resources,time}')::float) as run_time
FROM
task_stat
WHERE
task_id=task.task_id AND
(json(stat) #>> '{error}')::bool IS NOT TRUE
GROUP BY
task_id
) run_time ON true
-- WHERE evictions > 0
WHERE output_size > 0
;