forked from NorbertKrupa/vertica-kit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvertica_kit_diagnostics.sql
434 lines (396 loc) · 17.7 KB
/
vertica_kit_diagnostics.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
-- Vertica Diagnostic Information Queries
-- July 2014
--
-- Last Modified: July 20, 2014
-- http://www.vertica.tips
-- http://www.jadito.us
-- Twitter: justadayito
--
-- Written by Norbert Krupa
--
-- For more scripts and sample code, check out
-- http://jadito.us/vertica-kit
-- http://vertica.tips
--
-- Vertica Kit is free: you can redistribute it and/or modify it under
-- the terms of the GNU General Public License as published by the Free
-- Software Foundation, either version 3 of the License, or (at your
-- option) any later version.
--
-- Vertica Kit is distributed in the hope that it will be useful, but
-- WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with Vertica Kit. If not, see <http://www.gnu.org/licenses/>.
--
-- Note: These queries should be run from within vsql.
--*************************************************************************
-- Configuration Information
--*************************************************************************
-- Your product version
SELECT version();
-- Your license and compliance status
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#15460.htm
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#15563.htm
SELECT DISPLAY_LICENSE();
SELECT GET_COMPLIANCE_STATUS();
-- Is the Data Collector enabled (for monitoring)
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#16138.htm
SELECT GET_CONFIG_PARAMETER('EnableDataCollector');
-- Configuration parameters that have been modified
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12772.htm
SELECT /*+label(diag_changed_config_param)*/
*
FROM v_monitor.configuration_parameters
WHERE current_value <> default_value;
-- Shows change history of configuration parameters
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#17542.htm
SELECT /*+label(diag_config_param_history)*/
*
FROM v_monitor.configuration_changes
ORDER BY event_timestamp DESC;
-- Shows current fault tolerance of the system by returning K-safety
-- level and number of node failures before automatic shut down
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12275.htm
SELECT /*+label(diag_fault_tolerance)*/
designed_fault_tolerance,
current_fault_tolerance
FROM v_monitor.system;
--*************************************************************************
-- Resource Information
--*************************************************************************
-- Shows disk space utilization by host (see below for alternate methods)
-- http://wp.me/p3Qalh-fs
SELECT /*+label(diag_disk_space_utilization)*/
host_name,
( disk_space_free_mb / 1024 ) AS disk_space_free_gb,
( disk_space_used_mb / 1024 ) AS disk_space_used_gb,
( disk_space_total_mb / 1024 ) AS disk_space_total_gb
FROM v_monitor.host_resources;
-- Shows processor information by host
SELECT /*+label(diag_cpu_info)*/
host_name,
processor_count,
processor_core_count,
processor_description
FROM v_monitor.host_resources;
-- Shows memory information by host
SELECT /*+label(diag_memory_info)*/
host_name,
total_memory_bytes / ( 1024^3 ) AS total_memory_gb,
total_memory_free_bytes / ( 1024^3 ) AS total_memory_free_gb,
total_swap_memory_bytes / ( 1024^3 ) AS total_swap_memory_gb,
total_swap_memory_free_bytes / ( 1024^3 ) AS total_swap_memory_free_gb
FROM v_monitor.host_resources;
-- Shows compressed and raw estimate data space utilization by schema
-- http://wp.me/p3Qalh-jA
SELECT /*+label(diag_schema_space_utilization)*/
pj.anchor_table_schema,
pj.used_compressed_gb,
pj.used_compressed_gb * la.ratio AS raw_estimate_gb
FROM (SELECT ps.anchor_table_schema,
SUM(used_bytes) / ( 1024^3 ) AS used_compressed_gb
FROM v_catalog.projections p
JOIN v_monitor.projection_storage ps
ON ps.projection_id = p.projection_id
WHERE p.is_super_projection = 't'
GROUP BY ps.anchor_table_schema) pj
CROSS JOIN (SELECT (SELECT database_size_bytes
FROM v_catalog.license_audits
ORDER BY audit_start_timestamp DESC
LIMIT 1) / (SELECT SUM(used_bytes)
FROM v_monitor.projection_storage) AS ratio) la
ORDER BY pj.used_compressed_gb DESC;
-- Shows nodes that have less than the recommended disk space (40%) available for use
-- http://goo.gl/I4kCmk
SELECT /*+label(diag_critical_space_usage)*/
node_name,
storage_path,
disk_space_free_percent
FROM v_monitor.disk_storage
WHERE disk_space_used_mb / ( disk_space_used_mb + disk_space_free_mb ) <= 0.4
AND storage_usage = 'DATA,TEMP';
--*************************************************************************
-- Diagnostic Information
--*************************************************************************
-- Distribution of query request times (see below for identifying slow queries)
-- http://wp.me/p3Qalh-ir
SELECT /*+label(diag_query_time_distribution)*/
SUM(CASE
WHEN request_duration_ms <= 1000 THEN 1
ELSE 0
END) AS less_than_1,
SUM(CASE
WHEN request_duration_ms BETWEEN 1001 AND 2000 THEN 1
ELSE 0
END) AS between_1_and_2,
SUM(CASE
WHEN request_duration_ms BETWEEN 2001 AND 3000 THEN 1
ELSE 0
END) AS between_2_and_3,
SUM(CASE
WHEN request_duration_ms BETWEEN 3001 AND 4000 THEN 1
ELSE 0
END) AS between_3_and_4,
SUM(CASE
WHEN request_duration_ms BETWEEN 4001 AND 5000 THEN 1
ELSE 0
END) AS between_4_and_5,
SUM(CASE
WHEN request_duration_ms > 5000 THEN 1
ELSE 0
END) AS greater_than_5
FROM v_monitor.query_requests;
-- Shows possible issues with planning of execution of a query; specifically
-- looking for event types such as GROUP_BY_SPILLED and JOIN_SPILLED
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#20263.htm
SELECT /*+label(diag_query_events)*/
event_timestamp,
session_id,
transaction_id,
event_description,
event_type
FROM v_monitor.query_events
ORDER BY event_timestamp DESC;
-- Shows overview of event types
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#17580.htm
SELECT /*+label(diag_query_event_types)*/
event_type,
COUNT(*)
FROM query_events
GROUP BY event_type
ORDER BY COUNT(*) DESC;
-- Shows queries that spilled to disk during execution; the query
-- should be optimized for a merge join or group by pipelined
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12525.htm
SELECT /*+label(diag_query_event_types)*/
DISTINCT qr.start_timestamp,
qe.event_type,
REGEXP_REPLACE(qr.request, '[\r\t\f\n]', ' ') AS request
FROM v_monitor.query_events qe
JOIN v_monitor.query_requests qr
ON qr.transaction_id = qe.transaction_id
AND qr.statement_id = qe.statement_id
WHERE qe.event_type IN ( 'GROUP_BY_SPILLED', 'JOIN_SPILLED' )
AND qr.request_type = 'QUERY'
ORDER BY qr.start_timestamp;
-- Shows query events in which rows were resegmented during execution
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12174.htm
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#10248.htm
SELECT /*+label(diag_query_events_resegment)*/
DISTINCT qr.start_timestamp,
REGEXP_REPLACE(qr.request, '[\r\t\f\n]', ' ') AS request
FROM v_monitor.query_events qe
JOIN v_monitor.query_requests qr
ON qr.transaction_id = qe.transaction_id
AND qr.statement_id = qe.statement_id
WHERE qe.event_type = 'RESEGMENTED_MANY_ROWS'
AND qr.request_type = 'QUERY'
ORDER BY qr.start_timestamp;
-- Shows any load events that had rejected rows
-- See also: https://my.vertica.com/docs/6.1.x/HTML/index.htm#12261.htm
SELECT /*+label(diag_rejected_load_rows)*/
le.time,
le.node_name,
le.event_description,
le.event_type,
le.rows_accepted,
le.rows_rejected,
le.session_id,
le.user_name,
ss.client_hostname,
ss.session_type,
ss.is_internal
FROM v_internal.dc_load_events le
LEFT JOIN v_internal.dc_session_starts ss
ON ss.session_id = le.session_id
WHERE le.rows_rejected <> 0
ORDER BY le.time DESC;
-- Shows any query requests with errors (truncated request text)
-- http://wp.me/p3Qalh-iV
SELECT /*+label(diag_query_errors)*/
qr.node_name,
qr.user_name,
qr.session_id,
qr.start_timestamp,
qr.request_type,
LEFT(REGEXP_REPLACE(qr.request, '[\r\t\f\n]', ' '), 100) AS request,
qr.request_duration_ms,
qr.error_count,
em.error_level,
em.error_code,
em.message
FROM v_monitor.query_requests qr
JOIN v_monitor.error_messages em
ON em.node_name = qr.node_name
AND em.session_id = qr.session_id
AND em.request_id = qr.request_id
AND em.transaction_id = qr.transaction_id
ORDER BY qr.start_timestamp DESC;
-- Shows the last run and interval for each service on each ndoe
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#17588.htm
SELECT /*+label(diag_system_services)*/
*
FROM v_monitor.system_services
ORDER BY node_name,
last_run_start;
-- Shows Workload Analyzer tuning rules
-- http://www.vertica.com/2014/05/06/inside-the-secret-world-of-the-workload-analyzer/
SELECT /*+label(diag_wla_tuning_params)*/
*
FROM v_internal.vs_tuning_rule_parameters;
-- Shows tables without primary keys
-- http://vertica.tips/2014/03/29/hash-join-operator/
SELECT /*+label(diag_tables_without_pk)*/
DISTINCT t.table_schema,
t.table_name
FROM v_catalog.tables t
LEFT JOIN v_catalog.constraint_columns cc
ON cc.table_id = t.table_id
WHERE cc.constraint_type <> 'p'
AND t.is_system_table = 'f'
ORDER BY t.table_schema,
t.table_name;
-- Shows percentage of database that has been deleted
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12704_1.htm
SELECT /*+label(diag_database_deleted_data)*/
(SELECT SUM(used_bytes)
FROM v_monitor.delete_vectors) / (SELECT SUM(ros_used_bytes)
FROM v_monitor.projection_storage) * 100 AS percent;
-- Shows denied resource requests (useful for identifying resource space and pool issues)
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#15239_1.htm
SELECT /*+label(diag_denied_resource_req)*/
reason,
COUNT(*)
FROM v_monitor.resource_rejection_details
GROUP BY reason;
-- Shows mergeout activity, durations, and volumes processed
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#12278.htm
SELECT /*+label(diag_mergeout_activity)*/
DATEDIFF(mi, ms.operation_start_timestamp, CASE WHEN me.operation_status = 'Running' THEN clock_timestamp() ELSE me.operation_start_timestamp END) AS min_to_complete,
CAST(CASE WHEN DATEDIFF(ss, ms.operation_start_timestamp, CASE WHEN me.operation_status = 'Running' THEN NULL::TIMESTAMP ELSE me.operation_start_timestamp END ) > 0 THEN CAST(ms.total_ros_used_bytes / ( 1024.0^2 ) AS DECIMAL(14,2))/DATEDIFF(ss,ms.operation_start_timestamp,CASE WHEN me.operation_status = 'Running' THEN clock_timestamp() ELSE me.operation_start_timestamp END ) ELSE 0 END AS DECIMAL(14,2)) AS mb_sec,
me.operation_status AS mergeout_status,
ms.operation_start_timestamp AS mergeout_start,
me.operation_start_timestamp AS mergeout_end,
ms.node_name,
ms.table_name,
ms.projection_name,
ms.total_ros_used_bytes AS ros_bytes,
CAST(ms.total_ros_used_bytes / ( 1024.0^2 ) AS DECIMAL(14,2)) AS ros_mb,
CAST(ms.total_ros_used_bytes / ( 1024.0^3 ) AS DECIMAL(14,2)) AS ros_gb,
ms.earliest_container_start_epoch AS start_epoch,
ms.latest_container_end_epoch AS end_epoch,
ms.ros_count
FROM (SELECT *
FROM v_monitor.tuple_mover_operations
WHERE operation_name = 'Mergeout'
AND operation_status = 'Start' ) AS ms
LEFT JOIN (SELECT *
FROM v_monitor.tuple_mover_operations
WHERE operation_name = 'Mergeout'
AND operation_status IN ( 'Complete', 'Running' ) ) AS me
ON ms.earliest_container_start_epoch = me.earliest_container_start_epoch
AND ms.latest_container_end_epoch = me.latest_container_end_epoch
AND ms.ros_count = me.ros_count
AND ms.total_ros_used_bytes = me.total_ros_used_bytes
AND ms.session_id = me.session_id
AND ms.node_name = me.node_name
AND ms.table_name = me.table_name
AND ms.table_schema = me.table_schema
AND ms.projection_name = me.projection_name
WHERE ms.operation_start_timestamp BETWEEN clock_timestamp() - INTERVAL '1 DAY' AND clock_timestamp()
-- AND ms.table_schema = 'foo'
-- AND ms.table_name = 'bar'
ORDER BY ms.projection_name,
me.operation_status DESC;
--*************************************************************************
-- Projection Specific
--*************************************************************************
-- Shows projections that haven't been refreshed in the past 3 months
-- or do not have a corresponding refresh
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#13905.htm
SELECT /*+label(diag_stale_projections)*/
p.projection_schema,
p.projection_name,
DATEDIFF(day, pr.refresh_start, SYSDATE()) AS days_last_refresh
FROM v_catalog.projections p
LEFT JOIN v_monitor.projection_refreshes pr
ON pr.projection_id = p.projection_id
WHERE DATEDIFF(month, pr.refresh_start, SYSDATE()) >= 3
OR pr.projection_id IS NULL
ORDER BY days_last_refresh DESC;
-- Shows projection columns that haven't been refreshed in the past month
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#15576.htm
SELECT /*+label(diag_stale_projection_columns)*/
projection_id,
projection_name,
projection_column_name,
statistics_type,
DATEDIFF(day, statistics_updated_timestamp, SYSDATE()) AS days_last_refresh
FROM v_catalog.projection_columns
WHERE DATEDIFF(month, statistics_updated_timestamp, SYSDATE()) >= 1
ORDER BY days_last_refresh DESC;
-- Shows possible data skew in segmented projections; note: Workload
-- Analyzer should be run to obtain the most recent recommendations
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#17426.htm
SELECT /*+label(diag_workload_resegment)*/
tuning_description,
tuning_cost
FROM v_monitor.tuning_recommendations
WHERE tuning_description LIKE 're-segment%'
ORDER BY tuning_description;
-- Shows projections which do not have full statistics; with tuning command;
-- only looking for statistics_type of NONE or ROWCOUNT. NONE means no statistics;
-- ROWCOUNT means created automatically from existing catalog metadata
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#15574.htm
SELECT /*+label(diag_unrefreshed_columns)*/
pc.projection_name,
pc.table_name,
pc.table_column_name,
pc.statistics_type,
E'SELECT /*+label(update_statistics)*/ ANALYZE_STATISTICS(\''
|| pc.table_name || '.' || pc.table_column_name || E'\');' AS tuning_command
FROM v_catalog.projections p
JOIN v_catalog.projection_columns pc
ON pc.projection_id = p.projection_id
WHERE p.has_statistics = 'f'
AND pc.statistics_type IN ( 'NONE', 'ROWCOUNT' );
-- Shows projection last used timestamp to identify unused projections
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#17579.htm
SELECT /*+label(diag_unused_projectiosn)*/
projection_name,
MIN(query_start_timestamp) AS last_used_timestamp
FROM v_monitor.projection_usage
GROUP BY projection_name,
query_start_timestamp
ORDER BY query_start_timestamp
LIMIT 30;
--*************************************************************************
-- Query Profiling Information
--*************************************************************************
-- Examining actual time spent in query
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#10300.htm
-- Profiling configuration: if any configs are enabled, and you're not
-- performing profiling, it may be using memory
-- To disable: https://my.vertica.com/docs/6.1.x/HTML/index.htm#14373.htm
SELECT SHOW_PROFILING_CONFIG();
-- Clear previous profiling data
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#10305.htm
SELECT CLEAR_PROFILING('query');
-- Session and global profiling should be enabled as they will be capped by data
-- collector policies; execution engine profiling should be used sparsely and briefly
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#13914.htm
SELECT SET_CONFIG_PARAMETER('GlobalQueryProfiling', 1);
SELECT SET_CONFIG_PARAMETER('GlobalSessionProfiling', 1);
-- Examine query_profiles; shows 50 longest running queries
-- https://my.vertica.com/docs/6.1.x/HTML/index.htm#10304.htm
SELECT /*+label(diag_long_running_queries)*/
LEFT(REGEXP_REPLACE(query, '[\r\t\f\n]', ' '), 100) AS query,
COUNT(*) AS instances,
AVG(query_duration_us) AS avg_query_duration_us
FROM v_monitor.query_profiles
GROUP BY query
ORDER BY avg_query_duration_us DESC
LIMIT 50;