-
Notifications
You must be signed in to change notification settings - Fork 2
/
init.lua
129 lines (115 loc) · 4.06 KB
/
init.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
-- this file loaded on first start of plugin
filepath = require("filepath")
time = require("time")
inspect = require("inspect")
json = require("json")
ioutil = require("ioutil")
crypto = require("crypto")
goos = require("goos")
log = require("log")
humanize = require("humanize")
strings = require("strings")
plugin_log = log.new()
plugin_log:set_flags({ date = true, time = true })
-- current directory (root)
root = filepath.dir(debug.getinfo(1).source)
HOST_DEV_DIR = os.getenv('HOST_DEV_DIR') or '/dev'
HOST_SYS_DIR = os.getenv('HOST_SYS_DIR') or '/sys'
HOST_PROC_DIR = os.getenv('HOST_PROC_DIR') or '/proc'
-- read file in plugin dir
function read_file_in_plugin_dir(filename)
local data, err = ioutil.read_file(filepath.join(plugin:dir(), filename))
if err then error(err) end
return data
end
-- return true if database hosted on rds
function is_rds()
return not (not (
pcall(function()
target:query("show rds.extensions")
end)
))
end
-- return unix ts from connection
function get_unix_ts(conn, ts)
conn = conn or target
ts = ts or 1
return conn:query("select extract(epoch from now())::int - (extract(epoch from now())::int % $1)", ts).rows[1][1]
end
-- insert metric with plugin:host()
local count_empty_metrics = 0
function storage_insert_metric(metric)
if not (metric.host) then metric.host = plugin:host() end
if (metric.int == nil) and (metric.float == nil) and not (metric.json == nil) then
local jsonb, err = json.decode(metric.json)
if err then error(err) end
if next(jsonb) == nil then
count_empty_metrics = count_empty_metrics + 1
if (count_empty_metrics % 10) == 0 then
plugin_log:printf("[ERROR] plugin '%s' on host '%s': empty metric (%d times)\n", plugin:name(), plugin:host(), count_empty_metrics)
end
return
end
end
storage:insert_metric(metric)
end
-- return postgresql version
function get_pg_server_version()
if pg_server_version then return pg_server_version end
local version = target:query("show server_version")
pg_server_version = tonumber(version.rows[1][1])
return pg_server_version
end
-- return in pg_in_recovery
function get_pg_is_in_recovery()
local pg_is_in_recovery = target:query("select pg_catalog.pg_is_in_recovery()")
return pg_is_in_recovery.rows[1][1]
end
-- return true if extension installed
function extension_present(conn, extname)
local extension = conn:query("select count(extname) from pg_catalog.pg_extension where extname = $1", extname)
return (extension.rows[1][1] == 1)
end
-- prometheus_gauge:set()
function gauge_set(name, value, labels)
local value = tonumber(value)
if (value == nil) then return end
local labels = labels or {}
if (labels.host == nil) then labels.host = plugin:host() end
local label_keys = {}; for k, _ in ipairs(labels) do table.insert(label_keys, k) end
local gauge, err = prometheus_gauge({
namespace = "pg",
subsystem = "gatherer",
name = name,
labels = label_keys
})
if err then error(err) end
gauge:set(value, labels)
end
-- run function f every sec
-- this function run in plugin context, then we use cache key `last_run`
function run_every(f, every)
while true do
local last_run_at = cache:get("last_run") or 0
if time.unix() >= last_run_at + every then
local start_at = time.unix()
cache:set("last_run", start_at)
f()
local exec_time = (time.unix() - start_at)
if exec_time > every then
plugin_log:printf("[ERROR] plugin '%s' on host '%s' execution timeout: %.2f s\n", plugin:name(), plugin:host(), exec_time)
time.sleep(1)
else
if exec_time > 1 then
plugin_log:printf("[INFO] plugin '%s' on host '%s' execution time: %.2f s\n", plugin:name(), plugin:host(), exec_time)
end
end
else
-- wait random seconds, for decrease CPU spikes ((0..every)/10 + 1)s
local rand = tonumber(string.format("%.0f", every / 10) +1)
time.sleep(rand)
end
end
end
-- wait random seconds, for decrease CPU spikes (0-1s)
time.sleep(math.random(100)/100)