From: Petr Špaček Date: Fri, 23 Oct 2020 13:31:28 +0000 (+0200) Subject: http: adapt Prometheus to changes in map() X-Git-Tag: v5.2.0~6^2~6 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=682a566ac30b0794fd12d4bd90da3cfa397cb41d;p=thirdparty%2Fknot-resolver.git http: adapt Prometheus to changes in map() We don't have leader instance anymore, so any instance can provide Prometheus statistics. For performance reasons an instance does not store past statistics when no client is connected to HTTP socket. As a consequence chart in web interface is empty when client loads the page and then gradually chart fills up. --- diff --git a/modules/http/http.lua.in b/modules/http/http.lua.in index 08101b924..521ddb285 100644 --- a/modules/http/http.lua.in +++ b/modules/http/http.lua.in @@ -347,7 +347,6 @@ end -- @function Init module function M.init() - worker.coroutine(prometheus.init) net.register_endpoint_kind('doh', cb_socket) net.register_endpoint_kind('webmgmt', cb_socket) end @@ -357,7 +356,6 @@ function M.deinit() for fd, _ in pairs(M.servers) do remove_socket(fd) end - prometheus.deinit() tls_cert.ephemeral_state_destroy(M.ephem_state) net.register_endpoint_kind('doh') net.register_endpoint_kind('webmgmt') diff --git a/modules/http/prometheus.lua b/modules/http/prometheus.lua index c55c9ae3c..68b491da5 100644 --- a/modules/http/prometheus.lua +++ b/modules/http/prometheus.lua @@ -5,8 +5,6 @@ local M = { finalize = function (_ --[[metrics]]) end, } -local snapshots, snapshots_count = {}, 120 - -- Gauge metrics local gauges = { ['worker.concurrent'] = true, @@ -32,89 +30,68 @@ local function getstats() return t end -local function snapshot_end() - snapshots_count = false -end - --- Function to sort frequency list -local function snapshot_start() - local prev = getstats() - while snapshots_count do - local is_empty = true - -- Get current snapshot - local cur, stats_dt = getstats(), {} - for k,v in pairs(cur) do - if gauges[k] then - stats_dt[k] = v - else - stats_dt[k] = v - (prev[k] or 0) - end - is_empty = is_empty and stats_dt[k] == 0 +-- @returns current stats + difference against previous data set passed in @param prev +local function snapshot_start(prev) + assert(type(prev) == 'table', 'table with previous values expected') + local is_empty = true + -- Get current snapshot + local cur, stats_dt = getstats(), {} + for k,v in pairs(cur) do + if gauges[k] then + stats_dt[k] = v + else + stats_dt[k] = v - (prev[k] or 0) end - prev = cur - -- Calculate upstreams and geotag them if possible - local upstreams - if http.geoip then - upstreams = stats.upstreams() - for k,v in pairs(upstreams) do - local gi - if string.find(k, '.', 1, true) then - gi = http.geoip:search_ipv4(k) - else - gi = http.geoip:search_ipv6(k) - end - if gi then - upstreams[k] = {data=v, location=gi.location, country=gi.country and gi.country.iso_code} - end + is_empty = is_empty and stats_dt[k] == 0 + end + -- Calculate upstreams and geotag them if possible + local upstreams + if http.geoip then + upstreams = stats.upstreams() + for k,v in pairs(upstreams) do + local gi + if string.find(k, '.', 1, true) then + gi = http.geoip:search_ipv4(k) + else + gi = http.geoip:search_ipv6(k) end - end - -- Aggregate per-worker metrics - local wdata = {} - for _, info in pairs(map 'worker.info()') do - if type(info) == 'table' then - wdata[tostring(info.pid)] = { - rss = info.rss, - usertime = info.usertime, - systime = info.systime, - pagefaults = info.pagefaults, - queries = info.queries - } + if gi then + upstreams[k] = {data=v, location=gi.location, country=gi.country and gi.country.iso_code} end end - -- Publish stats updates periodically - if not is_empty then - local update = {time=os.time(), stats=stats_dt, upstreams=upstreams, workers=wdata} - table.insert(snapshots, update) - if #snapshots > snapshots_count then - table.remove(snapshots, 1) - end + end + -- Aggregate per-worker metrics + local wdata = {} + for _, info in pairs(map 'worker.info()') do + if type(info) == 'table' then + wdata[tostring(info.pid)] = { + rss = info.rss, + usertime = info.usertime, + systime = info.systime, + pagefaults = info.pagefaults, + queries = info.queries + } end - worker.sleep(1) end + -- Publish stats updates periodically + if not is_empty then + local update = {time=os.time(), stats=stats_dt, upstreams=upstreams, workers=wdata} + return cur, update + end + return cur, nil end -- Function to sort frequency list local function stream_stats(_, ws) - -- Initially, stream history - local ok, last = true, nil - local batch = {} - for i, s in ipairs(snapshots) do - table.insert(batch, s) - if #batch == 20 or i + 1 == #snapshots then - ok = ws:send(tojson(batch)) - batch = {} - end - end + local ok = true -- Publish stats updates periodically + local prev = getstats() while ok do - -- Get last snapshot - local id = #snapshots - 1 - if id > 0 and snapshots[id].time ~= last then - local push = tojson(snapshots[id]) - last = snapshots[id].time - ok = ws:send(push) - end worker.sleep(1) + local update + prev, update = snapshot_start(prev) + local push = tojson(update) + ok = ws:send(push) end end @@ -160,8 +137,6 @@ local function serve_prometheus() end -- Export module interface -M.init = snapshot_start -M.deinit = snapshot_end M.endpoints = { ['/stats'] = {'application/json', getstats, stream_stats}, ['/frequent'] = {'application/json', function () return stats.frequent() end},