]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
modules/stats: add answer.stale
authorFrantisek Tobias <frantisek.tobias@nic.cz>
Mon, 19 Aug 2024 15:16:38 +0000 (17:16 +0200)
committerVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 19 Aug 2024 15:16:38 +0000 (17:16 +0200)
NEWS
daemon/lua/kres-gen-33.lua
lib/resolve.h
manager/knot_resolver_manager/statistics.py
modules/serve_stale/serve_stale.lua
modules/stats/README.rst
modules/stats/stats.c

diff --git a/NEWS b/NEWS
index a9540326384758fea834ee54f4f0e212f0a130b8..11c4bc453e3ccfdc0f0365129daefb24949935a2 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,7 @@ Improvements
 
 - answer NOTIMPL for meta-types and non-IN RR classes (!1589)
 - views: improve interaction with old-style policies (!1576)
+- stats: add stale answer counter 'answer.stale' (!1591)
 
 Bugfixes
 --------
index 6be16bc4ae70d6e2f57df16a80b3ab29ef0ece79..40a03453bd79bca67ff7786dc9efdcccdf3e2df4 100644 (file)
@@ -248,6 +248,7 @@ struct kr_request {
        ranked_rr_array_t add_selected;
        _Bool answ_validated;
        _Bool auth_validated;
+       _Bool stale_accounted;
        uint8_t rank;
        struct kr_rplan rplan;
        trace_log_f trace_log;
index 443fef29c19d70babf64159f3eb1bfa31c4c9045..cbc20877e85a21d9076a133658df33876fc3f223 100644 (file)
@@ -260,6 +260,7 @@ struct kr_request {
        ranked_rr_array_t add_selected;
        bool answ_validated; /**< internal to validator; beware of caching, etc. */
        bool auth_validated; /**< see answ_validated ^^ ; TODO */
+       bool stale_accounted;
 
        /** Overall rank for the request.
         *
index 4a0eb783a3043455d6b30fa4b8feb8510660ad28..ae9d98119942c2506298f79b2801c4c9f061281f 100644 (file)
@@ -119,6 +119,12 @@ if _prometheus_support:
             label=("instance_id", sid),
             value=metrics["answer"]["cached"],
         )
+        yield _counter(
+            "resolver_answer_stale",
+            "number of queries that utilized stale data",
+            label=("instance_id", sid),
+            value=metrics["answer"]["stale"],
+        )
         yield _counter(
             "resolver_answer_rcode_noerror",
             "number of NOERROR answers",
index faf07fbe05a06f7c5bafee9940339de52a27125b..c1528e804d11ca071e979237b449c4dea7b7dd29 100644 (file)
@@ -8,9 +8,10 @@ local ffi = require('ffi')
 M.timeout = 3*sec
 
 M.callback = ffi.cast("kr_stale_cb",
-       function (ttl) --, name, type, qry)
+       function (ttl, _, _, qry)
                --log_debug(ffi.C.SRVSTALE, '   => called back with TTL: ' .. tostring(ttl))
                if ttl + 3600 * 24 > 0 then -- at most one day stale
+                       qry.request.stale_accounted = true
                        return 1
                else
                        return -1
@@ -27,7 +28,9 @@ M.layer = {
                local now = ffi.C.kr_now()
                local deadline = qry.creation_time_mono + M.timeout
                if now > deadline or qry.flags.NO_NS_FOUND then
-                       log_debug(ffi.C.LOG_GRP_SRVSTALE, '   => no reachable NS, using stale data')
+                       log_qry(qry, ffi.C.LOG_GRP_SRVSTALE,
+                               '   => no reachable NS, using stale data "%s"',
+                               kres.dname2str(qry:name()))
                        qry.stale_cb = M.callback
                        -- TODO: probably start the same request that doesn't stale-serve,
                        -- but first we need some detection of non-interactive / internal requests.
index 1def925c418b4d2c8f26b6630c583beec925a19e..e9258274f2585ecf619b060551402561a73059ba 100644 (file)
@@ -55,6 +55,8 @@ Built-in counters keep track of number of queries and answers matching specific
 +-----------------+----------------------------------+
 | answer.cached   | queries answered from cache      |
 +-----------------+----------------------------------+
+| answer.stale    | queries that utilized stale data |
++-----------------+----------------------------------+
 
 +-----------------+----------------------------------+
 | **Answers categorized by RCODE**                   |
index deed9c949a2ea71a5d2a5b1fe37be129a82987b0..596847d7c20e84bd2a16956732fdfdeacbecb77b 100644 (file)
  #define UPSTREAMS_COUNT  512 /* Size of recent upstreams */
 #endif
 
-/** @cond internal Fixed-size map of predefined metrics. */
+/** @cond internal Fixed-size map of predefined metrics.
+ *
+ * When changing the list, don't forget _parse_resolver_metrics()
+ * in ../../manager/knot_resolver_manager/statistics.py
+ */
 #define CONST_METRICS(X) \
        X(answer,total) X(answer,noerror) X(answer,nodata) X(answer,nxdomain) X(answer,servfail) \
        X(answer,cached) X(answer,1ms) X(answer,10ms) X(answer,50ms) X(answer,100ms) \
        X(answer,250ms) X(answer,500ms) X(answer,1000ms) X(answer,1500ms) X(answer,slow) \
        X(answer,sum_ms) \
+       X(answer,stale) \
        X(answer,aa) X(answer,tc) X(answer,rd) X(answer,ra) X(answer, ad) X(answer,cd) \
        X(answer,edns0) X(answer,do) \
        X(query,edns) X(query,dnssec) \
@@ -303,6 +308,7 @@ static int collect(kr_layer_t *ctx)
                DEPRECATED
                use new names metric_answer_edns0 and metric_answer_do
        */
+       stat_const_add(data, metric_answer_stale, param->stale_accounted);
        stat_const_add(data, metric_query_edns, knot_pkt_has_edns(param->answer));
        stat_const_add(data, metric_query_dnssec, knot_pkt_has_dnssec(param->answer));