]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
Mesh reply counters (#1374)
authorRobert Edmonds <edmonds@users.noreply.github.com>
Thu, 13 Nov 2025 08:33:05 +0000 (03:33 -0500)
committerGitHub <noreply@github.com>
Thu, 13 Nov 2025 08:33:05 +0000 (09:33 +0100)
* Statistics counter for number of queries dropped by limit on reply addresses

Request list entries can be associated with multiple pending "reply
addresses". Basically each request list entry keeps its own list of
clients that should receive the response once the recursion is finished.
This requires keeping allocations around for each client, and there is
a global limit on the number of *additional* reply addresses that can
be allocated. (Each new request list entry seems to get its own initial
reply address which is not counted against the limit.)

This commit adds a statistics counter "num_queries_replyaddr_limit" that
counts the number of incoming client queries that have been dropped due
to the restriction on allocating additional reply addresses. This allows
distinguishing these drops from other kinds of drops.

* Statistics counter for number of mesh reply entries

Request list entries can be associated with multiple pending "reply
addresses". Since there is a limit on the number of additional reply
addresses that can be allocated which can cause incoming queries to be
dropped if exceeded, it would be nice to be able to track this number.

This commit basically exports the mesh_area's internal counter
`num_reply_addrs` as "threadX.requestlist.current.replies" /
"total.requestlist.current.replies".

daemon/remote.c
daemon/stats.c
doc/unbound-control.8.in
doc/unbound-control.rst
libunbound/unbound.h
services/mesh.c
services/mesh.h
smallapp/unbound-control.c

index 0d55619c29faff60ff4f36a7abc0ee890082fb25..862a43cfda354bafe0dc8cebb393aadb1d24b83a 100644 (file)
@@ -801,6 +801,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
                (unsigned long)s->svr.num_queries_cookie_invalid)) return 0;
        if(!ssl_printf(ssl, "%s.num.queries_discard_timeout"SQ"%lu\n", nm,
                (unsigned long)s->svr.num_queries_discard_timeout)) return 0;
+       if(!ssl_printf(ssl, "%s.num.queries_replyaddr_limit"SQ"%lu\n", nm,
+               (unsigned long)s->svr.num_queries_replyaddr_limit)) return 0;
        if(!ssl_printf(ssl, "%s.num.queries_wait_limit"SQ"%lu\n", nm,
                (unsigned long)s->svr.num_queries_wait_limit)) return 0;
        if(!ssl_printf(ssl, "%s.num.cachehits"SQ"%lu\n", nm,
@@ -845,6 +847,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
                (unsigned long)s->mesh_num_states)) return 0;
        if(!ssl_printf(ssl, "%s.requestlist.current.user"SQ"%lu\n", nm,
                (unsigned long)s->mesh_num_reply_states)) return 0;
+       if(!ssl_printf(ssl, "%s.requestlist.current.replies"SQ"%lu\n", nm,
+               (unsigned long)s->mesh_num_reply_addrs)) return 0;
 #ifndef S_SPLINT_S
        sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
        sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
index 41c4656aaec5d25c1a06b8cd73229625643ca4cf..43a9f7092034c9f70c71b388fab1f5307eb4ca39 100644 (file)
@@ -262,6 +262,7 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
        s->svr = worker->stats;
        s->mesh_num_states = (long long)worker->env.mesh->all.count;
        s->mesh_num_reply_states = (long long)worker->env.mesh->num_reply_states;
+       s->mesh_num_reply_addrs = (long long)worker->env.mesh->num_reply_addrs;
        s->mesh_jostled = (long long)worker->env.mesh->stats_jostled;
        s->mesh_dropped = (long long)worker->env.mesh->stats_dropped;
        s->mesh_replies_sent = (long long)worker->env.mesh->replies_sent;
@@ -284,6 +285,8 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
                NUM_BUCKETS_HIST);
        s->svr.num_queries_discard_timeout +=
                (long long)worker->env.mesh->num_queries_discard_timeout;
+       s->svr.num_queries_replyaddr_limit +=
+               (long long)worker->env.mesh->num_queries_replyaddr_limit;
        s->svr.num_queries_wait_limit +=
                (long long)worker->env.mesh->num_queries_wait_limit;
        s->svr.num_dns_error_reports +=
@@ -448,6 +451,8 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
        total->svr.num_queries_cookie_invalid += a->svr.num_queries_cookie_invalid;
        total->svr.num_queries_discard_timeout +=
                a->svr.num_queries_discard_timeout;
+       total->svr.num_queries_replyaddr_limit +=
+               a->svr.num_queries_replyaddr_limit;
        total->svr.num_queries_wait_limit += a->svr.num_queries_wait_limit;
        total->svr.num_dns_error_reports += a->svr.num_dns_error_reports;
        total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
@@ -519,6 +524,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
 
        total->mesh_num_states += a->mesh_num_states;
        total->mesh_num_reply_states += a->mesh_num_reply_states;
+       total->mesh_num_reply_addrs += a->mesh_num_reply_addrs;
        total->mesh_jostled += a->mesh_jostled;
        total->mesh_dropped += a->mesh_dropped;
        total->mesh_replies_sent += a->mesh_replies_sent;
index 782a98e50ff5fdec01d0ac82992c5f7de9d76448..433b373544d6cdf5d4b2bb07b732557752934232 100644 (file)
@@ -880,6 +880,11 @@ number of queries removed due to discard\-timeout by thread
 .UNINDENT
 .INDENT 0.0
 .TP
+.B threadX.num.queries_replyaddr_limit 
+number of queries removed due to replyaddr limits by thread
+.UNINDENT
+.INDENT 0.0
+.TP
 .B threadX.num.queries_wait_limit 
 number of queries removed due to wait\-limit by thread
 .UNINDENT
@@ -994,6 +999,13 @@ Current size of the request list, only the requests from client queries.
 .UNINDENT
 .INDENT 0.0
 .TP
+.B threadX.requestlist.current.replies 
+Current count of the number of reply entries waiting on request list
+entries. Because a request list entry can send results to multiple reply
+addresses, this number may be larger than the size of the request list.
+.UNINDENT
+.INDENT 0.0
+.TP
 .B threadX.recursion.time.avg 
 Average time it took to answer queries that needed recursive processing.
 Note that queries that were answered from the cache are not in this average.
@@ -1048,6 +1060,11 @@ summed over threads.
 .UNINDENT
 .INDENT 0.0
 .TP
+.B total.num.queries_replyaddr_limit 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
 .B total.num.queries_wait_limit 
 summed over threads.
 .UNINDENT
@@ -1138,6 +1155,16 @@ summed over threads.
 .UNINDENT
 .INDENT 0.0
 .TP
+.B total.requestlist.current.user 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B total.requestlist.current.replies 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
 .B total.recursion.time.median 
 averaged over threads.
 .UNINDENT
index 71ff6ee37b6cb854b4bb34a70cf21d16c8691027..630f2e160cd3badfa23366394a1a41705465bd86 100644 (file)
@@ -815,6 +815,10 @@ number of statistic counters:
     number of queries removed due to discard-timeout by thread
 
 
+@@UAHL@unbound-control.stats@threadX.num.queries_replyaddr_limit@@
+    number of queries removed due to replyaddr limits by thread
+
+
 @@UAHL@unbound-control.stats@threadX.num.queries_wait_limit@@
     number of queries removed due to wait-limit by thread
 
@@ -910,6 +914,12 @@ number of statistic counters:
     Current size of the request list, only the requests from client queries.
 
 
+@@UAHL@unbound-control.stats@threadX.requestlist.current.replies@@
+    Current count of the number of reply entries waiting on request list
+    entries. Because a request list entry can send results to multiple reply
+    addresses, this number may be larger than the size of the request list.
+
+
 @@UAHL@unbound-control.stats@threadX.recursion.time.avg@@
     Average time it took to answer queries that needed recursive processing.
     Note that queries that were answered from the cache are not in this average.
@@ -955,6 +965,10 @@ number of statistic counters:
     summed over threads.
 
 
+@@UAHL@unbound-control.stats@total.num.queries_replyaddr_limit@@
+    summed over threads.
+
+
 @@UAHL@unbound-control.stats@total.num.queries_wait_limit@@
     summed over threads.
 
@@ -1027,6 +1041,14 @@ number of statistic counters:
     summed over threads.
 
 
+@@UAHL@unbound-control.stats@total.requestlist.current.user@@
+    summed over threads.
+
+
+@@UAHL@unbound-control.stats@total.requestlist.current.replies@@
+    summed over threads.
+
+
 @@UAHL@unbound-control.stats@total.recursion.time.median@@
     averaged over threads.
 
index c274f80ab897be11aeffb6930c557afd3fd4a344..5a31f98e5bc6875997f5da728d77ce6b9540b578 100644 (file)
@@ -853,6 +853,8 @@ struct ub_server_stats {
        long long qquic;
        /** number of queries removed due to discard-timeout */
        long long num_queries_discard_timeout;
+       /** number of queries removed due to replyaddr limit */
+       long long num_queries_replyaddr_limit;
        /** number of queries removed due to wait-limit */
        long long num_queries_wait_limit;
        /** number of dns error reports generated */
@@ -872,6 +874,8 @@ struct ub_stats_info {
        long long mesh_num_states;
        /** mesh stats: current number of reply (user) states */
        long long mesh_num_reply_states;
+       /** mesh stats: current number of reply entries */
+       long long mesh_num_reply_addrs;
        /** mesh stats: number of reply states overwritten with a new one */
        long long mesh_jostled;
        /** mesh stats: number of incoming queries dropped */
index ca622e9c9232a8124534edb26252122cc533fe25..4a947766d8a33d88fcac0481508b1f7877679181 100644 (file)
@@ -231,6 +231,7 @@ mesh_create(struct module_stack* stack, struct module_env* env)
        mesh->ans_expired = 0;
        mesh->ans_cachedb = 0;
        mesh->num_queries_discard_timeout = 0;
+       mesh->num_queries_replyaddr_limit = 0;
        mesh->num_queries_wait_limit = 0;
        mesh->num_dns_error_reports = 0;
        mesh->max_reply_states = env->cfg->num_queries_per_thread;
@@ -474,7 +475,7 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
                        verbose(VERB_ALGO, "Too many requests queued. "
                                "dropping incoming query.");
                        comm_point_drop_reply(rep);
-                       mesh->stats_dropped++;
+                       mesh->num_queries_replyaddr_limit++;
                        return;
                }
        }
@@ -2295,6 +2296,7 @@ mesh_stats_clear(struct mesh_area* mesh)
        memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM);
        mesh->ans_nodata = 0;
        mesh->num_queries_discard_timeout = 0;
+       mesh->num_queries_replyaddr_limit = 0;
        mesh->num_queries_wait_limit = 0;
        mesh->num_dns_error_reports = 0;
 }
index 53a05b443e7d8a4b081042282769ce3258a974cf..d2fac9d3c9185abd716be7d655569341dac146f6 100644 (file)
@@ -141,6 +141,8 @@ struct mesh_area {
        size_t rpz_action[UB_STATS_RPZ_ACTION_NUM];
        /** stats, number of queries removed due to discard-timeout */
        size_t num_queries_discard_timeout;
+       /** stats, number of queries removed due to replyaddr limit */
+       size_t num_queries_replyaddr_limit;
        /** stats, number of queries removed due to wait-limit */
        size_t num_queries_wait_limit;
        /** stats, number of dns error reports generated */
index 696750c19fa15ee687418b7f070588360c0a3d2d..bb1d5237edcf84452733bb10c26389c8140b8c11 100644 (file)
@@ -236,6 +236,8 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
                s->svr.num_queries_cookie_invalid);
        PR_UL_NM("num.queries_discard_timeout",
                s->svr.num_queries_discard_timeout);
+       PR_UL_NM("num.queries_replyaddr_limit",
+               s->svr.num_queries_replyaddr_limit);
        PR_UL_NM("num.queries_wait_limit", s->svr.num_queries_wait_limit);
        PR_UL_NM("num.cachehits",
                s->svr.num_queries - s->svr.num_queries_missed_cache);
@@ -263,6 +265,7 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
        PR_UL_NM("requestlist.exceeded", s->mesh_dropped);
        PR_UL_NM("requestlist.current.all", s->mesh_num_states);
        PR_UL_NM("requestlist.current.user", s->mesh_num_reply_states);
+       PR_UL_NM("requestlist.current.replies", s->mesh_num_reply_addrs);
 #ifndef S_SPLINT_S
        sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
        sumwait.tv_usec = s->mesh_replies_sum_wait_usec;