Mesh reply counters (#1374)

author Robert Edmonds <edmonds@users.noreply.github.com>

Thu, 13 Nov 2025 08:33:05 +0000 (03:33 -0500)

committer GitHub <noreply@github.com>

Thu, 13 Nov 2025 08:33:05 +0000 (09:33 +0100)
author Robert Edmonds <edmonds@users.noreply.github.com>
Thu, 13 Nov 2025 08:33:05 +0000 (03:33 -0500)
committer GitHub <noreply@github.com>
Thu, 13 Nov 2025 08:33:05 +0000 (09:33 +0100)
diff --git a/daemon/remote.c b/daemon/remote.c

index 0d55619c29faff60ff4f36a7abc0ee890082fb25..862a43cfda354bafe0dc8cebb393aadb1d24b83a 100644 (file)
--- a/daemon/remote.c
+++ b/daemon/remote.c
@@ -801,6 +801,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
                 (unsigned long)s->svr.num_queries_cookie_invalid)) return 0;
         if(!ssl_printf(ssl, "%s.num.queries_discard_timeout"SQ"%lu\n", nm,
                 (unsigned long)s->svr.num_queries_discard_timeout)) return 0;
+       if(!ssl_printf(ssl, "%s.num.queries_replyaddr_limit"SQ"%lu\n", nm,
+               (unsigned long)s->svr.num_queries_replyaddr_limit)) return 0;
         if(!ssl_printf(ssl, "%s.num.queries_wait_limit"SQ"%lu\n", nm,
                 (unsigned long)s->svr.num_queries_wait_limit)) return 0;
         if(!ssl_printf(ssl, "%s.num.cachehits"SQ"%lu\n", nm,
@@ -845,6 +847,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
                 (unsigned long)s->mesh_num_states)) return 0;
         if(!ssl_printf(ssl, "%s.requestlist.current.user"SQ"%lu\n", nm,
                 (unsigned long)s->mesh_num_reply_states)) return 0;
+       if(!ssl_printf(ssl, "%s.requestlist.current.replies"SQ"%lu\n", nm,
+               (unsigned long)s->mesh_num_reply_addrs)) return 0;
  #ifndef S_SPLINT_S
         sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
         sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
diff --git a/daemon/stats.c b/daemon/stats.c

index 41c4656aaec5d25c1a06b8cd73229625643ca4cf..43a9f7092034c9f70c71b388fab1f5307eb4ca39 100644 (file)
--- a/daemon/stats.c
+++ b/daemon/stats.c
@@ -262,6 +262,7 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
         s->svr = worker->stats;
         s->mesh_num_states = (long long)worker->env.mesh->all.count;
         s->mesh_num_reply_states = (long long)worker->env.mesh->num_reply_states;
+       s->mesh_num_reply_addrs = (long long)worker->env.mesh->num_reply_addrs;
         s->mesh_jostled = (long long)worker->env.mesh->stats_jostled;
         s->mesh_dropped = (long long)worker->env.mesh->stats_dropped;
         s->mesh_replies_sent = (long long)worker->env.mesh->replies_sent;
@@ -284,6 +285,8 @@ server_stats_compile(struct worker* worker, struct ub_stats_info* s, int reset)
                 NUM_BUCKETS_HIST);
         s->svr.num_queries_discard_timeout +=
                 (long long)worker->env.mesh->num_queries_discard_timeout;
+       s->svr.num_queries_replyaddr_limit +=
+               (long long)worker->env.mesh->num_queries_replyaddr_limit;
         s->svr.num_queries_wait_limit +=
                 (long long)worker->env.mesh->num_queries_wait_limit;
         s->svr.num_dns_error_reports +=
@@ -448,6 +451,8 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
         total->svr.num_queries_cookie_invalid += a->svr.num_queries_cookie_invalid;
         total->svr.num_queries_discard_timeout +=
                 a->svr.num_queries_discard_timeout;
+       total->svr.num_queries_replyaddr_limit +=
+               a->svr.num_queries_replyaddr_limit;
         total->svr.num_queries_wait_limit += a->svr.num_queries_wait_limit;
         total->svr.num_dns_error_reports += a->svr.num_dns_error_reports;
         total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
@@ -519,6 +524,7 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
  
         total->mesh_num_states += a->mesh_num_states;
         total->mesh_num_reply_states += a->mesh_num_reply_states;
+       total->mesh_num_reply_addrs += a->mesh_num_reply_addrs;
         total->mesh_jostled += a->mesh_jostled;
         total->mesh_dropped += a->mesh_dropped;
         total->mesh_replies_sent += a->mesh_replies_sent;
diff --git a/doc/unbound-control.8.in b/doc/unbound-control.8.in

index 782a98e50ff5fdec01d0ac82992c5f7de9d76448..433b373544d6cdf5d4b2bb07b732557752934232 100644 (file)
--- a/doc/unbound-control.8.in
+++ b/doc/unbound-control.8.in
@@ -880,6 +880,11 @@ number of queries removed due to discard\-timeout by thread
  .UNINDENT
  .INDENT 0.0
  .TP
+.B threadX.num.queries_replyaddr_limit 
+number of queries removed due to replyaddr limits by thread
+.UNINDENT
+.INDENT 0.0
+.TP
  .B threadX.num.queries_wait_limit 
  number of queries removed due to wait\-limit by thread
  .UNINDENT
@@ -994,6 +999,13 @@ Current size of the request list, only the requests from client queries.
  .UNINDENT
  .INDENT 0.0
  .TP
+.B threadX.requestlist.current.replies 
+Current count of the number of reply entries waiting on request list
+entries. Because a request list entry can send results to multiple reply
+addresses, this number may be larger than the size of the request list.
+.UNINDENT
+.INDENT 0.0
+.TP
  .B threadX.recursion.time.avg 
  Average time it took to answer queries that needed recursive processing.
  Note that queries that were answered from the cache are not in this average.
@@ -1048,6 +1060,11 @@ summed over threads.
  .UNINDENT
  .INDENT 0.0
  .TP
+.B total.num.queries_replyaddr_limit 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
  .B total.num.queries_wait_limit 
  summed over threads.
  .UNINDENT
@@ -1138,6 +1155,16 @@ summed over threads.
  .UNINDENT
  .INDENT 0.0
  .TP
+.B total.requestlist.current.user 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
+.B total.requestlist.current.replies 
+summed over threads.
+.UNINDENT
+.INDENT 0.0
+.TP
  .B total.recursion.time.median 
  averaged over threads.
  .UNINDENT
diff --git a/doc/unbound-control.rst b/doc/unbound-control.rst

index 71ff6ee37b6cb854b4bb34a70cf21d16c8691027..630f2e160cd3badfa23366394a1a41705465bd86 100644 (file)
--- a/doc/unbound-control.rst
+++ b/doc/unbound-control.rst
@@ -815,6 +815,10 @@ number of statistic counters:
      number of queries removed due to discard-timeout by thread
  
  
+@@UAHL@unbound-control.stats@threadX.num.queries_replyaddr_limit@@
+    number of queries removed due to replyaddr limits by thread
+
+
  @@UAHL@unbound-control.stats@threadX.num.queries_wait_limit@@
      number of queries removed due to wait-limit by thread
  
@@ -910,6 +914,12 @@ number of statistic counters:
      Current size of the request list, only the requests from client queries.
  
  
+@@UAHL@unbound-control.stats@threadX.requestlist.current.replies@@
+    Current count of the number of reply entries waiting on request list
+    entries. Because a request list entry can send results to multiple reply
+    addresses, this number may be larger than the size of the request list.
+
+
  @@UAHL@unbound-control.stats@threadX.recursion.time.avg@@
      Average time it took to answer queries that needed recursive processing.
      Note that queries that were answered from the cache are not in this average.
@@ -955,6 +965,10 @@ number of statistic counters:
      summed over threads.
  
  
+@@UAHL@unbound-control.stats@total.num.queries_replyaddr_limit@@
+    summed over threads.
+
+
  @@UAHL@unbound-control.stats@total.num.queries_wait_limit@@
      summed over threads.
  
@@ -1027,6 +1041,14 @@ number of statistic counters:
      summed over threads.
  
  
+@@UAHL@unbound-control.stats@total.requestlist.current.user@@
+    summed over threads.
+
+
+@@UAHL@unbound-control.stats@total.requestlist.current.replies@@
+    summed over threads.
+
+
  @@UAHL@unbound-control.stats@total.recursion.time.median@@
      averaged over threads.
  
diff --git a/libunbound/unbound.h b/libunbound/unbound.h

index c274f80ab897be11aeffb6930c557afd3fd4a344..5a31f98e5bc6875997f5da728d77ce6b9540b578 100644 (file)
--- a/libunbound/unbound.h
+++ b/libunbound/unbound.h
@@ -853,6 +853,8 @@ struct ub_server_stats {
         long long qquic;
         /** number of queries removed due to discard-timeout */
         long long num_queries_discard_timeout;
+       /** number of queries removed due to replyaddr limit */
+       long long num_queries_replyaddr_limit;
         /** number of queries removed due to wait-limit */
         long long num_queries_wait_limit;
         /** number of dns error reports generated */
@@ -872,6 +874,8 @@ struct ub_stats_info {
         long long mesh_num_states;
         /** mesh stats: current number of reply (user) states */
         long long mesh_num_reply_states;
+       /** mesh stats: current number of reply entries */
+       long long mesh_num_reply_addrs;
         /** mesh stats: number of reply states overwritten with a new one */
         long long mesh_jostled;
         /** mesh stats: number of incoming queries dropped */
diff --git a/services/mesh.c b/services/mesh.c

index ca622e9c9232a8124534edb26252122cc533fe25..4a947766d8a33d88fcac0481508b1f7877679181 100644 (file)
--- a/services/mesh.c
+++ b/services/mesh.c
@@ -231,6 +231,7 @@ mesh_create(struct module_stack* stack, struct module_env* env)
         mesh->ans_expired = 0;
         mesh->ans_cachedb = 0;
         mesh->num_queries_discard_timeout = 0;
+       mesh->num_queries_replyaddr_limit = 0;
         mesh->num_queries_wait_limit = 0;
         mesh->num_dns_error_reports = 0;
         mesh->max_reply_states = env->cfg->num_queries_per_thread;
@@ -474,7 +475,7 @@ void mesh_new_client(struct mesh_area* mesh, struct query_info* qinfo,
                         verbose(VERB_ALGO, "Too many requests queued. "
                                 "dropping incoming query.");
                         comm_point_drop_reply(rep);
-                       mesh->stats_dropped++;
+                       mesh->num_queries_replyaddr_limit++;
                         return;
                 }
         }
@@ -2295,6 +2296,7 @@ mesh_stats_clear(struct mesh_area* mesh)
         memset(&mesh->rpz_action[0], 0, sizeof(size_t)*UB_STATS_RPZ_ACTION_NUM);
         mesh->ans_nodata = 0;
         mesh->num_queries_discard_timeout = 0;
+       mesh->num_queries_replyaddr_limit = 0;
         mesh->num_queries_wait_limit = 0;
         mesh->num_dns_error_reports = 0;
  }
diff --git a/services/mesh.h b/services/mesh.h

index 53a05b443e7d8a4b081042282769ce3258a974cf..d2fac9d3c9185abd716be7d655569341dac146f6 100644 (file)
--- a/services/mesh.h
+++ b/services/mesh.h
@@ -141,6 +141,8 @@ struct mesh_area {
         size_t rpz_action[UB_STATS_RPZ_ACTION_NUM];
         /** stats, number of queries removed due to discard-timeout */
         size_t num_queries_discard_timeout;
+       /** stats, number of queries removed due to replyaddr limit */
+       size_t num_queries_replyaddr_limit;
         /** stats, number of queries removed due to wait-limit */
         size_t num_queries_wait_limit;
         /** stats, number of dns error reports generated */
diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c

index 696750c19fa15ee687418b7f070588360c0a3d2d..bb1d5237edcf84452733bb10c26389c8140b8c11 100644 (file)
--- a/smallapp/unbound-control.c
+++ b/smallapp/unbound-control.c
@@ -236,6 +236,8 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
                 s->svr.num_queries_cookie_invalid);
         PR_UL_NM("num.queries_discard_timeout",
                 s->svr.num_queries_discard_timeout);
+       PR_UL_NM("num.queries_replyaddr_limit",
+               s->svr.num_queries_replyaddr_limit);
         PR_UL_NM("num.queries_wait_limit", s->svr.num_queries_wait_limit);
         PR_UL_NM("num.cachehits",
                 s->svr.num_queries - s->svr.num_queries_missed_cache);
@@ -263,6 +265,7 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
         PR_UL_NM("requestlist.exceeded", s->mesh_dropped);
         PR_UL_NM("requestlist.current.all", s->mesh_num_states);
         PR_UL_NM("requestlist.current.user", s->mesh_num_reply_states);
+       PR_UL_NM("requestlist.current.replies", s->mesh_num_reply_addrs);
  #ifndef S_SPLINT_S
         sumwait.tv_sec = s->mesh_replies_sum_wait_sec;
         sumwait.tv_usec = s->mesh_replies_sum_wait_usec;
author	Robert Edmonds <edmonds@users.noreply.github.com>
	Thu, 13 Nov 2025 08:33:05 +0000 (03:33 -0500)
committer	GitHub <noreply@github.com>
	Thu, 13 Nov 2025 08:33:05 +0000 (09:33 +0100)
daemon/remote.c		patch \| blob \| blame \| history
daemon/stats.c		patch \| blob \| blame \| history
doc/unbound-control.8.in		patch \| blob \| blame \| history
doc/unbound-control.rst		patch \| blob \| blame \| history
libunbound/unbound.h		patch \| blob \| blame \| history
services/mesh.c		patch \| blob \| blame \| history
services/mesh.h		patch \| blob \| blame \| history
smallapp/unbound-control.c		patch \| blob \| blame \| history