]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
stats: add query max wait time metric 882/head
authorVadim Fedorenko <vadfed@meta.com>
Fri, 21 Apr 2023 16:23:21 +0000 (09:23 -0700)
committerVadim Fedorenko <vadfed@meta.com>
Wed, 26 Apr 2023 10:27:25 +0000 (03:27 -0700)
Add new statistic value to know the size of the queue in microseconds.

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
daemon/remote.c
daemon/stats.c
daemon/worker.c
libunbound/unbound.h
smallapp/unbound-control.c

index d8a3a6cafc1e02651436a3229770f87c525d62bc..03daa935ee1809cf7f34cb972ab0a42bb64feda5 100644 (file)
@@ -681,6 +681,8 @@ print_stats(RES* ssl, const char* nm, struct ub_stats_info* s)
                (unsigned long)s->svr.num_queries_prefetch)) return 0;
        if(!ssl_printf(ssl, "%s.num.queries_timed_out"SQ"%lu\n", nm,
                (unsigned long)s->svr.num_queries_timed_out)) return 0;
+       if(!ssl_printf(ssl, "%s.query.queue_time_us.max"SQ"%lu\n", nm,
+               (unsigned long)s->svr.max_query_time_us)) return 0;
        if(!ssl_printf(ssl, "%s.num.expired"SQ"%lu\n", nm,
                (unsigned long)s->svr.ans_expired)) return 0;
        if(!ssl_printf(ssl, "%s.num.recursivereplies"SQ"%lu\n", nm,
index 38f4597b3a9a81b5a590f5524e636124e95efbf3..cef7de8272109631f9c0c1bbbab92efa6a060f77 100644 (file)
@@ -433,6 +433,8 @@ void server_stats_add(struct ub_stats_info* total, struct ub_stats_info* a)
        total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
        total->svr.num_queries_prefetch += a->svr.num_queries_prefetch;
        total->svr.num_queries_timed_out += a->svr.num_queries_timed_out;
+       if (total->svr.max_query_time_us < a->svr.max_query_time_us)
+               total->svr.max_query_time_us = a->svr.max_query_time_us;
        total->svr.sum_query_list_size += a->svr.sum_query_list_size;
        total->svr.ans_expired += a->svr.ans_expired;
 #ifdef USE_DNSCRYPT
index 63b8678e31e7f570fe87cfb013d04c63a4af358c..5c373b79b09161ad2519df1e2d32df4eae51089c 100644 (file)
@@ -1281,6 +1281,7 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
        int is_expired_answer = 0;
        int is_secure_answer = 0;
        int rpz_passthru = 0;
+       long long wait_queue_time = 0;
        /* We might have to chase a CNAME chain internally, in which case
         * we'll have up to two replies and combine them to build a complete
         * answer.  These variables control this case. */
@@ -1289,6 +1290,7 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
        struct query_info* lookup_qinfo = &qinfo;
        struct query_info qinfo_tmp; /* placeholder for lookup_qinfo */
        struct respip_client_info* cinfo = NULL, cinfo_tmp;
+       struct timeval wait_time;
        memset(&qinfo, 0, sizeof(qinfo));
 
        if((error != NETEVENT_NOERROR && error != NETEVENT_DONE)|| !repinfo) {
@@ -1297,9 +1299,13 @@ worker_handle_request(struct comm_point* c, void* arg, int error,
                return 0;
        }
 
-       if (worker->env.cfg->sock_queue_timeout && timeval_isset(c->recv_tv)) {
-         c->recv_tv.tv_sec += worker->env.cfg->sock_queue_timeout;
-               if (timeval_smaller(c->recv_tv, worker->env.now_tv)) {
+       if (worker->env.cfg->sock_queue_timeout && timeval_isset(&c->recv_tv)) {
+               timeval_subtract(&wait_time, worker->env.now_tv, &c->recv_tv);
+               wait_queue_time = wait_time.tv_sec * 1000000 +  wait_time.tv_usec;
+               if (worker->stats.max_query_time_us < wait_queue_time)
+                       worker->stats.max_query_time_us = wait_queue_time;
+               c->recv_tv.tv_sec += worker->env.cfg->sock_queue_timeout;
+               if (timeval_smaller(&c->recv_tv, worker->env.now_tv)) {
                /* count and drop queries that were sitting in the socket queue too long */
                        worker->stats.num_queries_timed_out++;
                        return 0;
index f83a38f8ffd28c3a026b3197c8612de36da50e9b..8a97b16feaee1a06c1c6ba9eb905f0669aae84e2 100644 (file)
@@ -701,7 +701,8 @@ struct ub_server_stats {
        long long num_queries_prefetch;
        /** number of queries which are too late to process */
        long long num_queries_timed_out;
-
+       /** the longest wait time in the queue */
+       long long max_query_time_us;
        /**
         * Sum of the querylistsize of the worker for
         * every query that missed cache. To calculate average.
index d5f124b49bc003cf15724b659908a0a3824202d8..bbc09659fb049dc7b6cf5afba33cef338e3bb608 100644 (file)
@@ -209,6 +209,7 @@ static void pr_stats(const char* nm, struct ub_stats_info* s)
        PR_UL_NM("num.cachemiss", s->svr.num_queries_missed_cache);
        PR_UL_NM("num.prefetch", s->svr.num_queries_prefetch);
        PR_UL_NM("num.queries_timed_out", s->svr.num_queries_timed_out);
+       PR_UL_NM("query.queue_time_us.max", s->svr.max_query_time_us);
        PR_UL_NM("num.expired", s->svr.ans_expired);
        PR_UL_NM("num.recursivereplies", s->mesh_replies_sent);
 #ifdef USE_DNSCRYPT