]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
daemon/quic: Prioritize existing unused conns over new ones when conn limits are...
authorFrantisek Tobias <frantisek.tobias@nic.cz>
Mon, 10 Nov 2025 09:36:20 +0000 (10:36 +0100)
committerFrantisek Tobias <frantisek.tobias@nic.cz>
Wed, 7 Jan 2026 13:39:14 +0000 (14:39 +0100)
This change reduces the number of failed requests when the worker is experiencing heavy trafic.

daemon/quic_conn.c
daemon/quic_conn.h
daemon/quic_demux.c

index 2099ba1d51d2597209018467d7c12c4c02c105ac..1de244f1f1829b90ca97cd8f8268f57c9a446916 100644 (file)
@@ -3,7 +3,11 @@
  */
 
 #include "quic_conn.h"
+#include "engine.h"
+#include "lib/generic/trie.h"
 #include "lib/log.h"
+#include "lib/resolve.h"
+#include "network.h"
 #include "quic_stream.h"
 #include "quic_common.h"
 #include "libdnssec/random.h"
@@ -122,7 +126,7 @@ static int kr_recv_stream_data_cb(ngtcp2_conn *ngconn, uint32_t flags,
        }
 
        if (datalen <= 1)
-               return NGTCP2_PROTOCOL_VIOLATION;
+               return kr_error(EINVAL);
 
        if (offset == 0) {
                memcpy(wire_buf_free_space(&stream->pers_inbuf), data, datalen);
@@ -136,7 +140,7 @@ static int kr_recv_stream_data_cb(ngtcp2_conn *ngconn, uint32_t flags,
        }
 
        /* we can ignore ret return value, it can only be ENOMEM, at which point
-        * there is nothing we can do anyway and the connection will timeout cleanly*/
+        * there is nothing we can do anyway and the connection will timeout cleanly */
        (void)ngtcp2_conn_extend_max_stream_offset(ngconn, stream_id, datalen);
        ngtcp2_conn_extend_max_offset(ngconn, datalen);
 
@@ -212,6 +216,7 @@ static int stream_close_cb(ngtcp2_conn *ngconn, uint32_t flags,
        rem_node(&stream->list_node);
        session2_close(stream->h.session);
        --conn->streams_count;
+       ++conn->finished_streams;
 
        return NGTCP2_NO_ERROR;
 }
@@ -424,12 +429,11 @@ int kr_tls_server_session(struct pl_quic_conn_sess_data *conn)
                                        kr_log_error(TLS, "Failed to renew expiring ephemeral X.509 cert, using existing one\n");
                                }
                        }
-               } else {
-                       /* non-ephemeral cert: warn once when certificate expires */
-                       if (now >= the_network->tls_credentials->valid_until) {
-                               kr_log_error(TLS, "X.509 certificate has expired!\n");
-                               the_network->tls_credentials->valid_until = GNUTLS_X509_NO_WELL_DEFINED_EXPIRATION;
-                       }
+               /* non-ephemeral cert: warn once when certificate expires */
+               } else if (now >= the_network->tls_credentials->valid_until) {
+                       kr_log_error(TLS, "X.509 certificate has expired!\n");
+                       the_network->tls_credentials->valid_until =
+                               GNUTLS_X509_NO_WELL_DEFINED_EXPIRATION;
                }
        }
 
@@ -473,15 +477,17 @@ int kr_tls_server_session(struct pl_quic_conn_sess_data *conn)
                                          conn->tls_session);
        }
 
-       const gnutls_datum_t alpn_datum = { (void *)"doq", '\x03' };
+       const gnutls_datum_t alpn_datum = {
+               .data = (void *)"doq",
+               .size = 3
+       };
        gnutls_alpn_set_protocols(conn->tls_session, &alpn_datum, 1,
                        GNUTLS_ALPN_MANDATORY);
        if (ret != GNUTLS_E_SUCCESS) {
                kr_log_error(TLS, "gnutls_alpn_set_protocols(): %s (%d)\n", gnutls_strerror_name(ret), ret);
-               return ret;
        }
 
-       return kr_ok();
+       return ret;
 }
 
 static ngtcp2_conn *get_conn(ngtcp2_crypto_conn_ref *conn_ref)
@@ -901,6 +907,19 @@ static int pl_quic_conn_sess_deinit(struct session2 *session, void *sess_data)
        struct pl_quic_conn_sess_data *conn = sess_data;
        while (session2_tasklist_del_first(session, false) != NULL);
 
+       kr_log_debug(DOQ, "Closing connection, %s useful, served %zu streams\n",
+                       conn->finished_streams ? "was" : "wasn't",
+                       conn->finished_streams);
+
+       struct pl_quic_stream_sess_data *s_node;
+       WALK_LIST_FIRST(s_node, conn->streams) {
+               struct pl_quic_stream_sess_data *s =
+                       container_of(s_node, struct pl_quic_stream_sess_data, list_node);
+               rem_node(&s->list_node);
+               session2_close(s->h.session);
+               --conn->streams_count;
+       }
+
        kr_require(conn->streams_count == 0);
        if (conn->priority) {
                gnutls_priority_deinit(conn->priority);
@@ -923,6 +942,7 @@ static int pl_quic_conn_sess_deinit(struct session2 *session, void *sess_data)
        ngtcp2_conn_del(conn->conn);
        conn->conn = NULL;
 
+       session2_timer_stop(session);
        return kr_ok();
 }
 
@@ -939,17 +959,7 @@ static enum protolayer_event_cb_result pl_quic_conn_event_unwrap(
        if (event == PROTOLAYER_EVENT_DISCONNECT ||
                        event == PROTOLAYER_EVENT_CLOSE ||
                        event == PROTOLAYER_EVENT_FORCE_CLOSE) {
-
-               struct pl_quic_stream_sess_data *s_node;
-               WALK_LIST_FIRST(s_node, conn->streams) {
-                       struct pl_quic_stream_sess_data *s =
-                               container_of(s_node, struct pl_quic_stream_sess_data, list_node);
-                       rem_node(&s->list_node);
-                       session2_close(s->h.session);
-                       --conn->streams_count;
-               }
                session2_dec_refs(session);
-               session2_timer_stop(session);
                return PROTOLAYER_EVENT_CONSUME;
        }
 
index 5c0e301828f34392b81ca9cb35b526a57ad90233..69d7d68ec00fff2afe1074c410594e2a1b088e82 100644 (file)
@@ -106,6 +106,7 @@ struct pl_quic_conn_sess_data {
        list_t streams;
        // number of allocated streams structures
        int16_t streams_count;
+       uint64_t finished_streams;
        quic_conn_state_t state;
        size_t cid_pointers;
 };
index 6ba238826f18258632b3c8176846d75dfa8829de..5f5cbc2b1b5376330cf332bcb0697c3bbd25ae8b 100644 (file)
@@ -123,19 +123,81 @@ int kr_quic_table_add(struct pl_quic_conn_sess_data *conn_sess,
        return kr_ok();
 }
 
+int kr_quic_table_rem2(kr_quic_cid_t **pcid, kr_quic_table_t *table)
+{
+       kr_quic_cid_t *cid = *pcid;
+       *pcid = cid->next;
+       free(cid);
+       table->pointers--;
+
+       return kr_ok();
+}
+
+void kr_quic_table_rem(struct pl_quic_conn_sess_data *conn,
+               kr_quic_table_t *table)
+{
+       if (conn == NULL || table == NULL) {
+               return;
+       }
+
+       if (conn->conn) {
+               size_t num_scid = ngtcp2_conn_get_scid(conn->conn, NULL);
+               ngtcp2_cid *scids = calloc(num_scid, sizeof(*scids));
+               ngtcp2_conn_get_scid(conn->conn, scids);
+
+               for (size_t i = 0; i < num_scid; i++) {
+                       kr_quic_cid_t **pcid = kr_quic_table_lookup2(&scids[i], table);
+                       if (*pcid == NULL) {
+                               continue;
+                       }
+                       kr_quic_table_rem2(pcid, table);
+               }
+
+               conn->cid_pointers--;
+               free(scids);
+       } else {
+               kr_quic_cid_t **pcid = kr_quic_table_lookup2(&conn->dcid, table);
+               if (pcid != NULL) {
+                       kr_quic_table_rem2(pcid, table);
+               }
+       }
+
+       int pos = heap_find(table->expiry_heap, (heap_val_t *)conn);
+       heap_delete(table->expiry_heap, pos);
+       table->usage--;
+}
+
+void kr_quic_table_free(kr_quic_table_t *table)
+{
+       if (!table)
+               return;
+
+       while (!EMPTY_HEAP(table->expiry_heap)) {
+               struct pl_quic_conn_sess_data *c =
+                       *(struct pl_quic_conn_sess_data **)HHEAD(table->expiry_heap);
+
+               kr_quic_table_rem(c, table);
+       }
+
+       kr_assert(table->usage == 0);
+       kr_assert(table->pointers == 0);
+
+       gnutls_priority_deinit(table->priority);
+       heap_deinit(table->expiry_heap);
+       free(table->expiry_heap);
+       free(table);
+}
+
 static void send_excessive_load(struct pl_quic_conn_sess_data *conn,
                struct protolayer_iter_ctx *ctx, kr_quic_table_t *table)
 {
        (void)send_special(conn, ctx, DOQ_EXCESSIVE_LOAD);
 }
 
-/* unused for now, compare performance with per conn uv_timer_t spawns */
 void kr_quic_table_sweep(struct kr_quic_table *table,
                struct protolayer_iter_ctx *ctx)
 {
        uint64_t now = 0;
-       size_t removed = 0;
-
        while (!EMPTY_HEAP(table->expiry_heap)) {
                struct pl_quic_conn_sess_data *c =
                        *(struct pl_quic_conn_sess_data **)
@@ -143,20 +205,24 @@ void kr_quic_table_sweep(struct kr_quic_table *table,
 
                if ((c->state & QUIC_STATE_BLOCKED)) {
                        break;
-               } else if (table->usage > table->max_conns) {
+               /* when we reach the limit of open conns we lookup the most idle
+                * one but only close it if it has has received at least one query.
+                * This is to prevent closing brand new connections which has
+                * crippling effects on the number of answered queries when
+                * conn limits are reached. */
+               } else if (table->usage >= table->max_conns &&
+                               // c->streams_count <= 0 &&
+                               c->finished_streams > 0) {
                        send_excessive_load(c, ctx, table);
-                       kr_quic_table_rem(c, table);
-                       session2_event(c->h.session,
+                       session2_event(c->h.session->transport.parent,
                                        PROTOLAYER_EVENT_DISCONNECT,
-                                       NULL);
-                       ++removed;
+                                       c);
                } else if (c->state >= QUIC_STATE_CLOSING) {
                        send_special(c, ctx, QUIC_SEND_CONN_CLOSE);
-                       kr_quic_table_rem(c, table);
-                       session2_event(c->h.session,
+                       // kr_quic_table_rem(c, table);
+                       session2_event(c->h.session->transport.parent,
                                        PROTOLAYER_EVENT_DISCONNECT,
                                        NULL);
-                       ++removed;
 
 
                } else if (kr_quic_conn_timeout(c, &now)) {
@@ -169,7 +235,6 @@ void kr_quic_table_sweep(struct kr_quic_table *table,
                                session2_event(c->h.session,
                                                PROTOLAYER_EVENT_DISCONNECT,
                                                NULL);
-                               ++removed;
                        } else {
                                // quic_conn_mark_used(c, table);
                        }
@@ -180,10 +245,6 @@ void kr_quic_table_sweep(struct kr_quic_table *table,
                        break;
                }
        }
-
-       if (removed > 0) {
-               kr_log_debug(DOQ, "Closing %zu idle quic connections\n", removed);
-       }
 }
 
 static bool init_unique_cid(ngtcp2_cid *cid, size_t len, kr_quic_table_t *table)
@@ -245,15 +306,12 @@ static enum protolayer_iter_cb_result pl_quic_demux_unwrap(void *sess_data,
 
        qconn = kr_quic_table_lookup(&dcid, demux->conn_table);
        if (!qconn) {
-               /* Clear idle connections */
-               // kr_quic_table_sweep(demux->conn_table, ctx);
-
                if (demux->conn_table->usage >= demux->conn_table->max_conns) {
-                       kr_log_warning(DOQ,
-                               "Refusing to open new connection, reached limit of active conns\n");
-                       /* we might want to inform the client
-                        * that limits have been reached */
-                       return protolayer_break(ctx, kr_ok());
+                       kr_quic_table_sweep(demux->conn_table, ctx);
+                       if (demux->conn_table->usage >= demux->conn_table->max_conns) {
+                               /* no luck */
+                               return protolayer_break(ctx, kr_ok());
+                       }
                }
 
                ngtcp2_pkt_hd header = { 0 };
@@ -270,11 +328,12 @@ static enum protolayer_iter_cb_result pl_quic_demux_unwrap(void *sess_data,
                        return protolayer_break(ctx, kr_ok());
                }
 
-               if (header.tokenlen == 0 /*&& quic_require_retry(table)*/) {
-                       kr_log_error(DOQ, "received empty header.token\n");
+               /* additional RTT seems quite expensive for all new connections */
+               // if (header.tokenlen == 0 /*&& quic_require_retry(table)*/) {
+                       // kr_log_error(DOQ, "received empty header.token\n");
                        // ret = -QUIC_SEND_RETRY;
                        // goto finish;
-               }
+               // }
 
                if (header.tokenlen > 0) {
                        if (header.token[0] == NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
@@ -451,66 +510,6 @@ static int pl_quic_demux_sess_init(struct session2 *session, void *sess_data, vo
        return kr_ok();
 }
 
-int kr_quic_table_rem2(kr_quic_cid_t **pcid, kr_quic_table_t *table)
-{
-       kr_quic_cid_t *cid = *pcid;
-       *pcid = cid->next;
-       free(cid);
-       table->pointers--;
-
-       return kr_ok();
-}
-
-void kr_quic_table_rem(struct pl_quic_conn_sess_data *conn,
-               kr_quic_table_t *table)
-{
-       if (conn == NULL || table == NULL) {
-               return;
-       }
-
-       if (conn->conn) {
-               size_t num_scid = ngtcp2_conn_get_scid(conn->conn, NULL);
-               ngtcp2_cid *scids = calloc(num_scid, sizeof(*scids));
-               ngtcp2_conn_get_scid(conn->conn, scids);
-
-               for (size_t i = 0; i < num_scid; i++) {
-                       kr_quic_cid_t **pcid = kr_quic_table_lookup2(&scids[i], table);
-                       if (*pcid == NULL) {
-                               continue;
-                       }
-                       kr_quic_table_rem2(pcid, table);
-               }
-
-               conn->cid_pointers--;
-               table->usage--;
-               free(scids);
-       }
-
-       int pos = heap_find(table->expiry_heap, (heap_val_t *)conn);
-       heap_delete(table->expiry_heap, pos);
-}
-
-void kr_quic_table_free(kr_quic_table_t *table)
-{
-       if (!table)
-               return;
-
-       while (!EMPTY_HEAP(table->expiry_heap)) {
-               struct pl_quic_conn_sess_data *c =
-                       *(struct pl_quic_conn_sess_data **)HHEAD(table->expiry_heap);
-
-               kr_quic_table_rem(c, table);
-       }
-
-       kr_assert(table->usage == 0);
-       kr_assert(table->pointers == 0);
-
-       gnutls_priority_deinit(table->priority);
-       heap_deinit(table->expiry_heap);
-       free(table->expiry_heap);
-       free(table);
-}
-
 static int pl_quic_demux_sess_deinit(struct session2 *session, void *data)
 {
        struct pl_quic_demux_sess_data *quic = data;