]> git.ipfire.org Git - thirdparty/unbound.git/commitdiff
- Fix that on malloc failure during accept of TCP, the
authorW.C.A. Wijngaards <wouter@nlnetlabs.nl>
Wed, 17 Jun 2026 13:37:04 +0000 (15:37 +0200)
committerW.C.A. Wijngaards <wouter@nlnetlabs.nl>
Wed, 17 Jun 2026 13:37:04 +0000 (15:37 +0200)
  socket is not left to cause a read event loop. It uses
  slow-accept to delay accepting new connections, if
  that fails it drops the new connections. When the tcp
  connection usage is full, it waits for 50msec, to allow
  existing queries to be resolved. Thanks to Qifan Zhang,
  Palo Alto Networks, for the report.

doc/Changelog
util/netevent.c
util/netevent.h

index 93c86dc411c908148ddf47f07d968341af07d2e3..dd05d8928ce4738ade8414ba32e99a558bc7c5d3 100644 (file)
        - Fix that malloc failure for ngtcp2_conn_server_new
          cleans up reference that older ngtcp2 versions can leave.
          Thanks to Qifan Zhang, Palo Alto Networks, for the report.
+       - Fix that on malloc failure during accept of TCP, the
+         socket is not left to cause a read event loop. It uses
+         slow-accept to delay accepting new connections, if
+         that fails it drops the new connections. When the tcp
+         connection usage is full, it waits for 50msec, to allow
+         existing queries to be resolved. Thanks to Qifan Zhang,
+         Palo Alto Networks, for the report.
 
 16 June 2026: Wouter
        - Fix to disallow $INCLUDE for secondary zones. Start up
index 53ee7ce39430222469ab79c26b8435517542d2e7..37b29bc243828d33eec2bcce485edad761987cd2 100644 (file)
@@ -2985,6 +2985,62 @@ void comm_base_handle_slow_accept(int ATTR_UNUSED(fd),
        }
 }
 
+/** out of resources in the accept path: pause all listening for
+ * NETEVENT_SLOW_ACCEPT_TIME and re-arm via comm_base_handle_slow_accept.
+ *
+ * If the routine fails, the socket is accepted and then closed, draining it
+ * from the waiting list of connections to be accepted.
+ * @param c: the comm point that is a listening socket.
+ * @param msec: if 0: uses the slow accept time. Otherwise, sets the time
+ *             to wait.
+ */
+static void
+comm_point_slow_accept(struct comm_point* c, int msec)
+{
+       struct comm_base* b = c->ev->base;
+       struct timeval tv;
+       struct ub_event* slowev;
+       if(!b->stop_accept)
+               return;
+       if(b->eb->slow_accept_enabled)
+               return;
+       /* Allocate the event */
+       slowev = ub_event_new(b->eb->base, -1, UB_EV_TIMEOUT,
+               comm_base_handle_slow_accept, b);
+       if(!slowev) {
+               /* The slow accept was not enabled yet, to handle
+                * the allocation failure, instead drain the incoming
+                * connection. */
+               int new_fd = accept(c->fd, NULL, NULL);
+               if(new_fd != -1) {
+                       verbose(VERB_ALGO, "slow accept: event_new failed, "
+                               "drop connection");
+                       sock_close(new_fd);
+               }
+               return;
+       }
+       ub_comm_base_now(b);
+       if(b->eb->last_slow_log+SLOW_LOG_TIME <= b->eb->secs) {
+               b->eb->last_slow_log = b->eb->secs;
+               verbose(VERB_OPS, "out of resources on accept, "
+                       "slow down accept for %d msec",
+                       NETEVENT_SLOW_ACCEPT_TIME);
+       }
+       b->eb->slow_accept_enabled = 1;
+       fptr_ok(fptr_whitelist_stop_accept(b->stop_accept));
+       (*b->stop_accept)(b->cb_arg);
+       /* set timeout, no mallocs */
+       if(msec == 0)
+               msec = NETEVENT_SLOW_ACCEPT_TIME;
+       tv.tv_sec = msec/1000;
+       tv.tv_usec = (msec%1000)*1000;
+       b->eb->slow_accept = slowev;
+       if(ub_event_add(b->eb->slow_accept, &tv) != 0) {
+               /* we do not want to log here,
+                * error: "event_add failed." */
+       }
+}
+
 int comm_point_perform_accept(struct comm_point* c,
        struct sockaddr_storage* addr, socklen_t* addrlen)
 {
@@ -3018,6 +3074,14 @@ int comm_point_perform_accept(struct comm_point* c,
                        if(c->ev->base->stop_accept) {
                                struct comm_base* b = c->ev->base;
                                struct timeval tv;
+                               struct ub_event* slowev = ub_event_new(
+                                       b->eb->base, -1, UB_EV_TIMEOUT,
+                                       comm_base_handle_slow_accept, b);
+                               if(!slowev) {
+                                       verbose(VERB_ALGO, "slow accept: "
+                                               "event_new failed");
+                                       return -1;
+                               }
                                verbose(VERB_ALGO, "out of file descriptors: "
                                        "slow accept");
                                ub_comm_base_now(b);
@@ -3037,15 +3101,8 @@ int comm_point_perform_accept(struct comm_point* c,
                                /* set timeout, no mallocs */
                                tv.tv_sec = NETEVENT_SLOW_ACCEPT_TIME/1000;
                                tv.tv_usec = (NETEVENT_SLOW_ACCEPT_TIME%1000)*1000;
-                               b->eb->slow_accept = ub_event_new(b->eb->base,
-                                       -1, UB_EV_TIMEOUT,
-                                       comm_base_handle_slow_accept, b);
-                               if(b->eb->slow_accept == NULL) {
-                                       /* we do not want to log here, because
-                                        * that would spam the logfiles.
-                                        * error: "event_base_set failed." */
-                               }
-                               else if(ub_event_add(b->eb->slow_accept, &tv)
+                               b->eb->slow_accept = slowev;
+                               if(ub_event_add(b->eb->slow_accept, &tv)
                                        != 0) {
                                        /* we do not want to log here,
                                         * error: "event_add failed." */
@@ -3226,6 +3283,13 @@ comm_point_tcp_accept_callback(int fd, short event, void* arg)
        /* find free tcp handler. */
        if(!c->tcp_free) {
                log_warn("accepted too many tcp, connections full");
+               /* Wait for a short moment (say 50msec) so that other
+                * TCP connections can complete. Or timeout, at the busy
+                * timeout of about 200msec. That stops this routine from
+                * spinning endlessly, and gives time to complete the other
+                * requests. But it is not as slow as the 2000msec wait
+                * time for when the kernel is out of buffers. */
+               comm_point_slow_accept(c, NETEVENT_SLOW_ACCEPT_QUEUE_TIME);
                return;
        }
        /* accept incoming connection. */
@@ -3247,6 +3311,7 @@ comm_point_tcp_accept_callback(int fd, short event, void* arg)
                if(!c_hdl->h2_session ||
                        !http2_session_server_create(c_hdl->h2_session)) {
                        log_warn("failed to create nghttp2");
+                       comm_point_slow_accept(c, 0);
                        return;
                }
                if(!c_hdl->h2_session ||
@@ -3254,6 +3319,7 @@ comm_point_tcp_accept_callback(int fd, short event, void* arg)
                        log_warn("failed to submit http2 settings");
                        if(c_hdl->h2_session)
                                http2_session_server_delete(c_hdl->h2_session);
+                       comm_point_slow_accept(c, 0);
                        return;
                }
                if(!c->ssl) {
@@ -3270,11 +3336,12 @@ comm_point_tcp_accept_callback(int fd, short event, void* arg)
                        comm_point_tcp_handle_callback, c_hdl);
        }
        if(!c_hdl->ev->ev) {
-               log_warn("could not ub_event_new, dropped tcp");
+               log_warn("could not ub_event_new, for new tcp");
 #ifdef HAVE_NGHTTP2
                if(c_hdl->type == comm_http && c_hdl->h2_session)
                        http2_session_server_delete(c_hdl->h2_session);
 #endif
+               comm_point_slow_accept(c, 0);
                return;
        }
        log_assert(fd != -1);
index c5114bbbef27da7a86251457821c10aed6d70907..e073b45f2c9d3e14bce6938e63f2011f41d01700 100644 (file)
@@ -111,6 +111,8 @@ typedef int comm_point_callback_type(struct comm_point*, void*, int,
 
 /** timeout to slow accept calls when not possible, in msec. */
 #define NETEVENT_SLOW_ACCEPT_TIME 2000
+/** timeout to slow accept calls when tcp queue is full, in msec. */
+#define NETEVENT_SLOW_ACCEPT_QUEUE_TIME 50
 /** timeout to slow down log print, so it does not spam the logs, in sec */
 #define SLOW_LOG_TIME 10
 /** for doq, the maximum dcid length, in ngtcp2 it is 20. */