]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Throttle reading from TCP if the sends are not getting through
authorOndřej Surý <ondrej@isc.org>
Thu, 18 Jan 2024 16:24:22 +0000 (17:24 +0100)
committerNicki Křížek <nicki@isc.org>
Mon, 10 Jun 2024 16:43:44 +0000 (18:43 +0200)
When TCP client would not read the DNS message sent to them, the TCP
sends inside named would accumulate and cause degradation of the
service.  Throttle the reading from the TCP socket when we accumulate
enough DNS data to be sent.  Currently this is limited in a way that a
single largest possible DNS message can fit into the buffer.

(cherry picked from commit 26006f7b44474819fac2a76dc6cd6f69f0d76828)

lib/isc/netmgr/netmgr-int.h
lib/isc/netmgr/netmgr.c
lib/isc/netmgr/tcp.c
lib/isc/netmgr/tcpdns.c

index f95a51173ecd54d5c832ad11122ad3300d8bb2c3..6d2bd9ad9f2e5daf32ff8c7bff930e3d8fc74dab 100644 (file)
 #endif
 
 /*
- * The TCP receive buffer can fit one maximum sized DNS message plus its size,
- * the receive buffer here affects TCP, DoT and DoH.
+ * The TCP send and receive buffers can fit one maximum sized DNS message plus
+ * its size, the receive buffer here affects TCP, DoT and DoH.
  */
+#define ISC_NETMGR_TCP_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
 #define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
 
 /* Pick the larger buffer */
index 820099553d5eae655b2771afda193c688818f341..7b09639440a34781199c4d915a1d03b2c6edc9da 100644 (file)
@@ -2086,6 +2086,7 @@ isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) {
 
        sock = req->sock;
 
+       isc__nm_start_reading(sock);
        isc__nmsocket_reset(sock);
 }
 
index 16b53cc57975800537a0ba63112f9c1063389494..016a9e9059a4a942bb86aeed7bd33cf61cb4b571 100644 (file)
@@ -905,6 +905,31 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
 
        /* The readcb could have paused the reading */
        if (atomic_load(&sock->reading)) {
+               if (!sock->client) {
+                       /*
+                        * Stop reading if we have accumulated enough bytes in
+                        * the send queue; this means that the TCP client is not
+                        * reading back the data we sending to it, and there's
+                        * no reason to continue processing more incoming DNS
+                        * messages, if the client is not reading back the
+                        * responses.
+                        */
+                       size_t write_queue_size =
+                               uv_stream_get_write_queue_size(
+                                       &sock->uv_handle.stream);
+
+                       if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
+                               isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+                                             ISC_LOGMODULE_NETMGR,
+                                             ISC_LOG_DEBUG(3),
+                                             "throttling TCP connection, "
+                                             "the other side is "
+                                             "not reading the data (%zu)",
+                                             write_queue_size);
+                               isc__nm_stop_reading(sock);
+                       }
+               }
+
                /* The timer will be updated */
                isc__nmsocket_timer_restart(sock);
        }
@@ -1095,6 +1120,33 @@ isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
        return;
 }
 
+static void
+tcp_maybe_restart_reading(isc_nmsocket_t *sock) {
+       if (!sock->client && sock->reading &&
+           !uv_is_active(&sock->uv_handle.handle))
+       {
+               /*
+                * Restart reading if we have less data in the send queue than
+                * the send buffer size, this means that the TCP client has
+                * started reading some data again.  Starting reading when we go
+                * under the limit instead of waiting for all data has been
+                * flushed allows faster recovery (in case there was a
+                * congestion and now there isn't).
+                */
+               size_t write_queue_size =
+                       uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+               if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
+                       isc_log_write(
+                               isc_lctx, ISC_LOGCATEGORY_GENERAL,
+                               ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+                               "resuming TCP connection, the other side  "
+                               "is reading the data again (%zu)",
+                               write_queue_size);
+                       isc__nm_start_reading(sock);
+               }
+       }
+}
+
 static void
 tcp_send_cb(uv_write_t *req, int status) {
        isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
@@ -1112,10 +1164,16 @@ tcp_send_cb(uv_write_t *req, int status) {
                isc__nm_incstats(sock, STATID_SENDFAIL);
                isc__nm_failed_send_cb(sock, uvreq,
                                       isc__nm_uverr2result(status));
+
+               if (!sock->client && sock->reading) {
+                       isc__nm_start_reading(sock);
+                       isc__nmsocket_reset(sock);
+               }
                return;
        }
 
        isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+       tcp_maybe_restart_reading(sock);
 }
 
 /*
index dd3e9ec3fed64321cf27c89cb8d2abac6ac57079..00ecb0f3d2b883b4ed4e5dd0e5c9c91949144615 100644 (file)
@@ -913,6 +913,27 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
        result = isc__nm_process_sock_buffer(sock);
        if (result != ISC_R_SUCCESS) {
                isc__nm_failed_read_cb(sock, result, true);
+       } else if (!sock->client) {
+               /*
+                * Stop reading if we have accumulated enough bytes in
+                * the send queue; this means that the TCP client is not
+                * reading back the data we sending to it, and there's
+                * no reason to continue processing more incoming DNS
+                * messages, if the client is not reading back the
+                * responses.
+                */
+               size_t write_queue_size =
+                       uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+
+               if (write_queue_size >= ISC_NETMGR_TCP_SENDBUF_SIZE) {
+                       isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+                                     ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+                                     "throttling TCP connection, "
+                                     "the other side is "
+                                     "not reading the data (%zu)",
+                                     write_queue_size);
+                       isc__nm_stop_reading(sock);
+               }
        }
 free:
        if (nread < 0) {
@@ -1135,6 +1156,33 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
        return;
 }
 
+static void
+tcpdns_maybe_restart_reading(isc_nmsocket_t *sock) {
+       if (!sock->client && sock->reading &&
+           !uv_is_active(&sock->uv_handle.handle))
+       {
+               /*
+                * Restart reading if we have less data in the send queue than
+                * the send buffer size, this means that the TCP client has
+                * started reading some data again.  Starting reading when we go
+                * under the limit instead of waiting for all data has been
+                * flushed allows faster recovery (in case there was a
+                * congestion and now there isn't).
+                */
+               size_t write_queue_size =
+                       uv_stream_get_write_queue_size(&sock->uv_handle.stream);
+               if (write_queue_size < ISC_NETMGR_TCP_SENDBUF_SIZE) {
+                       isc_log_write(
+                               isc_lctx, ISC_LOGCATEGORY_GENERAL,
+                               ISC_LOGMODULE_NETMGR, ISC_LOG_DEBUG(3),
+                               "resuming TCP connection, the other side  "
+                               "is reading the data again (%zu)",
+                               write_queue_size);
+                       isc__nm_start_reading(sock);
+               }
+       }
+}
+
 static void
 tcpdns_send_cb(uv_write_t *req, int status) {
        isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
@@ -1152,10 +1200,16 @@ tcpdns_send_cb(uv_write_t *req, int status) {
                isc__nm_incstats(sock, STATID_SENDFAIL);
                isc__nm_failed_send_cb(sock, uvreq,
                                       isc__nm_uverr2result(status));
+
+               if (!sock->client && sock->reading) {
+                       isc__nm_start_reading(sock);
+                       isc__nmsocket_reset(sock);
+               }
                return;
        }
 
        isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+       tcpdns_maybe_restart_reading(sock);
 }
 
 /*