PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [],
[AC_MSG_ERROR([libuv not found])])
+# libuv recvmmsg support
+AC_CHECK_DECLS([UV_UDP_RECVMMSG, UV_UDP_MMSG_FREE, UV_UDP_MMSG_CHUNK], [], [], [[#include <uv.h>]])
+
# [pairwise: --enable-doh --with-libnghttp2=auto, --enable-doh --with-libnghttp2=yes, --disable-doh]
AC_ARG_ENABLE([doh],
[AS_HELP_STRING([--disable-doh], [enable DNS over HTTPS, requires libnghttp2 (default=yes)])],
/* Must be different from ISC_NETMGR_TID_UNKNOWN */
#define ISC_NETMGR_NON_INTERLOCKED -2
-#define ISC_NETMGR_TLSBUF_SIZE 65536
+/*
+ * Receive buffers
+ */
+#if HAVE_DECL_UV_UDP_MMSG_CHUNK
+/*
+ * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
+ * libuv will not receive more that 20 datagrams in a single recvmmsg call.
+ */
+#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
+#else
+/*
+ * A single DNS message size
+ */
+#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
+#endif
/*
- * New versions of libuv support recvmmsg on unices.
- * Since recvbuf is only allocated per worker allocating a bigger one is not
- * that wasteful.
- * 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing
- * will break if the original value changes.
+ * The TCP receive buffer can fit one maximum sized DNS message plus its size,
+ * the receive buffer here affects TCP, DoT and DoH.
*/
-#define ISC_NETMGR_RECVBUF_SIZE (20 * 65536)
+#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
+/* Pick the larger buffer */
+#define ISC_NETMGR_RECVBUF_SIZE \
+ (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
+ ? ISC_NETMGR_UDP_RECVBUF_SIZE \
+ : ISC_NETMGR_TCP_RECVBUF_SIZE)
+
+/*
+ * Send buffer
+ */
#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
+/*
+ * Make sure our RECVBUF size is large enough
+ */
+
+STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
+ "UDP receive buffer size must be smaller or equal than worker "
+ "receive buffer size");
+
+STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
+ "TCP receive buffer size must be smaller or equal than worker "
+ "receive buffer size");
+
/*%
* Regular TCP buffer size.
*/
* most in TCPDNS or TLSDNS connections, so there's no risk of overrun
* when using a buffer this size.
*/
-#define NM_BIG_BUF (65535 + 2) * 2
+#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
#if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__))
#define HAVE_SO_REUSEPORT_LB 1
isc__networker_t *worker = NULL;
REQUIRE(VALID_NMSOCK(sock));
- if (buf->base == NULL) {
- /* Empty buffer: might happen in case of error. */
- return;
- }
- worker = &sock->mgr->workers[sock->tid];
- REQUIRE(worker->recvbuf_inuse);
- if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
- buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
- {
- /* Can happen in case of out-of-order recvmmsg in libuv1.36 */
- return;
- }
+ worker = &sock->mgr->workers[sock->tid];
REQUIRE(buf->base == worker->recvbuf);
+
worker->recvbuf_inuse = false;
}
}
/*%<
- * Allocator for read operations. Limited to size 2^16.
+ * Allocator callback for read operations.
*
* Note this doesn't actually allocate anything, it just assigns the
* worker's receive buffer to a socket, and marks it as "in use".
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(isc__nm_in_netthread());
+ /*
+ * The size provided by libuv is only suggested size, and it always
+ * defaults to 64 * 1024 in the current versions of libuv (see
+ * src/unix/udp.c and src/unix/stream.c).
+ */
+ UNUSED(size);
+
+ worker = &sock->mgr->workers[sock->tid];
+ INSIST(!worker->recvbuf_inuse);
+ INSIST(worker->recvbuf != NULL);
switch (sock->type) {
case isc_nm_udpsocket:
- REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
- size = ISC_NETMGR_RECVBUF_SIZE;
+ buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
break;
case isc_nm_tcpsocket:
case isc_nm_tcpdnssocket:
- break;
case isc_nm_tlsdnssocket:
- /*
- * We need to limit the individual chunks to be read, so the
- * BIO_write() will always succeed and the consumed before the
- * next readcb is called.
- */
- if (size >= ISC_NETMGR_TLSBUF_SIZE) {
- size = ISC_NETMGR_TLSBUF_SIZE;
- }
+ buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
break;
default:
INSIST(0);
ISC_UNREACHABLE();
}
- worker = &sock->mgr->workers[sock->tid];
- INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
-
+ REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
buf->base = worker->recvbuf;
- buf->len = size;
+
worker->recvbuf_inuse = true;
}
}
free:
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
isc__nm_free_uvbuf(sock, buf);
}
isc__nm_process_sock_buffer(sock);
free:
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
isc__nm_free_uvbuf(sock, buf);
}
/*
*
*/
- r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
- &sock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
+ r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
- r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
- &sock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
+ r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
(void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0);
int pending = SSL_pending(sock->tls.tls);
- if (pending > ISC_NETMGR_TLSBUF_SIZE) {
- pending = ISC_NETMGR_TLSBUF_SIZE;
+ if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
+ pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
if ((sock->buf_len + pending) > sock->buf_size) {
break;
}
- if (pending > ISC_NETMGR_TLSBUF_SIZE) {
- pending = ISC_NETMGR_TLSBUF_SIZE;
+ if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
+ pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending);
}
free:
async_tlsdns_cycle(sock);
+
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
isc__nm_free_uvbuf(sock, buf);
}
csock->tls.tls = isc_tls_create(ssock->tls.ctx);
RUNTIME_CHECK(csock->tls.tls != NULL);
- r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
- &csock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
+ r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
- r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
- &csock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
+ r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
REQUIRE(sock->parent != NULL);
REQUIRE(sock->tid == isc_nm_tid());
-#ifdef UV_UDP_RECVMMSG
+#if HAVE_DECL_UV_UDP_RECVMMSG
uv_init_flags |= UV_UDP_RECVMMSG;
#endif
r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
isc__nm_uvreq_t *req = NULL;
uint32_t maxudp;
- bool free_buf;
isc_result_t result;
isc_sockaddr_t sockaddr, *sa = NULL;
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(atomic_load(&sock->reading));
-#ifdef UV_UDP_MMSG_FREE
- free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE);
-#elif UV_UDP_MMSG_CHUNK
- free_buf = ((flags & UV_UDP_MMSG_CHUNK) == 0);
+ /*
+ * When using recvmmsg(2), if no errors occur, there will be a final
+ * callback with nrecv set to 0, addr set to NULL and the buffer
+ * pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
+ * flag cleared and the UV_UDP_MMSG_FREE flag set.
+ */
+#if HAVE_DECL_UV_UDP_MMSG_FREE
+ if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
+ INSIST(nrecv == 0);
+ INSIST(addr == NULL);
+ goto free;
+ }
#else
- free_buf = true;
UNUSED(flags);
#endif
- /*
- * Four possible reasons to return now without processing:
- */
-
/*
* - If we're simulating a firewall blocking UDP packets
* bigger than 'maxudp' bytes for testing purposes.
sock->processing = false;
free:
- if (free_buf) {
- isc__nm_free_uvbuf(sock, buf);
+#if HAVE_DECL_UV_UDP_MMSG_CHUNK
+ /*
+ * When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
+ * set, those must not be freed.
+ */
+ if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
+ return;
}
+#endif
+
+ /*
+ * When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
+ * 0. In either scenario, the callee can now safely free the provided
+ * buffer.
+ */
+ if (nrecv < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
+ isc__nm_free_uvbuf(sock, buf);
}
/*