From: Ondřej Surý Date: Fri, 1 Apr 2022 12:43:14 +0000 (+0200) Subject: Add option to configure load balance sockets X-Git-Tag: v9.19.0~20^2~1 X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=85c6e797aa84dfeecb096e7ca3eafb85a5a45f3f;p=thirdparty%2Fbind9.git Add option to configure load balance sockets Previously, the option to enable kernel load balancing of the sockets was always enabled when supported by the operating system (SO_REUSEPORT on Linux and SO_REUSEPORT_LB on FreeBSD). It was reported that in scenarios where the networking threads are also responsible for processing long-running tasks (like RPZ processing, CATZ processing or large zone transfers), this could lead to intermitten brownouts for some clients, because the thread assigned by the operating system might be busy. In such scenarious, the overall performance would be better served by threads competing over the sockets because the idle threads can pick up the incoming traffic. Add new configuration option (`load-balance-sockets`) to allow enabling or disabling the load balancing of the sockets. --- diff --git a/bin/named/config.c b/bin/named/config.c index 7c6ad5dc8b8..b165b421038 100644 --- a/bin/named/config.c +++ b/bin/named/config.c @@ -59,17 +59,25 @@ options {\n\ edns-udp-size 1232;\n\ files unlimited;\n" #if defined(HAVE_GEOIP2) - " geoip-directory \"" MAXMINDDB_PREFIX "/share/" - "GeoIP\";" - "\n" + "\ + geoip-directory \"" MAXMINDDB_PREFIX "/share/GeoIP\";\n" #elif defined(HAVE_GEOIP2) - " geoip-directory \".\";\n" + "\ + geoip-directory \".\";\n" #endif /* if defined(HAVE_GEOIP2) */ "\ heartbeat-interval 60;\n\ interface-interval 60;\n\ listen-on {any;};\n\ - listen-on-v6 {any;};\n\ + listen-on-v6 {any;};\n" +#if HAVE_SO_REUSEPORT_LB + "\ + load-balance-sockets yes;\n" +#else + "\ + load-balance-sockets no;\n" +#endif + "\ # lock-file \"" NAMED_LOCALSTATEDIR "/run/named/named.lock\";\n\ match-mapped-addresses no;\n\ max-ixfr-ratio 100%;\n\ @@ -84,10 +92,11 @@ options {\n\ port 53;\n\ tls-port 853;\n" #if HAVE_LIBNGHTTP2 - "http-port 80;\n" - "https-port 443;\n" - "http-listener-clients 300;\n" - "http-streams-per-connection 100;\n" + "\ + http-port 80;\n\ + https-port 443;\n\ + http-listener-clients 300;\n\ + http-streams-per-connection 100;\n" #endif "\ prefetch 2 9;\n\ diff --git a/bin/named/named.conf.rst b/bin/named/named.conf.rst index 2f2de71e83e..6974439ec0b 100644 --- a/bin/named/named.conf.rst +++ b/bin/named/named.conf.rst @@ -300,6 +300,7 @@ OPTIONS string ] { address_match_element; ... }; lmdb-mapsize sizeval; + load-balance-sockets boolean; lock-file ( quoted_string | none ); managed-keys-directory quoted_string; masterfile-format ( raw | text ); diff --git a/bin/named/server.c b/bin/named/server.c index fa327e8da01..db46a0b305b 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -8353,6 +8353,7 @@ load_configuration(const char *filename, named_server_t *server, uint32_t softquota = 0; uint32_t max; uint64_t initial, idle, keepalive, advertised; + bool loadbalancesockets; dns_aclenv_t *env = ns_interfacemgr_getaclenv(named_g_server->interfacemgr); @@ -8851,6 +8852,28 @@ load_configuration(const char *filename, named_server_t *server, } ns_interfacemgr_setbacklog(server->interfacemgr, backlog); + obj = NULL; + result = named_config_get(maps, "load-balance-sockets", &obj); + INSIST(result == ISC_R_SUCCESS); + loadbalancesockets = cfg_obj_asboolean(obj); +#if HAVE_SO_REUSEPORT_LB + if (first_time) { + isc_nm_setloadbalancesockets(named_g_netmgr, + cfg_obj_asboolean(obj)); + } else if (loadbalancesockets != + isc_nm_getloadbalancesockets(named_g_netmgr)) { + cfg_obj_log(obj, named_g_lctx, ISC_LOG_WARNING, + "changing load-balance-sockets value requires " + "server restart"); + } +#else + if (loadbalancesockets) { + cfg_obj_log( + obj, named_g_lctx, ISC_LOG_WARNING, + "load-balance-sockets has no effect on this system"); + } +#endif + /* * Configure the interface manager according to the "listen-on" * statement. diff --git a/doc/arm/reference.rst b/doc/arm/reference.rst index 4092a33abdc..1ac16f28823 100644 --- a/doc/arm/reference.rst +++ b/doc/arm/reference.rst @@ -1726,6 +1726,29 @@ Boolean Options If ``yes``, respond to root key sentinel probes as described in draft-ietf-dnsop-kskroll-sentinel-08. The default is ``yes``. +``load-balance-sockets`` + + This option enables kernel load-balancing of sockets on systems which support + it, including Linux and FreeBSD. This instructs the kernel to distribute + incoming socket connections among the networking threads based on a hashing + scheme. For more information, see the receive network flow classification + options (``rx-flow-hash``) section in the ``ethtool`` manual page. The + default is ``yes``. + + Enabling ``load-balance-sockets`` significantly increases general throughput + when incoming traffic is distributed uniformly onto the threads by the + operating system. However, in cases where a worker thread is busy with a + long-lasting operation, such as processing a Response Policy Zone (RPZ) or + Catalog Zone update or an unusually large zone transfer, incoming traffic + that hashes onto that thread may be delayed. On servers where these events + occur frequently, it may be preferable to disable socket load-balancing so + that other threads can pick up the traffic that would have been sent to the + busy thread. + + Note: this option can only be set when ``named`` first starts. + Changes will not take effect during reconfiguration; the server + must be restarted. + ``message-compression`` If ``yes``, DNS name compression is used in responses to regular queries (not including AXFR or IXFR, which always use compression). diff --git a/doc/man/named.conf.5in b/doc/man/named.conf.5in index 406cdfa5c22..a1b97c79600 100644 --- a/doc/man/named.conf.5in +++ b/doc/man/named.conf.5in @@ -350,6 +350,7 @@ options { string ] { address_match_element; ... }; lmdb\-mapsize sizeval; + load\-balance\-sockets boolean; lock\-file ( quoted_string | none ); managed\-keys\-directory quoted_string; masterfile\-format ( raw | text ); diff --git a/doc/misc/options b/doc/misc/options index eb860679022..ea1365633dc 100644 --- a/doc/misc/options +++ b/doc/misc/options @@ -217,6 +217,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.active b/doc/misc/options.active index 2d832b57d29..4aa8022ffa4 100644 --- a/doc/misc/options.active +++ b/doc/misc/options.active @@ -215,6 +215,7 @@ options { ] { ; ... }; // may occur multiple times lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/doc/misc/options.grammar.rst b/doc/misc/options.grammar.rst index 1ce0353b320..858409aa25f 100644 --- a/doc/misc/options.grammar.rst +++ b/doc/misc/options.grammar.rst @@ -152,6 +152,7 @@ ] { ; ... }; lmdb-mapsize ; + load-balance-sockets ; lock-file ( | none ); managed-keys-directory ; masterfile-format ( raw | text ); diff --git a/lib/isc/include/isc/netmgr.h b/lib/isc/include/isc/netmgr.h index e339084e967..3f0e080b0f6 100644 --- a/lib/isc/include/isc/netmgr.h +++ b/lib/isc/include/isc/netmgr.h @@ -395,6 +395,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, * \li 'mgr' is a valid netmgr. */ +bool +isc_nm_getloadbalancesockets(isc_nm_t *mgr); +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled); +/*%< + * Get and set value of load balancing of the sockets. + * + * Requires: + * \li 'mgr' is a valid netmgr. + */ + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised); diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 1e90f000510..ccc1bc3fddd 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -711,6 +711,8 @@ struct isc_nm { atomic_uint_fast32_t workers_paused; atomic_uint_fast32_t maxudp; + bool load_balance_sockets; + atomic_bool paused; /* diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 5f6c04a6804..07cf5864229 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -233,6 +233,11 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { atomic_init(&mgr->send_tcp_buffer_size, 0); atomic_init(&mgr->recv_udp_buffer_size, 0); atomic_init(&mgr->send_udp_buffer_size, 0); +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = true; +#else + mgr->load_balance_sockets = false; +#endif #ifdef NETMGR_TRACE ISC_LIST_INIT(mgr->active_sockets); @@ -573,6 +578,17 @@ isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp, atomic_store(&mgr->send_udp_buffer_size, send_udp); } +void +isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled) { + REQUIRE(VALID_NM(mgr)); + +#if HAVE_SO_REUSEPORT_LB + mgr->load_balance_sockets = enabled; +#else + UNUSED(enabled); +#endif +} + void isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle, uint32_t *keepalive, uint32_t *advertised) { diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 1f4d3bf0885..680e983b44d 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -360,7 +360,7 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcp_lb_socket(sa_family_t sa_family) { +isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -375,10 +375,10 @@ isc__nm_tcp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -402,12 +402,13 @@ start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcplisten(mgr, csock); @@ -453,9 +454,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -470,9 +471,9 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -504,6 +505,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -511,6 +513,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpsocket); REQUIRE(sock->parent != NULL); @@ -544,28 +547,30 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index cab6fc34f69..b2dbeebda53 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -323,7 +323,7 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer, } static uv_os_sock_t -isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { +isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -338,10 +338,10 @@ isc__nm_tcpdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -375,12 +375,13 @@ start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tcpdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock); @@ -419,9 +420,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tcpdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -436,9 +437,9 @@ isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -471,6 +472,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -478,6 +480,7 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tcpdnssocket); REQUIRE(sock->parent != NULL); @@ -510,28 +513,30 @@ isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index e5ed8dd9bf7..0b67472795a 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -401,7 +401,7 @@ failure: } static uv_os_sock_t -isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { +isc__nm_tlsdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -416,10 +416,10 @@ isc__nm_tlsdns_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -447,12 +447,13 @@ start_tlsdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->pquota = sock->pquota; isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock); -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_tlsdns_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_tlsdnslisten(mgr, csock); @@ -503,9 +504,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_tlsdns_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_tlsdns_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -520,9 +521,9 @@ isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface, start_tlsdns_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -555,6 +556,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { int flags = 0; isc_nmsocket_t *sock = NULL; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -562,6 +564,7 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_tlsdnssocket); REQUIRE(sock->parent != NULL); @@ -594,28 +597,30 @@ isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) { flags = UV_TCP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, - flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { + if (mgr->load_balance_sockets) { r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, &sock->iface.type.sa, flags); if (r < 0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags; + if (sock->parent->fd == -1) { + r = isc_uv_tcp_freebind(&sock->uv_handle.tcp, + &sock->iface.type.sa, flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.tcp.flags = + sock->uv_handle.tcp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.tcp.flags = + sock->parent->uv_handle.tcp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 7b556aa3e3c..587366282dd 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -85,7 +85,7 @@ static void stop_udp_child(isc_nmsocket_t *sock); static uv_os_sock_t -isc__nm_udp_lb_socket(sa_family_t sa_family) { +isc__nm_udp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) { isc_result_t result; uv_os_sock_t sock; @@ -99,10 +99,10 @@ isc__nm_udp_lb_socket(sa_family_t sa_family) { result = isc__nm_socket_reuse(sock); RUNTIME_CHECK(result == ISC_R_SUCCESS); -#if HAVE_SO_REUSEPORT_LB - result = isc__nm_socket_reuse_lb(sock); - RUNTIME_CHECK(result == ISC_R_SUCCESS); -#endif + if (mgr->load_balance_sockets) { + result = isc__nm_socket_reuse_lb(sock); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + } return (sock); } @@ -123,12 +123,13 @@ start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock, csock->recv_cbarg = sock->recv_cbarg; csock->tid = tid; -#if HAVE_SO_REUSEPORT_LB - UNUSED(fd); - csock->fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#else - csock->fd = dup(fd); -#endif + if (mgr->load_balance_sockets) { + UNUSED(fd); + csock->fd = isc__nm_udp_lb_socket(mgr, + iface->type.sa.sa_family); + } else { + csock->fd = dup(fd); + } REQUIRE(csock->fd >= 0); ievent = isc__nm_get_netievent_udplisten(mgr, csock); @@ -173,9 +174,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, sock->tid = 0; sock->fd = -1; -#if !HAVE_SO_REUSEPORT_LB - fd = isc__nm_udp_lb_socket(iface->type.sa.sa_family); -#endif + if (!mgr->load_balance_sockets) { + fd = isc__nm_udp_lb_socket(mgr, iface->type.sa.sa_family); + } isc_barrier_init(&sock->startlistening, sock->nchildren); @@ -190,9 +191,9 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb, start_udp_child(mgr, iface, sock, fd, isc_nm_tid()); } -#if !HAVE_SO_REUSEPORT_LB - isc__nm_closesocket(fd); -#endif + if (!mgr->load_balance_sockets) { + isc__nm_closesocket(fd); + } LOCK(&sock->lock); while (atomic_load(&sock->rchildren) != sock->nchildren) { @@ -416,6 +417,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { int uv_init_flags = 0; sa_family_t sa_family; isc_result_t result = ISC_R_UNSET; + isc_nm_t *mgr = NULL; REQUIRE(VALID_NMSOCK(ievent->sock)); REQUIRE(ievent->sock->tid == isc_nm_tid()); @@ -423,6 +425,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { sock = ievent->sock; sa_family = sock->iface.type.sa.sa_family; + mgr = sock->mgr; REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->parent != NULL); @@ -457,16 +460,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { uv_bind_flags |= UV_UDP_IPV6ONLY; } -#if HAVE_SO_REUSEPORT_LB - r = isc_uv_udp_freebind(&sock->uv_handle.udp, - &sock->parent->iface.type.sa, uv_bind_flags); - if (r < 0) { - isc__nm_incstats(sock, STATID_BINDFAIL); - goto done; - } -#else - if (sock->parent->fd == -1) { - /* This thread is first, bind the socket */ + if (mgr->load_balance_sockets) { r = isc_uv_udp_freebind(&sock->uv_handle.udp, &sock->parent->iface.type.sa, uv_bind_flags); @@ -474,13 +468,25 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_incstats(sock, STATID_BINDFAIL); goto done; } - sock->parent->uv_handle.udp.flags = sock->uv_handle.udp.flags; - sock->parent->fd = sock->fd; } else { - /* The socket is already bound, just copy the flags */ - sock->uv_handle.udp.flags = sock->parent->uv_handle.udp.flags; + if (sock->parent->fd == -1) { + /* This thread is first, bind the socket */ + r = isc_uv_udp_freebind(&sock->uv_handle.udp, + &sock->parent->iface.type.sa, + uv_bind_flags); + if (r < 0) { + isc__nm_incstats(sock, STATID_BINDFAIL); + goto done; + } + sock->parent->uv_handle.udp.flags = + sock->uv_handle.udp.flags; + sock->parent->fd = sock->fd; + } else { + /* The socket is already bound, just copy the flags */ + sock->uv_handle.udp.flags = + sock->parent->uv_handle.udp.flags; + } } -#endif isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle); diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 14499fbdea2..1e7298c0da4 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -1279,6 +1279,7 @@ static cfg_clausedef_t options_clauses[] = { CFG_CLAUSEFLAG_OBSOLETE }, { "listen-on", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, { "listen-on-v6", &cfg_type_listenon, CFG_CLAUSEFLAG_MULTI }, + { "load-balance-sockets", &cfg_type_boolean, 0 }, { "lock-file", &cfg_type_qstringornone, 0 }, { "managed-keys-directory", &cfg_type_qstring, 0 }, { "match-mapped-addresses", &cfg_type_boolean, 0 },