dns_kasplist_t tmpkasplist, kasplist;
dns_keystorelist_t tmpkeystorelist, keystorelist;
dns_viewlist_t viewlist;
- in_port_t listen_port, udpport_low, udpport_high;
+ in_port_t listen_port, port_low, port_high;
int i, backlog;
isc_interval_t interval;
isc_logconfig_t *logc = NULL;
isc_portset_create(isc_g_mctx, &v4portset);
isc_portset_create(isc_g_mctx, &v6portset);
- result = isc_net_getudpportrange(AF_INET, &udpport_low, &udpport_high);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER,
- ISC_LOG_ERROR,
- "get the default UDP/IPv4 port range: %s",
- isc_result_totext(result));
- goto cleanup_portsets;
- }
-
- isc_portset_addrange(v4portset, udpport_low, udpport_high);
+ isc_net_getudpportrange(AF_INET, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET, port_low, port_high);
+ isc_portset_addrange(v4portset, port_low, port_high);
if (!ns_server_getoption(server->sctx, NS_SERVER_DISABLE4)) {
isc_log_write(NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER,
ISC_LOG_INFO,
"using default UDP/IPv4 port range: "
"[%d, %d]",
- udpport_low, udpport_high);
+ port_low, port_high);
}
- result = isc_net_getudpportrange(AF_INET6, &udpport_low, &udpport_high);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER,
- ISC_LOG_ERROR,
- "get the default UDP/IPv6 port range: %s",
- isc_result_totext(result));
- goto cleanup_portsets;
- }
- isc_portset_addrange(v6portset, udpport_low, udpport_high);
+ isc_net_getudpportrange(AF_INET6, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET6, port_low, port_high);
+ isc_portset_addrange(v6portset, port_low, port_high);
if (!ns_server_getoption(server->sctx, NS_SERVER_DISABLE6)) {
isc_log_write(NAMED_LOGCATEGORY_GENERAL, NAMED_LOGMODULE_SERVER,
ISC_LOG_INFO,
"using default UDP/IPv6 port range: "
"[%d, %d]",
- udpport_low, udpport_high);
+ port_low, port_high);
}
dns_dispatchmgr_setavailports(named_g_dispatchmgr, v4portset,
void
isc_net_enableipv6(void);
-isc_result_t
+void
isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high);
/*%<
* Returns system's default range of ephemeral UDP ports, if defined.
/*%<
* Return the local address of 'sock'.
*/
+
+void
+isc_netmgr_portrange(sa_family_t af, in_port_t low, in_port_t high);
+/*%<
+ * Set the ephemeral port range <low, high> for 'af' family.
+ */
/*%<
* Return umask of the current process as initialized at the program start
*/
+
+void
+isc_os_kernel(char **name, int *major, int *minor, int *patch);
+/*%<
+ * Fill the running kernel version into major, minor and patch.
+ * If any of these are not available then -1 is returned.
+ */
void
isc_managers_create(uint32_t workers) {
+ in_port_t port_low, port_high;
+
isc_loopmgr_create(isc_g_mctx, workers);
isc_netmgr_create(isc_g_mctx);
isc_rwlock_setworkers(workers);
+
+ isc_net_getudpportrange(AF_INET, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET, port_low, port_high);
+
+ isc_net_getudpportrange(AF_INET6, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET6, port_low, port_high);
}
void
#endif /* HAVE_SYSCTLBYNAME */
#endif /* USE_SYSCTL_PORTRANGE */
-isc_result_t
+void
isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high) {
int result = ISC_R_FAILURE;
#if !defined(USE_SYSCTL_PORTRANGE) && defined(__linux)
*low = ISC_NET_PORTRANGELOW;
*high = ISC_NET_PORTRANGEHIGH;
}
-
- return ISC_R_SUCCESS; /* we currently never fail in this function */
}
void
atomic_int_fast32_t send_udp_buffer_size;
atomic_int_fast32_t recv_tcp_buffer_size;
atomic_int_fast32_t send_tcp_buffer_size;
+
+ _Atomic(in_port_t) port_low4;
+ _Atomic(in_port_t) port_high4;
+ _Atomic(in_port_t) port_low6;
+ _Atomic(in_port_t) port_high6;
+
} isc__netmgr_t;
extern isc__netmgr_t *isc__netmgr;
*/
isc_result_t
-isc__nm_tcp_bind_no_port(uv_tcp_t *handle);
+isc__nm_socket_max_port_range(uv_os_sock_t fd ISC_ATTR_UNUSED,
+ sa_family_t sa_family ISC_ATTR_UNUSED);
/*%<
- * Set IP_BIND_ADDRESS_NO_PORT on the socket (Linux only).
+ * Set IP_BIND_ADDRESS_NO_PORT and IP_LOCAL_PORT_RANGE on the socket
+ * (Linux only).
*/
void
void
isc_netmgr_create(isc_mem_t *mctx) {
isc__netmgr_t *netmgr = NULL;
+ in_port_t port_low, port_high;
#ifdef MAXIMAL_UV_VERSION
if (uv_version() > MAXIMAL_UV_VERSION) {
atomic_init(&netmgr->send_tcp_buffer_size, 0);
atomic_init(&netmgr->recv_udp_buffer_size, 0);
atomic_init(&netmgr->send_udp_buffer_size, 0);
+ atomic_init(&netmgr->port_low4, 0);
+ atomic_init(&netmgr->port_high4, 65535);
+ atomic_init(&netmgr->port_low6, 0);
+ atomic_init(&netmgr->port_high6, 65535);
+
#if HAVE_SO_REUSEPORT_LB
netmgr->load_balance_sockets = true;
#else
}
isc__netmgr = netmgr;
+
+ /*
+ * Set the initial port range for IP_LOCAL_PORT_RANGE.
+ */
+ isc_net_getudpportrange(AF_INET, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET, port_low, port_high);
+
+ isc_net_getudpportrange(AF_INET6, &port_low, &port_high);
+ isc_netmgr_portrange(AF_INET6, port_low, port_high);
}
/*
return &isc__netmgr->workers[tid];
}
+void
+isc_netmgr_portrange(sa_family_t af, in_port_t low, in_port_t high) {
+ REQUIRE(VALID_NM(isc__netmgr));
+ switch (af) {
+ case AF_INET:
+ atomic_store_relaxed(&isc__netmgr->port_low4, low);
+ atomic_store_relaxed(&isc__netmgr->port_high4, high);
+ break;
+ case AF_INET6:
+ atomic_store_relaxed(&isc__netmgr->port_low6, low);
+ atomic_store_relaxed(&isc__netmgr->port_high6, high);
+ break;
+ default:
+ INSIST(0);
+ }
+}
+
#if ISC_NETMGR_TRACE
/*
* Dump all active sockets in netmgr. We output to stderr
* information regarding copyright ownership.
*/
+#include <netinet/in.h>
+
#include <isc/errno.h>
+#include <isc/result.h>
#include <isc/uv.h>
#include "netmgr-int.h"
return ISC_R_SUCCESS;
}
+/*
+ * See
+ * https://blog.cloudflare.com/linux-transport-protocol-port-selection-performance/#kernel
+ * for rationalle.
+ */
+#define PORT_RANGE 1000
+
isc_result_t
-isc__nm_tcp_bind_no_port(uv_tcp_t *handle ISC_ATTR_UNUSED) {
+isc__nm_socket_max_port_range(uv_os_sock_t fd ISC_ATTR_UNUSED,
+ sa_family_t af ISC_ATTR_UNUSED) {
#ifdef IP_BIND_ADDRESS_NO_PORT
- uv_os_sock_t fd = -1;
-
- int r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
- if (r < 0) {
+ if (setsockopt_on(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT) == -1) {
return ISC_R_FAILURE;
}
+#endif
- if (setsockopt_on(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT) == -1) {
+#if defined(IP_LOCAL_PORT_RANGE) && defined(__linux__)
+ /*
+ * The option takes an uint32_t value with the high 16 bits
+ * set to the upper range bound, and the low 16 bits set to
+ * the lower range bound. Range bounds are inclusive. The
+ * 16-bit values should be in host byte order.
+ */
+ uint32_t port_range;
+ int major, minor;
+ isc_os_kernel(NULL, &major, &minor, NULL);
+
+ in_port_t port_low, port_high;
+ switch (af) {
+ case AF_INET:
+ port_low = isc__netmgr->port_low4;
+ port_high = isc__netmgr->port_high4;
+ break;
+ case AF_INET6:
+ port_low = isc__netmgr->port_low6;
+ port_high = isc__netmgr->port_high6;
+ break;
+ default:
+ INSIST(0);
+ }
+
+ /*
+ * Linux 6.8 implemented a following patch:
+ *
+ * If IP_LOCAL_PORT_RANGE is set on a socket before accept(),
+ * port selection no longer favors even ports.
+ *
+ * This means that connect() can find a suitable source port
+ * faster, and applications can use a different split between
+ * connect() and bind() users.
+ */
+ if (major < 6 || (major == 6 && minor < 8)) {
+ /*
+ * On Linux << 6.8, use IP_LOCAL_PORT_RANGE to
+ * partition ephemeral port range randomly to help
+ * with the port selection.
+ */
+ if (port_high - port_low <= PORT_RANGE) {
+ return ISC_R_RANGE;
+ }
+
+ /*
+ * port_low <= N < port_high - PORT_RANGE
+ */
+ port_high -= PORT_RANGE;
+ port_low += isc_random_uniform(port_high - port_low);
+ port_high = port_low + PORT_RANGE;
+ }
+ INSIST(port_low > 0);
+ INSIST(port_low < port_high);
+
+ port_range = (uint32_t)port_low | ((uint32_t)port_high << 16);
+ if (setsockopt(fd, IPPROTO_IP, IP_LOCAL_PORT_RANGE, &port_range,
+ sizeof(port_range)) == -1)
+ {
return ISC_R_FAILURE;
}
#endif
*/
#include <libgen.h>
+#include <string.h>
#include <unistd.h>
#include <isc/async.h>
}
isc__nm_incstats(sock, STATID_OPEN);
- isc__nm_tcp_bind_no_port(&sock->uv_handle.tcp);
-
if (req->local.length != 0) {
r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
if (r != 0) {
(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+ result = isc__nm_socket_max_port_range(sock->fd, sa_family);
+ if (result != ISC_R_SUCCESS) {
+ isc__nmsocket_log(sock, ISC_LOG_DEBUG(99),
+ "setting up IP_BIND_ADDRESS_NO_PORT or "
+ "IP_LOCAL_PORT_RANGE failed: %s\n",
+ result == ISC_R_RANGE
+ ? isc_result_totext(result)
+ : strerror(errno));
+ }
sock->active = true;
* information regarding copyright ownership.
*/
+#include <ctype.h>
#include <inttypes.h>
#include <sys/stat.h>
+#include <sys/utsname.h>
#include <isc/os.h>
+#include <isc/string.h>
#include <isc/types.h>
#include <isc/util.h>
#include <isc/uv.h>
static unsigned int isc__os_ncpus = 0;
static unsigned long isc__os_cacheline = ISC_OS_CACHELINE_SIZE;
static mode_t isc__os_umask = 0;
+static int kernel_major = -1, kernel_minor = -1, kernel_patch = -1;
+static char kernel_name[64];
/*
* The affinity support for non-Linux is in the review in the upstream
(void)umask(isc__os_umask);
}
+static void
+kernel_initialize(void) {
+ struct utsname buffer;
+
+ if (uname(&buffer) == -1) {
+ return;
+ }
+
+ (void)sscanf(buffer.release, "%d.%d.%d", &kernel_major, &kernel_minor,
+ &kernel_patch);
+ (void)strlcpy(kernel_name, buffer.sysname, sizeof(kernel_name));
+}
+
unsigned int
isc_os_ncpus(void) {
return isc__os_ncpus;
return isc__os_umask;
}
+void
+isc_os_kernel(char **name, int *major, int *minor, int *patch) {
+ SET_IF_NOT_NULL(name, kernel_name)
+ SET_IF_NOT_NULL(major, kernel_major);
+ SET_IF_NOT_NULL(minor, kernel_minor);
+ SET_IF_NOT_NULL(patch, kernel_patch);
+}
+
void
isc__os_initialize(void) {
umask_initialize();
ncpus_initialize();
+ kernel_initialize();
#if defined(_SC_LEVEL1_DCACHE_LINESIZE)
long s = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
if (s > 0 && (unsigned long)s > isc__os_cacheline) {