dns_view_t *view_next = NULL;
dns_viewlist_t tmpviewlist;
dns_viewlist_t viewlist, builtin_viewlist;
- in_port_t listen_port, udpport_low, udpport_high;
+ in_port_t listen_port, port_low, port_high;
int i, backlog;
isc_interval_t interval;
isc_logconfig_t *logc = NULL;
if (usev4ports != NULL) {
portset_fromconf(v4portset, usev4ports, true);
} else {
- result = isc_net_getudpportrange(AF_INET, &udpport_low,
- &udpport_high);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL,
- NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR,
- "get the default UDP/IPv4 port range: %s",
- isc_result_totext(result));
- goto cleanup_v6portset;
- }
-
- if (udpport_low == udpport_high) {
- isc_portset_add(v4portset, udpport_low);
+ isc_net_getudpportrange(AF_INET, &port_low, &port_high);
+ if (port_low == port_high) {
+ isc_portset_add(v4portset, port_low);
} else {
- isc_portset_addrange(v4portset, udpport_low,
- udpport_high);
+ isc_portset_addrange(v4portset, port_low, port_high);
}
if (!ns_server_getoption(server->sctx, NS_SERVER_DISABLE4)) {
isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL,
NAMED_LOGMODULE_SERVER, ISC_LOG_INFO,
"using default UDP/IPv4 port range: "
"[%d, %d]",
- udpport_low, udpport_high);
+ port_low, port_high);
}
}
(void)named_config_get(maps, "avoid-v4-udp-ports", &avoidv4ports);
if (usev6ports != NULL) {
portset_fromconf(v6portset, usev6ports, true);
} else {
- result = isc_net_getudpportrange(AF_INET6, &udpport_low,
- &udpport_high);
- if (result != ISC_R_SUCCESS) {
- isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL,
- NAMED_LOGMODULE_SERVER, ISC_LOG_ERROR,
- "get the default UDP/IPv6 port range: %s",
- isc_result_totext(result));
- goto cleanup_v6portset;
- }
- if (udpport_low == udpport_high) {
- isc_portset_add(v6portset, udpport_low);
+ isc_net_getudpportrange(AF_INET6, &port_low, &port_high);
+ if (port_low == port_high) {
+ isc_portset_add(v6portset, port_low);
} else {
- isc_portset_addrange(v6portset, udpport_low,
- udpport_high);
+ isc_portset_addrange(v6portset, port_low, port_high);
}
if (!ns_server_getoption(server->sctx, NS_SERVER_DISABLE6)) {
isc_log_write(named_g_lctx, NAMED_LOGCATEGORY_GENERAL,
NAMED_LOGMODULE_SERVER, ISC_LOG_INFO,
"using default UDP/IPv6 port range: "
"[%d, %d]",
- udpport_low, udpport_high);
+ port_low, port_high);
}
}
(void)named_config_get(maps, "avoid-v6-udp-ports", &avoidv6ports);
/*%<
* Return the local address of 'sock'.
*/
+
+void
+isc_netmgr_portrange(isc_nm_t *netmgr, sa_family_t af, in_port_t low,
+ in_port_t high);
+/*%<
+ * Set the ephemeral port range <low, high> for 'af' family.
+ */
* Return umask of the current process as initialized at the program start
*/
+void
+isc_os_kernel(char **name, int *major, int *minor, int *patch);
+/*%<
+ * Fill the running kernel version into major, minor and patch.
+ * If any of these are not available then -1 is returned.
+ */
+
ISC_LANG_ENDDECLS
atomic_int_fast32_t send_udp_buffer_size;
atomic_int_fast32_t recv_tcp_buffer_size;
atomic_int_fast32_t send_tcp_buffer_size;
+
+ _Atomic(in_port_t) port_low4;
+ _Atomic(in_port_t) port_high4;
+ _Atomic(in_port_t) port_low6;
+ _Atomic(in_port_t) port_high6;
};
/*%
*/
isc_result_t
-isc__nm_tcp_bind_no_port(uv_tcp_t *handle);
+isc__nm_socket_max_port_range(uv_os_sock_t fd ISC_ATTR_UNUSED,
+ sa_family_t sa_family ISC_ATTR_UNUSED,
+ in_port_t port_low, in_port_t port_high);
/*%<
- * Set IP_BIND_ADDRESS_NO_PORT on the socket (Linux only).
+ * Set IP_BIND_ADDRESS_NO_PORT and IP_LOCAL_PORT_RANGE on the socket
+ * (Linux only).
*/
void
void
isc_netmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t **netmgrp) {
isc_nm_t *netmgr = NULL;
+ in_port_t port_low, port_high;
#ifdef MAXIMAL_UV_VERSION
if (uv_version() > MAXIMAL_UV_VERSION) {
atomic_init(&netmgr->send_tcp_buffer_size, 0);
atomic_init(&netmgr->recv_udp_buffer_size, 0);
atomic_init(&netmgr->send_udp_buffer_size, 0);
+ atomic_init(&netmgr->port_low4, 0);
+ atomic_init(&netmgr->port_high4, 65535);
+ atomic_init(&netmgr->port_low6, 0);
+ atomic_init(&netmgr->port_high6, 65535);
+
#if HAVE_SO_REUSEPORT_LB
netmgr->load_balance_sockets = true;
#else
}
*netmgrp = netmgr;
+
+ /*
+ * Set the initial port range for IP_LOCAL_PORT_RANGE.
+ */
+ isc_net_getudpportrange(AF_INET, &port_low, &port_high);
+ isc_netmgr_portrange(netmgr, AF_INET, port_low, port_high);
+
+ isc_net_getudpportrange(AF_INET6, &port_low, &port_high);
+ isc_netmgr_portrange(netmgr, AF_INET6, port_low, port_high);
}
/*
.complete_header = *header_data };
}
+void
+isc_netmgr_portrange(isc_nm_t *netmgr, sa_family_t af, in_port_t low,
+ in_port_t high) {
+ REQUIRE(VALID_NM(netmgr));
+ switch (af) {
+ case AF_INET:
+ atomic_store_relaxed(&netmgr->port_low4, low);
+ atomic_store_relaxed(&netmgr->port_high4, high);
+ break;
+ case AF_INET6:
+ atomic_store_relaxed(&netmgr->port_low6, low);
+ atomic_store_relaxed(&netmgr->port_high6, high);
+ break;
+ default:
+ UNREACHABLE();
+ }
+}
+
#if ISC_NETMGR_TRACE
/*
* Dump all active sockets in netmgr. We output to stderr
* information regarding copyright ownership.
*/
+#include <netinet/in.h>
+
#include <isc/errno.h>
+#include <isc/result.h>
#include <isc/uv.h>
#include "netmgr-int.h"
return ISC_R_SUCCESS;
}
+/*
+ * See
+ * https://blog.cloudflare.com/linux-transport-protocol-port-selection-performance/#kernel
+ * for rationalle.
+ */
+#define PORT_RANGE 1000
+
isc_result_t
-isc__nm_tcp_bind_no_port(uv_tcp_t *handle ISC_ATTR_UNUSED) {
+isc__nm_socket_max_port_range(uv_os_sock_t fd ISC_ATTR_UNUSED,
+ sa_family_t sa_family ISC_ATTR_UNUSED,
+ in_port_t port_low ISC_ATTR_UNUSED,
+ in_port_t port_high ISC_ATTR_UNUSED) {
#ifdef IP_BIND_ADDRESS_NO_PORT
- uv_os_sock_t fd = -1;
-
- int r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
- if (r < 0) {
+ if (setsockopt_on(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT) == -1) {
return ISC_R_FAILURE;
}
+#endif
- if (setsockopt_on(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT) == -1) {
+#if defined(IP_LOCAL_PORT_RANGE) && defined(__linux__)
+ /*
+ * The option takes an uint32_t value with the high 16 bits
+ * set to the upper range bound, and the low 16 bits set to
+ * the lower range bound. Range bounds are inclusive. The
+ * 16-bit values should be in host byte order.
+ */
+ uint32_t port_range;
+ int major, minor;
+ isc_os_kernel(NULL, &major, &minor, NULL);
+
+ /*
+ * Linux 6.8 implemented a following patch:
+ *
+ * If IP_LOCAL_PORT_RANGE is set on a socket before accept(),
+ * port selection no longer favors even ports.
+ *
+ * This means that connect() can find a suitable source port
+ * faster, and applications can use a different split between
+ * connect() and bind() users.
+ */
+ if (major < 6 || (major == 6 && minor < 8)) {
+ /*
+ * On Linux << 6.8, use IP_LOCAL_PORT_RANGE to
+ * partition ephemeral port range randomly to help
+ * with the port selection.
+ */
+ if (port_high - port_low <= PORT_RANGE) {
+ return ISC_R_RANGE;
+ }
+
+ /*
+ * port_low <= N < port_high - PORT_RANGE
+ */
+ port_high -= PORT_RANGE;
+ port_low += isc_random_uniform(port_high - port_low);
+ port_high = port_low + PORT_RANGE;
+ }
+ INSIST(port_low > 0);
+ INSIST(port_low < port_high);
+
+ port_range = (uint32_t)port_low | ((uint32_t)port_high << 16);
+ if (setsockopt(fd, IPPROTO_IP, IP_LOCAL_PORT_RANGE, &port_range,
+ sizeof(port_range)) == -1)
+ {
return ISC_R_FAILURE;
}
#endif
*/
#include <libgen.h>
+#include <string.h>
#include <unistd.h>
#include <isc/async.h>
}
isc__nm_incstats(sock, STATID_OPEN);
- isc__nm_tcp_bind_no_port(&sock->uv_handle.tcp);
-
if (req->local.length != 0) {
r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
if (r != 0) {
sa_family_t sa_family;
isc__networker_t *worker = NULL;
uv_os_sock_t fd = -1;
+ in_port_t port_low, port_high;
REQUIRE(VALID_NM(mgr));
REQUIRE(local != NULL);
(void)isc__nm_socket_min_mtu(sock->fd, sa_family);
(void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+ port_low = (sa_family == AF_INET) ? mgr->port_low4 : mgr->port_low6;
+ port_high = (sa_family == AF_INET) ? mgr->port_high4 : mgr->port_high6;
+ result = isc__nm_socket_max_port_range(sock->fd, sa_family, port_low,
+ port_high);
+ if (result != ISC_R_SUCCESS) {
+ isc__nmsocket_log(sock, ISC_LOG_DEBUG(99),
+ "setting up IP_BIND_ADDRESS_NO_PORT or "
+ "IP_LOCAL_PORT_RANGE failed: %s\n",
+ result == ISC_R_RANGE
+ ? isc_result_totext(result)
+ : strerror(errno));
+ }
sock->active = true;
* information regarding copyright ownership.
*/
+#include <ctype.h>
#include <inttypes.h>
#include <sys/stat.h>
+#include <sys/utsname.h>
#include <isc/os.h>
+#include <isc/string.h>
#include <isc/types.h>
#include <isc/util.h>
static unsigned int isc__os_ncpus = 0;
static unsigned long isc__os_cacheline = ISC_OS_CACHELINE_SIZE;
static mode_t isc__os_umask = 0;
+static int kernel_major = -1, kernel_minor = -1, kernel_patch = -1;
+static char kernel_name[64];
#ifdef HAVE_SYSCONF
(void)umask(isc__os_umask);
}
+static void
+kernel_initialize(void) {
+ struct utsname buffer;
+
+ if (uname(&buffer) == -1) {
+ return;
+ }
+
+ (void)sscanf(buffer.release, "%d.%d.%d", &kernel_major, &kernel_minor,
+ &kernel_patch);
+ (void)strlcpy(kernel_name, buffer.sysname, sizeof(kernel_name));
+}
+
unsigned int
isc_os_ncpus(void) {
return isc__os_ncpus;
return isc__os_umask;
}
+void
+isc_os_kernel(char **name, int *major, int *minor, int *patch) {
+ SET_IF_NOT_NULL(name, kernel_name)
+ SET_IF_NOT_NULL(major, kernel_major);
+ SET_IF_NOT_NULL(minor, kernel_minor);
+ SET_IF_NOT_NULL(patch, kernel_patch);
+}
+
void
isc__os_initialize(void) {
umask_initialize();
ncpus_initialize();
+ kernel_initialize();
#if defined(HAVE_SYSCONF) && defined(_SC_LEVEL1_DCACHE_LINESIZE)
long s = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
if (s > 0 && (unsigned long)s > isc__os_cacheline) {