extern unsigned int dns_adb_entrywindow;
extern unsigned int dns_adb_cachemin;
extern size_t dns_dispatch_tcppipelining;
+extern unsigned int dns_dispatch_tcp_idle_timeout;
static bool want_stats = false;
static char absolute_conffile[PATH_MAX];
"least 1");
}
dns_dispatch_tcppipelining = pipelining;
+ } else if (!strncmp(option, "tcpidletimeout=", 15)) {
+ dns_dispatch_tcp_idle_timeout = atoi(option + 15);
} else {
fprintf(stderr, "unknown -T flag '%s'\n", option);
}
*/
size_t dns_dispatch_tcppipelining = 256;
+/*
+ * Idle timeout (in milliseconds) for a reused outgoing TCP connection. While a
+ * dispatch has no outstanding responses we keep a read pending so a
+ * peer-initiated close is noticed promptly; this bounds how long such an idle
+ * connection is kept open for reuse. Can be overridden via
+ * 'named -T tcpidletimeout=N'.
+ */
+unsigned int dns_dispatch_tcp_idle_timeout = 5000;
+
typedef ISC_LIST(dns_dispentry_t) dns_displist_t;
struct dns_dispatchmgr {
tcp_recv_add(resps, resp, result);
}
disp->state = DNS_DISPATCHSTATE_CANCELED;
+
+ /* Close now so the socket doesn't linger in CLOSE-WAIT. */
+ if (disp->handle != NULL) {
+ isc_nmhandle_close(disp->handle);
+ }
}
static void
}
}
+static void
+tcp_startrecv_idle(dns_dispatch_t *disp) {
+ REQUIRE(dns_dispatch_tcp_idle_timeout > 0);
+
+ /*
+ * No outstanding responses, but the connection is still up and
+ * reusable. Keep a read pending (bounded by the idle timeout)
+ * so that a peer-initiated close is detected promptly;
+ * otherwise the socket would linger in CLOSE-WAIT and be handed
+ * out again as a dead reused connection.
+ */
+ isc_nmhandle_cleartimeout(disp->handle);
+ isc_nmhandle_settimeout(disp->handle, dns_dispatch_tcp_idle_timeout);
+ if (!disp->reading) {
+ dispatch_log(disp, ISC_LOG_DEBUG(90), "keeping idle read on %p",
+ disp->handle);
+ tcp_startrecv(disp, NULL);
+ }
+}
+
/*
* General flow:
*
* restart.
*/
static void
-tcp_recv(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
+tcp_recv(isc_nmhandle_t *handle, isc_result_t eresult, isc_region_t *region,
void *arg) {
dns_dispatch_t *disp = (dns_dispatch_t *)arg;
dns_dispentry_t *resp = NULL;
dns_displist_t resps = ISC_LIST_INITIALIZER;
isc_time_t now;
int timeout = 0;
+ isc_result_t result = eresult;
REQUIRE(VALID_DISPATCH(disp));
dispatch_log(disp, ISC_LOG_DEBUG(90),
"TCP read:%s:requests %" PRIuFAST32,
- isc_result_totext(result), disp->requests);
+ isc_result_totext(eresult), disp->requests);
peer = isc_nmhandle_peeraddr(handle);
/*
* Phase 1: Process timeout and success.
*/
- switch (result) {
+ switch (eresult) {
case ISC_R_TIMEDOUT:
/*
* Time out the oldest response in the active queue.
*/
if (result == ISC_R_NOTFOUND) {
- if (disp->timedout > 0) {
+ if (eresult == ISC_R_TIMEDOUT) {
+ /*
+ * An idle keep-alive read timed out with no outstanding
+ * responses.
+ */
+ result = ISC_R_CANCELED;
+ } else if (disp->timedout > 0) {
/* There was active query that timed-out before */
disp->timedout--;
} else {
case ISC_R_TIMEDOUT:
case ISC_R_NOTFOUND:
break;
-
case ISC_R_SHUTTINGDOWN:
case ISC_R_CANCELED:
case ISC_R_EOF:
case ISC_R_CONNECTIONRESET:
- isc_sockaddr_format(&peer, buf, sizeof(buf));
- dispatch_log(disp, ISC_LOG_DEBUG(90),
- "shutting down TCP: %s: %s", buf,
- isc_result_totext(result));
+ if (isc_log_wouldlog(ISC_LOG_DEBUG(90))) {
+ isc_sockaddr_format(&peer, buf, sizeof(buf));
+ dispatch_log(disp, ISC_LOG_DEBUG(90),
+ "shutting down TCP: %s: %s", buf,
+ isc_result_totext(result));
+ }
tcp_recv_shutdown(disp, &resps, result);
break;
default:
*/
tcp_recv_processall(&resps, region);
+ /*
+ * Phase 7: Restart reading on idle connections.
+ */
+ if (ISC_LIST_EMPTY(disp->active) &&
+ disp->state == DNS_DISPATCHSTATE_CONNECTED &&
+ dns_dispatch_tcp_idle_timeout > 0)
+ {
+ tcp_startrecv_idle(disp);
+ }
+
dns_dispatch_detach(&disp); /* DISPATCH002 */
}
INSIST(!ISC_LINK_LINKED(resp, alink));
- if (ISC_LIST_EMPTY(disp->active)) {
+ if (ISC_LIST_EMPTY(disp->active) &&
+ disp->state == DNS_DISPATCHSTATE_CONNECTED)
+ {
INSIST(disp->handle != NULL);
-#if DISPATCH_TCP_KEEPALIVE
/*
- * This is an experimental code that keeps the TCP
- * connection open for 1 second before it is finally
- * closed. By keeping the TCP connection open, it can
- * be reused by dns_request that uses
- * dns_dispatch_gettcp() to join existing TCP
- * connections.
- *
- * It is disabled for now, because it changes the
- * behaviour, but I am keeping the code here for future
- * reference when we improve the dns_dispatch to reuse
- * the TCP connections also in the resolver.
- *
- * The TCP connection reuse should be seamless and not
- * require any extra handling on the client side though.
+ * This was the last outstanding response on a still
+ * connected dispatch. Keep the connection open with a
+ * read pending (bounded by the idle timeout) so that it
+ * can be reused by a subsequent query and so that a
+ * peer-initiated close is noticed promptly instead of
+ * leaving the socket stuck in CLOSE-WAIT. If the
+ * dispatch is already shutting down, leave it alone so
+ * it can be torn down.
*/
- isc_nmhandle_cleartimeout(disp->handle);
- isc_nmhandle_settimeout(disp->handle, 1000);
-
- if (!disp->reading) {
- dispentry_log(resp, ISC_LOG_DEBUG(90),
- "final 1 second timeout on %p",
- disp->handle);
- tcp_startrecv(disp, NULL);
- }
-#else
- if (disp->reading) {
+ if (dns_dispatch_tcp_idle_timeout > 0) {
+ tcp_startrecv_idle(disp);
+ } else if (disp->reading) {
dispentry_log(resp, ISC_LOG_DEBUG(90),
"canceling read on %p",
disp->handle);
isc_nm_cancelread(disp->handle);
}
-#endif
}
break;
REQUIRE(VALID_DISPATCH(disp));
REQUIRE(disp->socktype == isc_socktype_tcp);
+ if (disp->reading) {
+ return;
+ }
+
dns_dispatch_ref(disp); /* DISPATCH002 */
if (resp != NULL) {
dispentry_log(resp, ISC_LOG_DEBUG(90), "reading from %p",
"already connected; attaching");
resp->reading = true;
+ /*
+ * Replace any idle timeout with this query's timeout. The
+ * connection may have been kept reading while idle (with the
+ * idle timeout applied), so update the handle timeout even when
+ * a read is already pending.
+ */
+ isc_nmhandle_cleartimeout(disp->handle);
+ if (resp->timeout != 0) {
+ isc_nmhandle_settimeout(disp->handle, resp->timeout);
+ }
+
if (!disp->reading) {
/* Restart the reading */
- isc_nmhandle_cleartimeout(disp->handle);
- if (resp->timeout != 0) {
- isc_nmhandle_settimeout(disp->handle,
- resp->timeout);
- }
tcp_startrecv(disp, resp);
}
#include <isc/lib.h>
#include <isc/managers.h>
#include <isc/refcount.h>
+#include <isc/time.h>
+#include <isc/timer.h>
#include <isc/tls.h>
#include <isc/util.h>
#include <isc/uv.h>
dns_dispatch_connect(test->dispentry);
}
+/*
+ * Regression test for a reused outgoing TCP connection that the peer closes
+ * while it is idle. The connection must be kept under read while idle so the
+ * close is noticed and the dead dispatch is dropped from the reuse pool;
+ * otherwise the next query would be handed the dead connection and fail.
+ */
+static test_dispatch_t *reuse_test1 = NULL;
+static isc_timer_t *reuse_timer = NULL;
+
+static void
+server_senddone_close(isc_nmhandle_t *handle, isc_result_t eresult, void *arg) {
+ isc_nmhandle_t *sendhandle = arg;
+
+ UNUSED(eresult);
+
+ /*
+ * Drop the server side of the connection right after answering, to
+ * mimic a forwarder (e.g. a DoT resolver) that closes idle
+ * connections.
+ */
+ isc_nmhandle_close(handle);
+ isc_nmhandle_detach(&sendhandle);
+}
+
+static void
+nameserver_close(isc_nmhandle_t *handle, isc_result_t eresult,
+ isc_region_t *region, void *arg ISC_ATTR_UNUSED) {
+ isc_region_t response;
+ static unsigned char buf[16];
+ isc_nmhandle_t *sendhandle = NULL;
+
+ if (eresult != ISC_R_SUCCESS) {
+ return;
+ }
+
+ memmove(buf, region->base, 12);
+ memset(buf + 12, 0, 4);
+ buf[2] |= 0x80; /* qr=1 */
+
+ response.base = buf;
+ response.length = sizeof(buf);
+
+ isc_nmhandle_attach(handle, &sendhandle);
+ isc_nm_send(sendhandle, &response, server_senddone_close, sendhandle);
+}
+
+static void
+reuse_query2(void *arg ISC_ATTR_UNUSED) {
+ isc_result_t result;
+ test_dispatch_t *test2 = isc_mem_get(isc_g_mctx, sizeof(*test2));
+
+ *test2 = (test_dispatch_t){
+ .dispatchmgr = dns_dispatchmgr_ref(reuse_test1->dispatchmgr),
+ };
+
+ isc_timer_destroy(&reuse_timer);
+
+ testdata.region.base = testdata.message;
+ testdata.region.length = sizeof(testdata.message);
+
+ /*
+ * Create the dispatch for the second query. This inspects the reuse
+ * pool: the dispatch from the first query must no longer be offered
+ * (it was either removed or marked canceled when the peer closed it),
+ * so this gets a fresh connection.
+ */
+ result = dns_dispatch_createtcp(
+ test2->dispatchmgr, &tcp_connect_addr, &tcp_server_addr, NULL,
+ DNS_DISPATCHTYPE_RESOLVER, 0, &test2->dispatch);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ result = dns_dispatch_add(test2->dispatch, isc_loop_main(), 0,
+ T_CLIENT_CONNECT, T_CLIENT_INIT,
+ &tcp_server_addr, NULL, NULL, connected,
+ client_senddone, response_shutdown, test2,
+ &test2->id, &test2->dispentry);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ testdata.message[0] = (test2->id >> 8) & 0xff;
+ testdata.message[1] = test2->id & 0xff;
+
+ dns_dispatch_connect(test2->dispentry);
+
+ /*
+ * Now release the lingering reference from the first query. Keeping
+ * it until after the createtcp above ensures the dead dispatch is
+ * still around to be (wrongly) reused if the fix were absent.
+ */
+ dns_dispatch_detach(&reuse_test1->dispatch);
+ dns_dispatchmgr_detach(&reuse_test1->dispatchmgr);
+ isc_mem_put(isc_g_mctx, reuse_test1, sizeof(*reuse_test1));
+}
+
+static void
+response_reuse(isc_result_t eresult, isc_region_t *region ISC_ATTR_UNUSED,
+ void *arg) {
+ test_dispatch_t *test = arg;
+ isc_interval_t interval;
+
+ assert_int_equal(eresult, ISC_R_SUCCESS);
+
+ /*
+ * Finish the first query but keep a reference on its dispatch, as an
+ * overlapping query would, so the connection lingers in the reuse pool
+ * after going idle.
+ */
+ dns_dispatch_done(&test->dispentry);
+ reuse_test1 = test;
+
+ /*
+ * Defer the second query so the server's close of the idle connection
+ * is processed first.
+ */
+ isc_interval_set(&interval, 1, 0);
+ isc_timer_create(isc_loop_main(), reuse_query2, NULL, &reuse_timer);
+ isc_timer_start(reuse_timer, isc_timertype_once, &interval);
+}
+
+ISC_LOOP_TEST_IMPL(dispatch_tcp_reuse_after_close) {
+ isc_result_t result;
+ test_dispatch_t *test = isc_mem_get(isc_g_mctx, sizeof(*test));
+ *test = (test_dispatch_t){ 0 };
+
+ /* Server: answer one query per connection, then close it. */
+ result = isc_nm_listenstreamdns(
+ ISC_NM_LISTEN_ONE, &tcp_server_addr, nameserver_close, NULL,
+ accept_cb, NULL, 0, NULL, NULL, ISC_NM_PROXY_NONE, &sock);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ isc_loop_teardown(isc_loop_main(), stop_listening, sock);
+
+ /* Client */
+ testdata.region.base = testdata.message;
+ testdata.region.length = sizeof(testdata.message);
+
+ result = dns_dispatchmgr_create(isc_g_mctx, &test->dispatchmgr);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ result = dns_dispatch_createtcp(
+ test->dispatchmgr, &tcp_connect_addr, &tcp_server_addr, NULL,
+ DNS_DISPATCHTYPE_RESOLVER, 0, &test->dispatch);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ result = dns_dispatch_add(test->dispatch, isc_loop_main(), 0,
+ T_CLIENT_CONNECT, T_CLIENT_INIT,
+ &tcp_server_addr, NULL, NULL, connected,
+ client_senddone, response_reuse, test,
+ &test->id, &test->dispentry);
+ assert_int_equal(result, ISC_R_SUCCESS);
+
+ testdata.message[0] = (test->id >> 8) & 0xff;
+ testdata.message[1] = test->id & 0xff;
+
+ dns_dispatch_connect(test->dispentry);
+}
+
ISC_TEST_LIST_START
ISC_TEST_ENTRY_CUSTOM(dispatch_sharedtcp, setup_test, teardown_test)
+ISC_TEST_ENTRY_CUSTOM(dispatch_tcp_reuse_after_close, setup_test, teardown_test)
ISC_TEST_ENTRY_CUSTOM(dispatch_timeout_udp_response, setup_test, teardown_test)
ISC_TEST_ENTRY_CUSTOM(dispatchset_create, setup_test, teardown_test)
ISC_TEST_ENTRY_CUSTOM(dispatchset_get, setup_test, teardown_test)