]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
Revert "daemon/worker: add task timeouts for upstream TCP connections"
authorVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 14 Mar 2022 06:33:05 +0000 (07:33 +0100)
committerVladimír Čunát <vladimir.cunat@nic.cz>
Mon, 14 Mar 2022 06:40:37 +0000 (07:40 +0100)
This reverts commit 0c9ea1332e1c4475043eab571f60915b90985999 (!1226).

CI rp:fwd-tls6.udp-asan now repeatedly shows use-after-free.
That could be a serious issue, and this commit's feature
seems less important than the risk.  Let's revert until the issue
gets deeper investigation.

NEWS
daemon/worker.c
lib/selection.c
lib/selection.h

diff --git a/NEWS b/NEWS
index 7686faa788b3c695346ccad346da3031979b8f68..9b55d46cfa66075e1d956ec3ae11f29c31d17acf 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -18,7 +18,6 @@ Incompatible changes
 
 Bugfixes
 --------
-- fix task resolution hang when a connected TCP session yields no answers (!1226)
 - doh2: fix CORS by adding `access-control-allow-origin: *` (!1246)
 - net: fix listen by interface - add interface suffix to link-local IPv6 (!1253)
 - daemon/tls: fix resumption for outgoing TLS (e.g. TLS_FORWARD) (!1261)
index f161ea95a4b20329c313ef22e63db3256dbda7a8..167737e3138e3dfd0fdcd2b262803ef22fa9146b 100644 (file)
@@ -87,14 +87,12 @@ struct qr_task
        uint16_t timeouts;
        uint16_t iter_count;
        uint32_t refs;
-       bool finished      : 1;
-       bool leading       : 1;
-       bool timeout_valid : 1; /**< true: `->timeout` is not yet closed. */
+       bool finished : 1;
+       bool leading  : 1;
        uint64_t creation_time;
        uint64_t send_time;
        uint64_t recv_time;
        struct kr_transport *transport;
-       uv_timer_t timeout;
 };
 
 
@@ -138,16 +136,6 @@ static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_
 struct worker_ctx the_worker_value; /**< Static allocation is suitable for the singleton. */
 struct worker_ctx *the_worker = NULL;
 
-
-static struct kr_query *task_get_last_pending_query(struct qr_task *task)
-{
-       if (!task || task->ctx->req.rplan.pending.len == 0) {
-               return NULL;
-       }
-
-       return array_tail(task->ctx->req.rplan.pending);
-}
-
 /*! @internal Create a UDP/TCP handle for an outgoing AF_INET* connection.
  *  socktype is SOCK_* */
 static uv_handle_t *ioreq_spawn(struct worker_ctx *worker,
@@ -568,13 +556,6 @@ static struct qr_task *qr_task_create(struct request_ctx *ctx)
        qr_task_ref(task);
        task->creation_time = kr_now();
        ctx->worker->stats.concurrent += 1;
-
-       /* Reference for timer callbacks - unref'd in 'qr_task_timeout_onclose()' */
-       qr_task_ref(task);
-       uv_timer_init(ctx->worker->loop, &task->timeout);
-       task->timeout.data = task;
-       task->timeout_valid = true;
-
        return task;
 }
 
@@ -622,23 +603,6 @@ static int qr_task_register(struct qr_task *task, struct session *session)
        return 0;
 }
 
-static void qr_task_timeout_onclose(uv_handle_t *timer_handle)
-{
-       /* Remove the reference made in qr_task_create for timeout callbacks */
-       struct qr_task* task = timer_handle->data;
-       qr_task_unref(task);
-}
-
-/** Closes the timeout in this task, if it has not been closed yet. */
-static void qr_task_close_timeout(struct qr_task *task)
-{
-       if (task->timeout_valid) {
-               uv_timer_stop(&task->timeout);
-               uv_close((uv_handle_t*) &task->timeout, qr_task_timeout_onclose);
-               task->timeout_valid = false;
-       }
-}
-
 static void qr_task_complete(struct qr_task *task)
 {
        struct request_ctx *ctx = task->ctx;
@@ -648,9 +612,6 @@ static void qr_task_complete(struct qr_task *task)
        kr_require(task->waiting.len == 0);
        kr_require(task->leading == false);
 
-       /* Close task-specific timeout */
-       qr_task_close_timeout(task);
-
        struct session *s = ctx->source.session;
        if (s) {
                kr_require(!session_flags(s)->outgoing && session_waitinglist_is_empty(s));
@@ -665,44 +626,6 @@ static void qr_task_complete(struct qr_task *task)
        }
 }
 
-static void qr_task_timeout(uv_timer_t* timer)
-{
-       /* A task may time out when no data is received on an otherwise valid
-        * TCP connection. */
-
-       struct qr_task *task = timer->data;
-
-       /* Find connected TCP session for this task. If none exists,
-        * no timeout logic is currently defined. There are also session-level
-        * timeouts in the resolver that handle the other cases. */
-       const struct sockaddr *addr = &task->transport->address.ip;
-       struct session *session = worker_find_tcp_connected(task->ctx->worker, addr);
-       if (session && session_flags(session)->outgoing) {
-               /* Check if the pointer with our msgid points to our task. */
-               uint16_t msg_id = worker_task_pkt_get_msgid(task);
-               struct qr_task *other = session_tasklist_find_msgid(session, msg_id);
-               if (other != task)
-                       return;
-
-               qr_task_ref(task);
-
-               struct kr_query *qry = task_get_last_pending_query(task);
-               VERBOSE_MSG(qry, "=> Query resolution task timed out\n");
-               session_tasklist_finalize_expired(session);
-               session_tasklist_del(session, task);
-
-               if (qry)
-                       qry->server_selection.error(qry, task->transport,
-                                       KR_SELECTION_DATA_TIMEOUT);
-
-               task->timeouts += 1;
-               task->ctx->worker->stats.timeout += 1;
-               qr_task_step(task, NULL, NULL);
-
-               qr_task_unref(task);
-       }
-}
-
 /* This is called when we send subrequest / answer */
 int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
 {
@@ -752,16 +675,9 @@ int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
                        return status;
                }
 
-               if (session_flags(s)->closing)
+               if (session_flags(s)->outgoing || session_flags(s)->closing)
                        return status;
 
-               if (session_flags(s)->outgoing) {
-                       if (task->transport)
-                               uv_timer_start(&task->timeout, &qr_task_timeout,
-                                               task->transport->timeout, 0);
-                       return status;
-               }
-
                struct worker_ctx *worker = task->ctx->worker;
                if (session_flags(s)->throttled &&
                    session_tasklist_get_len(s) < worker->tcp_pipeline_max/2) {
@@ -934,6 +850,15 @@ static int qr_task_send(struct qr_task *task, struct session *session,
        return ret;
 }
 
+static struct kr_query *task_get_last_pending_query(struct qr_task *task)
+{
+       if (!task || task->ctx->req.rplan.pending.len == 0) {
+               return NULL;
+       }
+
+       return array_tail(task->ctx->req.rplan.pending);
+}
+
 static int session_tls_hs_cb(struct session *session, int status)
 {
        if (kr_fails_assert(session_flags(session)->outgoing))
@@ -1431,8 +1356,6 @@ static int qr_task_finalize(struct qr_task *task, int state)
        struct session *source_session = ctx->source.session;
        kr_resolve_finish(&ctx->req, state);
 
-       qr_task_close_timeout(task);
-
        task->finished = true;
        if (source_session == NULL) {
                (void) qr_task_on_send(task, NULL, kr_error(EIO));
index c6eb93eff58b482a95770efd5cf35cb45c2d928f..c8f99f6eb90a22dc8e9261d72f451df8f9e3b5e3 100644 (file)
@@ -37,7 +37,6 @@ static const char *kr_selection_error_str(enum kr_selection_error err) {
        #define X(ENAME) case KR_SELECTION_ ## ENAME: return #ENAME
                X(OK);
                X(QUERY_TIMEOUT);
-               X(DATA_TIMEOUT);
                X(TLS_HANDSHAKE_FAILED);
                X(TCP_CONNECT_FAILED);
                X(TCP_CONNECT_TIMEOUT);
@@ -654,7 +653,6 @@ void error(struct kr_query *qry, struct address_state *addr_state,
                /* Connection and handshake failures have properties similar
                 * to UDP timeouts, so we handle them (almost) the same way. */
                /* fall-through */
-       case KR_SELECTION_DATA_TIMEOUT:
        case KR_SELECTION_TLS_HANDSHAKE_FAILED:
        case KR_SELECTION_QUERY_TIMEOUT:
                qry->server_selection.local_state->timeouts++;
index e35eb750662d2990b2ed90206aad08e80cba6070..81e1118458f696bb4db8062716752d48afb12874 100644 (file)
@@ -28,7 +28,6 @@ enum kr_selection_error {
 
        // Network errors
        KR_SELECTION_QUERY_TIMEOUT,
-       KR_SELECTION_DATA_TIMEOUT,
        KR_SELECTION_TLS_HANDSHAKE_FAILED,
        KR_SELECTION_TCP_CONNECT_FAILED,
        KR_SELECTION_TCP_CONNECT_TIMEOUT,
@@ -73,7 +72,7 @@ struct kr_transport {
        union kr_sockaddr address;
        size_t address_len;
        enum kr_transport_protocol protocol;
-       unsigned timeout; /**< Timeout in ms; needed for: UDP, TCP, TLS. */
+       unsigned timeout; /**< Timeout in ms to be set for UDP transmission. */
        /** Timeout was capped to a maximum value based on the other candidates
         * when choosing this transport. The timeout therefore can be much lower
         * than what we expect it to be. We basically probe the server for a sudden