]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
daemon, lib/nsrep: tuning of upstreams timeouting algorithm
authorGrigorii Demidov <grigorii.demidov@nic.cz>
Tue, 4 Dec 2018 14:02:51 +0000 (15:02 +0100)
committerGrigorii Demidov <grigorii.demidov@nic.cz>
Fri, 7 Dec 2018 10:23:42 +0000 (11:23 +0100)
daemon/worker.c
lib/nsrep.h

index e8e70a76a2ff3c4d0c62fd55256d4e95429c9884..04e9599d02f13b429fb5479187cd05b8606f1bdf 100644 (file)
@@ -675,6 +675,15 @@ static int qr_task_send(struct qr_task *task, struct session *session,
        return ret;
 }
 
+static struct kr_query *task_get_last_pending_query(struct qr_task *task)
+{
+       if (!task || task->ctx->req.rplan.pending.len == 0) {
+               return NULL;
+       }
+
+       return array_tail(task->ctx->req.rplan.pending);
+}
+
 static int session_tls_hs_cb(struct session *session, int status)
 {
        assert(session_flags(session)->outgoing);
@@ -686,9 +695,11 @@ static int session_tls_hs_cb(struct session *session, int status)
        int ret = kr_ok();
 
        if (status) {
-               kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
+               struct qr_task *task = session_waitinglist_get(session);
+               unsigned score = task->ctx->req.options.FORWARD ? KR_NS_FWD_DEAD : KR_NS_DEAD;
+               kr_nsrep_update_rtt(NULL, peer, score,
                                    worker->engine->resolver.cache_rtt,
-                                   KR_NS_RESET);
+                                   KR_NS_UPDATE_NORESET);
                return ret;
        }
 
@@ -756,16 +767,6 @@ static int session_tls_hs_cb(struct session *session, int status)
        return kr_ok();
 }
 
-
-static struct kr_query *task_get_last_pending_query(struct qr_task *task)
-{
-       if (!task || task->ctx->req.rplan.pending.len == 0) {
-               return NULL;
-       }
-
-       return array_tail(task->ctx->req.rplan.pending);
-}
-
 static int send_waiting(struct session *session)
 {
        int ret = 0;
@@ -810,15 +811,17 @@ static void on_connect(uv_connect_t *req, int status)
        }
 
        if (status != 0) {
-               if (kr_verbose_status) {
+               if (VERBOSE_STATUS) {
                        const char *peer_str = kr_straddr(peer);
                        kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
                                        peer_str ? peer_str : "", uv_strerror(status));
                }
                worker_del_tcp_waiting(worker, peer);
-               kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
+               struct qr_task *task = session_waitinglist_get(session);
+               unsigned score = task->ctx->req.options.FORWARD ? KR_NS_FWD_DEAD : KR_NS_DEAD;
+               kr_nsrep_update_rtt(NULL, peer, score,
                                    worker->engine->resolver.cache_rtt,
-                                   KR_NS_RESET);
+                                   KR_NS_UPDATE_NORESET);
                assert(session_tasklist_is_empty(session));
                session_waitinglist_retry(session, false);
                session_close(session);
@@ -838,7 +841,7 @@ static void on_connect(uv_connect_t *req, int status)
                }
        }
 
-       if (kr_verbose_status) {
+       if (VERBOSE_STATUS) {
                const char *peer_str = kr_straddr(peer);
                kr_log_verbose( "[wrkr]=> connected to '%s'\n", peer_str ? peer_str : "");
        }
@@ -891,9 +894,10 @@ static void on_tcp_connect_timeout(uv_timer_t *timer)
                VERBOSE_MSG(qry, "=> connection to '%s' failed\n", peer_str);
        }
 
-       kr_nsrep_update_rtt(NULL, peer, KR_NS_DEAD,
+       unsigned score = qry->flags.FORWARD ? KR_NS_FWD_DEAD : KR_NS_DEAD;
+       kr_nsrep_update_rtt(NULL, peer, score,
                            worker->engine->resolver.cache_rtt,
-                           KR_NS_RESET);
+                           KR_NS_UPDATE_NORESET);
 
        worker->stats.timeout += session_waitinglist_get_len(session);
        session_waitinglist_retry(session, true);
@@ -923,9 +927,10 @@ static void on_udp_timeout(uv_timer_t *timer)
                                char *addr_str = kr_straddr(choice);
                                VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : "");
                        }
-                       kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_DEAD,
+                       unsigned score = qry->flags.FORWARD ? KR_NS_FWD_DEAD : KR_NS_DEAD;
+                       kr_nsrep_update_rtt(&qry->ns, choice, score,
                                            worker->engine->resolver.cache_rtt,
-                                           KR_NS_RESET);
+                                           KR_NS_UPDATE_NORESET);
                }
        }
        task->timeouts += 1;
@@ -1286,9 +1291,10 @@ static int tcp_task_make_connection(struct qr_task *task, const struct sockaddr
                worker_del_tcp_waiting(ctx->worker, addr);
                free(conn);
                session_close(session);
-               kr_nsrep_update_rtt(NULL, addr, KR_NS_DEAD,
+               unsigned score = qry->flags.FORWARD ? KR_NS_FWD_DEAD : KR_NS_DEAD;
+               kr_nsrep_update_rtt(NULL, peer, score,
                                    worker->engine->resolver.cache_rtt,
-                                   KR_NS_RESET);
+                                   KR_NS_UPDATE_NORESET);
                WITH_VERBOSE (qry) {
                        const char *peer_str = kr_straddr(peer);
                        kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
index 8db0ce062b398820674a85a551acbd07e98cbe52..12e9554f34231743cff050b57a327f582ebbf04d 100644 (file)
@@ -31,10 +31,7 @@ struct kr_query;
   * @note RTT is measured in milliseconds.
   */
 enum kr_ns_score {
-       KR_NS_MAX_SCORE     = 20 * KR_CONN_RTT_MAX, /* rtt "invalid value" */
-       KR_NS_DEAD          = KR_NS_MAX_SCORE - 1,  /* NS didn't answer via UDP transport,
-                                                    * TCP connection failed or
-                                                    * TLS handshake failed */
+       KR_NS_MAX_SCORE     = 20 * KR_CONN_RTT_MAX, /* max possible value */
        KR_NS_FWD_TIMEOUT   = 10000, /* timeout for upstream recursor  */
        KR_NS_TIMEOUT       = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
        KR_NS_LONG          = (3 * KR_NS_TIMEOUT) / 4,
@@ -43,6 +40,12 @@ enum kr_ns_score {
        KR_NS_GLUED         = 10
 };
 
+/**
+ *  See kr_nsrep_update_rtt()
+ */
+#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
+#define KR_NS_FWD_DEAD (((KR_NS_FWD_TIMEOUT * 4) + 3) / 3)
+
 /** If once NS was marked as "timeouted", it won't participate in NS elections
  * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one minute). */
 #define KR_NS_TIMEOUT_RETRY_INTERVAL 60000