]> git.ipfire.org Git - thirdparty/knot-resolver.git/commitdiff
iterate: rework error handling from iterate.c
authorŠtěpán Balážik <stepan.balazik@nic.cz>
Thu, 14 Jan 2021 14:39:31 +0000 (15:39 +0100)
committerŠtěpán Balážik <stepan.balazik@nic.cz>
Mon, 25 Jan 2021 14:42:55 +0000 (15:42 +0100)
Previously there where resolve_badmsg and resolve_error functions used
to apply workarounds. This is now moved to selection.c and iterate.c
just provides feedback using the server selection API. Errors are now
handled centrally in selection.c:error.

daemon/lua/kres-gen.lua
lib/layer/iterate.c
lib/resolve.c
lib/rplan.h
lib/selection.c
lib/selection.h
lib/selection_iter.c
tests/integration/deckard

index 36f21a2e1cff2a350910f7b323a974582a3da28f..e39f3a27012a6ca5f39418a363622c518ea896bc 100644 (file)
@@ -92,7 +92,7 @@ struct kr_qflags {
        _Bool AWAIT_IPV4 : 1;
        _Bool AWAIT_IPV6 : 1;
        _Bool AWAIT_CUT : 1;
-       _Bool SAFEMODE : 1;
+       _Bool NO_EDNS : 1;
        _Bool CACHED : 1;
        _Bool NO_CACHE : 1;
        _Bool EXPIRING : 1;
index aedaa411afce1643c461a1c84c39c683dee6f2ab..3eb233655bf81960973c2f7c9c5a1ff9d723ef09 100644 (file)
@@ -873,14 +873,6 @@ static int process_stub(knot_pkt_t *pkt, struct kr_request *req)
        return finalize_answer(pkt, req);
 }
 
-
-/** Error handling, RFC1034 5.3.3, 4d.
- * NOTE: returing this does not prevent further queries (by itself). */
-static int resolve_error(knot_pkt_t *pkt, struct kr_request *req)
-{
-       return KR_STATE_FAIL;
-}
-
 /* State-less single resolution iteration step, not needed. */
 static int reset(kr_layer_t *ctx)  { return KR_STATE_PRODUCE; }
 
@@ -992,23 +984,6 @@ static bool satisfied_by_additional(const struct kr_query *qry)
        return false;
 }
 
-static int resolve_badmsg(knot_pkt_t *pkt, struct kr_request *req, struct kr_query *query)
-{
-
-#ifndef STRICT_MODE
-       /* Work around broken auths/load balancers */
-       if (query->flags.SAFEMODE) {
-               return resolve_error(pkt, req);
-       } else {
-               query->flags.SAFEMODE = true;
-               query->flags.NO_MINIMIZE = true;
-               return KR_STATE_DONE;
-       }
-#else
-               return resolve_error(pkt, req);
-#endif
-}
-
 /** Resolve input query or continue resolution with followups.
  *
  *  This roughly corresponds to RFC1034, 5.3.3 4a-d.
@@ -1040,15 +1015,13 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
 #ifdef STRICT_MODE
        if (pkt->parsed < pkt->size) {
                VERBOSE_MSG("<= pkt contains excessive data\n");
-               return resolve_badmsg(pkt, req, query);
+               return KR_STATE_FAIL;
        } else
 #endif
-       /* LATER: Query minimization, 0x20 randomization, EDNS… should really be
-        * set and managed by selection.c and SAFEMODE should be split and
-        * removed altogether because it's doing many things at once. */
        if (pkt->parsed <= KNOT_WIRE_HEADER_SIZE) {
                VERBOSE_MSG("<= malformed response (parsed %d)\n", (int)pkt->parsed);
-               return resolve_badmsg(pkt, req, query);
+               query->server_selection.error(query, req->upstream.transport, KR_SELECTION_MALFORMED);
+               return KR_STATE_FAIL;
        } else if (!is_paired_to_query(pkt, query)) {
                WITH_VERBOSE(query) {
                        const char *ns_str =
@@ -1056,10 +1029,8 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
                        VERBOSE_MSG("<= ignoring mismatching response from %s\n",
                                        ns_str ? ns_str : "(kr_straddr failed)");
                }
-               /* Force TCP, to work around authoritatives messing up question
-                * without yielding to spoofed responses. */
-               query->flags.TCP = true;
-               return resolve_badmsg(pkt, req, query);
+               query->server_selection.error(query, req->upstream.transport, KR_SELECTION_MISMATCHED);
+               return KR_STATE_FAIL;
        } else if (knot_wire_get_tc(pkt->wire)) {
                VERBOSE_MSG("<= truncated response, failover to TCP\n");
                if (query) {
@@ -1067,7 +1038,7 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
                        if (req->upstream.transport->protocol != KR_TRANSPORT_UDP) {
                                VERBOSE_MSG("<= TC=1 with TCP, bailing out\n");
                                query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
-                               return resolve_error(pkt, req);
+                               return KR_STATE_FAIL;
                        }
                        query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
                }
@@ -1084,7 +1055,7 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
 
        // We can't return directly from the switch because we have to give feedback to server selection first
        int ret = 0;
-       int selection_error = -1;
+       int selection_error = KR_SELECTION_OK;
 
        /* Check response code. */
        switch(knot_wire_get_rcode(pkt->wire)) {
@@ -1092,8 +1063,9 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
        case KNOT_RCODE_NXDOMAIN:
                break; /* OK */
        case KNOT_RCODE_YXDOMAIN: /* Basically a successful answer; name just doesn't fit. */
-               if (!kr_request_ensure_answer(req))
-                       return req->state;
+               if (!kr_request_ensure_answer(req)) {
+                       ret = req->state;
+               }
                knot_wire_set_rcode(req->answer->wire, KNOT_RCODE_YXDOMAIN);
                break;
        case KNOT_RCODE_REFUSED:
@@ -1101,41 +1073,37 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
                         /* just pass answer through if in stub mode */
                        break;
                }
+               ret = KR_STATE_FAIL;
                selection_error = KR_SELECTION_REFUSED;
-               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
-               ret = resolve_badmsg(pkt, req, query);
                break;
        case KNOT_RCODE_SERVFAIL:
                if (query->flags.STUB) {
                         /* just pass answer through if in stub mode */
                        break;
                }
+               ret = KR_STATE_FAIL;
                selection_error = KR_SELECTION_SERVFAIL;
-               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
-               ret = resolve_badmsg(pkt, req, query);
                break;
        case KNOT_RCODE_FORMERR:
+               ret = KR_STATE_FAIL;
                selection_error = KR_SELECTION_FORMERROR;
-               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
-               ret = resolve_badmsg(pkt, req, query);
                break;
        case KNOT_RCODE_NOTIMPL:
+               ret = KR_STATE_FAIL;
                selection_error = KR_SELECTION_NOTIMPL;
-               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
-               ret = resolve_badmsg(pkt, req, query);
                break;
        default:
+               ret = KR_STATE_FAIL;
                selection_error = KR_SELECTION_OTHER_RCODE;
-               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
-               ret = resolve_error(pkt, req);
                break;
        }
 
-       if (query->server_selection.initialized && selection_error != -1) {
+       if (query->server_selection.initialized) {
                query->server_selection.error(query, req->upstream.transport, selection_error);
        }
 
        if (ret) {
+               VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
                return ret;
        }
 
index 50ea4ac4960e928f32c709349cd3b152f27fa5f2..0d4d89c528587b3cf87f45623e8572cbb4c72193 100644 (file)
@@ -600,7 +600,7 @@ static void answer_finalize(struct kr_request *request)
 static int query_finalize(struct kr_request *request, struct kr_query *qry, knot_pkt_t *pkt)
 {
        knot_pkt_begin(pkt, KNOT_ADDITIONAL);
-       if (qry->flags.SAFEMODE)
+       if (qry->flags.NO_EDNS)
                return kr_ok();
        /* Remove any EDNS records from any previous iteration. */
        int ret = edns_erase_and_reserve(pkt);
@@ -1400,11 +1400,8 @@ int kr_resolve_produce(struct kr_request *request, struct kr_transport **transpo
                return KR_STATE_PRODUCE;
        }
 
-       qry->flags.SAFEMODE = qry->flags.SAFEMODE || (*transport)->safe_mode;
-
-       /* Randomize query case (if not in safe mode or turned off) */
-       qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20)
-                       ? 0 : kr_rand_bytes(sizeof(qry->secret));
+       /* Randomize query case (if not in not turned off) */
+       qry->secret = qry->flags.NO_0X20 ? 0 : kr_rand_bytes(sizeof(qry->secret));
        knot_dname_t *qname_raw = knot_pkt_qname(packet);
        randomized_qname_case(qname_raw, qry->secret);
 
@@ -1516,7 +1513,7 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
        }
 
        /* Write down OPT unless in safemode */
-       if (!(qry->flags.SAFEMODE)) {
+       if (!(qry->flags.NO_EDNS)) {
                ret = edns_put(packet, true);
                if (ret != 0) {
                        return kr_error(EINVAL);
index c3d28a263d52717790d481cfe98355372db901ab..fbe91c83034a0cba751cee0508f59cbef4143649 100644 (file)
@@ -24,7 +24,7 @@ struct kr_qflags {
        bool AWAIT_IPV4 : 1;     /**< Query is waiting for A address. */
        bool AWAIT_IPV6 : 1;     /**< Query is waiting for AAAA address. */
        bool AWAIT_CUT : 1;      /**< Query is waiting for zone cut lookup */
-       bool SAFEMODE : 1;       /**< Don't use fancy stuff (EDNS, 0x20, ...) */
+       bool NO_EDNS : 1;        /**< Don't use EDNS. */
        bool CACHED : 1;         /**< Query response is cached. */
        bool NO_CACHE : 1;       /**< No cache for lookup; exception: finding NSs and subqueries. */
        bool EXPIRING : 1;       /**< Query response is cached, but expiring. */
index 2c155e4a2978cc15a8d6fb9d9bec5269e629adef..33161db9694c1328459907c105293922a7f0fee5 100644 (file)
 #define EPSILON_NOMIN 1
 #define EPSILON_DENOM 20
 
-/**
- * If one of the errors set to true is encountered,
- * there is no point in asking this server again.
- */
-static const bool UNRECOVERABLE_ERRORS[] = {
-       [KR_SELECTION_QUERY_TIMEOUT] = false,
-       [KR_SELECTION_TLS_HANDSHAKE_FAILED] = false,
-       [KR_SELECTION_TCP_CONNECT_FAILED] = false,
-       [KR_SELECTION_TCP_CONNECT_TIMEOUT] = false,
-       [KR_SELECTION_REFUSED] = true,
-       [KR_SELECTION_SERVFAIL] = true,
-       [KR_SELECTION_FORMERROR] = false,
-       [KR_SELECTION_NOTIMPL] = true,
-       [KR_SELECTION_OTHER_RCODE] = true,
-       [KR_SELECTION_TRUNCATED] = false,
-       [KR_SELECTION_DNSSEC_ERROR] = true,
-       [KR_SELECTION_LAME_DELEGATION] = true,
-       [KR_SELECTION_BAD_CNAME] = true,
-};
-
-
 /* Simple cache interface follows */
 
 static knot_db_val_t cache_key(const uint8_t *ip, size_t len)
@@ -69,7 +48,7 @@ static knot_db_val_t cache_key(const uint8_t *ip, size_t len)
        return key;
 }
 
-/* First value of timeout will be calculated as SRTT+4*DEFAULT_TIMEOUT
+/* First value of timeout will be calculated as SRTT+4*VARIANCE
  * by calc_timeout(), so it'll be equal to DEFAULT_TIMEOUT. */
 static const struct rtt_state default_rtt_state = { .srtt = 0,
                                                    .variance = DEFAULT_TIMEOUT / 4,
@@ -387,7 +366,6 @@ struct kr_transport *select_transport(struct choice choices[], int choices_len,
                .ns_name = chosen->address_state->ns_name,
                .protocol = protocol,
                .timeout = timeout,
-               .safe_mode = chosen->address_state->errors[KR_SELECTION_FORMERROR],
        };
 
        int port = chosen->port;
@@ -497,42 +475,78 @@ void error(struct kr_query *qry, struct address_state *addr_state,
           const struct kr_transport *transport,
           enum kr_selection_error sel_error)
 {
-       assert(sel_error >= KR_SELECTION_OK && sel_error < KR_SELECTION_NUMBER_OF_ERRORS);
        if (!transport || !addr_state) {
                /* Answers from cache have NULL transport, ignore them. */
                return;
        }
 
-       if (sel_error == KR_SELECTION_QUERY_TIMEOUT) {
+       switch (sel_error) {
+       case KR_SELECTION_OK:
+               return;
+       case KR_SELECTION_QUERY_TIMEOUT:
                qry->server_selection.local_state->timeouts++;
-               // Make sure the query was chosen by this query
+               /* Make sure the query was chosen by this query. */
                if (!transport->deduplicated) {
                        cache_timeout(transport, addr_state,
                                      &qry->request->ctx->cache);
                }
-       }
-
-       if (sel_error == KR_SELECTION_TRUNCATED &&
-           transport->protocol == KR_TRANSPORT_UDP) {
-               /* Don't punish the server that told us to switch to TCP. */
-               qry->server_selection.local_state->truncated = true;
-       } else {
-               if (sel_error == KR_SELECTION_TRUNCATED) {
-                       /* TRUNCATED over TCP/TLS, upstream is broken. */
-                       addr_state->unrecoverable_errors++;
+               break;
+       case KR_SELECTION_FORMERROR:
+               if (qry->flags.NO_EDNS) {
+                       addr_state->broken = true;
+               } else {
+                       qry->flags.NO_EDNS = true;
                }
-
-               if (UNRECOVERABLE_ERRORS[sel_error]) {
-                       addr_state->unrecoverable_errors++;
+               break;
+       case KR_SELECTION_MISMATCHED:
+               if (qry->flags.NO_0X20 && qry->flags.TCP) {
+                       addr_state->broken = true;
+               } else {
+                       qry->flags.TCP = true;
+                       qry->flags.NO_0X20 = true;
                }
-
-               if (sel_error == KR_SELECTION_FORMERROR && transport->safe_mode) {
-                       addr_state->unrecoverable_errors++;
+               break;
+       case KR_SELECTION_TRUNCATED:
+               if (transport->protocol == KR_TRANSPORT_UDP) {
+                       qry->server_selection.local_state->truncated = true;
+                       /* TC=1 over UDP is not an error, so we compensate. */
+                       addr_state->error_count--;
+               } else {
+                       addr_state->broken = true;
                }
-
-               addr_state->errors[sel_error]++;
-               addr_state->error_count++;
+               break;
+       case KR_SELECTION_REFUSED:
+       case KR_SELECTION_SERVFAIL:
+               if (qry->flags.NO_MINIMIZE && qry->flags.NO_0X20 && qry->flags.TCP) {
+                       addr_state->broken = true;
+               } else if (qry->flags.NO_MINIMIZE) {
+                       qry->flags.NO_0X20 = true;
+                       qry->flags.TCP = true;
+               } else {
+                       qry->flags.NO_MINIMIZE = true;
+               }
+               break;
+       case KR_SELECTION_NOTIMPL:
+       case KR_SELECTION_OTHER_RCODE:
+       case KR_SELECTION_DNSSEC_ERROR:
+       case KR_SELECTION_LAME_DELEGATION:
+       case KR_SELECTION_BAD_CNAME:
+       case KR_SELECTION_MALFORMED:
+               /* These errors are fatal, no point in trying this server again. */
+               addr_state->broken = true;
+               break;
+       case KR_SELECTION_TLS_HANDSHAKE_FAILED:
+       case KR_SELECTION_TCP_CONNECT_FAILED:
+       case KR_SELECTION_TCP_CONNECT_TIMEOUT:
+               /* These might get resolved by retrying. */
+               break;
+       default:
+               assert(0);
+               break;
        }
+
+       addr_state->error_count++;
+       addr_state->errors[sel_error]++;
        
        WITH_VERBOSE(qry)
        {
index 18cefccc26ddfce61a6119d83fdb003c7b1c9ff3..c4268b0f3d9a31e7e5e7a1b9702133885b8ad487 100644 (file)
@@ -37,10 +37,13 @@ enum kr_selection_error {
        KR_SELECTION_OTHER_RCODE,
 
        // DNS errors
+       KR_SELECTION_MALFORMED,
+       /** Name or type mismatch. */
+       KR_SELECTION_MISMATCHED,
        KR_SELECTION_TRUNCATED,
        KR_SELECTION_DNSSEC_ERROR,
        KR_SELECTION_LAME_DELEGATION,
-       /** Too long chain, or cycle. */
+       /** Too long chain, or cycle. */
        KR_SELECTION_BAD_CNAME,
 
        /** Leave this last, as it is used as array size. */
@@ -69,7 +72,6 @@ struct kr_transport {
        /** True iff transport was set in worker.c:subreq_finalize,
         * that means it may be different from the one originally chosen one.*/
        bool deduplicated;
-       bool safe_mode; /**< Turn on SAFEMODE for this transport */
 };
 
 struct local_state {
@@ -156,7 +158,7 @@ struct address_state {
        */
        int choice_array_index;
        int error_count;
-       int unrecoverable_errors;
+       bool broken;
        int errors[KR_SELECTION_NUMBER_OF_ERRORS];
 };
 
index 394609f149df1fce01e07dfefe554ca48d3bcc0a..e9870c13d56c2a0d3b9c06c7ecbbf72c2103be56 100644 (file)
@@ -140,7 +140,7 @@ static int get_valid_addresses(struct iter_local_state *local_state,
                uint8_t *address = (uint8_t *)trie_it_key(it, &address_len);
                struct address_state *address_state = *trie_it_val(it);
                if (address_state->generation == local_state->generation &&
-                   !address_state->unrecoverable_errors) {
+                   !address_state->broken) {
                        choices[count] = (struct choice){
                                .address_len = address_len,
                                .address_state = address_state,
@@ -311,10 +311,9 @@ void iter_choose_transport(struct kr_query *qry, struct kr_transport **transport
                        break;
                default:
                        VERBOSE_MSG(qry, "=> id: '%05u' choosing: '%s'@'%s'"
-                                   " with timeout %u ms zone cut: '%s'%s\n",
+                                   " with timeout %u ms zone cut: '%s'\n",
                                    qry->id, ns_name, ns_str ? ns_str : "",
-                                   (*transport)->timeout, zonecut_str,
-                                   (*transport)->safe_mode ? " SAFEMODE" : "");
+                                   (*transport)->timeout, zonecut_str);
                        break;
                }
        } else {
index c146dffd99ec244cfd678f4f3c6e829062739a9d..8e4d318beebc2d50e95f749a36c39f44a0ac9719 160000 (submodule)
@@ -1 +1 @@
-Subproject commit c146dffd99ec244cfd678f4f3c6e829062739a9d
+Subproject commit 8e4d318beebc2d50e95f749a36c39f44a0ac9719