From: Štěpán Balážik Date: Thu, 14 Jan 2021 14:39:31 +0000 (+0100) Subject: iterate: rework error handling from iterate.c X-Git-Tag: v5.3.0~15^2~14 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=21c1ec3034d27a36b2bf48ad5dfe92b82ef48179;p=thirdparty%2Fknot-resolver.git iterate: rework error handling from iterate.c Previously there where resolve_badmsg and resolve_error functions used to apply workarounds. This is now moved to selection.c and iterate.c just provides feedback using the server selection API. Errors are now handled centrally in selection.c:error. --- diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index 36f21a2e1..e39f3a270 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -92,7 +92,7 @@ struct kr_qflags { _Bool AWAIT_IPV4 : 1; _Bool AWAIT_IPV6 : 1; _Bool AWAIT_CUT : 1; - _Bool SAFEMODE : 1; + _Bool NO_EDNS : 1; _Bool CACHED : 1; _Bool NO_CACHE : 1; _Bool EXPIRING : 1; diff --git a/lib/layer/iterate.c b/lib/layer/iterate.c index aedaa411a..3eb233655 100644 --- a/lib/layer/iterate.c +++ b/lib/layer/iterate.c @@ -873,14 +873,6 @@ static int process_stub(knot_pkt_t *pkt, struct kr_request *req) return finalize_answer(pkt, req); } - -/** Error handling, RFC1034 5.3.3, 4d. - * NOTE: returing this does not prevent further queries (by itself). */ -static int resolve_error(knot_pkt_t *pkt, struct kr_request *req) -{ - return KR_STATE_FAIL; -} - /* State-less single resolution iteration step, not needed. */ static int reset(kr_layer_t *ctx) { return KR_STATE_PRODUCE; } @@ -992,23 +984,6 @@ static bool satisfied_by_additional(const struct kr_query *qry) return false; } -static int resolve_badmsg(knot_pkt_t *pkt, struct kr_request *req, struct kr_query *query) -{ - -#ifndef STRICT_MODE - /* Work around broken auths/load balancers */ - if (query->flags.SAFEMODE) { - return resolve_error(pkt, req); - } else { - query->flags.SAFEMODE = true; - query->flags.NO_MINIMIZE = true; - return KR_STATE_DONE; - } -#else - return resolve_error(pkt, req); -#endif -} - /** Resolve input query or continue resolution with followups. * * This roughly corresponds to RFC1034, 5.3.3 4a-d. @@ -1040,15 +1015,13 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) #ifdef STRICT_MODE if (pkt->parsed < pkt->size) { VERBOSE_MSG("<= pkt contains excessive data\n"); - return resolve_badmsg(pkt, req, query); + return KR_STATE_FAIL; } else #endif - /* LATER: Query minimization, 0x20 randomization, EDNS… should really be - * set and managed by selection.c and SAFEMODE should be split and - * removed altogether because it's doing many things at once. */ if (pkt->parsed <= KNOT_WIRE_HEADER_SIZE) { VERBOSE_MSG("<= malformed response (parsed %d)\n", (int)pkt->parsed); - return resolve_badmsg(pkt, req, query); + query->server_selection.error(query, req->upstream.transport, KR_SELECTION_MALFORMED); + return KR_STATE_FAIL; } else if (!is_paired_to_query(pkt, query)) { WITH_VERBOSE(query) { const char *ns_str = @@ -1056,10 +1029,8 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) VERBOSE_MSG("<= ignoring mismatching response from %s\n", ns_str ? ns_str : "(kr_straddr failed)"); } - /* Force TCP, to work around authoritatives messing up question - * without yielding to spoofed responses. */ - query->flags.TCP = true; - return resolve_badmsg(pkt, req, query); + query->server_selection.error(query, req->upstream.transport, KR_SELECTION_MISMATCHED); + return KR_STATE_FAIL; } else if (knot_wire_get_tc(pkt->wire)) { VERBOSE_MSG("<= truncated response, failover to TCP\n"); if (query) { @@ -1067,7 +1038,7 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) if (req->upstream.transport->protocol != KR_TRANSPORT_UDP) { VERBOSE_MSG("<= TC=1 with TCP, bailing out\n"); query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED); - return resolve_error(pkt, req); + return KR_STATE_FAIL; } query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED); } @@ -1084,7 +1055,7 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) // We can't return directly from the switch because we have to give feedback to server selection first int ret = 0; - int selection_error = -1; + int selection_error = KR_SELECTION_OK; /* Check response code. */ switch(knot_wire_get_rcode(pkt->wire)) { @@ -1092,8 +1063,9 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) case KNOT_RCODE_NXDOMAIN: break; /* OK */ case KNOT_RCODE_YXDOMAIN: /* Basically a successful answer; name just doesn't fit. */ - if (!kr_request_ensure_answer(req)) - return req->state; + if (!kr_request_ensure_answer(req)) { + ret = req->state; + } knot_wire_set_rcode(req->answer->wire, KNOT_RCODE_YXDOMAIN); break; case KNOT_RCODE_REFUSED: @@ -1101,41 +1073,37 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt) /* just pass answer through if in stub mode */ break; } + ret = KR_STATE_FAIL; selection_error = KR_SELECTION_REFUSED; - VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); - ret = resolve_badmsg(pkt, req, query); break; case KNOT_RCODE_SERVFAIL: if (query->flags.STUB) { /* just pass answer through if in stub mode */ break; } + ret = KR_STATE_FAIL; selection_error = KR_SELECTION_SERVFAIL; - VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); - ret = resolve_badmsg(pkt, req, query); break; case KNOT_RCODE_FORMERR: + ret = KR_STATE_FAIL; selection_error = KR_SELECTION_FORMERROR; - VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); - ret = resolve_badmsg(pkt, req, query); break; case KNOT_RCODE_NOTIMPL: + ret = KR_STATE_FAIL; selection_error = KR_SELECTION_NOTIMPL; - VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); - ret = resolve_badmsg(pkt, req, query); break; default: + ret = KR_STATE_FAIL; selection_error = KR_SELECTION_OTHER_RCODE; - VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); - ret = resolve_error(pkt, req); break; } - if (query->server_selection.initialized && selection_error != -1) { + if (query->server_selection.initialized) { query->server_selection.error(query, req->upstream.transport, selection_error); } if (ret) { + VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??"); return ret; } diff --git a/lib/resolve.c b/lib/resolve.c index 50ea4ac49..0d4d89c52 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -600,7 +600,7 @@ static void answer_finalize(struct kr_request *request) static int query_finalize(struct kr_request *request, struct kr_query *qry, knot_pkt_t *pkt) { knot_pkt_begin(pkt, KNOT_ADDITIONAL); - if (qry->flags.SAFEMODE) + if (qry->flags.NO_EDNS) return kr_ok(); /* Remove any EDNS records from any previous iteration. */ int ret = edns_erase_and_reserve(pkt); @@ -1400,11 +1400,8 @@ int kr_resolve_produce(struct kr_request *request, struct kr_transport **transpo return KR_STATE_PRODUCE; } - qry->flags.SAFEMODE = qry->flags.SAFEMODE || (*transport)->safe_mode; - - /* Randomize query case (if not in safe mode or turned off) */ - qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20) - ? 0 : kr_rand_bytes(sizeof(qry->secret)); + /* Randomize query case (if not in not turned off) */ + qry->secret = qry->flags.NO_0X20 ? 0 : kr_rand_bytes(sizeof(qry->secret)); knot_dname_t *qname_raw = knot_pkt_qname(packet); randomized_qname_case(qname_raw, qry->secret); @@ -1516,7 +1513,7 @@ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src, } /* Write down OPT unless in safemode */ - if (!(qry->flags.SAFEMODE)) { + if (!(qry->flags.NO_EDNS)) { ret = edns_put(packet, true); if (ret != 0) { return kr_error(EINVAL); diff --git a/lib/rplan.h b/lib/rplan.h index c3d28a263..fbe91c830 100644 --- a/lib/rplan.h +++ b/lib/rplan.h @@ -24,7 +24,7 @@ struct kr_qflags { bool AWAIT_IPV4 : 1; /**< Query is waiting for A address. */ bool AWAIT_IPV6 : 1; /**< Query is waiting for AAAA address. */ bool AWAIT_CUT : 1; /**< Query is waiting for zone cut lookup */ - bool SAFEMODE : 1; /**< Don't use fancy stuff (EDNS, 0x20, ...) */ + bool NO_EDNS : 1; /**< Don't use EDNS. */ bool CACHED : 1; /**< Query response is cached. */ bool NO_CACHE : 1; /**< No cache for lookup; exception: finding NSs and subqueries. */ bool EXPIRING : 1; /**< Query response is cached, but expiring. */ diff --git a/lib/selection.c b/lib/selection.c index 2c155e4a2..33161db96 100644 --- a/lib/selection.c +++ b/lib/selection.c @@ -31,27 +31,6 @@ #define EPSILON_NOMIN 1 #define EPSILON_DENOM 20 -/** - * If one of the errors set to true is encountered, - * there is no point in asking this server again. - */ -static const bool UNRECOVERABLE_ERRORS[] = { - [KR_SELECTION_QUERY_TIMEOUT] = false, - [KR_SELECTION_TLS_HANDSHAKE_FAILED] = false, - [KR_SELECTION_TCP_CONNECT_FAILED] = false, - [KR_SELECTION_TCP_CONNECT_TIMEOUT] = false, - [KR_SELECTION_REFUSED] = true, - [KR_SELECTION_SERVFAIL] = true, - [KR_SELECTION_FORMERROR] = false, - [KR_SELECTION_NOTIMPL] = true, - [KR_SELECTION_OTHER_RCODE] = true, - [KR_SELECTION_TRUNCATED] = false, - [KR_SELECTION_DNSSEC_ERROR] = true, - [KR_SELECTION_LAME_DELEGATION] = true, - [KR_SELECTION_BAD_CNAME] = true, -}; - - /* Simple cache interface follows */ static knot_db_val_t cache_key(const uint8_t *ip, size_t len) @@ -69,7 +48,7 @@ static knot_db_val_t cache_key(const uint8_t *ip, size_t len) return key; } -/* First value of timeout will be calculated as SRTT+4*DEFAULT_TIMEOUT +/* First value of timeout will be calculated as SRTT+4*VARIANCE * by calc_timeout(), so it'll be equal to DEFAULT_TIMEOUT. */ static const struct rtt_state default_rtt_state = { .srtt = 0, .variance = DEFAULT_TIMEOUT / 4, @@ -387,7 +366,6 @@ struct kr_transport *select_transport(struct choice choices[], int choices_len, .ns_name = chosen->address_state->ns_name, .protocol = protocol, .timeout = timeout, - .safe_mode = chosen->address_state->errors[KR_SELECTION_FORMERROR], }; int port = chosen->port; @@ -497,42 +475,78 @@ void error(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, enum kr_selection_error sel_error) { - assert(sel_error >= KR_SELECTION_OK && sel_error < KR_SELECTION_NUMBER_OF_ERRORS); if (!transport || !addr_state) { /* Answers from cache have NULL transport, ignore them. */ return; } - if (sel_error == KR_SELECTION_QUERY_TIMEOUT) { + switch (sel_error) { + case KR_SELECTION_OK: + return; + case KR_SELECTION_QUERY_TIMEOUT: qry->server_selection.local_state->timeouts++; - // Make sure the query was chosen by this query + /* Make sure the query was chosen by this query. */ if (!transport->deduplicated) { cache_timeout(transport, addr_state, &qry->request->ctx->cache); } - } - - if (sel_error == KR_SELECTION_TRUNCATED && - transport->protocol == KR_TRANSPORT_UDP) { - /* Don't punish the server that told us to switch to TCP. */ - qry->server_selection.local_state->truncated = true; - } else { - if (sel_error == KR_SELECTION_TRUNCATED) { - /* TRUNCATED over TCP/TLS, upstream is broken. */ - addr_state->unrecoverable_errors++; + break; + case KR_SELECTION_FORMERROR: + if (qry->flags.NO_EDNS) { + addr_state->broken = true; + } else { + qry->flags.NO_EDNS = true; } - - if (UNRECOVERABLE_ERRORS[sel_error]) { - addr_state->unrecoverable_errors++; + break; + case KR_SELECTION_MISMATCHED: + if (qry->flags.NO_0X20 && qry->flags.TCP) { + addr_state->broken = true; + } else { + qry->flags.TCP = true; + qry->flags.NO_0X20 = true; } - - if (sel_error == KR_SELECTION_FORMERROR && transport->safe_mode) { - addr_state->unrecoverable_errors++; + break; + case KR_SELECTION_TRUNCATED: + if (transport->protocol == KR_TRANSPORT_UDP) { + qry->server_selection.local_state->truncated = true; + /* TC=1 over UDP is not an error, so we compensate. */ + addr_state->error_count--; + } else { + addr_state->broken = true; } - - addr_state->errors[sel_error]++; - addr_state->error_count++; + break; + case KR_SELECTION_REFUSED: + case KR_SELECTION_SERVFAIL: + if (qry->flags.NO_MINIMIZE && qry->flags.NO_0X20 && qry->flags.TCP) { + addr_state->broken = true; + } else if (qry->flags.NO_MINIMIZE) { + qry->flags.NO_0X20 = true; + qry->flags.TCP = true; + } else { + qry->flags.NO_MINIMIZE = true; + } + break; + case KR_SELECTION_NOTIMPL: + case KR_SELECTION_OTHER_RCODE: + case KR_SELECTION_DNSSEC_ERROR: + case KR_SELECTION_LAME_DELEGATION: + case KR_SELECTION_BAD_CNAME: + case KR_SELECTION_MALFORMED: + /* These errors are fatal, no point in trying this server again. */ + addr_state->broken = true; + break; + case KR_SELECTION_TLS_HANDSHAKE_FAILED: + case KR_SELECTION_TCP_CONNECT_FAILED: + case KR_SELECTION_TCP_CONNECT_TIMEOUT: + /* These might get resolved by retrying. */ + break; + default: + assert(0); + break; } + + addr_state->error_count++; + addr_state->errors[sel_error]++; WITH_VERBOSE(qry) { diff --git a/lib/selection.h b/lib/selection.h index 18cefccc2..c4268b0f3 100644 --- a/lib/selection.h +++ b/lib/selection.h @@ -37,10 +37,13 @@ enum kr_selection_error { KR_SELECTION_OTHER_RCODE, // DNS errors + KR_SELECTION_MALFORMED, + /** Name or type mismatch. */ + KR_SELECTION_MISMATCHED, KR_SELECTION_TRUNCATED, KR_SELECTION_DNSSEC_ERROR, KR_SELECTION_LAME_DELEGATION, - /** Too long chain, or cycle. */ + /** Too long chain, or a cycle. */ KR_SELECTION_BAD_CNAME, /** Leave this last, as it is used as array size. */ @@ -69,7 +72,6 @@ struct kr_transport { /** True iff transport was set in worker.c:subreq_finalize, * that means it may be different from the one originally chosen one.*/ bool deduplicated; - bool safe_mode; /**< Turn on SAFEMODE for this transport */ }; struct local_state { @@ -156,7 +158,7 @@ struct address_state { */ int choice_array_index; int error_count; - int unrecoverable_errors; + bool broken; int errors[KR_SELECTION_NUMBER_OF_ERRORS]; }; diff --git a/lib/selection_iter.c b/lib/selection_iter.c index 394609f14..e9870c13d 100644 --- a/lib/selection_iter.c +++ b/lib/selection_iter.c @@ -140,7 +140,7 @@ static int get_valid_addresses(struct iter_local_state *local_state, uint8_t *address = (uint8_t *)trie_it_key(it, &address_len); struct address_state *address_state = *trie_it_val(it); if (address_state->generation == local_state->generation && - !address_state->unrecoverable_errors) { + !address_state->broken) { choices[count] = (struct choice){ .address_len = address_len, .address_state = address_state, @@ -311,10 +311,9 @@ void iter_choose_transport(struct kr_query *qry, struct kr_transport **transport break; default: VERBOSE_MSG(qry, "=> id: '%05u' choosing: '%s'@'%s'" - " with timeout %u ms zone cut: '%s'%s\n", + " with timeout %u ms zone cut: '%s'\n", qry->id, ns_name, ns_str ? ns_str : "", - (*transport)->timeout, zonecut_str, - (*transport)->safe_mode ? " SAFEMODE" : ""); + (*transport)->timeout, zonecut_str); break; } } else { diff --git a/tests/integration/deckard b/tests/integration/deckard index c146dffd9..8e4d318be 160000 --- a/tests/integration/deckard +++ b/tests/integration/deckard @@ -1 +1 @@ -Subproject commit c146dffd99ec244cfd678f4f3c6e829062739a9d +Subproject commit 8e4d318beebc2d50e95f749a36c39f44a0ac9719