#include "contrib/ucw/lib.h"
#include "daemon/engine.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
typedef kr_nsrep_lru_t lru_bench_t;
/* Clear reputation tables */
struct kr_context *ctx = &the_worker->engine->resolver;
- lru_reset(ctx->cache_rtt);
- lru_reset(ctx->cache_rep);
lru_reset(ctx->cache_cookie);
lua_pushboolean(L, true);
return 1;
#include "kresconfig.h"
#include "daemon/engine.h"
#include "daemon/ffimodule.h"
-#include "daemon/worker.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/cache/api.h"
#include "lib/defines.h"
#include "lib/cache/cdb_lmdb.h"
engine->resolver.tls_padding = -1;
/* Empty init; filled via ./lua/postconfig.lua */
kr_zonecut_init(&engine->resolver.root_hints, (const uint8_t *)"", engine->pool);
- /* Open NS rtt + reputation cache */
- lru_create(&engine->resolver.cache_rtt, LRU_RTT_SIZE, NULL, NULL);
- lru_create(&engine->resolver.cache_rep, LRU_REP_SIZE, NULL, NULL);
lru_create(&engine->resolver.cache_cookie, LRU_COOKIES_SIZE, NULL, NULL);
/* Load basic modules */
kr_cache_close(&engine->resolver.cache);
/* The LRUs are currently malloc-ated and need to be freed. */
- lru_free(engine->resolver.cache_rtt);
- lru_free(engine->resolver.cache_rep);
lru_free(engine->resolver.cache_cookie);
network_deinit(&engine->net);
extern const knot_dump_style_t KNOT_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
+typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
+typedef struct {
+ uint16_t pos;
+ uint16_t flags;
+ uint16_t compress_ptr[16];
+} knot_rrinfo_t;
+typedef unsigned char knot_dname_t;
typedef struct knot_mm {
void *ctx, *alloc, *free;
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
-typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
-typedef struct {
- uint16_t pos;
- uint16_t flags;
- uint16_t compress_ptr[16];
-} knot_rrinfo_t;
-typedef unsigned char knot_dname_t;
+typedef bool (*addr_info_f)(struct sockaddr*);
typedef struct {
knot_dname_t *_owner;
uint32_t _ttl;
size_t len;
size_t cap;
} ranked_rr_array_t;
+typedef struct {
+ union inaddr *at;
+ size_t len;
+ size_t cap;
+} inaddr_array_t;
struct kr_zonecut {
knot_dname_t *name;
knot_rrset_t *key;
} qsource;
struct {
unsigned int rtt;
- const struct sockaddr *addr;
+ const struct kr_transport *transport;
} upstream;
struct kr_qflags options;
int state;
int vars_ref;
knot_mm_t pool;
unsigned int uid;
+ struct {
+ addr_info_f is_tls_capable;
+ addr_info_f is_tcp_connected;
+ addr_info_f is_tcp_waiting;
+ inaddr_array_t forwarding_targets;
+ } selection_context;
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
alloc_wire_f alloc_wire_cb;
void *lib;
void *data;
};
+struct kr_server_selection {
+ _Bool initialized;
+ void (*choose_transport)(struct kr_query *, struct kr_transport **);
+ void (*update_rtt)(struct kr_query *, const struct kr_transport *, unsigned int);
+ void (*error)(struct kr_query *, const struct kr_transport *, enum kr_selection_error);
+ struct local_state *local_state;
+};
kr_layer_t kr_layer_t_static;
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
-struct kr_nsrep {
- unsigned int score;
- unsigned int reputation;
- const knot_dname_t *name;
- struct kr_context *ctx;
- /* beware: hidden stub, to avoid hardcoding sockaddr lengths */
-};
struct kr_query {
struct kr_query *parent;
knot_dname_t *sname;
struct kr_query *cname_parent;
struct kr_request *request;
kr_stale_cb stale_cb;
- struct kr_nsrep ns;
+ struct kr_server_selection server_selection;
};
struct kr_context {
struct kr_qflags options;
map_t negative_anchors;
struct kr_zonecut root_hints;
struct kr_cache cache;
+ unsigned int cache_rtt_tout_retry_interval;
char _stub[];
};
+struct kr_transport {
+ knot_dname_t *ns_name;
+ /* beware: hidden stub, to avoid hardcoding sockaddr lengths */
+};
const char *knot_strerror(int);
knot_dname_t *knot_dname_copy(const knot_dname_t *, knot_mm_t *);
knot_dname_t *knot_dname_from_str(uint8_t *, const char *, size_t);
int kr_rplan_pop(struct kr_rplan *, struct kr_query *);
struct kr_query *kr_rplan_resolved(struct kr_rplan *);
struct kr_query *kr_rplan_last(struct kr_rplan *);
-int kr_nsrep_set(struct kr_query *, size_t, const struct sockaddr *);
+int kr_forward_add_target(struct kr_request *, const struct sockaddr *);
void kr_log_req(const struct kr_request * const, uint32_t, const unsigned int, const char *, const char *, ...);
void kr_log_q(const struct kr_query * const, const char *, const char *, ...);
int kr_make_query(struct kr_query *, knot_pkt_t *);
struct lru {};
"
+${CDEFS} ${LIBKRES} types <<-EOF
+ knot_section_t
+ knot_rrinfo_t
+ knot_dname_t
+ #knot_rdata_t
+ #knot_rdataset_t
+EOF
+
# The generator doesn't work well with typedefs of functions.
printf "
typedef struct knot_mm {
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
+typedef bool (*addr_info_f)(struct sockaddr*);
"
-${CDEFS} ${LIBKRES} types <<-EOF
- knot_section_t
- knot_rrinfo_t
- knot_dname_t
- #knot_rdata_t
- #knot_rdataset_t
-EOF
-
genResType() {
echo "$1" | ${CDEFS} ${LIBKRES} types
}
struct kr_qflags
ranked_rr_array_entry_t
ranked_rr_array_t
+ inaddr_array_t
struct kr_zonecut
kr_qarray_t
struct kr_rplan
# lib/module.h
struct kr_prop
struct kr_module
+ struct kr_server_selection
EOF
# a static variable; the line might not be simple to generate
## Some definitions would need too many deps, so shorten them.
-genResType "struct kr_nsrep" | sed '/union/,$ d'
-printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
-
genResType "struct kr_query"
-genResType "struct kr_context" | sed '/kr_nsrep_rtt_lru_t/,$ d'
+genResType "struct kr_context" | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
+
+echo "struct kr_transport" | ${CDEFS} ${KRESD} types | sed '/union /,$ d'
+printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
+
## libknot API
${CDEFS} libknot functions <<-EOF
# Utils
kr_rplan_pop
kr_rplan_resolved
kr_rplan_last
-# Nameservers
- kr_nsrep_set
+# Forwarding
+ kr_forward_add_target
# Utils
kr_log_req
kr_log_q
echo "struct qr_task" | ${CDEFS} ${KRESD} types | sed '/pktbuf/,$ d'
printf "\t/* beware: hidden stub, to avoid qr_tasklist_t */\n};\n"
+
${CDEFS} ${KRESD} functions <<-EOF
worker_resolve_exec
worker_resolve_mk_pkt
qr_tasklist_t waiting;
struct session *pending[MAX_PENDING];
uint16_t pending_count;
- uint16_t addrlist_count;
- uint16_t addrlist_turn;
uint16_t timeouts;
uint16_t iter_count;
- struct sockaddr *addrlist;
uint32_t refs;
bool finished : 1;
bool leading : 1;
uint64_t creation_time;
+ uint64_t send_time;
+ uint64_t recv_time;
+ struct kr_transport *transport;
};
const struct sockaddr *addr, knot_pkt_t *pkt);
static int qr_task_finalize(struct qr_task *task, int state);
static void qr_task_complete(struct qr_task *task);
-static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
const struct sockaddr *addr);
static int worker_add_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr *addr,
struct session *session);
-static struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr *addr);
static void on_tcp_connect_timeout(uv_timer_t *timer);
-static void on_retransmit(uv_timer_t *req);
+static void on_udp_timeout(uv_timer_t *timer);
static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt);
kr_log_verbose("[xdp] freed unsent buffer, ret = %d\n", ret);
}
#endif
+/* Helper functions for transport selection */
+static inline bool is_tls_capable(struct sockaddr *address) {
+ tls_client_param_t *tls_entry = tls_client_param_get(the_worker->engine->net.tls_client_params, address);
+ return tls_entry;
+}
+
+static inline bool is_tcp_connected(struct sockaddr *address) {
+ return worker_find_tcp_connected(the_worker, address);
+}
+
+static inline bool is_tcp_waiting(struct sockaddr *address) {
+ return worker_find_tcp_waiting(the_worker, address);
+}
/** Create and initialize a request_ctx (on a fresh mempool).
*
req->qsource.dst_addr = &ctx->source.dst_addr.ip;
}
+ req->selection_context.is_tls_capable = is_tls_capable;
+ req->selection_context.is_tcp_connected = is_tcp_connected;
+ req->selection_context.is_tcp_waiting = is_tcp_waiting;
+ array_init(req->selection_context.forwarding_targets);
+ array_reserve_mm(req->selection_context.forwarding_targets, 1, kr_memreserve, &req->pool);
+
worker->stats.rconcurrent += 1;
return ctx;
/* This is called when we send subrequest / answer */
int qr_task_on_send(struct qr_task *task, const uv_handle_t *handle, int status)
{
-
if (task->finished) {
assert(task->leading == false);
qr_task_complete(task);
assert(s);
if (handle->type == UV_UDP && session_flags(s)->outgoing) {
- /* Start the timeout timer for UDP here, since this is the closest
- * to the wire we can get. */
- struct kr_request *req = &task->ctx->req;
- /* Check current query NSLIST */
- struct kr_query *qry = array_tail(req->rplan.pending);
+ // This should ensure that we are only dealing with our question to upstream
+ assert(!knot_wire_get_qr(task->pktbuf->wire));
+ // start the timer
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
assert(qry != NULL);
- /* Retransmit at default interval, or more frequently if the mean
- * RTT of the server is better. If the server is glued, use default rate. */
- size_t timeout = qry->ns.score;
- if (timeout > KR_NS_GLUED) {
- /* We don't have information about variance in RTT, expect +10ms */
- timeout = MIN(qry->ns.score + 10, KR_CONN_RETRY);
- } else {
- timeout = KR_CONN_RETRY;
- }
- int ret = session_timer_start(s, on_retransmit, timeout, 0);
+ size_t timeout = task->transport->timeout;
+ int ret = session_timer_start(s, on_udp_timeout, timeout, 0);
/* Start next step with timeout, fatal if can't start a timer. */
if (ret != 0) {
- subreq_finalize(task, &qry->ns.addr->ip, task->pktbuf);
+ subreq_finalize(task, &task->transport->address.ip, task->pktbuf);
qr_task_finalize(task, KR_STATE_FAIL);
}
}
qr_task_ref(task);
struct worker_ctx *worker = ctx->worker;
+ /* Note time for upstream RTT */
+ task->send_time = kr_now();
+ task->recv_time = 0; // task structure is being reused so we have to zero this out here
/* Send using given protocol */
assert(!session_flags(session)->closing);
if (session_flags(session)->has_http) {
if (status) {
struct qr_task *task = session_waitinglist_get(session);
if (task) {
- struct kr_qflags *options = &task->ctx->req.options;
- unsigned score = options->FORWARD || options->STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- the_worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ // TLS handshake failed, report it to server selection
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED);
}
#ifndef NDEBUG
else {
struct qr_task *task = session_waitinglist_get(session);
if (task && status != UV_ETIMEDOUT) {
/* Penalize upstream.
- * In case of UV_ETIMEDOUT upstream has been
- * already penalized in on_tcp_connect_timeout() */
- struct kr_qflags *options = &task->ctx->req.options;
- unsigned score = options->FORWARD || options->STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ * In case of UV_ETIMEDOUT upstream has been
+ * already penalized in on_tcp_connect_timeout() */
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
}
assert(session_tasklist_is_empty(session));
session_waitinglist_retry(session, false);
peer_str ? peer_str : "");
}
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_TIMEOUT);
worker->stats.timeout += session_waitinglist_get_len(session);
session_waitinglist_retry(session, true);
uv_timer_stop(timer);
- /* Penalize all tried nameservers with a timeout. */
struct qr_task *task = session_tasklist_get_first(session);
struct worker_ctx *worker = task->ctx->worker;
+
if (task->leading && task->pending_count > 0) {
struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
- struct sockaddr_in6 *addrlist = (struct sockaddr_in6 *)task->addrlist;
- for (uint16_t i = 0; i < MIN(task->pending_count, task->addrlist_count); ++i) {
- struct sockaddr *choice = (struct sockaddr *)(&addrlist[i]);
- WITH_VERBOSE(qry) {
- char *addr_str = kr_straddr(choice);
- VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : "");
- }
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(&qry->ns, choice, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
- }
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_QUERY_TIMEOUT);
}
+
task->timeouts += 1;
worker->stats.timeout += 1;
qr_task_step(task, NULL, NULL);
}
-static uv_handle_t *retransmit(struct qr_task *task)
+static uv_handle_t *transmit(struct qr_task *task)
{
uv_handle_t *ret = NULL;
- if (task && task->addrlist && task->addrlist_count > 0) {
- struct sockaddr_in6 *choice = &((struct sockaddr_in6 *)task->addrlist)[task->addrlist_turn];
+
+ if (task) {
+ struct kr_transport* transport = task->transport;
+
+ struct sockaddr_in6 *choice = (struct sockaddr_in6 *)&transport->address;
+
if (!choice) {
return ret;
}
}
/* Checkout answer before sending it */
struct request_ctx *ctx = task->ctx;
- if (kr_resolve_checkout(&ctx->req, NULL, (struct sockaddr *)choice, SOCK_DGRAM, task->pktbuf) != 0) {
+ if (kr_resolve_checkout(&ctx->req, NULL, transport, task->pktbuf) != 0) {
return ret;
}
ret = ioreq_spawn(ctx->worker, SOCK_DGRAM, choice->sin6_family, false, false);
} else {
task->pending[task->pending_count] = session;
task->pending_count += 1;
- task->addrlist_turn = (task->addrlist_turn + 1) %
- task->addrlist_count; /* Round robin */
session_start_read(session); /* Start reading answer */
}
}
return ret;
}
-static void on_retransmit(uv_timer_t *req)
-{
- struct session *session = req->data;
- assert(session_tasklist_get_len(session) == 1);
-
- uv_timer_stop(req);
- struct qr_task *task = session_tasklist_get_first(session);
- if (retransmit(task) == NULL) {
- /* Not possible to spawn request, start timeout timer with remaining deadline. */
- struct kr_qflags *options = &task->ctx->req.options;
- uint64_t timeout = options->FORWARD || options->STUB ? KR_NS_FWD_TIMEOUT / 2 :
- KR_CONN_RTT_MAX - task->pending_count * KR_CONN_RETRY;
- uv_timer_start(req, on_udp_timeout, timeout, 0);
- } else {
- uv_timer_start(req, on_retransmit, KR_CONN_RETRY, 0);
- }
-}
static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt)
{
struct kr_query *qry = array_tail(follower->ctx->req.rplan.pending);
qry->id = leader_qry->id;
qry->secret = leader_qry->secret;
+
+ // Note that this transport may not be present in `leader_qry`'s server selection
+ follower->transport = task->transport;
+ if(follower->transport) {
+ follower->transport->deduplicated = true;
+ }
leader_qry->secret = 0; /* Next will be already decoded */
}
qr_task_step(follower, packet_source, pkt);
return kr_ok(); /* Will be notified when outgoing query finishes. */
}
/* Start transmitting */
- uv_handle_t *handle = retransmit(task);
+ uv_handle_t *handle = transmit(task);
if (handle == NULL) {
subreq_finalize(task, packet_source, packet);
return qr_task_finalize(task, KR_STATE_FAIL);
worker_del_tcp_waiting(worker, addr);
free(conn);
session_close(session);
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
- WITH_VERBOSE (qry) {
- const char *peer_str = kr_straddr(peer);
- kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
- peer_str ? peer_str : "", uv_strerror(ret));
- }
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
return kr_error(EAGAIN);
}
assert(task->pending_count == 0);
/* target */
- const struct sockaddr *addr = task->addrlist;
+ const struct sockaddr *addr = &task->transport->address.ip;
if (addr->sa_family == AF_UNSPEC) {
/* Target isn't defined. Finalize task with SERVFAIL.
* Although task->pending_count is zero, there are can be followers,
}
/* Checkout task before connecting */
struct request_ctx *ctx = task->ctx;
- if (kr_resolve_checkout(&ctx->req, NULL, (struct sockaddr *)addr,
- SOCK_STREAM, task->pktbuf) != 0) {
+ if (kr_resolve_checkout(&ctx->req, NULL, task->transport, task->pktbuf) != 0) {
subreq_finalize(task, packet_source, packet);
return qr_task_finalize(task, KR_STATE_FAIL);
}
assert(ctx);
struct kr_request *req = &ctx->req;
struct worker_ctx *worker = ctx->worker;
- int sock_type = -1;
- task->addrlist = NULL;
- task->addrlist_count = 0;
- task->addrlist_turn = 0;
if (worker->too_many_open) {
/* */
} else {
if (packet && kr_rplan_empty(rplan)) {
/* new query; TODO - make this detection more obvious */
- kr_resolve_consume(req, packet_source, packet);
+ kr_resolve_consume(req, &task->transport, packet);
}
return qr_task_finalize(task, KR_STATE_FAIL);
}
}
- int state = kr_resolve_consume(req, packet_source, packet);
+ // Report network RTT back to server selection
+ if (task->send_time && task->recv_time) {
+ struct kr_query *qry = array_tail(req->rplan.pending);
+ qry->server_selection.update_rtt(qry, task->transport, task->recv_time - task->send_time);
+ }
+
+ int state = kr_resolve_consume(req, &task->transport, packet);
+
+ task->transport = NULL;
while (state == KR_STATE_PRODUCE) {
- state = kr_resolve_produce(req, &task->addrlist,
- &sock_type, task->pktbuf);
+ state = kr_resolve_produce(req, &task->transport, task->pktbuf);
if (unlikely(++task->iter_count > KR_ITER_LIMIT ||
task->timeouts >= KR_TIMEOUT_LIMIT)) {
#ifndef NOVERBOSELOG
struct kr_rplan *rplan = &req->rplan;
- struct kr_query *last = kr_rplan_last(rplan);
+ struct kr_query *last = kr_rplan_last(rplan);
if (task->iter_count > KR_ITER_LIMIT) {
VERBOSE_MSG(last, "canceling query due to exceeded iteration count limit of %d\n", KR_ITER_LIMIT);
}
/* We're done, no more iterations needed */
if (state & (KR_STATE_DONE|KR_STATE_FAIL)) {
return qr_task_finalize(task, state);
- } else if (!task->addrlist || sock_type < 0) {
+ } else if (!task->transport || !task->transport->protocol) {
return qr_task_step(task, NULL, NULL);
}
- /* Count available address choices */
- struct sockaddr_in6 *choice = (struct sockaddr_in6 *)task->addrlist;
- for (size_t i = 0; i < KR_NSREP_MAXADDR && choice->sin6_family != AF_UNSPEC; ++i) {
- task->addrlist_count += 1;
- choice += 1;
- }
-
- /* Upgrade to TLS if the upstream address is configured as DoT capable. */
- if (task->addrlist_count > 0 && kr_inaddr_port(task->addrlist) == KR_DNS_PORT) {
- /* TODO if there are multiple addresses (task->addrlist_count > 1)
- * check all of them. */
- struct network *net = &worker->engine->net;
- /* task->addrlist has to contain TLS port before tls_client_param_get() call */
- kr_inaddr_set_port(task->addrlist, KR_DNS_TLS_PORT);
- tls_client_param_t *tls_entry =
- tls_client_param_get(net->tls_client_params, task->addrlist);
- if (tls_entry) {
- packet_source = NULL;
- sock_type = SOCK_STREAM;
- /* TODO in this case in tcp_task_make_connection() will be performed
- * redundant map_get() call. */
- } else {
- /* The function is fairly cheap, so we just change there and back. */
- kr_inaddr_set_port(task->addrlist, KR_DNS_PORT);
- }
- }
-
- int ret = 0;
- if (sock_type == SOCK_DGRAM) {
- /* Start fast retransmit with UDP. */
- ret = udp_task_step(task, packet_source, packet);
- } else {
- /* TCP. Connect to upstream or send the query if connection already exists. */
- assert (sock_type == SOCK_STREAM);
- ret = tcp_task_step(task, packet_source, packet);
+ switch (task->transport->protocol)
+ {
+ case KR_TRANSPORT_UDP:
+ return udp_task_step(task, packet_source, packet);
+ break;
+ case KR_TRANSPORT_TCP: // fall through
+ case KR_TRANSPORT_TLS:
+ return tcp_task_step(task, packet_source, packet);
+ default:
+ assert(0);
+ break;
}
- return ret;
}
static int parse_packet(knot_pkt_t *query)
}
assert(!session_flags(session)->closing);
addr = peer;
+ /* Note recieve time for RTT calculation */
+ task->recv_time = kr_now();
}
assert(uv_is_closing(session_get_handle(session)) == false);
/* Packet was successfully parsed.
* Task was created (found). */
session_touch(session);
+
/* Consume input and produce next message */
return qr_task_step(task, addr, pkt);
}
return map_del_tcp_session(&worker->tcp_connected, addr);
}
-static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
const struct sockaddr* addr)
{
return map_find_tcp_session(&worker->tcp_connected, addr);
return map_del_tcp_session(&worker->tcp_waiting, addr);
}
-static struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr* addr)
{
return map_find_tcp_session(&worker->tcp_waiting, addr);
return kr_ok();
}
-knot_pkt_t * worker_resolve_mk_pkt(const char *qname_str, uint16_t qtype, uint16_t qclass,
+knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
const struct kr_qflags *options)
{
- uint8_t qname[KNOT_DNAME_MAXLEN];
- if (!knot_dname_from_str(qname, qname_str, sizeof(qname)))
- return NULL;
knot_pkt_t *pkt = knot_pkt_new(NULL, KNOT_EDNS_MAX_UDP_PAYLOAD, NULL);
if (!pkt)
return NULL;
return pkt;
}
+knot_pkt_t *worker_resolve_mk_pkt(const char *qname_str, uint16_t qtype, uint16_t qclass,
+ const struct kr_qflags *options)
+{
+ uint8_t qname[KNOT_DNAME_MAXLEN];
+ if (!knot_dname_from_str(qname, qname_str, sizeof(qname)))
+ return NULL;
+ return worker_resolve_mk_pkt_dname(qname, qtype, qclass, options);
+}
+
struct qr_task *worker_resolve_start(knot_pkt_t *query, struct kr_qflags options)
{
struct worker_ctx *worker = the_worker;
*/
int worker_end_tcp(struct session *session);
+KR_EXPORT knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
+ const struct kr_qflags *options);
+
/**
* Create a packet suitable for worker_resolve_start(). All in malloc() memory.
*/
const struct sockaddr *addr);
int worker_del_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr* addr);
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+ const struct sockaddr* addr);
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+ const struct sockaddr* addr);
knot_pkt_t *worker_task_get_pktbuf(const struct qr_task *task);
struct request_ctx *worker_task_get_request(struct qr_task *task);
/** @cond internal */
/** Number of request within timeout window. */
-#define MAX_PENDING KR_NSREP_MAXADDR
+#define MAX_PENDING 4
/** Maximum response time from TCP upstream, milliseconds */
#define MAX_TCP_INACTIVITY (KR_RESOLVE_TIME_LIMIT + KR_CONN_RTT_MAX)
*/
#include <inttypes.h> /* PRIu64 */
+#include <limits.h>
#include <stdlib.h>
#include <uv.h>
#include <ucw/mempool.h>
Nameservers
-----------
-.. doxygenfile:: nsrep.h
+.. doxygenfile:: selection.h
:project: libkres
.. doxygenfile:: zonecut.h
:project: libkres
#define KR_ITER_LIMIT 100 /* Built-in iterator limit */
#define KR_RESOLVE_TIME_LIMIT 10000 /* Upper limit for resolution time of single query, ms */
#define KR_CNAME_CHAIN_LIMIT 13 /* Built-in maximum CNAME chain length */
-#define KR_TIMEOUT_LIMIT 4 /* Maximum number of retries after timeout. */
+#define KR_TIMEOUT_LIMIT 10 /* Maximum number of retries after timeout. */
#define KR_QUERY_NSRETRY_LIMIT 4 /* Maximum number of retries per query. */
#define KR_COUNT_NO_NSADDR_LIMIT 5
#define KR_CONSUME_FAIL_ROW_LIMIT 3 /* Maximum number of KR_STATE_FAIL in a row. */
#include "lib/resolve.h"
#include "lib/rplan.h"
#include "lib/defines.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/module.h"
#include "lib/dnssec/ta.h"
if ((rr->type == KNOT_RRTYPE_A) &&
(req->ctx->options.NO_IPV4)) {
+ QVERBOSE_MSG(qry, "<= skipping IPv4 glue due to network settings\n");
continue;
}
if ((rr->type == KNOT_RRTYPE_AAAA) &&
(req->ctx->options.NO_IPV6)) {
+ QVERBOSE_MSG(qry, "<= skipping IPv6 glue due to network settings\n");
continue;
}
(void) update_nsaddr(rr, req->current_query, glue_cnt);
&& knot_dname_in_bailiwick(qry->sname, rr->owner) >= 0;
if (!ok) {
VERBOSE_MSG("<= authority: ns outside bailiwick\n");
+ qry->server_selection.error(qry, req->upstream.transport, KR_SELECTION_LAME_DELEGATION);
#ifdef STRICT_MODE
return KR_STATE_FAIL;
#else
{
const knot_dname_t *cname = NULL;
int state = unroll_cname(pkt, req, true, &cname);
+ struct kr_query *query = req->current_query;
if (state != kr_ok()) {
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_BAD_CNAME);
return KR_STATE_FAIL;
}
- struct kr_query *query = req->current_query;
if (!(query->flags.CACHED)) {
/* If not cached (i.e. got from upstream)
* make sure that this is not an authoritative answer
if (!is_authoritative(pkt, query)) {
if (!(query->flags.FORWARD) &&
pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) {
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_LAME_DELEGATION);
VERBOSE_MSG("<= lame response: non-auth sent negative response\n");
return KR_STATE_FAIL;
}
/* Process answer type */
int state = unroll_cname(pkt, req, false, &cname);
if (state != kr_ok()) {
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_BAD_CNAME);
return state;
}
/* Make sure that this is an authoritative answer (even with AA=0) for other layers */
q->stype == query->stype &&
knot_dname_is_equal(q->sname, cname)) {
VERBOSE_MSG("<= cname chain loop\n");
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_BAD_CNAME);
return KR_STATE_FAIL;
}
}
if (query->flags.FORWARD) {
next->forward_flags.CNAME = true;
- if (query->parent == NULL) {
- state = kr_nsrep_copy_set(&next->ns, &query->ns);
- if (state != kr_ok()) {
- return KR_STATE_FAIL;
- }
- }
}
next->cname_parent = query;
/* Want DNSSEC if and only if it's posible to secure
/* Work around broken auths/load balancers */
if (query->flags.SAFEMODE) {
return resolve_error(pkt, req);
- } else if (query->flags.NO_MINIMIZE) {
- query->flags.SAFEMODE = true;
- return KR_STATE_DONE;
} else {
+ query->flags.SAFEMODE = true;
query->flags.NO_MINIMIZE = true;
return KR_STATE_DONE;
}
return resolve_badmsg(pkt, req, query);
} else
#endif
+ /* LATER: Query minimization, 0x20 randomization, EDNS… should really be
+ * set and managed by selection.c and SAFEMODE should be split and
+ * removed altogether because it's doing many things at once. */
if (pkt->parsed <= KNOT_WIRE_HEADER_SIZE) {
VERBOSE_MSG("<= malformed response (parsed %d)\n", (int)pkt->parsed);
return resolve_badmsg(pkt, req, query);
} else if (!is_paired_to_query(pkt, query)) {
WITH_VERBOSE(query) {
const char *ns_str =
- req->upstream.addr ? kr_straddr(req->upstream.addr) : "(internal)";
+ req->upstream.transport ? kr_straddr(&req->upstream.transport->address.ip) : "(internal)";
VERBOSE_MSG("<= ignoring mismatching response from %s\n",
ns_str ? ns_str : "(kr_straddr failed)");
}
VERBOSE_MSG("<= truncated response, failover to TCP\n");
if (query) {
/* Fail if already on TCP. */
- if (query->flags.TCP) {
+ if (req->upstream.transport->protocol != KR_TRANSPORT_UDP) {
VERBOSE_MSG("<= TC=1 with TCP, bailing out\n");
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
return resolve_error(pkt, req);
}
- query->flags.TCP = true;
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
}
return KR_STATE_CONSUME;
}
const knot_lookup_t *rcode = knot_lookup_by_id(knot_rcode_names, knot_wire_get_rcode(pkt->wire));
#endif
+ // We can't return directly from the switch because we have to give feedback to server selection first
+ int ret = 0;
+ int selection_error = -1;
+
/* Check response code. */
switch(knot_wire_get_rcode(pkt->wire)) {
case KNOT_RCODE_NOERROR:
knot_wire_set_rcode(req->answer->wire, KNOT_RCODE_YXDOMAIN);
break;
case KNOT_RCODE_REFUSED:
+ if (query->flags.STUB) {
+ /* just pass answer through if in stub mode */
+ break;
+ }
+ selection_error = KR_SELECTION_REFUSED;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_SERVFAIL:
if (query->flags.STUB) {
/* just pass answer through if in stub mode */
break;
}
- /* fall through */
+ selection_error = KR_SELECTION_SERVFAIL;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_FORMERR:
+ selection_error = KR_SELECTION_FORMERROR;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_NOTIMPL:
+ selection_error = KR_SELECTION_NOTIMPL;
VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
- return resolve_badmsg(pkt, req, query);
+ ret = resolve_badmsg(pkt, req, query);
+ break;
default:
+ selection_error = KR_SELECTION_OTHER_RCODE;
VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
- return resolve_error(pkt, req);
+ ret = resolve_error(pkt, req);
+ break;
+ }
+
+ if (query->server_selection.initialized) {
+ if (selection_error != -1) {
+ query->server_selection.error(query, req->upstream.transport, selection_error);
+ }
+ }
+
+ if (ret) {
+ return ret;
}
int state;
(void)0;
ranked_rr_array_t *selected[] = kr_request_selected(req);
for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
- int ret = kr_ranked_rrarray_finalize(selected[i], query->uid, &req->pool);
+ ret = kr_ranked_rrarray_finalize(selected[i], query->uid, &req->pool);
if (unlikely(ret)) {
return KR_STATE_FAIL;
}
#include "lib/utils.h"
#include "lib/defines.h"
#include "lib/module.h"
+#include "lib/selection.h"
#define VERBOSE_MSG(qry, ...) QRVERBOSE(qry, "vldr", __VA_ARGS__)
return NULL;
}
}
- return new_ds;
+ return new_ds;
}
static void mark_insecure_parents(const struct kr_query *qry)
return ctx->state;
}
+static int validate_wrapper(kr_layer_t *ctx, knot_pkt_t *pkt) {
+ // Wrapper for now.
+ int ret = validate(ctx, pkt);
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ if (ret & KR_STATE_FAIL && qry->flags.DNSSEC_BOGUS)
+ qry->server_selection.error(qry, req->upstream.transport, KR_SELECTION_DNSSEC_ERROR);
+ return ret;
+}
+
+
/** Module implementation. */
int validate_init(struct kr_module *self)
{
static const kr_layer_api_t layer = {
- .consume = &validate,
+ .consume = &validate_wrapper,
.answer_finalize = &hide_bogus,
};
self->layer = &layer;
'layer/iterate.c',
'layer/validate.c',
'module.c',
- 'nsrep.c',
'resolve.c',
'rplan.c',
+ 'selection.c',
+ 'selection_forward.c',
+ 'selection_iter.c',
'utils.c',
'zonecut.c',
])
'layer.h',
'layer/iterate.h',
'module.h',
- 'nsrep.h',
'resolve.h',
'rplan.h',
+ 'selection.h',
+ 'selection_forward.h',
+ 'selection_iter.h',
'utils.h',
'zonecut.h',
])
+++ /dev/null
-/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
- * SPDX-License-Identifier: GPL-3.0-or-later
- */
-
-#include <assert.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-#include <arpa/inet.h>
-
-#include "lib/nsrep.h"
-#include "lib/rplan.h"
-#include "lib/resolve.h"
-#include "lib/defines.h"
-#include "lib/generic/pack.h"
-#include "contrib/ucw/lib.h"
-
-/** Some built-in unfairness ... */
-#ifndef FAVOUR_IPV6
-#define FAVOUR_IPV6 20 /* 20ms bonus for v6 */
-#endif
-
-/** @internal Macro to set address structure. */
-#define ADDR_SET(sa, family, addr, len, port) do {\
- memcpy(&sa ## _addr, (addr), (len)); \
- sa ## _family = (family); \
- sa ## _port = htons(port); \
-} while (0)
-
-/** Update nameserver representation with current name/address pair. */
-static void update_nsrep(struct kr_nsrep *ns, size_t pos, uint8_t *addr, size_t addr_len, int port)
-{
- if (addr == NULL) {
- ns->addr[pos].ip.sa_family = AF_UNSPEC;
- return;
- }
-
- /* Rotate previous addresses to the right. */
- memmove(ns->addr + pos + 1, ns->addr + pos, (KR_NSREP_MAXADDR - pos - 1) * sizeof(ns->addr[0]));
-
- switch(addr_len) {
- case sizeof(struct in_addr):
- ADDR_SET(ns->addr[pos].ip4.sin, AF_INET, addr, addr_len, port); break;
- case sizeof(struct in6_addr):
- ADDR_SET(ns->addr[pos].ip6.sin6, AF_INET6, addr, addr_len, port); break;
- default: assert(0); break;
- }
-}
-
-static void update_nsrep_set(struct kr_nsrep *ns, const knot_dname_t *name, uint8_t *addr[], unsigned score)
-{
- /* NSLIST is not empty, empty NS cannot be a leader. */
- if (!addr[0] && ns->addr[0].ip.sa_family != AF_UNSPEC) {
- return;
- }
- /* Set new NS leader */
- ns->name = name;
- ns->score = score;
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (addr[i]) {
- void *addr_val = pack_obj_val(addr[i]);
- size_t len = pack_obj_len(addr[i]);
- update_nsrep(ns, i, addr_val, len, KR_DNS_PORT);
- } else {
- break;
- }
- }
-}
-
-#undef ADDR_SET
-
-/**
- * \param addr_set pack with one IP address per element */
-static unsigned eval_addr_set(const pack_t *addr_set, struct kr_context *ctx,
- struct kr_qflags opts, unsigned score, uint8_t *addr[])
-{
- kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry_ptr[KR_NSREP_MAXADDR] = { NULL, };
- assert (KR_NSREP_MAXADDR >= 2);
- unsigned rtt_cache_entry_score[KR_NSREP_MAXADDR] = { score, KR_NS_MAX_SCORE + 1, };
- uint64_t now = kr_now();
-
- /* Name server is better candidate if it has address record. */
- for (uint8_t *it = pack_head(*addr_set); it != pack_tail(*addr_set);
- it = pack_obj_next(it)) {
- void *val = pack_obj_val(it);
- size_t len = pack_obj_len(it);
- unsigned favour = 0;
- bool is_valid = false;
- /* Check if the address isn't disabled. */
- if (len == sizeof(struct in6_addr)) {
- is_valid = !(opts.NO_IPV6);
- favour = FAVOUR_IPV6;
- } else if (len == sizeof(struct in_addr)) {
- is_valid = !(opts.NO_IPV4);
- } else {
- assert(!EINVAL);
- is_valid = false;
- }
-
- if (!is_valid) {
- continue;
- }
-
- /* Get score for the current address. */
- kr_nsrep_rtt_lru_entry_t *cached = rtt_cache ?
- lru_get_try(rtt_cache, val, len) :
- NULL;
- unsigned cur_addr_score = KR_NS_GLUED;
- if (cached) {
- cur_addr_score = cached->score;
- if (cached->score >= KR_NS_TIMEOUT) {
- /* If NS once was marked as "timeouted",
- * it won't participate in NS elections
- * at least ctx->cache_rtt_tout_retry_interval milliseconds. */
- uint64_t elapsed = now - cached->tout_timestamp;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed > ctx->cache_rtt_tout_retry_interval) {
- /* Select this NS for probing in this particular query,
- * but don't change the cached score.
- * For other queries this NS will remain "timeouted". */
- cur_addr_score = KR_NS_LONG - 1;
- }
- }
- }
-
- /* We can't always use favour. If these conditions held:
- *
- * rtt_cache_entry_score[i] < KR_NS_TIMEOUT
- * rtt_cache_entry_score[i] + favour > KR_NS_TIMEOUT
- * cur_addr_score < rtt_cache_entry_score[i] + favour
- *
- * we would prefer "certainly dead" cur_addr_score
- * instead of "almost dead but alive" rtt_cache_entry_score[i]
- */
- const unsigned cur_favour = cur_addr_score < KR_NS_TIMEOUT ? favour : 0;
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (cur_addr_score >= rtt_cache_entry_score[i] + cur_favour)
- continue;
-
- /* Shake down previous contenders */
- for (size_t j = KR_NSREP_MAXADDR - 1; j > i; --j) {
- addr[j] = addr[j - 1];
- rtt_cache_entry_ptr[j] = rtt_cache_entry_ptr[j - 1];
- rtt_cache_entry_score[j] = rtt_cache_entry_score[j - 1];
- }
- addr[i] = it;
- rtt_cache_entry_score[i] = cur_addr_score;
- rtt_cache_entry_ptr[i] = cached;
- break;
- }
- }
-
- /* At this point, rtt_cache_entry_ptr contains up to KR_NSREP_MAXADDR
- * pointers to the rtt cache entries with the best scores for the given addr_set.
- * Check if there are timeouted NS. */
-
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (rtt_cache_entry_ptr[i] == NULL)
- continue;
- if (rtt_cache_entry_ptr[i]->score < KR_NS_TIMEOUT)
- continue;
-
- uint64_t elapsed = now - rtt_cache_entry_ptr[i]->tout_timestamp;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed <= ctx->cache_rtt_tout_retry_interval)
- continue;
-
- /* rtt_cache_entry_ptr[i] points to "timeouted" rtt cache entry.
- * The period of the ban on participation in elections has expired. */
-
- if (VERBOSE_STATUS) {
- void *val = pack_obj_val(addr[i]);
- size_t len = pack_obj_len(addr[i]);
- char sa_str[INET6_ADDRSTRLEN];
- int af = (len == sizeof(struct in6_addr)) ? AF_INET6 : AF_INET;
- inet_ntop(af, val, sa_str, sizeof(sa_str));
- kr_log_verbose("[ ][nsre] probing timeouted NS: %s, score %i\n",
- sa_str, rtt_cache_entry_ptr[i]->score);
- }
-
- rtt_cache_entry_ptr[i]->tout_timestamp = now;
- }
-
- return rtt_cache_entry_score[0];
-}
-
-static int eval_nsrep(const knot_dname_t *owner, const pack_t *addr_set, struct kr_query *qry)
-{
- struct kr_nsrep *ns = &qry->ns;
- struct kr_context *ctx = ns->ctx;
- unsigned score = KR_NS_MAX_SCORE;
- unsigned reputation = 0;
- uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
-
- /* Fetch NS reputation */
- if (ctx->cache_rep) {
- unsigned *cached = lru_get_try(ctx->cache_rep, (const char *)owner,
- knot_dname_size(owner));
- if (cached) {
- reputation = *cached;
- }
- }
-
- /* Favour nameservers with unknown addresses to probe them,
- * otherwise discover the current best address for the NS. */
- if (addr_set->len == 0) {
- score = KR_NS_UNKNOWN;
- /* If the server doesn't have IPv6, give it disadvantage. */
- if (reputation & KR_NS_NOIP6) {
- score += FAVOUR_IPV6;
- /* If the server is unknown but has rep record, treat it as timeouted */
- if (reputation & KR_NS_NOIP4) {
- score = KR_NS_UNKNOWN;
- /* Try to start with clean slate */
- if (!(qry->flags.NO_IPV6)) {
- reputation &= ~KR_NS_NOIP6;
- }
- if (!(qry->flags.NO_IPV4)) {
- reputation &= ~KR_NS_NOIP4;
- }
- }
- }
- } else {
- score = eval_addr_set(addr_set, ctx, qry->flags, score, addr_choice);
- }
-
- /* Probabilistic bee foraging strategy (naive).
- * The fastest NS is preferred by workers until it is depleted (timeouts or degrades),
- * at the same time long distance scouts probe other sources (low probability).
- * Servers on TIMEOUT will not have probed at all.
- * Servers with score above KR_NS_LONG will have periodically removed from
- * reputation cache, so that kresd can reprobe them. */
- if (score >= KR_NS_TIMEOUT) {
- return kr_ok();
- } else if (score <= ns->score &&
- (score < KR_NS_LONG || qry->flags.NO_THROTTLE)) {
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- } else if (kr_rand_coin(1, 10) &&
- !kr_rand_coin(score, KR_NS_MAX_SCORE)) {
- /* With 10% chance probe server with a probability
- * given by its RTT / MAX_RTT. */
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- return 1; /* Stop evaluation */
- } else if (ns->score > KR_NS_MAX_SCORE) {
- /* Check if any server was already selected.
- * If no, pick current server and continue evaluation. */
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- }
-
- return kr_ok();
-}
-
-int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock)
-{
- if (!qry) {
- return kr_error(EINVAL);
- }
- if (index >= KR_NSREP_MAXADDR) {
- return kr_error(ENOSPC);
- }
-
- if (!sock) {
- qry->ns.name = (const uint8_t *)"";
- qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
- return kr_ok();
- }
-
- switch (sock->sa_family) {
- case AF_INET:
- if (qry->flags.NO_IPV4) {
- return kr_error(ENOENT);
- }
- qry->ns.addr[index].ip4 = *(const struct sockaddr_in *)sock;
- break;
- case AF_INET6:
- if (qry->flags.NO_IPV6) {
- return kr_error(ENOENT);
- }
- qry->ns.addr[index].ip6 = *(const struct sockaddr_in6 *)sock;
- break;
- default:
- qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
- return kr_error(EINVAL);
- }
-
- qry->ns.name = (const uint8_t *)"";
- /* Reset score on first entry */
- if (index == 0) {
- qry->ns.score = KR_NS_UNKNOWN;
- qry->ns.reputation = 0;
- }
-
- /* Retrieve RTT from cache */
- struct kr_context *ctx = qry->ns.ctx;
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = ctx
- ? lru_get_try(ctx->cache_rtt, kr_inaddr(sock), kr_family_len(sock->sa_family))
- : NULL;
- if (rtt_cache_entry) {
- qry->ns.score = MIN(qry->ns.score, rtt_cache_entry->score);
- }
-
- return kr_ok();
-}
-
-#define ELECT_INIT(ns, ctx_) do { \
- (ns)->ctx = (ctx_); \
- (ns)->addr[0].ip.sa_family = AF_UNSPEC; \
- (ns)->reputation = 0; \
- (ns)->score = KR_NS_MAX_SCORE + 1; \
-} while (0)
-
-int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx)
-{
- if (!qry || !ctx) {
- //assert(!EINVAL);
- return kr_error(EINVAL);
- }
-
- // First we dump the nsset into a temporary array
- const int nsset_len = trie_weight(qry->zone_cut.nsset);
- struct {
- const knot_dname_t *name;
- const pack_t *addrs;
- } nsset[nsset_len];
-
- trie_it_t *it;
- int i = 0;
- for (it = trie_it_begin(qry->zone_cut.nsset); !trie_it_finished(it);
- trie_it_next(it), ++i) {
- /* we trust it's a correct dname */
- nsset[i].name = (const knot_dname_t *)trie_it_key(it, NULL);
- nsset[i].addrs = (const pack_t *)*trie_it_val(it);
- }
- trie_it_free(it);
- assert(i == nsset_len);
-
- // Now we sort it randomly, by select-sort.
- for (i = 0; i < nsset_len - 1; ++i) {
- // The winner for position i will be uniformly chosen from indices >= i
- const int j = i + kr_rand_bytes(1) % (nsset_len - i);
- // Now we swap the winner with index i
- if (i == j) continue;
- __typeof__((nsset[i])) tmp = nsset[i];
- nsset[i] = nsset[j];
- nsset[j] = tmp;
- }
-
- // Finally we run the original algorithm, in this randomized order.
- struct kr_nsrep *ns = &qry->ns;
- ELECT_INIT(ns, ctx);
- int ret = kr_ok();
- for (i = 0; i < nsset_len; ++i) {
- ret = eval_nsrep(nsset[i].name, nsset[i].addrs, qry);
- if (ret) break;
- }
-
- if (qry->ns.score <= KR_NS_MAX_SCORE && qry->ns.score >= KR_NS_LONG) {
- /* This is a low-reliability probe,
- * go with TCP to get ICMP reachability check. */
- qry->flags.TCP = true;
- }
- return ret;
-}
-
-int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx)
-{
- if (!qry || !ctx) {
- //assert(!EINVAL);
- return kr_error(EINVAL);
- }
-
- /* Get address list for this NS */
- struct kr_nsrep *ns = &qry->ns;
- ELECT_INIT(ns, ctx);
- pack_t *addr_set = kr_zonecut_find(&qry->zone_cut, ns->name);
- if (!addr_set) {
- return kr_error(ENOENT);
- }
- /* Evaluate addr list */
- uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
- unsigned score = eval_addr_set(addr_set, ctx, qry->flags, ns->score, addr_choice);
- update_nsrep_set(ns, ns->name, addr_choice, score);
- return kr_ok();
-}
-
-#undef ELECT_INIT
-
-int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
- unsigned score, kr_nsrep_rtt_lru_t *cache, int umode)
-{
- if (!cache || umode > KR_NS_MAX || umode < 0) {
- return kr_error(EINVAL);
- }
-
- /* Get `addr`, and later its raw string. */
- if (addr) {
- /* Caller provided specific address, OK. */
- } else if (ns != NULL) {
- addr = &ns->addr[0].ip;
- } else {
- assert(false && "kr_nsrep_update_rtt: don't know what address to update");
- return kr_error(EINVAL);
- }
- const char *addr_in = kr_inaddr(addr);
- size_t addr_len = kr_inaddr_len(addr);
- if (!addr_in || addr_len <= 0) {
- assert(false && "kr_nsrep_update_rtt: incorrect address");
- return kr_error(EINVAL);
- }
-
- bool is_new_entry = false;
- kr_nsrep_rtt_lru_entry_t *cur = lru_get_new(cache, addr_in, addr_len,
- (&is_new_entry));
- if (!cur) {
- return kr_ok();
- }
- if (score <= KR_NS_GLUED) {
- score = KR_NS_GLUED + 1;
- }
- /* If there's nothing to update, we reset it unless KR_NS_UPDATE_NORESET
- * mode was requested. New items are zeroed by LRU automatically. */
- if (is_new_entry && umode != KR_NS_UPDATE_NORESET) {
- umode = KR_NS_RESET;
- }
- unsigned new_score = 0;
- /* Update score, by default smooth over last two measurements. */
- switch (umode) {
- case KR_NS_UPDATE:
- case KR_NS_UPDATE_NORESET:
- new_score = (cur->score + score) / 2; break;
- case KR_NS_RESET: new_score = score; break;
- case KR_NS_ADD: new_score = MIN(KR_NS_MAX_SCORE - 1, cur->score + score); break;
- case KR_NS_MAX: new_score = MAX(cur->score, score); break;
- default: return kr_error(EINVAL);
- }
- /* Score limits */
- if (new_score > KR_NS_MAX_SCORE) {
- new_score = KR_NS_MAX_SCORE;
- }
- if (new_score >= KR_NS_TIMEOUT && cur->score < KR_NS_TIMEOUT) {
- /* Set the timestamp only when NS became "timeouted" */
- cur->tout_timestamp = kr_now();
- }
- cur->score = new_score;
- return kr_ok();
-}
-
-int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache)
-{
- if (!ns || !cache ) {
- return kr_error(EINVAL);
- }
-
- /* Store in the struct */
- ns->reputation = reputation;
- /* Store reputation in the LRU cache */
- unsigned *cur = lru_get_new(cache, (const char *)ns->name,
- knot_dname_size(ns->name), NULL);
- if (cur) {
- *cur = reputation;
- }
- return kr_ok();
-}
-
-int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src)
-{
- if (!dst || !src ) {
- return kr_error(EINVAL);
- }
-
- memcpy(dst, src, sizeof(struct kr_nsrep));
- dst->name = (const uint8_t *)"";
- dst->score = KR_NS_UNKNOWN;
- dst->reputation = 0;
-
- return kr_ok();
-}
-
-int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx)
-{
- if (!ns || !ctx) {
- assert(false);
- return kr_error(EINVAL);
- }
-
- kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
-
- ns->reputation = 0;
- ns->score = KR_NS_MAX_SCORE + 1;
-
- if (ns->addr[0].ip.sa_family == AF_UNSPEC) {
- return kr_error(EINVAL);
- }
-
- /* Compute the scores. Unfortunately there's no space for scores
- * along the addresses. */
- unsigned scores[KR_NSREP_MAXADDR];
- int i;
- bool timeouted_address_is_already_selected = false;
- for (i = 0; i < KR_NSREP_MAXADDR; ++i) {
- const struct sockaddr *sa = &ns->addr[i].ip;
- if (sa->sa_family == AF_UNSPEC) {
- break;
- }
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = lru_get_try(rtt_cache,
- kr_inaddr(sa),
- kr_family_len(sa->sa_family));
- if (!rtt_cache_entry) {
- scores[i] = 1; /* prefer unknown to probe RTT */
- } else if (rtt_cache_entry->score < KR_NS_FWD_TIMEOUT) {
- /* some probability to bump bad ones up for re-probe */
- scores[i] = rtt_cache_entry->score;
- /* The lower the rtt, the more likely it will be selected. */
- if (!kr_rand_coin(rtt_cache_entry->score, KR_NS_FWD_TIMEOUT)) {
- scores[i] = 1;
- }
- } else {
- uint64_t now = kr_now();
- uint64_t elapsed = now - rtt_cache_entry->tout_timestamp;
- scores[i] = KR_NS_MAX_SCORE + 1;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed > ctx->cache_rtt_tout_retry_interval &&
- !timeouted_address_is_already_selected) {
- scores[i] = 1;
- rtt_cache_entry->tout_timestamp = now;
- timeouted_address_is_already_selected = true;
- }
- }
-
- /* Give advantage to IPv6. */
- if (scores[i] <= KR_NS_MAX_SCORE && sa->sa_family == AF_INET) {
- scores[i] += FAVOUR_IPV6;
- }
-
- if (VERBOSE_STATUS) {
- kr_log_verbose("[ ][nsre] score %d for %s;\t cached RTT: %d\n",
- scores[i], kr_straddr(sa),
- rtt_cache_entry ? rtt_cache_entry->score : -1);
- }
- }
-
- /* Select-sort the addresses. */
- const int count = i;
- for (i = 0; i < count - 1; ++i) {
- /* find min from i onwards */
- int min_i = i;
- for (int j = i + 1; j < count; ++j) {
- if (scores[j] < scores[min_i]) {
- min_i = j;
- }
- }
- /* swap the indices */
- if (min_i != i) {
- SWAP(scores[min_i], scores[i]);
- SWAP(ns->addr[min_i], ns->addr[i]);
- }
- }
-
- if (count > 0) {
- ns->score = scores[0];
- ns->reputation = 0;
- }
-
- return kr_ok();
-}
+++ /dev/null
-/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
- * SPDX-License-Identifier: GPL-3.0-or-later
- */
-
-#pragma once
-
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <libknot/dname.h>
-#include <limits.h>
-
-#include "lib/defines.h"
-#include "lib/generic/lru.h"
-
-struct kr_query;
-
-/**
- * NS RTT score (special values).
- * @note RTT is measured in milliseconds.
- */
-enum kr_ns_score {
- KR_NS_MAX_SCORE = 20 * KR_CONN_RTT_MAX, /* max possible value */
- KR_NS_FWD_TIMEOUT = (95 * 10000) / 100, /* timeout for upstream recursor,
- * 95 percents from max resolution time */
- KR_NS_TIMEOUT = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
- KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4,
- KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2,
- KR_NS_PENALTY = 100,
- KR_NS_GLUED = 10
-};
-
-/**
- * See kr_nsrep_update_rtt()
- */
-#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
-#define KR_NS_FWD_DEAD (((KR_NS_FWD_TIMEOUT * 4) + 3) / 3)
-
-/** If once NS was marked as "timeouted", it won't participate in NS elections
- * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one second). */
-#define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
-
-/**
- * NS QoS flags.
- */
-enum kr_ns_rep {
- KR_NS_NOIP4 = 1 << 0, /**< NS has no IPv4 */
- KR_NS_NOIP6 = 1 << 1, /**< NS has no IPv6 */
- KR_NS_NOEDNS = 1 << 2 /**< NS has no EDNS support */
-};
-
-/**
- * NS RTT update modes.
- * First update is always KR_NS_RESET unless
- * KR_NS_UPDATE_NORESET mode had choosen.
- */
-enum kr_ns_update_mode {
- KR_NS_UPDATE = 0, /**< Update as smooth over last two measurements */
- KR_NS_UPDATE_NORESET, /**< Same as KR_NS_UPDATE, but disable fallback to
- * KR_NS_RESET on newly added entries.
- * Zero is used as initial value. */
- KR_NS_RESET, /**< Set to given value */
- KR_NS_ADD, /**< Increment current value */
- KR_NS_MAX /**< Set to maximum of current/proposed value. */
-};
-
-struct kr_nsrep_rtt_lru_entry {
- unsigned score; /* combined rtt */
- uint64_t tout_timestamp; /* The time when score became
- * greater or equal then KR_NS_TIMEOUT.
- * Is meaningful only when score >= KR_NS_TIMEOUT */
-};
-
-typedef struct kr_nsrep_rtt_lru_entry kr_nsrep_rtt_lru_entry_t;
-
-/**
- * NS QoS tracking.
- */
-typedef lru_t(kr_nsrep_rtt_lru_entry_t) kr_nsrep_rtt_lru_t;
-
-/**
- * NS reputation tracking.
- */
-typedef lru_t(unsigned) kr_nsrep_lru_t;
-
-/* Maximum count of addresses probed in one go (last is left empty) */
-#define KR_NSREP_MAXADDR 4
-
-/**
- * Name server representation.
- * Contains extra information about the name server, e.g. score
- * or other metadata.
- */
-struct kr_nsrep
-{
- unsigned score; /**< NS score */
- unsigned reputation; /**< NS reputation */
- const knot_dname_t *name; /**< NS name */
- struct kr_context *ctx; /**< Resolution context */
- union inaddr addr[KR_NSREP_MAXADDR]; /**< NS address(es) */
-};
-
-/**
- * Set given NS address. (Very low-level access to the list.)
- * @param qry updated query
- * @param index index of the updated target
- * @param sock socket address to use (sockaddr_in or sockaddr_in6 or NULL)
- * @return 0 or an error code, in particular kr_error(ENOENT) for net.ipvX
- */
-KR_EXPORT
-int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock);
-
-/**
- * Elect best nameserver/address pair from the nsset.
- * @param qry updated query
- * @param ctx resolution context
- * @return 0 or an error code
- */
-KR_EXPORT
-int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx);
-
-/**
- * Elect best nameserver/address pair from the nsset.
- * @param qry updated query
- * @param ctx resolution context
- * @return 0 or an error code
- */
-KR_EXPORT
-int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx);
-
-/**
- * Update NS address RTT information.
- *
- * @brief In KR_NS_UPDATE mode reputation is smoothed over last N measurements.
- *
- * @param ns updated NS representation
- * @param addr chosen address (NULL for first)
- * @param score new score (i.e. RTT), see enum kr_ns_score
- * @param cache RTT LRU cache
- * @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD)
- * @return 0 on success, error code on failure
- */
-KR_EXPORT
-int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
- unsigned score, kr_nsrep_rtt_lru_t *cache, int umode);
-
-/**
- * Update NSSET reputation information.
- *
- * @param ns updated NS representation
- * @param reputation combined reputation flags, see enum kr_ns_rep
- * @param cache LRU cache
- * @return 0 on success, error code on failure
- */
-KR_EXPORT
-int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache);
-/**
- * Copy NSSET reputation information and resets score.
- *
- * @param dst updated NS representation
- * @param src source NS representation
- * @return 0 on success, error code on failure
- */
-int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src);
-
-/**
- * Sort addresses in the query nsrep list by cached RTT.
- * if RTT is greater then KR_NS_TIMEOUT, address will placed at the beginning of the
- * nsrep list once in cache.ns_tout() milliseconds. Otherwise it will be sorted
- * as if it has cached RTT equal to KR_NS_MAX_SCORE + 1.
- * @param ns updated kr_nsrep
- * @param ctx name resolution context.
- * @return 0 or an error code
- * @note ns reputation is zeroed and score is set to KR_NS_MAX_SCORE + 1.
- */
-KR_EXPORT
-int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx);
#include <libknot/rrtype/rdname.h>
#include <libknot/descriptor.h>
#include <ucw/mempool.h>
+#include <sys/socket.h>
#include "kresconfig.h"
#include "lib/resolve.h"
#include "lib/layer.h"
return;
}
assert(qname);
- const int len = knot_dname_size(qname) - 2; /* Skip first, last label. */
+ const int len = knot_dname_size(qname) - 2; /* Skip first, last label. First is length, last is always root */
for (int i = 0; i < len; ++i) {
/* Note: this relies on the fact that correct label lengths
* can't pass the isletter() test (by "luck"). */
}
}
-/** Invalidate current NS/addr pair. */
-static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry)
-{
- if (qry->ns.addr[0].ip.sa_family != AF_UNSPEC) {
- const char *addr = kr_inaddr(&qry->ns.addr[0].ip);
- int addr_len = kr_inaddr_len(&qry->ns.addr[0].ip);
- int ret = kr_zonecut_del(&qry->zone_cut, qry->ns.name, addr, addr_len);
- /* Also remove it from the qry->ns.addr array.
- * That's useful at least for STUB and FORWARD modes. */
- memmove(qry->ns.addr, qry->ns.addr + 1,
- sizeof(qry->ns.addr[0]) * (KR_NSREP_MAXADDR - 1));
- return ret;
- } else {
- return kr_zonecut_del_all(&qry->zone_cut, qry->ns.name);
- }
-}
-
/** This turns of QNAME minimisation if there is a non-terminal between current zone cut, and name target.
* It save several minimization steps, as the zone cut is likely final one.
*/
return KR_STATE_PRODUCE;
}
-static int ns_resolve_addr(struct kr_query *qry, struct kr_request *req)
-{
- struct kr_rplan *rplan = &req->rplan;
- struct kr_context *ctx = req->ctx;
-
-
- /* Start NS queries from root, to avoid certain cases
- * where a NS drops out of cache and the rest is unavailable,
- * this would lead to dependency loop in current zone cut.
- * Prefer IPv6 and continue with IPv4 if not available.
- */
- uint16_t next_type = 0;
- if (!(qry->flags.AWAIT_IPV6) &&
- !(ctx->options.NO_IPV6)) {
- next_type = KNOT_RRTYPE_AAAA;
- qry->flags.AWAIT_IPV6 = true;
- } else if (!(qry->flags.AWAIT_IPV4) &&
- !(ctx->options.NO_IPV4)) {
- next_type = KNOT_RRTYPE_A;
- qry->flags.AWAIT_IPV4 = true;
- /* Hmm, no useable IPv6 then. */
- qry->ns.reputation |= KR_NS_NOIP6;
- kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
- }
- /* Bail out if the query is already pending or dependency loop. */
- if (!next_type || kr_rplan_satisfies(qry->parent, qry->ns.name, KNOT_CLASS_IN, next_type)) {
- /* Fall back to SBELT if root server query fails. */
- if (!next_type && qry->zone_cut.name[0] == '\0') {
- VERBOSE_MSG(qry, "=> fallback to root hints\n");
- kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
- qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
- return kr_error(EAGAIN);
- }
- /* No IPv4 nor IPv6, flag server as unusable. */
- ++req->count_no_nsaddr;
- VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out (counter: %u)\n",
- req->count_no_nsaddr);
- qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6;
- kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
- invalidate_ns(rplan, qry);
- return kr_error(EHOSTUNREACH);
- }
- /* Push new query to the resolution plan */
- struct kr_query *next =
- kr_rplan_push(rplan, qry, qry->ns.name, KNOT_CLASS_IN, next_type);
- if (!next) {
- return kr_error(ENOMEM);
- }
- next->flags.NONAUTH = true;
-
- /* At the root level with no NS addresses, add SBELT subrequest. */
- int ret = 0;
- if (qry->zone_cut.name[0] == '\0') {
- ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
- if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
- kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
- kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
- qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
- }
- } else {
- next->flags.AWAIT_CUT = true;
- }
- return ret;
-}
-
static int edns_put(knot_pkt_t *pkt, bool reclaim)
{
if (!pkt->opt_rr) {
return request->state;
}
-knot_pkt_t * kr_request_ensure_answer(struct kr_request *request)
+knot_pkt_t *kr_request_ensure_answer(struct kr_request *request)
{
if (request->answer)
return request->answer;
return request->answer = NULL;
}
-KR_PURE static bool kr_inaddr_equal(const struct sockaddr *a, const struct sockaddr *b)
-{
- const int a_len = kr_inaddr_len(a);
- const int b_len = kr_inaddr_len(b);
- return a_len == b_len && memcmp(kr_inaddr(a), kr_inaddr(b), a_len) == 0;
-}
-
-static void update_nslist_rtt(struct kr_context *ctx, struct kr_query *qry, const struct sockaddr *src)
-{
- /* Do not track in safe mode. */
- if (qry->flags.SAFEMODE) {
- return;
- }
-
- /* Calculate total resolution time from the time the query was generated. */
- uint64_t elapsed = kr_now() - qry->timestamp_mono;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
-
- /* NSs in the preference list prior to the one who responded will be penalised
- * with the RETRY timer interval. This is because we know they didn't respond
- * for N retries, so their RTT must be at least N * RETRY.
- * The NS in the preference list that responded will have RTT relative to the
- * time when the query was sent out, not when it was originated.
- */
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- const struct sockaddr *addr = &qry->ns.addr[i].ip;
- if (addr->sa_family == AF_UNSPEC) {
- break;
- }
- /* If this address is the source of the answer, update its RTT */
- if (kr_inaddr_equal(src, addr)) {
- kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_UPDATE);
- WITH_VERBOSE(qry) {
- char addr_str[INET6_ADDRSTRLEN];
- inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
- VERBOSE_MSG(qry, "<= server: '%s' rtt: %"PRIu64" ms\n",
- addr_str, elapsed);
- }
- } else {
- /* Response didn't come from this IP, but we know the RTT must be at least
- * several RETRY timer tries, e.g. if we have addresses [a, b, c] and we have
- * tried [a, b] when the answer from 'a' came after 350ms, then we know
- * that 'b' didn't respond for at least 350 - (1 * 300) ms. We can't say that
- * its RTT is 50ms, but we can say that its score shouldn't be less than 50. */
- kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_MAX);
- WITH_VERBOSE(qry) {
- char addr_str[INET6_ADDRSTRLEN];
- inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
- VERBOSE_MSG(qry, "<= server: '%s' rtt: >= %"PRIu64" ms\n",
- addr_str, elapsed);
- }
- }
- /* Subtract query start time from elapsed time */
- if (elapsed < KR_CONN_RETRY) {
- break;
- }
- elapsed = elapsed - KR_CONN_RETRY;
- }
-}
-
-static void update_nslist_score(struct kr_request *request, struct kr_query *qry, const struct sockaddr *src, knot_pkt_t *packet)
-{
- struct kr_context *ctx = request->ctx;
- /* On successful answer, update preference list RTT and penalise timer */
- if (!(request->state & KR_STATE_FAIL)) {
- /* Update RTT information for preference list */
- update_nslist_rtt(ctx, qry, src);
- /* Do not complete NS address resolution on soft-fail. */
- const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0;
- if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) {
- qry->flags.AWAIT_IPV6 = false;
- qry->flags.AWAIT_IPV4 = false;
- } else { /* Penalize SERVFAILs. */
- kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
- }
- }
-}
-
static bool resolution_time_exceeded(struct kr_query *qry, uint64_t now)
{
uint64_t resolving_time = now - qry->creation_time_mono;
return false;
}
-int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet)
+int kr_resolve_consume(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet)
{
struct kr_rplan *rplan = &request->rplan;
}
bool tried_tcp = (qry->flags.TCP);
if (!packet || packet->size == 0) {
- if (tried_tcp) {
- request->state = KR_STATE_FAIL;
- } else {
- qry->flags.TCP = true;
- }
+ return KR_STATE_PRODUCE;
} else {
/* Packet cleared, derandomize QNAME. */
knot_dname_t *qname_raw = knot_pkt_qname(packet);
} else {
/* Fill in source and latency information. */
request->upstream.rtt = kr_now() - qry->timestamp_mono;
- request->upstream.addr = src;
+ request->upstream.transport = transport ? *transport : NULL;
ITERATE_LAYERS(request, qry, consume, packet);
/* Clear temporary information */
- request->upstream.addr = NULL;
+ request->upstream.transport = NULL;
request->upstream.rtt = 0;
}
}
- /* Track RTT for iterative answers */
- if (src && !(qry->flags.CACHED)) {
- update_nslist_score(request, qry, src, packet);
+ if (transport && !qry->flags.CACHED) {
+ if (!(request->state & KR_STATE_FAIL)) {
+ /* Do not complete NS address resolution on soft-fail. */
+ const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0;
+ if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) {
+ qry->flags.AWAIT_IPV6 = false;
+ qry->flags.AWAIT_IPV4 = false;
+ }
+ }
}
- /* Resolution failed, invalidate current NS. */
+
if (request->state & KR_STATE_FAIL) {
- invalidate_ns(rplan, qry);
qry->flags.RESOLVED = false;
}
- /* For multiple errors in a row; invalidate_ns() is not enough. */
if (!qry->flags.CACHED) {
if (request->state & KR_STATE_FAIL) {
if (++request->count_fail_row > KR_CONSUME_FAIL_ROW_LIMIT) {
/* Do not finish with bogus answer. */
if (qry->flags.DNSSEC_BOGUS) {
- return KR_STATE_FAIL;
+ if (qry->flags.FORWARD || qry->flags.STUB) {
+ return KR_STATE_FAIL;
+ }
+ /* Other servers might not have broken DNSSEC. */
+ qry->flags.DNSSEC_BOGUS = false;
+ return KR_STATE_PRODUCE;
}
return kr_rplan_empty(&request->rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE;
return trust_chain_check(request, qry);
}
-int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet)
+
+static int ns_resolve_addr(struct kr_query *qry, struct kr_request *param, struct kr_transport *transport, uint16_t next_type)
+{
+ struct kr_rplan *rplan = ¶m->rplan;
+ struct kr_context *ctx = param->ctx;
+
+
+ /* Start NS queries from root, to avoid certain cases
+ * where a NS drops out of cache and the rest is unavailable,
+ * this would lead to dependency loop in current zone cut.
+ */
+
+ /* Bail out if the query is already pending or dependency loop. */
+ if (!next_type || kr_rplan_satisfies(qry->parent, transport->ns_name, KNOT_CLASS_IN, next_type)) {
+ /* Fall back to SBELT if root server query fails. */
+ if (!next_type && qry->zone_cut.name[0] == '\0') {
+ VERBOSE_MSG(qry, "=> fallback to root hints\n");
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ return kr_error(EAGAIN);
+ }
+ /* No IPv4 nor IPv6, flag server as unusable. */
+ VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n");
+ kr_zonecut_del_all(&qry->zone_cut, transport->ns_name);
+ return kr_error(EHOSTUNREACH);
+ }
+ /* Push new query to the resolution plan */
+ struct kr_query *next =
+ kr_rplan_push(rplan, qry, transport->ns_name, KNOT_CLASS_IN, next_type);
+ if (!next) {
+ return kr_error(ENOMEM);
+ }
+ next->flags.NONAUTH = true;
+
+ /* At the root level with no NS addresses, add SBELT subrequest. */
+ int ret = 0;
+ if (qry->zone_cut.name[0] == '\0') {
+ ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
+ if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ }
+ } else {
+ next->flags.AWAIT_CUT = true;
+ }
+
+ if (ret == 0) {
+ if (next_type == KNOT_RRTYPE_AAAA) {
+ qry->flags.AWAIT_IPV6 = true;
+ } else {
+ qry->flags.AWAIT_IPV4 = true;
+ }
+ }
+
+ return ret;
+}
+
+int kr_resolve_produce(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet)
{
struct kr_rplan *rplan = &request->rplan;
- unsigned ns_election_iter = 0;
/* No query left for resolution */
if (kr_rplan_empty(rplan)) {
return KR_STATE_FAIL;
}
- /* If we have deferred answers, resume them. */
+
struct kr_query *qry = array_tail(rplan->pending);
+
+ /* Initialize server selection */
+ if (!qry->server_selection.initialized) {
+ kr_server_selection_init(qry);
+ }
+
+ /* If we have deferred answers, resume them. */
if (qry->deferred != NULL) {
/* @todo: Refactoring validator, check trust chain before resuming. */
int state = 0;
}
}
-ns_election:
-
- if (unlikely(request->count_no_nsaddr >= KR_COUNT_NO_NSADDR_LIMIT)) {
- VERBOSE_MSG(qry, "=> too many unresolvable NSs, bail out "
- "(mitigation for NXNSAttack CVE-2020-12667)\n");
- return KR_STATE_FAIL;
- }
- /* If the query has already selected a NS and is waiting for IPv4/IPv6 record,
- * elect best address only, otherwise elect a completely new NS.
- */
- if(++ns_election_iter >= KR_ITER_LIMIT) {
- VERBOSE_MSG(qry, "=> couldn't converge NS selection, bail out\n");
- return KR_STATE_FAIL;
- }
const struct kr_qflags qflg = qry->flags;
const bool retry = qflg.TCP || qflg.BADCOOKIE_AGAIN;
- if (qflg.AWAIT_IPV4 || qflg.AWAIT_IPV6) {
- kr_nsrep_elect_addr(qry, request->ctx);
- } else if (qflg.FORWARD || qflg.STUB) {
- kr_nsrep_sort(&qry->ns, request->ctx);
- if (qry->ns.score > KR_NS_MAX_SCORE) {
- /* At the moment all NS have bad reputation.
- * But there can be existing connections*/
- VERBOSE_MSG(qry, "=> no valid NS left\n");
- return KR_STATE_FAIL;
- }
- } else if (!qry->ns.name || !retry) { /* Keep NS when requerying/stub/badcookie. */
+ if (!qflg.FORWARD && !qflg.STUB && !retry) { /* Keep NS when requerying/stub/badcookie. */
/* Root DNSKEY must be fetched from the hints to avoid chicken and egg problem. */
if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) {
kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
}
- kr_nsrep_elect(qry, request->ctx);
- if (qry->ns.score > KR_NS_MAX_SCORE) {
- if (kr_zonecut_is_empty(&qry->zone_cut)) {
- VERBOSE_MSG(qry, "=> no NS with an address\n");
- } else {
- VERBOSE_MSG(qry, "=> no valid NS left\n");
- }
- if (!qry->flags.NO_NS_FOUND) {
- qry->flags.NO_NS_FOUND = true;
- } else {
- ITERATE_LAYERS(request, qry, reset);
- kr_rplan_pop(rplan, qry);
- }
- return KR_STATE_PRODUCE;
- }
}
- /* Resolve address records */
- if (qry->ns.addr[0].ip.sa_family == AF_UNSPEC) {
- int ret = ns_resolve_addr(qry, request);
- if (ret != 0) {
- qry->flags.AWAIT_IPV6 = false;
- qry->flags.AWAIT_IPV4 = false;
- qry->flags.TCP = false;
- qry->ns.name = NULL;
- goto ns_election; /* Must try different NS */
+ qry->server_selection.choose_transport(qry, transport);
+
+ if (*transport == NULL) {
+ /* Properly signal to serve_stale module. */
+ if (qry->flags.NO_NS_FOUND) {
+ ITERATE_LAYERS(request, qry, reset);
+ kr_rplan_pop(rplan, qry);
+ } else {
+ /* FIXME: This is probably quite inefficient:
+ * we go through the whole qr_task_step loop just because of the serve_stale
+ * module which might not even be loaded. */
+ qry->flags.NO_NS_FOUND = true;
}
+ return KR_STATE_PRODUCE;
+ }
+
+ if ((*transport)->protocol == KR_TRANSPORT_RESOLVE_A || (*transport)->protocol == KR_TRANSPORT_RESOLVE_AAAA) {
+ uint16_t type = (*transport)->protocol == KR_TRANSPORT_RESOLVE_A ? KNOT_RRTYPE_A : KNOT_RRTYPE_AAAA;
+ ns_resolve_addr(qry, qry->request, *transport, type);
ITERATE_LAYERS(request, qry, reset);
return KR_STATE_PRODUCE;
}
+ qry->flags.SAFEMODE = qry->flags.SAFEMODE || (*transport)->safe_mode;
+
/* Randomize query case (if not in safe mode or turned off) */
qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20)
? 0 : kr_rand_bytes(sizeof(qry->secret));
* kr_resolve_checkout().
*/
qry->timestamp_mono = kr_now();
- *dst = &qry->ns.addr[0].ip;
- *type = (qry->flags.TCP) ? SOCK_STREAM : SOCK_DGRAM;
return request->state;
}
#endif /* ENABLE_COOKIES */
int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
- struct sockaddr *dst, int type, knot_pkt_t *packet)
+ struct kr_transport *transport, knot_pkt_t *packet)
{
/* @todo: Update documentation if this function becomes approved. */
* actual cookie. If we don't know the server address then we
* also don't know the actual cookie size.
*/
- if (!outbound_request_update_cookies(request, src, dst)) {
+ if (!outbound_request_update_cookies(request, src, &transport->address.ip)) {
return kr_error(EINVAL);
}
}
/* Run the checkout layers and cancel on failure.
* The checkout layer doesn't persist the state, so canceled subrequests
* don't affect the resolution or rest of the processing. */
+ int type = -1;
+ switch(transport->protocol) {
+ case KR_TRANSPORT_UDP:
+ type = SOCK_DGRAM;
+ break;
+ case KR_TRANSPORT_TCP:
+ case KR_TRANSPORT_TLS:
+ type = SOCK_STREAM;
+ break;
+ default:
+ assert(0);
+ }
int state = request->state;
- ITERATE_LAYERS(request, qry, checkout, packet, dst, type);
+ ITERATE_LAYERS(request, qry, checkout, packet, &transport->address.ip, type);
if (request->state & KR_STATE_FAIL) {
request->state = state; /* Restore */
return kr_error(ECANCELED);
WITH_VERBOSE(qry) {
KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet));
+ KR_DNAME_GET_STR(ns_name, transport->ns_name);
KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet));
+ const char *ns_str = kr_straddr(&transport->address.ip);
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- struct sockaddr *addr = &qry->ns.addr[i].ip;
- if (addr->sa_family == AF_UNSPEC) {
- break;
- }
- if (!kr_inaddr_equal(dst, addr)) {
- continue;
- }
- const char *ns_str = kr_straddr(addr);
- VERBOSE_MSG(qry,
- "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' "
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' querying: '%s'@'%s' zone cut: '%s' "
"qname: '%s' qtype: '%s' proto: '%s'\n",
- qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str,
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str,
qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp");
-
- break;
- }}
+ }
return kr_ok();
}
#include "lib/layer.h"
#include "lib/generic/map.h"
#include "lib/generic/array.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/rplan.h"
#include "lib/module.h"
#include "lib/cache/api.h"
map_t negative_anchors;
struct kr_zonecut root_hints;
struct kr_cache cache;
- kr_nsrep_rtt_lru_t *cache_rtt;
unsigned cache_rtt_tout_retry_interval;
- kr_nsrep_lru_t *cache_rep;
module_array_t *modules;
/* The cookie context structure should not be held within the cookies
* module because of better access. */
bool xdp:1; /**< true if the request is on AF_XDP; only meaningful if (dst_addr). */
};
+typedef bool (*addr_info_f)(struct sockaddr*);
+typedef void (*async_resolution_f)(knot_dname_t*, enum knot_rr_type);
+typedef array_t(union inaddr) inaddr_array_t;
+
/**
* Name resolution request.
*
} qsource;
struct {
unsigned rtt; /**< Current upstream RTT */
- const struct sockaddr *addr; /**< Current upstream address */
+ const struct kr_transport *transport; /**< Current upstream transport */
} upstream; /**< Upstream information, valid only in consume() phase */
struct kr_qflags options;
int state;
int vars_ref; /**< Reference to per-request variable table. LUA_NOREF if not set. */
knot_mm_t pool;
unsigned int uid; /**< for logging purposes only */
+ struct {
+ addr_info_f is_tls_capable;
+ addr_info_f is_tcp_connected;
+ addr_info_f is_tcp_waiting;
+ inaddr_array_t forwarding_targets; /**< When forwarding, possible targets are put here */
+ } selection_context;
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
alloc_wire_f alloc_wire_cb; /**< CB to allocate answer wire (can be NULL). */
* @return any state
*/
KR_EXPORT
-int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet);
+int kr_resolve_consume(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet);
/**
* Produce either next additional query or finish.
* @return any state
*/
KR_EXPORT
-int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet);
+int kr_resolve_produce(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet);
/**
* Finalises the outbound query packet with the knowledge of the IP addresses.
*/
KR_EXPORT
int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
- struct sockaddr *dst, int type, knot_pkt_t *packet);
+ struct kr_transport *transport, knot_pkt_t *packet);
/**
* Finish resolution and commit results if the state is DONE.
*/
KR_EXPORT KR_PURE
knot_mm_t *kr_resolve_pool(struct kr_request *request);
-
qry->flags = rplan->request->options;
qry->parent = parent;
qry->request = rplan->request;
- qry->ns.ctx = rplan->request->ctx;
- qry->ns.addr[0].ip.sa_family = AF_UNSPEC;
+
gettimeofday(&qry->timestamp, NULL);
qry->timestamp_mono = kr_now();
qry->creation_time_mono = parent ? parent->creation_time_mono : qry->timestamp_mono;
kr_zonecut_init(&qry->zone_cut, (const uint8_t *)"", rplan->pool);
qry->reorder = qry->flags.REORDER_RR ? kr_rand_bytes(sizeof(qry->reorder)) : 0;
- /* When forwarding, keep the nameserver addresses. */
- if (parent && parent->flags.FORWARD && qry->flags.FORWARD) {
- ret = kr_nsrep_copy_set(&qry->ns, &parent->ns);
- if (ret) {
- query_free(rplan->pool, qry);
- return NULL;
- }
- }
assert((rplan->pending.len == 0 && rplan->resolved.len == 0)
== (rplan->initial == NULL));
#include <libknot/dname.h>
#include <libknot/codes.h>
+#include "lib/selection.h"
#include "lib/cache/api.h"
#include "lib/zonecut.h"
-#include "lib/nsrep.h"
/** Query flags */
struct kr_qflags {
struct kr_query *cname_parent;
struct kr_request *request; /**< Parent resolution request. */
kr_stale_cb stale_cb; /**< See the type */
- /* Beware: this must remain the last, because of lua bindings. */
- struct kr_nsrep ns;
+ struct kr_server_selection server_selection;
};
/** @cond internal Array of queries. */
--- /dev/null
+#include <libknot/dname.h>
+
+#include "lib/selection.h"
+#include "lib/selection_forward.h"
+#include "lib/selection_iter.h"
+#include "lib/generic/pack.h"
+#include "lib/generic/trie.h"
+#include "lib/rplan.h"
+#include "lib/cache/api.h"
+#include "lib/resolve.h"
+
+#include "daemon/worker.h"
+#include "daemon/tls.h"
+
+#include "lib/utils.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "slct", __VA_ARGS__)
+
+/** @internal Macro to set address structure. */
+#define ADDR_SET(sa, family, addr, len, port) do {\
+ memcpy(&sa ## _addr, (addr), (len)); \
+ sa ## _family = (family); \
+ sa ## _port = htons(port); \
+} while (0)
+
+#define DEFAULT_TIMEOUT 800
+#define MAX_TIMEOUT 10000
+#define MAX_BACKOFF 5
+#define MINIMAL_TIMEOUT_ADDITION 20
+
+/* After TCP_TIMEOUT_THRESHOLD timeouts one transport, we'll switch to TCP. */
+#define TCP_TIMEOUT_THRESHOLD 2
+/* If the expected RTT is over TCP_RTT_THRESHOLD we switch to TCP instead. */
+#define TCP_RTT_THRESHOLD 2000
+
+/* Define ε for ε-greedy algorithm (see select_transport)
+ * as ε=EPSILON_NOMIN/EPSILON_DENOM */
+#define EPSILON_NOMIN 1
+#define EPSILON_DENOM 20
+
+/* Simple cache interface follows */
+
+#define KEY_PREFIX 'S'
+
+void *prefix_key(const uint8_t *ip, size_t len)
+{
+ void *key = malloc(len + 1);
+ *(char *)key = KEY_PREFIX;
+ memcpy((uint8_t *)key + 1, ip, len);
+ return key;
+}
+
+#undef PREFIX
+
+/* First value of timeout will be calculated as SRTT+4*DEFAULT_TIMEOUT
+ * by calc_timeout(), so it'll be equal to DEFAULT_TIMEOUT. */
+static const struct rtt_state default_rtt_state = { .srtt = 0,
+ .variance =
+ DEFAULT_TIMEOUT / 4,
+ .consecutive_timeouts = 0,
+ .dead_since = 0 };
+
+/* Note that this opens a cace transaction, which is usually closed by calling
+ * `put_rtt_state` i.e. callee is responsible for its closing
+ * (e.g. calling kr_cache_commit). */
+struct rtt_state get_rtt_state(const uint8_t *ip, size_t len,
+ struct kr_cache *cache)
+{
+ struct rtt_state state;
+ knot_db_val_t value;
+ knot_db_t *db = cache->db;
+ struct kr_cdb_stats *stats = &cache->stats;
+ uint8_t *prefixed_ip = prefix_key(ip, len);
+
+ knot_db_val_t key = { .len = len + 1, .data = prefixed_ip };
+
+ if (cache->api->read(db, stats, &key, &value, 1)) {
+ state = default_rtt_state;
+ } else {
+ assert(value.len == sizeof(struct rtt_state));
+ state = *(struct rtt_state *)value.data;
+ }
+
+ free(prefixed_ip);
+ return state;
+}
+
+int put_rtt_state(const uint8_t *ip, size_t len, struct rtt_state state,
+ struct kr_cache *cache)
+{
+ knot_db_t *db = cache->db;
+ struct kr_cdb_stats *stats = &cache->stats;
+ uint8_t *prefixed_ip = prefix_key(ip, len);
+
+ knot_db_val_t key = { .len = len + 1, .data = prefixed_ip };
+ knot_db_val_t value = { .len = sizeof(struct rtt_state),
+ .data = &state };
+
+ int ret = cache->api->write(db, stats, &key, &value, 1);
+ cache->api->commit(db, stats);
+
+ free(prefixed_ip);
+ return ret;
+}
+
+void bytes_to_ip(uint8_t *bytes, size_t len, union inaddr *dst)
+{
+ switch (len) {
+ case sizeof(struct in_addr):
+ ADDR_SET(dst->ip4.sin, AF_INET, bytes, len, 0);
+ break;
+ case sizeof(struct in6_addr):
+ ADDR_SET(dst->ip6.sin6, AF_INET6, bytes, len, 0);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+uint8_t *ip_to_bytes(const union inaddr *src, size_t len)
+{
+ switch (len) {
+ case sizeof(struct in_addr):
+ return (uint8_t *)&src->ip4.sin_addr;
+ case sizeof(struct in6_addr):
+ return (uint8_t *)&src->ip6.sin6_addr;
+ default:
+ assert(0);
+ }
+}
+
+static bool no_rtt_info(struct rtt_state s)
+{
+ return s.srtt == 0 && s.consecutive_timeouts == 0;
+}
+
+static unsigned back_off_timeout(uint32_t to, int pow)
+{
+ if (pow > MAX_BACKOFF) {
+ to *= 1 << MAX_BACKOFF;
+ } else {
+ to *= (1 << pow);
+ }
+ if (to > MAX_TIMEOUT) {
+ to = MAX_TIMEOUT;
+ }
+ return to;
+}
+
+/* This is verbatim (minus the default timeout value and minimal variance)
+ * RFC6298, sec. 2. */
+static unsigned calc_timeout(struct rtt_state state)
+{
+ int32_t timeout =
+ state.srtt + MAX(4 * state.variance, MINIMAL_TIMEOUT_ADDITION);
+ return back_off_timeout(timeout, state.consecutive_timeouts);
+}
+
+/* This is verbatim RFC6298, sec. 2. */
+static struct rtt_state calc_rtt_state(struct rtt_state old, unsigned new_rtt)
+{
+ if (no_rtt_info(old)) {
+ return (struct rtt_state){ new_rtt, new_rtt / 2, 0 };
+ }
+
+ struct rtt_state ret;
+
+ ret.srtt = (int32_t)(0.75 * old.srtt + 0.25 * new_rtt);
+ ret.variance = (int32_t)(0.875 * old.variance +
+ 0.125 * abs(old.srtt - (int32_t)new_rtt));
+ ret.consecutive_timeouts = 0;
+
+ return ret;
+}
+
+/**
+ * @internal Invalidate addresses which should be considered dead
+ */
+static void invalidate_dead_upstream(struct address_state *state,
+ unsigned int retry_timeout)
+{
+ if (kr_now() - state->rtt_state.dead_since < retry_timeout) {
+ state->generation = -1;
+ }
+}
+
+/**
+ * @internal Check if IP address is TLS capable.
+ *
+ * @p req has to have the selection_context properly initiazed.
+ */
+static void check_tls_capable(struct address_state *address_state,
+ struct kr_request *req, struct sockaddr *address)
+{
+ address_state->tls_capable =
+ req->selection_context.is_tls_capable ?
+ req->selection_context.is_tls_capable(address) :
+ false;
+}
+
+#if 0
+/* TODO: uncomment these once we actually use the information it collects. */
+/**
+ * Check if there is a existing TCP connection to this address.
+ *
+ * @p req has to have the selection_context properly initiazed.
+ */
+void check_tcp_connections(struct address_state *address_state, struct kr_request *req, struct sockaddr *address) {
+ address_state->tcp_connected = req->selection_context.is_tcp_connected ? req->selection_context.is_tcp_connected(address) : false;
+ address_state->tcp_waiting = req->selection_context.is_tcp_waiting ? req->selection_context.is_tcp_waiting(address) : false;
+}
+#endif
+
+/**
+ * @internal Invalidate address if the respective IP version is disabled.
+ */
+static void check_network_settings(struct address_state *address_state,
+ size_t address_len, bool no_ipv4, bool no_ipv6)
+{
+ if (no_ipv4 && address_len == sizeof(struct in_addr)) {
+ address_state->generation = -1;
+ }
+ if (no_ipv6 && address_len == sizeof(struct in6_addr)) {
+ address_state->generation = -1;
+ }
+}
+
+void update_address_state(struct address_state *state, uint8_t *address,
+ size_t address_len, struct kr_query *qry)
+{
+ union inaddr tmp_address;
+ bytes_to_ip(address, address_len, &tmp_address);
+ check_tls_capable(state, qry->request, &tmp_address.ip);
+ /* TODO: uncomment this once we actually use the information it collects
+ check_tcp_connections(address_state, qry->request, &tmp_address.ip);
+ */
+ check_network_settings(state, address_len, qry->flags.NO_IPV4,
+ qry->flags.NO_IPV6);
+ state->rtt_state =
+ get_rtt_state(address, address_len, &qry->request->ctx->cache);
+ invalidate_dead_upstream(
+ state, qry->request->ctx->cache_rtt_tout_retry_interval);
+#ifdef SELECTION_CHOICE_LOGGING
+ // This is sometimes useful for debugging, but usually too verbose
+ WITH_VERBOSE(qry)
+ {
+ const char *ns_str = kr_straddr(&tmp_address.ip);
+ VERBOSE_MSG(qry, "rtt of %s is %d, variance is %d\n", ns_str,
+ state->rtt_state.srtt, state->rtt_state.variance);
+ }
+#endif
+}
+
+static int cmp_choices(const void *a, const void *b)
+{
+ struct choice *a_ = (struct choice *)a;
+ struct choice *b_ = (struct choice *)b;
+
+ int diff;
+ /* Address with no RTT information is better than address
+ * with some information. */
+ if ((diff = no_rtt_info(b_->address_state->rtt_state) -
+ no_rtt_info(a_->address_state->rtt_state))) {
+ return diff;
+ }
+ /* Address with less errors is better. */
+ if ((diff = a_->address_state->error_count -
+ b_->address_state->error_count)) {
+ return diff;
+ }
+ /* Address with smaller expected timeout is better. */
+ if ((diff = calc_timeout(a_->address_state->rtt_state) -
+ calc_timeout(b_->address_state->rtt_state))) {
+ return diff;
+ }
+ return 0;
+}
+
+/* Fisher-Yates shuffle of the choices */
+static void shuffle_choices(struct choice choices[], int choices_len)
+{
+ struct choice tmp;
+ for (int i = choices_len - 1; i > 0; i--) {
+ int j = kr_rand_bytes(1) % (i + 1);
+ tmp = choices[i];
+ choices[i] = choices[j];
+ choices[j] = tmp;
+ }
+}
+
+/* Performs the actual selection (currently variation on epsilon-greedy). */
+struct kr_transport *select_transport(struct choice choices[], int choices_len,
+ struct to_resolve unresolved[],
+ int unresolved_len, int timeouts,
+ struct knot_mm *mempool, bool tcp,
+ size_t *choice_index)
+{
+ if (!choices_len && !unresolved_len) {
+ /* There is nothing to choose from */
+ return NULL;
+ }
+
+ struct kr_transport *transport =
+ mm_alloc(mempool, sizeof(struct kr_transport));
+ memset(transport, 0, sizeof(struct kr_transport));
+
+ int choice = 0;
+ if (kr_rand_coin(EPSILON_NOMIN, EPSILON_DENOM) || choices_len == 0) {
+ /* "EXPLORE":
+ * randomly choose some option
+ * (including resolution of some new name). */
+ int index = kr_rand_bytes(1) % (choices_len + unresolved_len);
+ if (index < unresolved_len) {
+ // We will resolve a new NS name
+ *transport = (struct kr_transport){
+ .protocol = unresolved[index].type,
+ .ns_name = unresolved[index].name
+ };
+ return transport;
+ } else {
+ choice = index - unresolved_len;
+ }
+ } else {
+ /* "EXPLOIT":
+ * choose a resolved address which seems best right now. */
+ shuffle_choices(choices, choices_len);
+ /* If there are some addresses with no rtt_info we try them
+ * first (see cmp_choices). So unknown servers are chosen
+ * *before* the best know server. This ensures that every option
+ * is tried before going back to some that was tried before. */
+ qsort(choices, choices_len, sizeof(struct choice), cmp_choices);
+ choice = 0;
+ }
+
+ struct choice *chosen = &choices[choice];
+
+ /* Don't try the same server again when there are other choices to be explored */
+ if (chosen->address_state->error_count && unresolved_len) {
+ int index = kr_rand_bytes(1) % unresolved_len;
+ *transport = (struct kr_transport){
+ .ns_name = unresolved[index].name,
+ .protocol = unresolved[index].type,
+ };
+ return transport;
+ }
+
+ unsigned timeout;
+ if (no_rtt_info(chosen->address_state->rtt_state)) {
+ /* Exponential back-off when retrying after timeout and choosing
+ * an unknown server. */
+ timeout = back_off_timeout(DEFAULT_TIMEOUT, timeouts);
+ } else {
+ timeout = calc_timeout(chosen->address_state->rtt_state);
+ }
+
+ enum kr_transport_protocol protocol;
+ if (chosen->address_state->tls_capable) {
+ protocol = KR_TRANSPORT_TLS;
+ } else if (tcp ||
+ chosen->address_state->errors[KR_SELECTION_QUERY_TIMEOUT] >= TCP_TIMEOUT_THRESHOLD ||
+ timeout > TCP_RTT_THRESHOLD) {
+ protocol = KR_TRANSPORT_TCP;
+ } else {
+ protocol = KR_TRANSPORT_UDP;
+ }
+
+ *transport = (struct kr_transport){
+ .ns_name = chosen->address_state->ns_name,
+ .protocol = protocol,
+ .timeout = timeout,
+ .safe_mode =
+ chosen->address_state->errors[KR_SELECTION_FORMERROR],
+ };
+
+ int port;
+ if (!(port = chosen->port)) {
+ switch (transport->protocol) {
+ case KR_TRANSPORT_TLS:
+ port = KR_DNS_TLS_PORT;
+ break;
+ case KR_TRANSPORT_UDP:
+ case KR_TRANSPORT_TCP:
+ port = KR_DNS_PORT;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+ switch (chosen->address_len) {
+ case sizeof(struct in_addr):
+ ADDR_SET(transport->address.ip4.sin, AF_INET, chosen->address,
+ chosen->address_len, port);
+ transport->address_len = chosen->address_len;
+ break;
+ case sizeof(struct in6_addr):
+ ADDR_SET(transport->address.ip6.sin6, AF_INET6, chosen->address,
+ chosen->address_len, port);
+ transport->address_len = chosen->address_len;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (choice_index) {
+ *choice_index = chosen->address_state->choice_array_index;
+ }
+
+ return transport;
+}
+
+void update_rtt(struct kr_query *qry, struct address_state *addr_state,
+ const struct kr_transport *transport, unsigned rtt)
+{
+ if (!transport || !addr_state) {
+ /* Answers from cache have NULL transport, ignore them. */
+ return;
+ }
+
+ struct kr_cache *cache = &qry->request->ctx->cache;
+
+ uint8_t *address =
+ ip_to_bytes(&transport->address, transport->address_len);
+ /* This construct is a bit racy since the global state may change
+ * between calls to `get_rtt_state` and `put_rtt_state` but we don't
+ * care that much since it is rare and we only risk slightly suboptimal
+ * transport choice. */
+ struct rtt_state cur_rtt_state =
+ get_rtt_state(address, transport->address_len, cache);
+ struct rtt_state new_rtt_state = calc_rtt_state(cur_rtt_state, rtt);
+ put_rtt_state(address, transport->address_len, new_rtt_state, cache);
+
+ WITH_VERBOSE(qry)
+ {
+ KR_DNAME_GET_STR(ns_name, transport->ns_name);
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ const char *ns_str = kr_straddr(&transport->address.ip);
+
+ VERBOSE_MSG(
+ qry,
+ "=> id: '%05u' updating: '%s'@'%s' zone cut: '%s' with rtt %u to srtt: %d and variance: %d \n",
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str,
+ rtt, new_rtt_state.srtt, new_rtt_state.variance);
+ }
+}
+
+static void cache_timeout(const struct kr_transport *transport,
+ struct address_state *addr_state, struct kr_cache *cache)
+{
+ if (transport->deduplicated) {
+ /* Transport was chosen by a different query, that one will
+ * cache the result. */
+ return;
+ }
+
+ uint8_t *address =
+ ip_to_bytes(&transport->address, transport->address_len);
+ struct rtt_state old_state = addr_state->rtt_state;
+ struct rtt_state cur_state =
+ get_rtt_state(address, transport->address_len, cache);
+
+ /* We could lose some update from some other process by doing this,
+ * but at least timeout count can't blow up. */
+ if (cur_state.consecutive_timeouts == old_state.consecutive_timeouts) {
+ if (++cur_state.consecutive_timeouts >=
+ KR_NS_TIMEOUT_ROW_DEAD) {
+ cur_state.dead_since = kr_now();
+ }
+ put_rtt_state(address, transport->address_len, cur_state,
+ cache);
+ } else {
+ /* `get_rtt_state` opens a cache transaction, we have to end it. */
+ kr_cache_commit(cache);
+ }
+}
+
+void error(struct kr_query *qry, struct address_state *addr_state,
+ const struct kr_transport *transport,
+ enum kr_selection_error sel_error)
+{
+ if (!transport || !addr_state) {
+ /* Answers from cache have NULL transport, ignore them. */
+ return;
+ }
+
+ if (sel_error >= KR_SELECTION_NUMBER_OF_ERRORS) {
+ assert(0);
+ }
+
+ if (sel_error == KR_SELECTION_QUERY_TIMEOUT) {
+ qry->server_selection.local_state->timeouts++;
+ // Make sure the query was chosen by this query
+ if (!transport->deduplicated) {
+ cache_timeout(transport, addr_state,
+ &qry->request->ctx->cache);
+ }
+ }
+
+ if (sel_error == KR_SELECTION_TRUNCATED &&
+ transport->protocol == KR_TRANSPORT_UDP) {
+ /* Don't punish the server that told us to switch to TCP. */
+ qry->server_selection.local_state->truncated = true;
+ } else {
+ if (sel_error == KR_SELECTION_TRUNCATED) {
+ /* TRUNCATED over TCP/TLS, upstream is broken. */
+ addr_state->unrecoverable_errors++;
+ }
+
+ if (UNRECOVERABLE_ERRORS[sel_error]) {
+ addr_state->unrecoverable_errors++;
+ }
+
+ if (sel_error == KR_SELECTION_FORMERROR && transport->safe_mode) {
+ addr_state->unrecoverable_errors++;
+ }
+
+ addr_state->errors[sel_error]++;
+ addr_state->error_count++;
+ }
+
+ WITH_VERBOSE(qry)
+ {
+ KR_DNAME_GET_STR(ns_name, transport->ns_name);
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ const char *ns_str = kr_straddr(&transport->address.ip);
+
+ VERBOSE_MSG(
+ qry,
+ "=> id: '%05u' noting selection error: '%s'@'%s' zone cut: '%s' error no.:%d\n",
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str,
+ sel_error);
+ }
+}
+
+void kr_server_selection_init(struct kr_query *qry)
+{
+ struct knot_mm *mempool = &qry->request->pool;
+ if (qry->flags.FORWARD || qry->flags.STUB) {
+ qry->server_selection = (struct kr_server_selection){
+ .initialized = true,
+ .choose_transport = forward_choose_transport,
+ .update_rtt = forward_update_rtt,
+ .error = forward_error,
+ .local_state =
+ mm_alloc(mempool, sizeof(struct local_state)),
+ };
+ memset(qry->server_selection.local_state, 0,
+ sizeof(struct local_state));
+ forward_local_state_alloc(
+ mempool, &qry->server_selection.local_state->private,
+ qry->request);
+ } else {
+ qry->server_selection = (struct kr_server_selection){
+ .initialized = true,
+ .choose_transport = iter_choose_transport,
+ .update_rtt = iter_update_rtt,
+ .error = iter_error,
+ .local_state =
+ mm_alloc(mempool, sizeof(struct local_state)),
+ };
+ memset(qry->server_selection.local_state, 0,
+ sizeof(struct local_state));
+ iter_local_state_alloc(
+ mempool, &qry->server_selection.local_state->private);
+ }
+}
+
+int kr_forward_add_target(struct kr_request *req, const struct sockaddr *sock)
+{
+ if (!req->selection_context.forwarding_targets.at) {
+ return kr_error(EINVAL);
+ }
+
+ union inaddr address;
+
+ switch (sock->sa_family) {
+ case AF_INET:
+ if (req->options.NO_IPV4)
+ return kr_error(EINVAL);
+ address.ip4 = *(const struct sockaddr_in *)sock;
+ break;
+ case AF_INET6:
+ if (req->options.NO_IPV6)
+ return kr_error(EINVAL);
+ address.ip6 = *(const struct sockaddr_in6 *)sock;
+ break;
+ default:
+ return kr_error(EINVAL);
+ }
+
+ array_push_mm(req->selection_context.forwarding_targets, address,
+ kr_memreserve, &req->pool);
+ return kr_ok();
+}
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+/**
+ * @file selection.h
+ * Provides server selection API (see `kr_server_selection`) and functions common to both implementations.
+ */
+
+#include "lib/cache/api.h"
+
+/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts, we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
+#define KR_NS_TIMEOUT_ROW_DEAD 4
+#define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
+
+/**
+ * These errors are to be reported as feedback to server selection.
+ * See `kr_server_selection::error` for more details.
+ */
+enum kr_selection_error {
+ KR_SELECTION_OK = 0,
+
+ // Network errors
+ KR_SELECTION_QUERY_TIMEOUT,
+ KR_SELECTION_TLS_HANDSHAKE_FAILED,
+ KR_SELECTION_TCP_CONNECT_FAILED,
+ KR_SELECTION_TCP_CONNECT_TIMEOUT,
+
+ // RCODEs
+ KR_SELECTION_REFUSED,
+ KR_SELECTION_SERVFAIL,
+ KR_SELECTION_FORMERROR,
+ KR_SELECTION_NOTIMPL,
+ KR_SELECTION_OTHER_RCODE,
+
+ // DNS errors
+ KR_SELECTION_TRUNCATED,
+ KR_SELECTION_DNSSEC_ERROR,
+ KR_SELECTION_LAME_DELEGATION,
+ /** Too long chain, or cycle. */
+ KR_SELECTION_BAD_CNAME,
+
+ /** Leave this last, as it is used as array size. */
+ KR_SELECTION_NUMBER_OF_ERRORS
+};
+
+enum kr_transport_protocol {
+ /** Selected name with no IPv4 address, it has to be resolved first. */
+ KR_TRANSPORT_RESOLVE_A,
+ /** Selected name with no IPv6 address, it has to be resolved first. */
+ KR_TRANSPORT_RESOLVE_AAAA,
+ KR_TRANSPORT_UDP,
+ KR_TRANSPORT_TCP,
+ KR_TRANSPORT_TLS,
+};
+
+/**
+ * Output of the selection algorithm.
+ */
+struct kr_transport {
+ knot_dname_t *ns_name; /**< Set to "." for forwarding targets.*/
+ union inaddr address;
+ size_t address_len;
+ enum kr_transport_protocol protocol;
+ unsigned timeout; /**< Timeout in ms to be set for UDP transmission. */
+ /** True iff transport was set in worker.c:subreq_finalize,
+ * that means it may be different from the one originally chosen one.*/
+ bool deduplicated;
+ bool safe_mode; /**< Turn on SAFEMODE for this transport */
+};
+
+struct local_state {
+ int timeouts; /**< Number of timeouts that occured resolving this query.*/
+ bool truncated; /**< Query was truncated, switch to TCP. */
+ void *private; /**< Inner state of the implementation.*/
+};
+
+/**
+ * Specifies a API for selecting transports and giving feedback on the choices.
+ *
+ * The function pointers are to be used throughout resolver when some information about
+ * the transport is obtained. E.g. RTT in `worker.c` or RCODE in `iterate.c`,…
+ */
+struct kr_server_selection {
+ bool initialized;
+ /**
+ * Puts a pointer to next transport of @p qry to @p transport .
+ *
+ * Allocates new kr_transport in request's mempool, chooses transport to be used for this query.
+ * Selection may fail, so @p transport can be set to NULL.
+ *
+ * @param transport to be filled with pointer to the chosen transport or NULL on failure
+ */
+ void (*choose_transport)(struct kr_query *qry,
+ struct kr_transport **transport);
+ /** Report back the RTT of network operation for transport in ms. */
+ void (*update_rtt)(struct kr_query *qry,
+ const struct kr_transport *transport, unsigned rtt);
+ /** Report back error encourtered with the chosen transport. See `enum kr_selection` */
+ void (*error)(struct kr_query *qry,
+ const struct kr_transport *transport,
+ enum kr_selection_error error);
+
+ struct local_state *local_state;
+};
+
+/**
+ * @brief Initialize the server selection API for @p qry.
+ *
+ * The implementation is to be chosen based on qry->flags.
+ */
+KR_EXPORT
+void kr_server_selection_init(struct kr_query *qry);
+
+/**
+ * @brief Add forwarding target to request.
+ *
+ * This is exposed to Lua in order to add forwarding targets to request.
+ * These are then shared by all the queries in said request.
+ */
+KR_EXPORT
+int kr_forward_add_target(struct kr_request *req, const struct sockaddr *sock);
+
+/**
+ * To be held per IP address in the global LMDB cache
+ */
+struct rtt_state {
+ int32_t srtt;
+ int32_t variance;
+ int32_t consecutive_timeouts;
+ /** Timestamp of pronouncing this IP bad based on KR_NS_TIMEOUT_ROW_DEAD */
+ uint64_t dead_since;
+};
+
+/**
+ * @brief To be held per IP address and locally "inside" query.
+ */
+struct address_state {
+ /** Used to distinguish old and valid records in local_state. */
+ unsigned int generation;
+ struct rtt_state rtt_state;
+ knot_dname_t *ns_name;
+ bool tls_capable : 1;
+ /* TODO: uncomment these once we actually use this information in selection
+ bool tcp_waiting : 1;
+ bool tcp_connected : 1;
+ */
+ int choice_array_index;
+ int error_count;
+ int unrecoverable_errors;
+ int errors[KR_SELECTION_NUMBER_OF_ERRORS];
+};
+
+/**
+ * @brief Array of these is one of inputs for the actual selection algorithm (`select_transport`)
+ */
+struct choice {
+ uint8_t *address;
+ size_t address_len;
+ struct address_state *address_state;
+ /** used to overwrite the port number;
+ * if zero, `select_transport` determines it. */
+ uint16_t port;
+};
+
+/**
+ * @brief Array of these is description of names to be resolved (i.e. name without some address)
+ */
+struct to_resolve {
+ knot_dname_t *name;
+ /** Either KR_TRANSPORT_RESOLVE_A or KR_TRANSPORT_RESOLVE_AAAA is valid here. */
+ enum kr_transport_protocol type;
+};
+
+/**
+ * @brief Based on passed choices, choose the next transport.
+ *
+ * Common function to both implementations (iteration and forwarding).
+ * The `*_choose_transport` functions from `selection_*.h` preprocess the input for this one.
+ *
+ * @param choices Options to choose from, see struct above
+ * @param unresolved Array of names that can be resolved (i.e. no A/AAAA record)
+ * @param timeouts Number of timeouts that occured in this query (used for exponential backoff)
+ * @param mempool Memory context of current request
+ * @param tcp Force TCP as transport protocol
+ * @param[out] choice_index Optinally index of the chosen transport in the @p choices array is stored here.
+ * @return Chosen transport or NULL when no choice is viable
+ */
+struct kr_transport *select_transport(struct choice choices[], int choices_len,
+ struct to_resolve unresolved[],
+ int unresolved_len, int timeouts,
+ struct knot_mm *mempool, bool tcp,
+ size_t *choice_index);
+
+/**
+ * Common part of RTT feedback mechanism. Notes RTT to global cache.
+ */
+void update_rtt(struct kr_query *qry, struct address_state *addr_state,
+ const struct kr_transport *transport, unsigned rtt);
+
+/**
+ * Common part of error feedback mechanism.
+ */
+void error(struct kr_query *qry, struct address_state *addr_state,
+ const struct kr_transport *transport,
+ enum kr_selection_error sel_error);
+
+/**
+ * Get RTT state from cache. Returns `default_rtt_state` on unknown addresses.
+ */
+struct rtt_state get_rtt_state(const uint8_t *ip, size_t len,
+ struct kr_cache *cache);
+
+int put_rtt_state(const uint8_t *ip, size_t len, struct rtt_state state,
+ struct kr_cache *cache);
+
+/**
+ * @internal Helper function for conversion between different IP representations.
+ */
+void bytes_to_ip(uint8_t *bytes, size_t len, union inaddr *dst);
+
+/**
+ * @internal Helper function for conversion between different IP representations.
+ */
+uint8_t *ip_to_bytes(const union inaddr *src, size_t len);
+
+/**
+ * @internal Fetch per-address information from various sources.
+ */
+void update_address_state(struct address_state *state, uint8_t *address,
+ size_t address_len, struct kr_query *qry);
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#include "lib/selection_forward.h"
+#include "lib/resolve.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "slct", __VA_ARGS__)
+
+#define FORWARDING_TIMEOUT 2000
+
+struct forward_local_state {
+ inaddr_array_t *targets;
+ struct address_state *addr_states;
+ /** Index of last choice in the targets array, used for error reporting. */
+ size_t last_choice_index;
+};
+
+void forward_local_state_alloc(struct knot_mm *mm, void **local_state,
+ struct kr_request *req)
+{
+ assert(req->selection_context.forwarding_targets.at);
+ *local_state = mm_alloc(mm, sizeof(struct forward_local_state));
+ memset(*local_state, 0, sizeof(struct forward_local_state));
+
+ struct forward_local_state *forward_state =
+ (struct forward_local_state *)*local_state;
+ forward_state->targets = &req->selection_context.forwarding_targets;
+
+ forward_state->addr_states = mm_alloc(
+ mm, sizeof(struct address_state) * forward_state->targets->len);
+ memset(forward_state->addr_states, 0,
+ sizeof(struct address_state) * forward_state->targets->len);
+}
+
+void forward_choose_transport(struct kr_query *qry,
+ struct kr_transport **transport)
+{
+ struct forward_local_state *local_state =
+ qry->server_selection.local_state->private;
+ struct choice choices[local_state->targets->len];
+ int valid = 0;
+
+ for (int i = 0; i < local_state->targets->len; i++) {
+ union inaddr *address = &local_state->targets->at[i];
+ size_t addr_len;
+ uint16_t port;
+ switch (address->ip.sa_family) {
+ case AF_INET:
+ port = ntohs(address->ip4.sin_port);
+ addr_len = sizeof(struct in_addr);
+ break;
+ case AF_INET6:
+ port = ntohs(address->ip6.sin6_port);
+ addr_len = sizeof(struct in6_addr);
+ break;
+ default:
+ assert(0);
+ }
+
+ struct address_state *addr_state = &local_state->addr_states[i];
+ addr_state->ns_name = (knot_dname_t *)"";
+
+ update_address_state(addr_state, ip_to_bytes(address, addr_len),
+ addr_len, qry);
+
+ if (addr_state->generation == -1) {
+ continue;
+ }
+ addr_state->choice_array_index = i;
+
+ choices[valid++] = (struct choice){
+ .address = ip_to_bytes(address, addr_len),
+ .address_len = addr_len,
+ .address_state = addr_state,
+ .port = port,
+ };
+ }
+
+ bool tcp =
+ qry->flags.TCP | qry->server_selection.local_state->truncated;
+ *transport =
+ select_transport(choices, valid, NULL, 0,
+ qry->server_selection.local_state->timeouts,
+ &qry->request->pool, tcp,
+ &local_state->last_choice_index);
+ if (*transport) {
+ /* Set static timeout for forwarding; there is no point in this
+ * being dynamic since the RTT of a packet to forwarding target
+ * says nothing about the network RTT of said target, since
+ * it is doing resolution upstream. */
+ (*transport)->timeout = FORWARDING_TIMEOUT;
+ /* We need to propagate this to flags since it's used in other
+ * parts of the resolver (e.g. logging and stats). */
+ qry->flags.TCP = tcp;
+ }
+}
+
+void forward_error(struct kr_query *qry, const struct kr_transport *transport,
+ enum kr_selection_error sel_error)
+{
+ if (!qry->server_selection.initialized) {
+ return;
+ }
+ struct forward_local_state *local_state =
+ qry->server_selection.local_state->private;
+ struct address_state *addr_state =
+ &local_state->addr_states[local_state->last_choice_index];
+ error(qry, addr_state, transport, sel_error);
+}
+
+void forward_update_rtt(struct kr_query *qry,
+ const struct kr_transport *transport, unsigned rtt)
+{
+ if (!qry->server_selection.initialized) {
+ return;
+ }
+
+ if (!transport) {
+ return;
+ }
+
+ struct forward_local_state *local_state =
+ qry->server_selection.local_state->private;
+ struct address_state *addr_state =
+ &local_state->addr_states[local_state->last_choice_index];
+
+ update_rtt(qry, addr_state, transport, rtt);
+}
\ No newline at end of file
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "lib/selection.h"
+#include "lib/resolve.h"
+
+void forward_local_state_alloc(struct knot_mm *mm, void **local_state,
+ struct kr_request *req);
+void forward_choose_transport(struct kr_query *qry,
+ struct kr_transport **transport);
+void forward_error(struct kr_query *qry, const struct kr_transport *transport,
+ enum kr_selection_error sel_error);
+void forward_update_rtt(struct kr_query *qry,
+ const struct kr_transport *transport, unsigned rtt);
\ No newline at end of file
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#include "lib/selection_iter.h"
+#include "lib/selection.h"
+
+#include "lib/generic/trie.h"
+#include "lib/generic/pack.h"
+#include "lib/zonecut.h"
+#include "lib/resolve.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "slct", __VA_ARGS__)
+
+// To be held per query and locally
+struct iter_local_state {
+ trie_t *names;
+ trie_t *addresses;
+ knot_dname_t *zonecut;
+ /** Used to distinguish old and valid records in tries. */
+ unsigned int generation;
+ enum kr_selection_error last_error;
+ unsigned int no_ns_addr_count;
+};
+
+enum record_state { RECORD_UNKNOWN, RECORD_RESOLVED, RECORD_TRIED };
+
+// To be held per NS name and locally
+struct iter_name_state {
+ unsigned int generation;
+ enum record_state a_state;
+ enum record_state aaaa_state;
+};
+
+void iter_local_state_alloc(struct knot_mm *mm, void **local_state)
+{
+ *local_state = mm_alloc(mm, sizeof(struct iter_local_state));
+ memset(*local_state, 0, sizeof(struct iter_local_state));
+}
+
+static struct address_state *get_address_state(struct iter_local_state *local_state,
+ const struct kr_transport *transport)
+{
+ if (!transport) {
+ return NULL;
+ }
+
+ trie_t *addresses = local_state->addresses;
+ uint8_t *address =
+ ip_to_bytes(&transport->address, transport->address_len);
+
+ trie_val_t *address_state = trie_get_try(addresses, (char *)address,
+ transport->address_len);
+
+ if (!address_state) {
+ if (transport->deduplicated) {
+ /* Transport was chosen by a different query. */
+ return NULL;
+ }
+
+ assert(0);
+ }
+ return (struct address_state *)*address_state;
+}
+
+static bool zonecut_changed(knot_dname_t *new, knot_dname_t *old)
+{
+ return knot_dname_cmp(old, new);
+}
+
+static void unpack_state_from_zonecut(struct iter_local_state *local_state,
+ struct kr_query *qry)
+{
+ struct kr_zonecut *zonecut = &qry->zone_cut;
+ struct knot_mm *mm = &qry->request->pool;
+
+ bool zcut_changed = false;
+ if (local_state->names == NULL || local_state->addresses == NULL) {
+ /* Local state initialization. */
+ memset(local_state, 0, sizeof(struct iter_local_state));
+ local_state->names = trie_create(mm);
+ local_state->addresses = trie_create(mm);
+ } else {
+ zcut_changed = zonecut_changed(zonecut->name, local_state->zonecut);
+ }
+ local_state->zonecut = zonecut->name;
+ local_state->generation++;
+
+ if (zcut_changed) {
+ local_state->no_ns_addr_count = 0;
+ }
+
+ trie_it_t *it;
+ unsigned int current_generation = local_state->generation;
+
+ for (it = trie_it_begin(zonecut->nsset); !trie_it_finished(it); trie_it_next(it)) {
+ knot_dname_t *dname = (knot_dname_t *)trie_it_key(it, NULL);
+ pack_t *addresses = (pack_t *)*trie_it_val(it);
+
+ trie_val_t *val = trie_get_ins(local_state->names, (char *)dname,
+ knot_dname_size(dname));
+ if (!*val) {
+ /* We encountered this name for the first time. */
+ *val = mm_alloc(mm, sizeof(struct iter_name_state));
+ memset(*val, 0, sizeof(struct iter_name_state));
+ }
+ struct iter_name_state *name_state = *(struct iter_name_state **)val;
+ name_state->generation = current_generation;
+
+ if (zcut_changed) {
+ /* Set name as unresolved as they might have fallen out
+ * of cache (TTL expired). */
+ name_state->a_state = RECORD_UNKNOWN;
+ name_state->aaaa_state = RECORD_UNKNOWN;
+ }
+
+ if (addresses->len == 0) {
+ continue;
+ }
+
+ /* We have some addresses to work with, let's iterate over them. */
+ for (uint8_t *obj = pack_head(*addresses); obj != pack_tail(*addresses);
+ obj = pack_obj_next(obj)) {
+ uint8_t *address = pack_obj_val(obj);
+ size_t address_len = pack_obj_len(obj);
+ trie_val_t *tval = trie_get_ins(local_state->addresses,
+ (char *)address,
+ address_len);
+ if (!*tval) {
+ /* We have have not seen this address before. */
+ *tval = mm_alloc(mm, sizeof(struct address_state));
+ memset(*tval, 0, sizeof(struct address_state));
+ }
+ struct address_state *address_state = (*(struct address_state **)tval);
+ address_state->generation = current_generation;
+ address_state->ns_name = dname;
+
+ if (address_len == sizeof(struct in_addr)) {
+ name_state->a_state = RECORD_RESOLVED;
+ } else if (address_len == sizeof(struct in6_addr)) {
+ name_state->aaaa_state = RECORD_RESOLVED;
+ }
+ update_address_state(address_state, address, address_len, qry);
+ }
+ }
+ trie_it_free(it);
+}
+
+static int get_valid_addresses(struct iter_local_state *local_state,
+ struct choice choices[])
+{
+ unsigned count = 0;
+ trie_it_t *it;
+ for (it = trie_it_begin(local_state->addresses); !trie_it_finished(it);
+ trie_it_next(it)) {
+ size_t address_len;
+ uint8_t *address = (uint8_t *)trie_it_key(it, &address_len);
+ struct address_state *address_state =
+ (struct address_state *)*trie_it_val(it);
+ if (address_state->generation == local_state->generation &&
+ !address_state->unrecoverable_errors) {
+ choices[count] = (struct choice){
+ .address = address,
+ .address_len = address_len,
+ .address_state = address_state,
+ };
+ count++;
+ }
+ }
+ trie_it_free(it);
+ return count;
+}
+
+static int get_resolvable_names(struct iter_local_state *local_state,
+ struct to_resolve resolvable[], struct kr_query *qry)
+{
+ /* Further resolution is not possible until we get `. DNSKEY` record;
+ * we have to choose one of the known addresses here. */
+ if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) {
+ return 0;
+ }
+
+ unsigned count = 0;
+ trie_it_t *it;
+ for (it = trie_it_begin(local_state->names); !trie_it_finished(it);
+ trie_it_next(it)) {
+ struct iter_name_state *name_state =
+ *(struct iter_name_state **)trie_it_val(it);
+ if (name_state->generation == local_state->generation) {
+ knot_dname_t *name = (knot_dname_t *)trie_it_key(it, NULL);
+ if (qry->stype == KNOT_RRTYPE_DNSKEY &&
+ knot_dname_in_bailiwick(name, qry->sname) > 0) {
+ /* Resolving `domain. DNSKEY` can't trigger the
+ * resolution of `sub.domain. A/AAAA` since it
+ * will cause a cycle. */
+ continue;
+ }
+
+ /* FIXME: kr_rplan_satisfies(qry,…) should have been here, but this leads to failures on
+ * iter_ns_badip.rpl, this is because the test requires the resolver to switch to parent
+ * side after a record in cache expires. Only way to do this in the current zonecut setup is
+ * to requery the same query twice in the row. So we have to allow that and only check the
+ * rplan from parent upwards.
+ */
+ bool a_in_rplan = kr_rplan_satisfies(qry->parent, name,
+ KNOT_CLASS_IN,
+ KNOT_RRTYPE_A);
+ bool aaaa_in_rplan =
+ kr_rplan_satisfies(qry->parent, name,
+ KNOT_CLASS_IN,
+ KNOT_RRTYPE_AAAA);
+
+ if (name_state->a_state == RECORD_UNKNOWN &&
+ !qry->flags.NO_IPV4 && !a_in_rplan) {
+ resolvable[count++] = (struct to_resolve){
+ name, KR_TRANSPORT_RESOLVE_A
+ };
+ }
+
+ if (name_state->aaaa_state == RECORD_UNKNOWN &&
+ !qry->flags.NO_IPV6 && !aaaa_in_rplan) {
+ resolvable[count++] = (struct to_resolve){
+ name, KR_TRANSPORT_RESOLVE_AAAA
+ };
+ }
+ }
+ }
+ trie_it_free(it);
+ return count;
+}
+
+static void update_name_state(knot_dname_t *name, enum kr_transport_protocol type,
+ trie_t *names)
+{
+ size_t name_len = knot_dname_size(name);
+ trie_val_t *val = trie_get_try(names, (char *)name, name_len);
+
+ if (!val) {
+ return;
+ }
+
+ struct iter_name_state *name_state = (struct iter_name_state *)*val;
+ switch (type) {
+ case KR_TRANSPORT_RESOLVE_A:
+ name_state->a_state = RECORD_TRIED;
+ break;
+ case KR_TRANSPORT_RESOLVE_AAAA:
+ name_state->aaaa_state = RECORD_TRIED;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void iter_choose_transport(struct kr_query *qry,
+ struct kr_transport **transport)
+{
+ struct knot_mm *mempool = &qry->request->pool;
+ struct iter_local_state *local_state =
+ (struct iter_local_state *)
+ qry->server_selection.local_state->private;
+
+ unpack_state_from_zonecut(local_state, qry);
+
+ struct choice choices[trie_weight(local_state->addresses)];
+ /* We may try to resolve A and AAAA record for each name, so therefore
+ * 2*trie_weight(…) is here. */
+ struct to_resolve resolvable[2 * trie_weight(local_state->names)];
+
+ // Filter valid addresses and names from the tries
+ int choices_len = get_valid_addresses(local_state, choices);
+ int resolvable_len = get_resolvable_names(local_state, resolvable, qry);
+
+ if (choices_len || resolvable_len) {
+ bool tcp = qry->flags.TCP |
+ qry->server_selection.local_state->truncated;
+ *transport = select_transport(
+ choices, choices_len, resolvable, resolvable_len,
+ qry->server_selection.local_state->timeouts, mempool,
+ tcp, NULL);
+ if (*transport) {
+ switch ((*transport)->protocol) {
+ case KR_TRANSPORT_RESOLVE_A:
+ case KR_TRANSPORT_RESOLVE_AAAA:
+ /* Note that we tried resolving this name to not try it again. */
+ update_name_state((*transport)->ns_name,
+ (*transport)->protocol,
+ local_state->names);
+ break;
+ case KR_TRANSPORT_TLS:
+ case KR_TRANSPORT_TCP:
+ /* We need to propagate this to flags since it's used in
+ * other parts of the resolver. */
+ qry->flags.TCP = true;
+ break;
+ default:
+ break;
+ }
+ }
+ } else {
+ *transport = NULL;
+ /* Last selected server had broken DNSSEC and now we have no more
+ * servers to ask. We signal this to the rest of resolver by
+ * setting DNSSEC_BOGUS flag. */
+ if (local_state->last_error == KR_SELECTION_DNSSEC_ERROR) {
+ qry->flags.DNSSEC_BOGUS = true;
+ }
+ }
+
+ bool nxnsattack_mitigation = false;
+ enum kr_transport_protocol proto =
+ *transport ? (*transport)->protocol : -1;
+ if (proto == KR_TRANSPORT_RESOLVE_A || proto == KR_TRANSPORT_RESOLVE_AAAA) {
+ if (++local_state->no_ns_addr_count > KR_COUNT_NO_NSADDR_LIMIT) {
+ *transport = NULL;
+ nxnsattack_mitigation = true;
+ }
+ }
+
+ WITH_VERBOSE(qry)
+ {
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ if (*transport) {
+ KR_DNAME_GET_STR(ns_name, (*transport)->ns_name);
+ const char *ns_str = kr_straddr(&(*transport)->address.ip);
+ const char *ip_version;
+ switch (proto)
+ {
+ case KR_TRANSPORT_RESOLVE_A:
+ case KR_TRANSPORT_RESOLVE_AAAA:
+ ip_version = (proto == KR_TRANSPORT_RESOLVE_A) ? "A" : "AAAA";
+ VERBOSE_MSG(qry, "=> id: '%05u' choosing to resolve %s: '%s' zone cut: '%s'\n",
+ qry->id, ip_version, ns_name, zonecut_str);
+ break;
+ default:
+ VERBOSE_MSG(qry, "=> id: '%05u' choosing: '%s'@'%s' with timeout %u ms zone cut: '%s'%s\n",
+ qry->id, ns_name, ns_str ? ns_str : "", (*transport)->timeout, zonecut_str,
+ (*transport)->safe_mode ? " SAFEMODE" : "");
+ break;
+ }
+ } else {
+ VERBOSE_MSG(qry, "=> id: '%05u' no suitable transport, zone cut: '%s'%s\n",
+ qry->id, zonecut_str, nxnsattack_mitigation ? " (stopped due to mitigation for NXNSAttack CVE-2020-12667)" : "");
+ }
+ }
+}
+
+void iter_error(struct kr_query *qry, const struct kr_transport *transport,
+ enum kr_selection_error sel_error)
+{
+ if (!qry->server_selection.initialized) {
+ return;
+ }
+ struct iter_local_state *local_state = qry->server_selection.local_state->private;
+ struct address_state *addr_state = get_address_state(local_state, transport);
+ local_state->last_error = sel_error;
+ error(qry, addr_state, transport, sel_error);
+}
+
+void iter_update_rtt(struct kr_query *qry, const struct kr_transport *transport,
+ unsigned rtt)
+{
+ if (!qry->server_selection.initialized) {
+ return;
+ }
+ struct iter_local_state *local_state = qry->server_selection.local_state->private;
+ struct address_state *addr_state = get_address_state(local_state, transport);
+ update_rtt(qry, addr_state, transport, rtt);
+}
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "lib/selection.h"
+
+/**
+ * If one of the errors set to true is encountered, there is no point in asking this server again.
+ */
+static const bool UNRECOVERABLE_ERRORS[] = {
+ [KR_SELECTION_QUERY_TIMEOUT] = false,
+ [KR_SELECTION_TLS_HANDSHAKE_FAILED] = false,
+ [KR_SELECTION_TCP_CONNECT_FAILED] = false,
+ [KR_SELECTION_TCP_CONNECT_TIMEOUT] = false,
+ [KR_SELECTION_REFUSED] = true,
+ [KR_SELECTION_SERVFAIL] = true,
+ [KR_SELECTION_FORMERROR] = false,
+ [KR_SELECTION_NOTIMPL] = true,
+ [KR_SELECTION_OTHER_RCODE] = true,
+ [KR_SELECTION_TRUNCATED] = false,
+ [KR_SELECTION_DNSSEC_ERROR] = true,
+ [KR_SELECTION_LAME_DELEGATION] = true,
+ [KR_SELECTION_BAD_CNAME] = true,
+};
+
+void iter_local_state_alloc(struct knot_mm *mm, void **local_state);
+void iter_choose_transport(struct kr_query *qry,
+ struct kr_transport **transport);
+void iter_error(struct kr_query *qry, const struct kr_transport *transport,
+ enum kr_selection_error sel_error);
+void iter_update_rtt(struct kr_query *qry, const struct kr_transport *transport,
+ unsigned rtt);
\ No newline at end of file
#include "lib/defines.h"
#include "lib/generic/array.h"
#include "lib/module.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/resolve.h"
#include <gnutls/gnutls.h>
free((void *)what);
}
+// Use this for alocations with mm.
static inline void *mm_alloc(knot_mm_t *mm, size_t size)
{
if (mm) return mm->alloc(mm->ctx, size);
void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size);
/** Trivial malloc() wrapper. */
+// Use mm_alloc for alocations into mempool
void *mm_malloc(void *ctx, size_t n);
/** posix_memalign() wrapper. */
void *mm_malloc_aligned(void *ctx, size_t n);
(int)rd->len, (int)rrtype);
continue;
}
- /* Check RTT cache - whether the IP is usable or not. */
- kr_nsrep_rtt_lru_entry_t *rtt_e = ctx->cache_rtt
- ? lru_get_try(ctx->cache_rtt, (const char *)rd->data, rd->len)
- : NULL;
- const bool unusable = rtt_e && rtt_e->score >= KR_NS_TIMEOUT
- && qry->creation_time_mono
- < rtt_e->tout_timestamp + ctx->cache_rtt_tout_retry_interval;
- if (!unusable) {
- result = AI_OK;
- ++usable_cnt;
- }
+ result = AI_OK;
+ ++usable_cnt;
ret = pack_obj_push(addrs, rd->data, rd->len);
assert(!ret); /* didn't fit because of incorrectly reserved memory */
pack_init(**pack);
addrset_info_t infos[2];
+
/* Fetch NS reputation and decide whether to prefetch A/AAAA records. */
- unsigned *cached = lru_get_try(ctx->cache_rep,
- (const char *)ns_name, ns_size);
- unsigned reputation = (cached) ? *cached : 0;
- infos[0] = (reputation & KR_NS_NOIP4) || qry->flags.NO_IPV4
- ? AI_REPUT
- : fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, cut->pool, qry);
- infos[1] = (reputation & KR_NS_NOIP6) || qry->flags.NO_IPV6
- ? AI_REPUT
- : fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, cut->pool, qry);
+ infos[0] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, cut->pool, qry);
+ infos[1] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, cut->pool, qry);
#if 0 /* rather unlikely to be useful unless changing some zcut code */
WITH_VERBOSE(qry) {
local expected = {
[1] = {
['type'] = 'DNSKEY',
- ['count'] = 2,
+ ['count'] = 8, -- This is a trade-off to not hardfailing on DNSSEC errors
['name'] = '.',
}
}
policy.STUB('192.0.2.1@5353'),
{todname('1.168.192.in-addr.arpa')}))
+.. note:: Forwarding targets must support
+ `EDNS <https://en.wikipedia.org/wiki/Extension_mechanisms_for_DNS>`_ and
+ `0x20 randomization <https://tools.ietf.org/html/draft-vixie-dnsext-dns0x20-00>`_.
+
+
.. _tls-forwarding:
Forwarding over TLS protocol (DNS-over-TLS)
end
-- Override the list of nameservers (forwarders)
-local function set_nslist(qry, list)
+local function set_nslist(req, list)
local ns_i = 0
for _, ns in ipairs(list) do
- -- kr_nsrep_set() can return kr_error(ENOENT), it's OK
- if ffi.C.kr_nsrep_set(qry, ns_i, ns) == 0 then
+ if ffi.C.kr_forward_add_target(req, ns) == 0 then
ns_i = ns_i + 1
end
end
- -- If less than maximum NSs, insert guard to terminate the list
- if ns_i < 3 then
- assert(ffi.C.kr_nsrep_set(qry, ns_i, nil) == 0);
- end
if ns_i == 0 then
-- would use assert() but don't want to compose the message if not triggered
error('no usable address in NS set (check net.ipv4 and '
if type(target) == 'table' then
for _, v in pairs(target) do
table.insert(list, addr2sock(v, 53))
- assert(#list <= 4, 'at most 4 STUB targets are supported')
end
else
table.insert(list, addr2sock(target, 53))
-- Switch mode to stub resolver, do not track origin zone cut since it's not real authority NS
qry.flags.STUB = true
qry.flags.ALWAYS_CUT = false
- set_nslist(qry, list)
+ set_nslist(req, list)
return state
end
end
if type(target) == 'table' then
for _, v in pairs(target) do
table.insert(list, addr2sock(v, 53))
- assert(#list <= 4, 'at most 4 FORWARD targets are supported')
end
else
table.insert(list, addr2sock(target, 53))
qry.flags.ALWAYS_CUT = false
qry.flags.NO_MINIMIZE = true
qry.flags.AWAIT_CUT = true
- set_nslist(qry, list)
+ set_nslist(req, list)
return state
end
end
function policy.TLS_FORWARD(targets)
if type(targets) ~= 'table' or #targets < 1 then
error('TLS_FORWARD argument must be a non-empty table')
- elseif #targets > 4 then
- error('TLS_FORWARD supports at most four targets (in a single call)')
end
local sockaddr_c_set = {}
qry.flags.AWAIT_CUT = true
req.options.TCP = true
qry.flags.TCP = true
- set_nslist(qry, nslist)
+ set_nslist(req, nslist)
return state
end
end
{
struct kr_request *req = ctx->req;
struct kr_query *qry = req->current_query;
- if (qry->flags.CACHED || !req->upstream.addr) {
+ if (qry->flags.CACHED || !req->upstream.transport) {
return ctx->state;
}
/* Socket address is encoded into sockaddr_in6 struct that
* unions with sockaddr_in and differ in sa_family */
struct sockaddr_in6 *e = &data->upstreams.q.at[data->upstreams.head];
- const struct sockaddr *src = req->upstream.addr;
- switch (src->sa_family) {
- case AF_INET: memcpy(e, src, sizeof(struct sockaddr_in)); break;
- case AF_INET6: memcpy(e, src, sizeof(struct sockaddr_in6)); break;
- default: return ctx->state;
+ const union inaddr *src = &req->upstream.transport->address;
+ switch (src->ip.sa_family) {
+ case AF_INET: memcpy(e, &src->ip4, sizeof(src->ip4)); break;
+ case AF_INET6: memcpy(e, &src->ip6, sizeof(src->ip6)); break;
+ default: return ctx->state;
}
/* Replace port number with the RTT information (cap is UINT16_MAX milliseconds) */
e->sin6_rtt = req->upstream.rtt;
end
)
- for delay = 0.1, 4, 0.5 do -- total max 14.9s in 8 steps
+ for delay = 0.1, 5, 0.5 do -- total max 23.5s in 9 steps
if done then return end
worker.sleep(delay)
end