#include "contrib/ucw/lib.h"
#include "daemon/engine.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
typedef kr_nsrep_lru_t lru_bench_t;
/* Clear reputation tables */
struct kr_context *ctx = &the_worker->engine->resolver;
- lru_reset(ctx->cache_rtt);
- lru_reset(ctx->cache_rep);
lru_reset(ctx->cache_cookie);
lua_pushboolean(L, true);
return 1;
#include "kresconfig.h"
#include "daemon/engine.h"
#include "daemon/ffimodule.h"
-#include "daemon/worker.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/cache/api.h"
#include "lib/defines.h"
#include "lib/cache/cdb_lmdb.h"
engine->resolver.negative_anchors = map_make(NULL);
engine->resolver.pool = engine->pool;
engine->resolver.modules = &engine->modules;
- engine->resolver.cache_rtt_tout_retry_interval = KR_NS_TIMEOUT_RETRY_INTERVAL;
/* Create OPT RR */
engine->resolver.downstream_opt_rr = mm_alloc(engine->pool, sizeof(knot_rrset_t));
engine->resolver.upstream_opt_rr = mm_alloc(engine->pool, sizeof(knot_rrset_t));
engine->resolver.tls_padding = -1;
/* Empty init; filled via ./lua/postconfig.lua */
kr_zonecut_init(&engine->resolver.root_hints, (const uint8_t *)"", engine->pool);
- /* Open NS rtt + reputation cache */
- lru_create(&engine->resolver.cache_rtt, LRU_RTT_SIZE, NULL, NULL);
- lru_create(&engine->resolver.cache_rep, LRU_REP_SIZE, NULL, NULL);
lru_create(&engine->resolver.cache_cookie, LRU_COOKIES_SIZE, NULL, NULL);
/* Load basic modules */
kr_cache_close(&engine->resolver.cache);
/* The LRUs are currently malloc-ated and need to be freed. */
- lru_free(engine->resolver.cache_rtt);
- lru_free(engine->resolver.cache_rep);
lru_free(engine->resolver.cache_cookie);
network_deinit(&engine->net);
extern const knot_dump_style_t KNOT_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
+typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
+typedef struct {
+ uint16_t pos;
+ uint16_t flags;
+ uint16_t compress_ptr[16];
+} knot_rrinfo_t;
+typedef unsigned char knot_dname_t;
typedef struct knot_mm {
void *ctx, *alloc, *free;
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
-typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
-typedef struct {
- uint16_t pos;
- uint16_t flags;
- uint16_t compress_ptr[16];
-} knot_rrinfo_t;
-typedef unsigned char knot_dname_t;
+typedef bool (*addr_info_f)(struct sockaddr*);
+typedef void (*async_resolution_f)(knot_dname_t*, enum knot_rr_type);
typedef struct {
knot_dname_t *_owner;
uint32_t _ttl;
} qsource;
struct {
unsigned int rtt;
- const struct sockaddr *addr;
+ const struct kr_transport *transport;
} upstream;
struct kr_qflags options;
int state;
trace_callback_f trace_finish;
int vars_ref;
knot_mm_t pool;
+ struct {
+ addr_info_f is_tls_capable;
+ addr_info_f is_tcp_connected;
+ addr_info_f is_tcp_waiting;
+ async_resolution_f async_ns_resolution;
+ union inaddr *forwarding_targets;
+ size_t forward_targets_num;
+ } selection_context;
unsigned int uid;
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
void *lib;
void *data;
};
+struct kr_server_selection {
+ _Bool initialized;
+ void (*choose_transport)(struct kr_query *, struct kr_transport **);
+ void (*success)(struct kr_query *, const struct kr_transport *);
+ void (*update_rtt)(struct kr_query *, const struct kr_transport *, unsigned int);
+ void (*error)(struct kr_query *, const struct kr_transport *, enum kr_selection_error);
+ void *local_state;
+};
kr_layer_t kr_layer_t_static;
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
-struct kr_nsrep {
- unsigned int score;
- unsigned int reputation;
- const knot_dname_t *name;
- struct kr_context *ctx;
- /* beware: hidden stub, to avoid hardcoding sockaddr lengths */
-};
struct kr_query {
struct kr_query *parent;
knot_dname_t *sname;
struct kr_query *cname_parent;
struct kr_request *request;
kr_stale_cb stale_cb;
- struct kr_nsrep ns;
+ struct kr_server_selection server_selection;
};
struct kr_context {
struct kr_qflags options;
map_t negative_anchors;
struct kr_zonecut root_hints;
struct kr_cache cache;
+ unsigned int cache_rtt_tout_retry_interval;
char _stub[];
};
+struct kr_transport {
+ knot_dname_t *name;
+ /* beware: hidden stub, to avoid hardcoding sockaddr lengths */
+};
const char *knot_strerror(int);
knot_dname_t *knot_dname_copy(const knot_dname_t *, knot_mm_t *);
knot_dname_t *knot_dname_from_str(uint8_t *, const char *, size_t);
int kr_rplan_pop(struct kr_rplan *, struct kr_query *);
struct kr_query *kr_rplan_resolved(struct kr_rplan *);
struct kr_query *kr_rplan_last(struct kr_rplan *);
-int kr_nsrep_set(struct kr_query *, size_t, const struct sockaddr *);
+int kr_forward_add_target(struct kr_request *, size_t, const struct sockaddr *);
void kr_log_req(const struct kr_request * const, uint32_t, const unsigned int, const char *, const char *, ...);
void kr_log_q(const struct kr_query * const, const char *, const char *, ...);
int kr_make_query(struct kr_query *, knot_pkt_t *);
struct lru {};
"
+${CDEFS} ${LIBKRES} types <<-EOF
+ knot_section_t
+ knot_rrinfo_t
+ knot_dname_t
+ #knot_rdata_t
+ #knot_rdataset_t
+EOF
+
# The generator doesn't work well with typedefs of functions.
printf "
typedef struct knot_mm {
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
+typedef bool (*addr_info_f)(struct sockaddr*);
+typedef void (*async_resolution_f)(knot_dname_t*, enum knot_rr_type);
"
-${CDEFS} ${LIBKRES} types <<-EOF
- knot_section_t
- knot_rrinfo_t
- knot_dname_t
- #knot_rdata_t
- #knot_rdataset_t
-EOF
-
genResType() {
echo "$1" | ${CDEFS} ${LIBKRES} types
}
# lib/module.h
struct kr_prop
struct kr_module
+ struct kr_server_selection
EOF
# a static variable; the line might not be simple to generate
## Some definitions would need too many deps, so shorten them.
-genResType "struct kr_nsrep" | sed '/union/,$ d'
-printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
-
genResType "struct kr_query"
-genResType "struct kr_context" | sed '/kr_nsrep_rtt_lru_t/,$ d'
+genResType "struct kr_context" | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
+
+echo "struct kr_transport" | ${CDEFS} ${KRESD} types | sed '/union /,$ d'
+printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
+
## libknot API
${CDEFS} libknot functions <<-EOF
# Utils
kr_rplan_pop
kr_rplan_resolved
kr_rplan_last
-# Nameservers
- kr_nsrep_set
+# Forwarding
+ kr_forward_add_target
# Utils
kr_log_req
kr_log_q
echo "struct qr_task" | ${CDEFS} ${KRESD} types | sed '/pktbuf/,$ d'
printf "\t/* beware: hidden stub, to avoid qr_tasklist_t */\n};\n"
+
${CDEFS} ${KRESD} functions <<-EOF
worker_resolve_exec
worker_resolve_mk_pkt
qr_tasklist_t waiting;
struct session *pending[MAX_PENDING];
uint16_t pending_count;
- uint16_t addrlist_count;
- uint16_t addrlist_turn;
+ // uint16_t addrlist_count;
+ // uint16_t addrlist_turn;
uint16_t timeouts;
uint16_t iter_count;
- struct sockaddr *addrlist;
+ // struct sockaddr *addrlist;
uint32_t refs;
bool finished : 1;
bool leading : 1;
uint64_t creation_time;
+ uint64_t send_time;
+ uint64_t recv_time;
+ struct kr_transport *transport;
};
const struct sockaddr *addr, knot_pkt_t *pkt);
static int qr_task_finalize(struct qr_task *task, int state);
static void qr_task_complete(struct qr_task *task);
-static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
const struct sockaddr *addr);
static int worker_add_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr *addr,
struct session *session);
-static struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr *addr);
static void on_tcp_connect_timeout(uv_timer_t *timer);
-static void on_retransmit(uv_timer_t *req);
+static void on_udp_timeout(uv_timer_t *req);
static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt);
knot_pkt_qtype(pkt), knot_pkt_qtype(pkt));
}
+/* Helper functions for transport selection */
+static inline bool is_tls_capable(struct sockaddr *address) {
+ tls_client_param_t *tls_entry = tls_client_param_get(the_worker->engine->net.tls_client_params, address);
+ return tls_entry;
+}
+
+static inline bool is_tcp_connected(struct sockaddr *address) {
+ return worker_find_tcp_connected(the_worker, address);
+}
+
+static inline bool is_tcp_waiting(struct sockaddr *address) {
+ return worker_find_tcp_waiting(the_worker, address);
+}
+
+void async_ns_resolution(knot_dname_t *name, enum knot_rr_type type) {
+ struct kr_qflags flags;
+ memset(&flags, 0, sizeof(struct kr_qflags));
+ knot_pkt_t* pkt = worker_resolve_mk_pkt_dname(name, type, KNOT_CLASS_IN, &flags);
+ worker_resolve_start(pkt, flags);
+ free(pkt);
+}
+
/** Create and initialize a request_ctx (on a fresh mempool).
*
* handle and addr point to the source of the request, and they are NULL
req->qsource.addr = &ctx->source.addr.ip;
}
+ req->selection_context.is_tls_capable = is_tls_capable;
+ req->selection_context.is_tcp_connected = is_tcp_connected;
+ req->selection_context.is_tcp_waiting = is_tcp_waiting;
+
worker->stats.rconcurrent += 1;
return ctx;
/* This is called when we send subrequest / answer */
int qr_task_on_send(struct qr_task *task, uv_handle_t *handle, int status)
{
-
if (task->finished) {
assert(task->leading == false);
qr_task_complete(task);
assert(s);
if (handle->type == UV_UDP && session_flags(s)->outgoing) {
- /* Start the timeout timer for UDP here, since this is the closest
- * to the wire we can get. */
- struct kr_request *req = &task->ctx->req;
- /* Check current query NSLIST */
- struct kr_query *qry = array_tail(req->rplan.pending);
+ // This should ensure that we are only dealing with our question to upstream
+ assert(!knot_wire_get_qr(task->pktbuf->wire));
+ // start the timer
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
assert(qry != NULL);
- /* Retransmit at default interval, or more frequently if the mean
- * RTT of the server is better. If the server is glued, use default rate. */
- size_t timeout = qry->ns.score;
- if (timeout > KR_NS_GLUED) {
- /* We don't have information about variance in RTT, expect +10ms */
- timeout = MIN(qry->ns.score + 10, KR_CONN_RETRY);
- } else {
- timeout = KR_CONN_RETRY;
- }
- int ret = session_timer_start(s, on_retransmit, timeout, 0);
+ size_t timeout = task->transport->timeout;
+ int ret = session_timer_start(s, on_udp_timeout, timeout, 0);
/* Start next step with timeout, fatal if can't start a timer. */
if (ret != 0) {
- subreq_finalize(task, &qry->ns.addr->ip, task->pktbuf);
+ subreq_finalize(task, &task->transport->address.ip, task->pktbuf);
qr_task_finalize(task, KR_STATE_FAIL);
}
}
qr_task_ref(task);
struct worker_ctx *worker = ctx->worker;
+ /* Note time for upstream RTT */
+ task->send_time = kr_now();
+ task->recv_time = 0; // task structure is being reused so we have to zero this out here
/* Send using given protocol */
assert(!session_flags(session)->closing);
if (session_flags(session)->has_http) {
if (status) {
struct qr_task *task = session_waitinglist_get(session);
if (task) {
- struct kr_qflags *options = &task->ctx->req.options;
- unsigned score = options->FORWARD || options->STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- the_worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ // TLS handshake failed, report it to server selection
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TLS_HANDSHAKE_FAILED);
}
#ifndef NDEBUG
else {
worker_del_tcp_waiting(worker, peer);
struct qr_task *task = session_waitinglist_get(session);
if (task && status != UV_ETIMEDOUT) {
- /* Penalize upstream.
- * In case of UV_ETIMEDOUT upstream has been
- * already penalized in on_tcp_connect_timeout() */
- struct kr_qflags *options = &task->ctx->req.options;
- unsigned score = options->FORWARD || options->STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
}
assert(session_tasklist_is_empty(session));
session_waitinglist_retry(session, false);
peer_str ? peer_str : "");
}
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_TIMEOUT);
worker->stats.timeout += session_waitinglist_get_len(session);
session_waitinglist_retry(session, true);
uv_timer_stop(timer);
- /* Penalize all tried nameservers with a timeout. */
struct qr_task *task = session_tasklist_get_first(session);
struct worker_ctx *worker = task->ctx->worker;
+
if (task->leading && task->pending_count > 0) {
struct kr_query *qry = array_tail(task->ctx->req.rplan.pending);
- struct sockaddr_in6 *addrlist = (struct sockaddr_in6 *)task->addrlist;
- for (uint16_t i = 0; i < MIN(task->pending_count, task->addrlist_count); ++i) {
- struct sockaddr *choice = (struct sockaddr *)(&addrlist[i]);
- WITH_VERBOSE(qry) {
- char *addr_str = kr_straddr(choice);
- VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str ? addr_str : "");
- }
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(&qry->ns, choice, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
- }
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TIMEOUT);
}
+
task->timeouts += 1;
worker->stats.timeout += 1;
qr_task_step(task, NULL, NULL);
}
-static uv_handle_t *retransmit(struct qr_task *task)
+static uv_handle_t *transmit(struct qr_task *task)
{
uv_handle_t *ret = NULL;
- if (task && task->addrlist && task->addrlist_count > 0) {
- struct sockaddr_in6 *choice = &((struct sockaddr_in6 *)task->addrlist)[task->addrlist_turn];
+
+ if (task) {
+ struct kr_transport* transport = task->transport;
+
+ struct sockaddr_in6 *choice = (struct sockaddr_in6 *)&transport->address;
+
if (!choice) {
return ret;
}
}
/* Checkout answer before sending it */
struct request_ctx *ctx = task->ctx;
- if (kr_resolve_checkout(&ctx->req, NULL, (struct sockaddr *)choice, SOCK_DGRAM, task->pktbuf) != 0) {
+ if (kr_resolve_checkout(&ctx->req, NULL, transport, task->pktbuf) != 0) {
return ret;
}
ret = ioreq_spawn(ctx->worker, SOCK_DGRAM, choice->sin6_family, false, false);
} else {
task->pending[task->pending_count] = session;
task->pending_count += 1;
- task->addrlist_turn = (task->addrlist_turn + 1) %
- task->addrlist_count; /* Round robin */
session_start_read(session); /* Start reading answer */
}
}
return ret;
}
-static void on_retransmit(uv_timer_t *req)
-{
- struct session *session = req->data;
- assert(session_tasklist_get_len(session) == 1);
-
- uv_timer_stop(req);
- struct qr_task *task = session_tasklist_get_first(session);
- if (retransmit(task) == NULL) {
- /* Not possible to spawn request, start timeout timer with remaining deadline. */
- struct kr_qflags *options = &task->ctx->req.options;
- uint64_t timeout = options->FORWARD || options->STUB ? KR_NS_FWD_TIMEOUT / 2 :
- KR_CONN_RTT_MAX - task->pending_count * KR_CONN_RETRY;
- uv_timer_start(req, on_udp_timeout, timeout, 0);
- } else {
- uv_timer_start(req, on_retransmit, KR_CONN_RETRY, 0);
- }
-}
static void subreq_finalize(struct qr_task *task, const struct sockaddr *packet_source, knot_pkt_t *pkt)
{
struct kr_query *qry = array_tail(follower->ctx->req.rplan.pending);
qry->id = leader_qry->id;
qry->secret = leader_qry->secret;
+ follower->transport = task->transport;
leader_qry->secret = 0; /* Next will be already decoded */
}
qr_task_step(follower, packet_source, pkt);
return kr_ok(); /* Will be notified when outgoing query finishes. */
}
/* Start transmitting */
- uv_handle_t *handle = retransmit(task);
+ uv_handle_t *handle = transmit(task);
if (handle == NULL) {
subreq_finalize(task, packet_source, packet);
return qr_task_finalize(task, KR_STATE_FAIL);
worker_del_tcp_waiting(worker, addr);
free(conn);
session_close(session);
- unsigned score = qry->flags.FORWARD || qry->flags.STUB ? KR_NS_FWD_DEAD : KR_NS_DEAD;
- kr_nsrep_update_rtt(NULL, peer, score,
- worker->engine->resolver.cache_rtt,
- KR_NS_UPDATE_NORESET);
- WITH_VERBOSE (qry) {
- const char *peer_str = kr_straddr(peer);
- kr_log_verbose( "[wrkr]=> connect to '%s' failed (%s), flagged as 'bad'\n",
- peer_str ? peer_str : "", uv_strerror(ret));
- }
+ qry->server_selection.error(qry, task->transport, KR_SELECTION_TCP_CONNECT_FAILED);
return kr_error(EAGAIN);
}
assert(task->pending_count == 0);
/* target */
- const struct sockaddr *addr = task->addrlist;
+ const struct sockaddr *addr = &task->transport->address.ip;
if (addr->sa_family == AF_UNSPEC) {
/* Target isn't defined. Finalize task with SERVFAIL.
* Although task->pending_count is zero, there are can be followers,
}
/* Checkout task before connecting */
struct request_ctx *ctx = task->ctx;
- if (kr_resolve_checkout(&ctx->req, NULL, (struct sockaddr *)addr,
- SOCK_STREAM, task->pktbuf) != 0) {
+ if (kr_resolve_checkout(&ctx->req, NULL, task->transport, task->pktbuf) != 0) {
subreq_finalize(task, packet_source, packet);
return qr_task_finalize(task, KR_STATE_FAIL);
}
assert(ctx);
struct kr_request *req = &ctx->req;
struct worker_ctx *worker = ctx->worker;
- int sock_type = -1;
- task->addrlist = NULL;
- task->addrlist_count = 0;
- task->addrlist_turn = 0;
if (worker->too_many_open) {
/* */
} else {
if (packet && kr_rplan_empty(rplan)) {
/* new query; TODO - make this detection more obvious */
- kr_resolve_consume(req, packet_source, packet);
+ kr_resolve_consume(req, &task->transport, packet);
}
return qr_task_finalize(task, KR_STATE_FAIL);
}
}
- int state = kr_resolve_consume(req, packet_source, packet);
+ // Report network RTT back to server selection
+ if (task->send_time && task->recv_time) {
+ struct kr_query *qry = array_tail(req->rplan.pending);
+ qry->server_selection.update_rtt(qry, task->transport, task->recv_time - task->send_time);
+ }
+
+ int state = kr_resolve_consume(req, &task->transport, packet);
+
+ task->transport = NULL;
while (state == KR_STATE_PRODUCE) {
- state = kr_resolve_produce(req, &task->addrlist,
- &sock_type, task->pktbuf);
+ state = kr_resolve_produce(req, &task->transport, task->pktbuf);
if (unlikely(++task->iter_count > KR_ITER_LIMIT ||
task->timeouts >= KR_TIMEOUT_LIMIT)) {
#ifndef NOVERBOSELOG
struct kr_rplan *rplan = &req->rplan;
- struct kr_query *last = kr_rplan_last(rplan);
+ struct kr_query *last = kr_rplan_last(rplan);
if (task->iter_count > KR_ITER_LIMIT) {
VERBOSE_MSG(last, "canceling query due to exceeded iteration count limit of %d\n", KR_ITER_LIMIT);
}
/* We're done, no more iterations needed */
if (state & (KR_STATE_DONE|KR_STATE_FAIL)) {
return qr_task_finalize(task, state);
- } else if (!task->addrlist || sock_type < 0) {
+ } else if (!task->transport->protocol) {
return qr_task_step(task, NULL, NULL);
}
- /* Count available address choices */
- struct sockaddr_in6 *choice = (struct sockaddr_in6 *)task->addrlist;
- for (size_t i = 0; i < KR_NSREP_MAXADDR && choice->sin6_family != AF_UNSPEC; ++i) {
- task->addrlist_count += 1;
- choice += 1;
- }
-
- /* Upgrade to TLS if the upstream address is configured as DoT capable. */
- if (task->addrlist_count > 0 && kr_inaddr_port(task->addrlist) == KR_DNS_PORT) {
- /* TODO if there are multiple addresses (task->addrlist_count > 1)
- * check all of them. */
- struct network *net = &worker->engine->net;
- /* task->addrlist has to contain TLS port before tls_client_param_get() call */
- kr_inaddr_set_port(task->addrlist, KR_DNS_TLS_PORT);
- tls_client_param_t *tls_entry =
- tls_client_param_get(net->tls_client_params, task->addrlist);
- if (tls_entry) {
- packet_source = NULL;
- sock_type = SOCK_STREAM;
- /* TODO in this case in tcp_task_make_connection() will be performed
- * redundant map_get() call. */
- } else {
- /* The function is fairly cheap, so we just change there and back. */
- kr_inaddr_set_port(task->addrlist, KR_DNS_PORT);
- }
- }
-
- int ret = 0;
- if (sock_type == SOCK_DGRAM) {
- /* Start fast retransmit with UDP. */
- ret = udp_task_step(task, packet_source, packet);
- } else {
- /* TCP. Connect to upstream or send the query if connection already exists. */
- assert (sock_type == SOCK_STREAM);
- ret = tcp_task_step(task, packet_source, packet);
+ switch (task->transport->protocol)
+ {
+ case KR_TRANSPORT_UDP:
+ return udp_task_step(task, packet_source, packet);
+ break;
+ case KR_TRANSPORT_TCP: // fall through
+ case KR_TRANSPORT_TLS:
+ return tcp_task_step(task, packet_source, packet);
+ default:
+ assert(0);
+ break;
}
- return ret;
}
static int parse_packet(knot_pkt_t *query)
}
assert(!session_flags(session)->closing);
addr = peer;
+ /* Note recieve time for RTT calculation */
+ task->recv_time = kr_now();
}
assert(uv_is_closing(session_get_handle(session)) == false);
/* Packet was successfully parsed.
* Task was created (found). */
session_touch(session);
+
/* Consume input and produce next message */
return qr_task_step(task, addr, pkt);
}
return map_del_tcp_session(&worker->tcp_connected, addr);
}
-static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
const struct sockaddr* addr)
{
return map_find_tcp_session(&worker->tcp_connected, addr);
return map_del_tcp_session(&worker->tcp_waiting, addr);
}
-static struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr* addr)
{
return map_find_tcp_session(&worker->tcp_waiting, addr);
return kr_ok();
}
-knot_pkt_t * worker_resolve_mk_pkt(const char *qname_str, uint16_t qtype, uint16_t qclass,
+knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
const struct kr_qflags *options)
{
- uint8_t qname[KNOT_DNAME_MAXLEN];
- if (!knot_dname_from_str(qname, qname_str, sizeof(qname)))
- return NULL;
knot_pkt_t *pkt = knot_pkt_new(NULL, KNOT_EDNS_MAX_UDP_PAYLOAD, NULL);
if (!pkt)
return NULL;
return pkt;
}
+knot_pkt_t *worker_resolve_mk_pkt(const char *qname_str, uint16_t qtype, uint16_t qclass,
+ const struct kr_qflags *options)
+{
+ uint8_t qname[KNOT_DNAME_MAXLEN];
+ if (!knot_dname_from_str(qname, qname_str, sizeof(qname)))
+ return NULL;
+ return worker_resolve_mk_pkt_dname(qname, qtype, qclass, options);
+}
+
struct qr_task *worker_resolve_start(knot_pkt_t *query, struct kr_qflags options)
{
struct worker_ctx *worker = the_worker;
*/
int worker_end_tcp(struct session *session);
+KR_EXPORT knot_pkt_t *worker_resolve_mk_pkt_dname(knot_dname_t *qname, uint16_t qtype, uint16_t qclass,
+ const struct kr_qflags *options);
+
/**
* Create a packet suitable for worker_resolve_start(). All in malloc() memory.
*/
const struct sockaddr *addr);
int worker_del_tcp_waiting(struct worker_ctx *worker,
const struct sockaddr* addr);
+struct session* worker_find_tcp_waiting(struct worker_ctx *worker,
+ const struct sockaddr* addr);
+struct session* worker_find_tcp_connected(struct worker_ctx *worker,
+ const struct sockaddr* addr);
knot_pkt_t *worker_task_get_pktbuf(const struct qr_task *task);
struct request_ctx *worker_task_get_request(struct qr_task *task);
/** @cond internal */
/** Number of request within timeout window. */
-#define MAX_PENDING KR_NSREP_MAXADDR
+#define MAX_PENDING 4
/** Maximum response time from TCP upstream, milliseconds */
#define MAX_TCP_INACTIVITY (KR_RESOLVE_TIME_LIMIT + KR_CONN_RTT_MAX)
*/
#include <inttypes.h> /* PRIu64 */
+#include <limits.h>
#include <stdlib.h>
#include <uv.h>
#include <ucw/mempool.h>
#include "lib/resolve.h"
#include "lib/rplan.h"
#include "lib/defines.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/module.h"
#include "lib/dnssec/ta.h"
if (!is_authoritative(pkt, query)) {
if (!(query->flags.FORWARD) &&
pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) {
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_LAME_DELEGATION);
VERBOSE_MSG("<= lame response: non-auth sent negative response\n");
return KR_STATE_FAIL;
}
if (query->flags.FORWARD) {
next->forward_flags.CNAME = true;
- if (query->parent == NULL) {
- state = kr_nsrep_copy_set(&next->ns, &query->ns);
- if (state != kr_ok()) {
- return KR_STATE_FAIL;
- }
- }
}
next->cname_parent = query;
/* Want DNSSEC if and only if it's posible to secure
} else if (!is_paired_to_query(pkt, query)) {
WITH_VERBOSE(query) {
const char *ns_str =
- req->upstream.addr ? kr_straddr(req->upstream.addr) : "(internal)";
+ req->upstream.transport ? kr_straddr(&req->upstream.transport->address.ip) : "(internal)";
VERBOSE_MSG("<= ignoring mismatching response from %s\n",
ns_str ? ns_str : "(kr_straddr failed)");
}
VERBOSE_MSG("<= truncated response, failover to TCP\n");
if (query) {
/* Fail if already on TCP. */
- if (query->flags.TCP) {
+ if (req->upstream.transport->protocol != KR_TRANSPORT_UDP) {
VERBOSE_MSG("<= TC=1 with TCP, bailing out\n");
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
return resolve_error(pkt, req);
}
- query->flags.TCP = true;
+ query->server_selection.error(query, req->upstream.transport, KR_SELECTION_TRUNCATED);
}
return KR_STATE_CONSUME;
}
const knot_lookup_t *rcode = knot_lookup_by_id(knot_rcode_names, knot_wire_get_rcode(pkt->wire));
#endif
+ // We can't return directly from the switch because we have to give feedback to server selection first
+ int ret = 0;
+ int selection_error = -1;
+
/* Check response code. */
switch(knot_wire_get_rcode(pkt->wire)) {
case KNOT_RCODE_NOERROR:
knot_wire_set_rcode(req->answer->wire, KNOT_RCODE_YXDOMAIN);
break;
case KNOT_RCODE_REFUSED:
+ if (query->flags.STUB) {
+ /* just pass answer through if in stub mode */
+ break;
+ }
+ selection_error = KR_SELECTION_REFUSED;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_SERVFAIL:
if (query->flags.STUB) {
/* just pass answer through if in stub mode */
break;
}
- /* fall through */
+ selection_error = KR_SELECTION_SERVFAIL;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_FORMERR:
+ selection_error = KR_SELECTION_FORMERROR;
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ ret = resolve_badmsg(pkt, req, query);
+ break;
case KNOT_RCODE_NOTIMPL:
+ selection_error = KR_SELECTION_NOTIMPL;
VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
- return resolve_badmsg(pkt, req, query);
+ ret = resolve_badmsg(pkt, req, query);
+ break;
default:
+ selection_error = KR_SELECTION_OTHER_RCODE;
VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
- return resolve_error(pkt, req);
+ ret = resolve_error(pkt, req);
+ break;
+ }
+
+ if (query->server_selection.initialized) {
+ if (selection_error != -1) {
+ query->server_selection.error(query, req->upstream.transport, selection_error);
+ } else {
+ // Is this even true? Is this neccesary?
+ query->server_selection.success(query, req->upstream.transport);
+ }
+ }
+
+ if (ret) {
+ return ret;
}
int state;
(void)0;
ranked_rr_array_t *selected[] = kr_request_selected(req);
for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
- int ret = kr_ranked_rrarray_finalize(selected[i], query->uid, &req->pool);
+ ret = kr_ranked_rrarray_finalize(selected[i], query->uid, &req->pool);
if (unlikely(ret)) {
return KR_STATE_FAIL;
}
#include "lib/utils.h"
#include "lib/defines.h"
#include "lib/module.h"
+#include "lib/selection.h"
#define VERBOSE_MSG(qry, ...) QRVERBOSE(qry, "vldr", __VA_ARGS__)
return NULL;
}
}
- return new_ds;
+ return new_ds;
}
static void mark_insecure_parents(const struct kr_query *qry)
return ctx->state;
}
+static int validate_wrapper(kr_layer_t *ctx, knot_pkt_t *pkt) {
+ // Wrapper for now.
+ int ret = validate(ctx, pkt);
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ if (ret & KR_STATE_FAIL && qry->flags.DNSSEC_BOGUS)
+ qry->server_selection.error(qry, req->upstream.transport, KR_SELECTION_DNSSEC_ERROR);
+ return ret;
+}
+
+
/** Module implementation. */
int validate_init(struct kr_module *self)
{
static const kr_layer_api_t layer = {
- .consume = &validate,
+ .consume = &validate_wrapper,
.answer_finalize = &hide_bogus,
};
self->layer = &layer;
'layer/iterate.c',
'layer/validate.c',
'module.c',
- 'nsrep.c',
'resolve.c',
'rplan.c',
+ 'selection.c',
+ 'selection_forward.c',
+ 'selection_iter.c',
'utils.c',
'zonecut.c',
])
'layer.h',
'layer/iterate.h',
'module.h',
- 'nsrep.h',
'resolve.h',
'rplan.h',
+ 'selection.h',
+ 'selection_forward.h',
+ 'selection_iter.h',
'utils.h',
'zonecut.h',
])
+++ /dev/null
-/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
- * SPDX-License-Identifier: GPL-3.0-or-later
- */
-
-#include <assert.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netdb.h>
-
-#include <arpa/inet.h>
-
-#include "lib/nsrep.h"
-#include "lib/rplan.h"
-#include "lib/resolve.h"
-#include "lib/defines.h"
-#include "lib/generic/pack.h"
-#include "contrib/ucw/lib.h"
-
-/** Some built-in unfairness ... */
-#ifndef FAVOUR_IPV6
-#define FAVOUR_IPV6 20 /* 20ms bonus for v6 */
-#endif
-
-/** @internal Macro to set address structure. */
-#define ADDR_SET(sa, family, addr, len, port) do {\
- memcpy(&sa ## _addr, (addr), (len)); \
- sa ## _family = (family); \
- sa ## _port = htons(port); \
-} while (0)
-
-/** Update nameserver representation with current name/address pair. */
-static void update_nsrep(struct kr_nsrep *ns, size_t pos, uint8_t *addr, size_t addr_len, int port)
-{
- if (addr == NULL) {
- ns->addr[pos].ip.sa_family = AF_UNSPEC;
- return;
- }
-
- /* Rotate previous addresses to the right. */
- memmove(ns->addr + pos + 1, ns->addr + pos, (KR_NSREP_MAXADDR - pos - 1) * sizeof(ns->addr[0]));
-
- switch(addr_len) {
- case sizeof(struct in_addr):
- ADDR_SET(ns->addr[pos].ip4.sin, AF_INET, addr, addr_len, port); break;
- case sizeof(struct in6_addr):
- ADDR_SET(ns->addr[pos].ip6.sin6, AF_INET6, addr, addr_len, port); break;
- default: assert(0); break;
- }
-}
-
-static void update_nsrep_set(struct kr_nsrep *ns, const knot_dname_t *name, uint8_t *addr[], unsigned score)
-{
- /* NSLIST is not empty, empty NS cannot be a leader. */
- if (!addr[0] && ns->addr[0].ip.sa_family != AF_UNSPEC) {
- return;
- }
- /* Set new NS leader */
- ns->name = name;
- ns->score = score;
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (addr[i]) {
- void *addr_val = pack_obj_val(addr[i]);
- size_t len = pack_obj_len(addr[i]);
- update_nsrep(ns, i, addr_val, len, KR_DNS_PORT);
- } else {
- break;
- }
- }
-}
-
-#undef ADDR_SET
-
-/**
- * \param addr_set pack with one IP address per element */
-static unsigned eval_addr_set(const pack_t *addr_set, struct kr_context *ctx,
- struct kr_qflags opts, unsigned score, uint8_t *addr[])
-{
- kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry_ptr[KR_NSREP_MAXADDR] = { NULL, };
- assert (KR_NSREP_MAXADDR >= 2);
- unsigned rtt_cache_entry_score[KR_NSREP_MAXADDR] = { score, KR_NS_MAX_SCORE + 1, };
- uint64_t now = kr_now();
-
- /* Name server is better candidate if it has address record. */
- for (uint8_t *it = pack_head(*addr_set); it != pack_tail(*addr_set);
- it = pack_obj_next(it)) {
- void *val = pack_obj_val(it);
- size_t len = pack_obj_len(it);
- unsigned favour = 0;
- bool is_valid = false;
- /* Check if the address isn't disabled. */
- if (len == sizeof(struct in6_addr)) {
- is_valid = !(opts.NO_IPV6);
- favour = FAVOUR_IPV6;
- } else if (len == sizeof(struct in_addr)) {
- is_valid = !(opts.NO_IPV4);
- } else {
- assert(!EINVAL);
- is_valid = false;
- }
-
- if (!is_valid) {
- continue;
- }
-
- /* Get score for the current address. */
- kr_nsrep_rtt_lru_entry_t *cached = rtt_cache ?
- lru_get_try(rtt_cache, val, len) :
- NULL;
- unsigned cur_addr_score = KR_NS_GLUED;
- if (cached) {
- cur_addr_score = cached->score;
- if (cached->score >= KR_NS_TIMEOUT) {
- /* If NS once was marked as "timeouted",
- * it won't participate in NS elections
- * at least ctx->cache_rtt_tout_retry_interval milliseconds. */
- uint64_t elapsed = now - cached->tout_timestamp;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed > ctx->cache_rtt_tout_retry_interval) {
- /* Select this NS for probing in this particular query,
- * but don't change the cached score.
- * For other queries this NS will remain "timeouted". */
- cur_addr_score = KR_NS_LONG - 1;
- }
- }
- }
-
- /* We can't always use favour. If these conditions held:
- *
- * rtt_cache_entry_score[i] < KR_NS_TIMEOUT
- * rtt_cache_entry_score[i] + favour > KR_NS_TIMEOUT
- * cur_addr_score < rtt_cache_entry_score[i] + favour
- *
- * we would prefer "certainly dead" cur_addr_score
- * instead of "almost dead but alive" rtt_cache_entry_score[i]
- */
- const unsigned cur_favour = cur_addr_score < KR_NS_TIMEOUT ? favour : 0;
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (cur_addr_score >= rtt_cache_entry_score[i] + cur_favour)
- continue;
-
- /* Shake down previous contenders */
- for (size_t j = KR_NSREP_MAXADDR - 1; j > i; --j) {
- addr[j] = addr[j - 1];
- rtt_cache_entry_ptr[j] = rtt_cache_entry_ptr[j - 1];
- rtt_cache_entry_score[j] = rtt_cache_entry_score[j - 1];
- }
- addr[i] = it;
- rtt_cache_entry_score[i] = cur_addr_score;
- rtt_cache_entry_ptr[i] = cached;
- break;
- }
- }
-
- /* At this point, rtt_cache_entry_ptr contains up to KR_NSREP_MAXADDR
- * pointers to the rtt cache entries with the best scores for the given addr_set.
- * Check if there are timeouted NS. */
-
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- if (rtt_cache_entry_ptr[i] == NULL)
- continue;
- if (rtt_cache_entry_ptr[i]->score < KR_NS_TIMEOUT)
- continue;
-
- uint64_t elapsed = now - rtt_cache_entry_ptr[i]->tout_timestamp;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed <= ctx->cache_rtt_tout_retry_interval)
- continue;
-
- /* rtt_cache_entry_ptr[i] points to "timeouted" rtt cache entry.
- * The period of the ban on participation in elections has expired. */
-
- if (VERBOSE_STATUS) {
- void *val = pack_obj_val(addr[i]);
- size_t len = pack_obj_len(addr[i]);
- char sa_str[INET6_ADDRSTRLEN];
- int af = (len == sizeof(struct in6_addr)) ? AF_INET6 : AF_INET;
- inet_ntop(af, val, sa_str, sizeof(sa_str));
- kr_log_verbose("[ ][nsre] probing timeouted NS: %s, score %i\n",
- sa_str, rtt_cache_entry_ptr[i]->score);
- }
-
- rtt_cache_entry_ptr[i]->tout_timestamp = now;
- }
-
- return rtt_cache_entry_score[0];
-}
-
-static int eval_nsrep(const knot_dname_t *owner, const pack_t *addr_set, struct kr_query *qry)
-{
- struct kr_nsrep *ns = &qry->ns;
- struct kr_context *ctx = ns->ctx;
- unsigned score = KR_NS_MAX_SCORE;
- unsigned reputation = 0;
- uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
-
- /* Fetch NS reputation */
- if (ctx->cache_rep) {
- unsigned *cached = lru_get_try(ctx->cache_rep, (const char *)owner,
- knot_dname_size(owner));
- if (cached) {
- reputation = *cached;
- }
- }
-
- /* Favour nameservers with unknown addresses to probe them,
- * otherwise discover the current best address for the NS. */
- if (addr_set->len == 0) {
- score = KR_NS_UNKNOWN;
- /* If the server doesn't have IPv6, give it disadvantage. */
- if (reputation & KR_NS_NOIP6) {
- score += FAVOUR_IPV6;
- /* If the server is unknown but has rep record, treat it as timeouted */
- if (reputation & KR_NS_NOIP4) {
- score = KR_NS_UNKNOWN;
- /* Try to start with clean slate */
- if (!(qry->flags.NO_IPV6)) {
- reputation &= ~KR_NS_NOIP6;
- }
- if (!(qry->flags.NO_IPV4)) {
- reputation &= ~KR_NS_NOIP4;
- }
- }
- }
- } else {
- score = eval_addr_set(addr_set, ctx, qry->flags, score, addr_choice);
- }
-
- /* Probabilistic bee foraging strategy (naive).
- * The fastest NS is preferred by workers until it is depleted (timeouts or degrades),
- * at the same time long distance scouts probe other sources (low probability).
- * Servers on TIMEOUT will not have probed at all.
- * Servers with score above KR_NS_LONG will have periodically removed from
- * reputation cache, so that kresd can reprobe them. */
- if (score >= KR_NS_TIMEOUT) {
- return kr_ok();
- } else if (score <= ns->score &&
- (score < KR_NS_LONG || qry->flags.NO_THROTTLE)) {
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- } else if (kr_rand_coin(1, 10) &&
- !kr_rand_coin(score, KR_NS_MAX_SCORE)) {
- /* With 10% chance probe server with a probability
- * given by its RTT / MAX_RTT. */
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- return 1; /* Stop evaluation */
- } else if (ns->score > KR_NS_MAX_SCORE) {
- /* Check if any server was already selected.
- * If no, pick current server and continue evaluation. */
- update_nsrep_set(ns, owner, addr_choice, score);
- ns->reputation = reputation;
- }
-
- return kr_ok();
-}
-
-int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock)
-{
- if (!qry) {
- return kr_error(EINVAL);
- }
- if (index >= KR_NSREP_MAXADDR) {
- return kr_error(ENOSPC);
- }
-
- if (!sock) {
- qry->ns.name = (const uint8_t *)"";
- qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
- return kr_ok();
- }
-
- switch (sock->sa_family) {
- case AF_INET:
- if (qry->flags.NO_IPV4) {
- return kr_error(ENOENT);
- }
- qry->ns.addr[index].ip4 = *(const struct sockaddr_in *)sock;
- break;
- case AF_INET6:
- if (qry->flags.NO_IPV6) {
- return kr_error(ENOENT);
- }
- qry->ns.addr[index].ip6 = *(const struct sockaddr_in6 *)sock;
- break;
- default:
- qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
- return kr_error(EINVAL);
- }
-
- qry->ns.name = (const uint8_t *)"";
- /* Reset score on first entry */
- if (index == 0) {
- qry->ns.score = KR_NS_UNKNOWN;
- qry->ns.reputation = 0;
- }
-
- /* Retrieve RTT from cache */
- struct kr_context *ctx = qry->ns.ctx;
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = ctx
- ? lru_get_try(ctx->cache_rtt, kr_inaddr(sock), kr_family_len(sock->sa_family))
- : NULL;
- if (rtt_cache_entry) {
- qry->ns.score = MIN(qry->ns.score, rtt_cache_entry->score);
- }
-
- return kr_ok();
-}
-
-#define ELECT_INIT(ns, ctx_) do { \
- (ns)->ctx = (ctx_); \
- (ns)->addr[0].ip.sa_family = AF_UNSPEC; \
- (ns)->reputation = 0; \
- (ns)->score = KR_NS_MAX_SCORE + 1; \
-} while (0)
-
-int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx)
-{
- if (!qry || !ctx) {
- //assert(!EINVAL);
- return kr_error(EINVAL);
- }
-
- // First we dump the nsset into a temporary array
- const int nsset_len = trie_weight(qry->zone_cut.nsset);
- struct {
- const knot_dname_t *name;
- const pack_t *addrs;
- } nsset[nsset_len];
-
- trie_it_t *it;
- int i = 0;
- for (it = trie_it_begin(qry->zone_cut.nsset); !trie_it_finished(it);
- trie_it_next(it), ++i) {
- /* we trust it's a correct dname */
- nsset[i].name = (const knot_dname_t *)trie_it_key(it, NULL);
- nsset[i].addrs = (const pack_t *)*trie_it_val(it);
- }
- trie_it_free(it);
- assert(i == nsset_len);
-
- // Now we sort it randomly, by select-sort.
- for (i = 0; i < nsset_len - 1; ++i) {
- // The winner for position i will be uniformly chosen from indices >= i
- const int j = i + kr_rand_bytes(1) % (nsset_len - i);
- // Now we swap the winner with index i
- if (i == j) continue;
- __typeof__((nsset[i])) tmp = nsset[i];
- nsset[i] = nsset[j];
- nsset[j] = tmp;
- }
-
- // Finally we run the original algorithm, in this randomized order.
- struct kr_nsrep *ns = &qry->ns;
- ELECT_INIT(ns, ctx);
- int ret = kr_ok();
- for (i = 0; i < nsset_len; ++i) {
- ret = eval_nsrep(nsset[i].name, nsset[i].addrs, qry);
- if (ret) break;
- }
-
- if (qry->ns.score <= KR_NS_MAX_SCORE && qry->ns.score >= KR_NS_LONG) {
- /* This is a low-reliability probe,
- * go with TCP to get ICMP reachability check. */
- qry->flags.TCP = true;
- }
- return ret;
-}
-
-int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx)
-{
- if (!qry || !ctx) {
- //assert(!EINVAL);
- return kr_error(EINVAL);
- }
-
- /* Get address list for this NS */
- struct kr_nsrep *ns = &qry->ns;
- ELECT_INIT(ns, ctx);
- pack_t *addr_set = kr_zonecut_find(&qry->zone_cut, ns->name);
- if (!addr_set) {
- return kr_error(ENOENT);
- }
- /* Evaluate addr list */
- uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
- unsigned score = eval_addr_set(addr_set, ctx, qry->flags, ns->score, addr_choice);
- update_nsrep_set(ns, ns->name, addr_choice, score);
- return kr_ok();
-}
-
-#undef ELECT_INIT
-
-int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
- unsigned score, kr_nsrep_rtt_lru_t *cache, int umode)
-{
- if (!cache || umode > KR_NS_MAX || umode < 0) {
- return kr_error(EINVAL);
- }
-
- /* Get `addr`, and later its raw string. */
- if (addr) {
- /* Caller provided specific address, OK. */
- } else if (ns != NULL) {
- addr = &ns->addr[0].ip;
- } else {
- assert(false && "kr_nsrep_update_rtt: don't know what address to update");
- return kr_error(EINVAL);
- }
- const char *addr_in = kr_inaddr(addr);
- size_t addr_len = kr_inaddr_len(addr);
- if (!addr_in || addr_len <= 0) {
- assert(false && "kr_nsrep_update_rtt: incorrect address");
- return kr_error(EINVAL);
- }
-
- bool is_new_entry = false;
- kr_nsrep_rtt_lru_entry_t *cur = lru_get_new(cache, addr_in, addr_len,
- (&is_new_entry));
- if (!cur) {
- return kr_ok();
- }
- if (score <= KR_NS_GLUED) {
- score = KR_NS_GLUED + 1;
- }
- /* If there's nothing to update, we reset it unless KR_NS_UPDATE_NORESET
- * mode was requested. New items are zeroed by LRU automatically. */
- if (is_new_entry && umode != KR_NS_UPDATE_NORESET) {
- umode = KR_NS_RESET;
- }
- unsigned new_score = 0;
- /* Update score, by default smooth over last two measurements. */
- switch (umode) {
- case KR_NS_UPDATE:
- case KR_NS_UPDATE_NORESET:
- new_score = (cur->score + score) / 2; break;
- case KR_NS_RESET: new_score = score; break;
- case KR_NS_ADD: new_score = MIN(KR_NS_MAX_SCORE - 1, cur->score + score); break;
- case KR_NS_MAX: new_score = MAX(cur->score, score); break;
- default: return kr_error(EINVAL);
- }
- /* Score limits */
- if (new_score > KR_NS_MAX_SCORE) {
- new_score = KR_NS_MAX_SCORE;
- }
- if (new_score >= KR_NS_TIMEOUT && cur->score < KR_NS_TIMEOUT) {
- /* Set the timestamp only when NS became "timeouted" */
- cur->tout_timestamp = kr_now();
- }
- cur->score = new_score;
- return kr_ok();
-}
-
-int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache)
-{
- if (!ns || !cache ) {
- return kr_error(EINVAL);
- }
-
- /* Store in the struct */
- ns->reputation = reputation;
- /* Store reputation in the LRU cache */
- unsigned *cur = lru_get_new(cache, (const char *)ns->name,
- knot_dname_size(ns->name), NULL);
- if (cur) {
- *cur = reputation;
- }
- return kr_ok();
-}
-
-int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src)
-{
- if (!dst || !src ) {
- return kr_error(EINVAL);
- }
-
- memcpy(dst, src, sizeof(struct kr_nsrep));
- dst->name = (const uint8_t *)"";
- dst->score = KR_NS_UNKNOWN;
- dst->reputation = 0;
-
- return kr_ok();
-}
-
-int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx)
-{
- if (!ns || !ctx) {
- assert(false);
- return kr_error(EINVAL);
- }
-
- kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
-
- ns->reputation = 0;
- ns->score = KR_NS_MAX_SCORE + 1;
-
- if (ns->addr[0].ip.sa_family == AF_UNSPEC) {
- return kr_error(EINVAL);
- }
-
- /* Compute the scores. Unfortunately there's no space for scores
- * along the addresses. */
- unsigned scores[KR_NSREP_MAXADDR];
- int i;
- bool timeouted_address_is_already_selected = false;
- for (i = 0; i < KR_NSREP_MAXADDR; ++i) {
- const struct sockaddr *sa = &ns->addr[i].ip;
- if (sa->sa_family == AF_UNSPEC) {
- break;
- }
- kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = lru_get_try(rtt_cache,
- kr_inaddr(sa),
- kr_family_len(sa->sa_family));
- if (!rtt_cache_entry) {
- scores[i] = 1; /* prefer unknown to probe RTT */
- } else if (rtt_cache_entry->score < KR_NS_FWD_TIMEOUT) {
- /* some probability to bump bad ones up for re-probe */
- scores[i] = rtt_cache_entry->score;
- /* The lower the rtt, the more likely it will be selected. */
- if (!kr_rand_coin(rtt_cache_entry->score, KR_NS_FWD_TIMEOUT)) {
- scores[i] = 1;
- }
- } else {
- uint64_t now = kr_now();
- uint64_t elapsed = now - rtt_cache_entry->tout_timestamp;
- scores[i] = KR_NS_MAX_SCORE + 1;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
- if (elapsed > ctx->cache_rtt_tout_retry_interval &&
- !timeouted_address_is_already_selected) {
- scores[i] = 1;
- rtt_cache_entry->tout_timestamp = now;
- timeouted_address_is_already_selected = true;
- }
- }
-
- /* Give advantage to IPv6. */
- if (scores[i] <= KR_NS_MAX_SCORE && sa->sa_family == AF_INET) {
- scores[i] += FAVOUR_IPV6;
- }
-
- if (VERBOSE_STATUS) {
- kr_log_verbose("[ ][nsre] score %d for %s;\t cached RTT: %d\n",
- scores[i], kr_straddr(sa),
- rtt_cache_entry ? rtt_cache_entry->score : -1);
- }
- }
-
- /* Select-sort the addresses. */
- const int count = i;
- for (i = 0; i < count - 1; ++i) {
- /* find min from i onwards */
- int min_i = i;
- for (int j = i + 1; j < count; ++j) {
- if (scores[j] < scores[min_i]) {
- min_i = j;
- }
- }
- /* swap the indices */
- if (min_i != i) {
- SWAP(scores[min_i], scores[i]);
- SWAP(ns->addr[min_i], ns->addr[i]);
- }
- }
-
- if (count > 0) {
- ns->score = scores[0];
- ns->reputation = 0;
- }
-
- return kr_ok();
-}
+++ /dev/null
-/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
- * SPDX-License-Identifier: GPL-3.0-or-later
- */
-
-#pragma once
-
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <libknot/dname.h>
-#include <limits.h>
-
-#include "lib/defines.h"
-#include "lib/generic/lru.h"
-
-struct kr_query;
-
-/**
- * NS RTT score (special values).
- * @note RTT is measured in milliseconds.
- */
-enum kr_ns_score {
- KR_NS_MAX_SCORE = 20 * KR_CONN_RTT_MAX, /* max possible value */
- KR_NS_FWD_TIMEOUT = (95 * 10000) / 100, /* timeout for upstream recursor,
- * 95 percents from max resolution time */
- KR_NS_TIMEOUT = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
- KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4,
- KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2,
- KR_NS_PENALTY = 100,
- KR_NS_GLUED = 10
-};
-
-/**
- * See kr_nsrep_update_rtt()
- */
-#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
-#define KR_NS_FWD_DEAD (((KR_NS_FWD_TIMEOUT * 4) + 3) / 3)
-
-/** If once NS was marked as "timeouted", it won't participate in NS elections
- * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one second). */
-#define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
-
-/**
- * NS QoS flags.
- */
-enum kr_ns_rep {
- KR_NS_NOIP4 = 1 << 0, /**< NS has no IPv4 */
- KR_NS_NOIP6 = 1 << 1, /**< NS has no IPv6 */
- KR_NS_NOEDNS = 1 << 2 /**< NS has no EDNS support */
-};
-
-/**
- * NS RTT update modes.
- * First update is always KR_NS_RESET unless
- * KR_NS_UPDATE_NORESET mode had choosen.
- */
-enum kr_ns_update_mode {
- KR_NS_UPDATE = 0, /**< Update as smooth over last two measurements */
- KR_NS_UPDATE_NORESET, /**< Same as KR_NS_UPDATE, but disable fallback to
- * KR_NS_RESET on newly added entries.
- * Zero is used as initial value. */
- KR_NS_RESET, /**< Set to given value */
- KR_NS_ADD, /**< Increment current value */
- KR_NS_MAX /**< Set to maximum of current/proposed value. */
-};
-
-struct kr_nsrep_rtt_lru_entry {
- unsigned score; /* combined rtt */
- uint64_t tout_timestamp; /* The time when score became
- * greater or equal then KR_NS_TIMEOUT.
- * Is meaningful only when score >= KR_NS_TIMEOUT */
-};
-
-typedef struct kr_nsrep_rtt_lru_entry kr_nsrep_rtt_lru_entry_t;
-
-/**
- * NS QoS tracking.
- */
-typedef lru_t(kr_nsrep_rtt_lru_entry_t) kr_nsrep_rtt_lru_t;
-
-/**
- * NS reputation tracking.
- */
-typedef lru_t(unsigned) kr_nsrep_lru_t;
-
-/* Maximum count of addresses probed in one go (last is left empty) */
-#define KR_NSREP_MAXADDR 4
-
-/**
- * Name server representation.
- * Contains extra information about the name server, e.g. score
- * or other metadata.
- */
-struct kr_nsrep
-{
- unsigned score; /**< NS score */
- unsigned reputation; /**< NS reputation */
- const knot_dname_t *name; /**< NS name */
- struct kr_context *ctx; /**< Resolution context */
- union inaddr addr[KR_NSREP_MAXADDR]; /**< NS address(es) */
-};
-
-/**
- * Set given NS address. (Very low-level access to the list.)
- * @param qry updated query
- * @param index index of the updated target
- * @param sock socket address to use (sockaddr_in or sockaddr_in6 or NULL)
- * @return 0 or an error code, in particular kr_error(ENOENT) for net.ipvX
- */
-KR_EXPORT
-int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock);
-
-/**
- * Elect best nameserver/address pair from the nsset.
- * @param qry updated query
- * @param ctx resolution context
- * @return 0 or an error code
- */
-KR_EXPORT
-int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx);
-
-/**
- * Elect best nameserver/address pair from the nsset.
- * @param qry updated query
- * @param ctx resolution context
- * @return 0 or an error code
- */
-KR_EXPORT
-int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx);
-
-/**
- * Update NS address RTT information.
- *
- * @brief In KR_NS_UPDATE mode reputation is smoothed over last N measurements.
- *
- * @param ns updated NS representation
- * @param addr chosen address (NULL for first)
- * @param score new score (i.e. RTT), see enum kr_ns_score
- * @param cache RTT LRU cache
- * @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD)
- * @return 0 on success, error code on failure
- */
-KR_EXPORT
-int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
- unsigned score, kr_nsrep_rtt_lru_t *cache, int umode);
-
-/**
- * Update NSSET reputation information.
- *
- * @param ns updated NS representation
- * @param reputation combined reputation flags, see enum kr_ns_rep
- * @param cache LRU cache
- * @return 0 on success, error code on failure
- */
-KR_EXPORT
-int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache);
-/**
- * Copy NSSET reputation information and resets score.
- *
- * @param dst updated NS representation
- * @param src source NS representation
- * @return 0 on success, error code on failure
- */
-int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src);
-
-/**
- * Sort addresses in the query nsrep list by cached RTT.
- * if RTT is greater then KR_NS_TIMEOUT, address will placed at the beginning of the
- * nsrep list once in cache.ns_tout() milliseconds. Otherwise it will be sorted
- * as if it has cached RTT equal to KR_NS_MAX_SCORE + 1.
- * @param ns updated kr_nsrep
- * @param ctx name resolution context.
- * @return 0 or an error code
- * @note ns reputation is zeroed and score is set to KR_NS_MAX_SCORE + 1.
- */
-KR_EXPORT
-int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx);
#include <libknot/rrtype/rdname.h>
#include <libknot/descriptor.h>
#include <ucw/mempool.h>
+#include <sys/socket.h>
#include "kresconfig.h"
#include "lib/resolve.h"
#include "lib/layer.h"
if (mod->layer) { \
struct kr_layer layer = {.state = (r)->state, .api = mod->layer, .req = (r)}; \
if (layer.api && layer.api->func) { \
+ /*printf("%s %s\n", STRINGIFY(func), (mod->name));*/ \
(r)->state = layer.api->func(&layer, ##__VA_ARGS__); \
+ /*printf("%s %s %x\n", STRINGIFY(func), (mod->name), (r->state));*/ \
if ((r)->state == KR_STATE_YIELD) { \
+ /*printf("%s_yield %s\n", STRINGIFY(func), (mod->name));*/ \
func ## _yield(&layer, ##__VA_ARGS__); \
break; \
} \
return;
}
assert(qname);
- const int len = knot_dname_size(qname) - 2; /* Skip first, last label. */
+ const int len = knot_dname_size(qname) - 2; /* Skip first, last label. First is length, last is always root */
for (int i = 0; i < len; ++i) {
/* Note: this relies on the fact that correct label lengths
* can't pass the isletter() test (by "luck"). */
}
}
-/** Invalidate current NS/addr pair. */
-static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry)
-{
- if (qry->ns.addr[0].ip.sa_family != AF_UNSPEC) {
- const char *addr = kr_inaddr(&qry->ns.addr[0].ip);
- int addr_len = kr_inaddr_len(&qry->ns.addr[0].ip);
- int ret = kr_zonecut_del(&qry->zone_cut, qry->ns.name, addr, addr_len);
- /* Also remove it from the qry->ns.addr array.
- * That's useful at least for STUB and FORWARD modes. */
- memmove(qry->ns.addr, qry->ns.addr + 1,
- sizeof(qry->ns.addr[0]) * (KR_NSREP_MAXADDR - 1));
- return ret;
- } else {
- return kr_zonecut_del_all(&qry->zone_cut, qry->ns.name);
- }
-}
-
/** This turns of QNAME minimisation if there is a non-terminal between current zone cut, and name target.
* It save several minimization steps, as the zone cut is likely final one.
*/
return KR_STATE_PRODUCE;
}
-static int ns_resolve_addr(struct kr_query *qry, struct kr_request *req)
-{
- struct kr_rplan *rplan = &req->rplan;
- struct kr_context *ctx = req->ctx;
-
-
- /* Start NS queries from root, to avoid certain cases
- * where a NS drops out of cache and the rest is unavailable,
- * this would lead to dependency loop in current zone cut.
- * Prefer IPv6 and continue with IPv4 if not available.
- */
- uint16_t next_type = 0;
- if (!(qry->flags.AWAIT_IPV6) &&
- !(ctx->options.NO_IPV6)) {
- next_type = KNOT_RRTYPE_AAAA;
- qry->flags.AWAIT_IPV6 = true;
- } else if (!(qry->flags.AWAIT_IPV4) &&
- !(ctx->options.NO_IPV4)) {
- next_type = KNOT_RRTYPE_A;
- qry->flags.AWAIT_IPV4 = true;
- /* Hmm, no useable IPv6 then. */
- qry->ns.reputation |= KR_NS_NOIP6;
- kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
- }
- /* Bail out if the query is already pending or dependency loop. */
- if (!next_type || kr_rplan_satisfies(qry->parent, qry->ns.name, KNOT_CLASS_IN, next_type)) {
- /* Fall back to SBELT if root server query fails. */
- if (!next_type && qry->zone_cut.name[0] == '\0') {
- VERBOSE_MSG(qry, "=> fallback to root hints\n");
- kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
- qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
- return kr_error(EAGAIN);
- }
- /* No IPv4 nor IPv6, flag server as unusable. */
- ++req->count_no_nsaddr;
- VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out (counter: %u)\n",
- req->count_no_nsaddr);
- qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6;
- kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
- invalidate_ns(rplan, qry);
- return kr_error(EHOSTUNREACH);
- }
- /* Push new query to the resolution plan */
- struct kr_query *next =
- kr_rplan_push(rplan, qry, qry->ns.name, KNOT_CLASS_IN, next_type);
- if (!next) {
- return kr_error(ENOMEM);
- }
- next->flags.NONAUTH = true;
-
- /* At the root level with no NS addresses, add SBELT subrequest. */
- int ret = 0;
- if (qry->zone_cut.name[0] == '\0') {
- ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
- if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
- kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
- kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
- qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
- }
- } else {
- next->flags.AWAIT_CUT = true;
- }
- return ret;
-}
-
static int edns_put(knot_pkt_t *pkt, bool reclaim)
{
if (!pkt->opt_rr) {
return knot_pkt_put(pkt, KNOT_COMPR_HINT_NONE, pkt->opt_rr, KNOT_PF_FREE);
}
+
+
/** Removes last EDNS OPT RR written to the packet. */
static int edns_erase_and_reserve(knot_pkt_t *pkt)
{
return request->state;
}
-KR_PURE static bool kr_inaddr_equal(const struct sockaddr *a, const struct sockaddr *b)
-{
- const int a_len = kr_inaddr_len(a);
- const int b_len = kr_inaddr_len(b);
- return a_len == b_len && memcmp(kr_inaddr(a), kr_inaddr(b), a_len) == 0;
-}
-
-static void update_nslist_rtt(struct kr_context *ctx, struct kr_query *qry, const struct sockaddr *src)
-{
- /* Do not track in safe mode. */
- if (qry->flags.SAFEMODE) {
- return;
- }
-
- /* Calculate total resolution time from the time the query was generated. */
- uint64_t elapsed = kr_now() - qry->timestamp_mono;
- elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
-
- /* NSs in the preference list prior to the one who responded will be penalised
- * with the RETRY timer interval. This is because we know they didn't respond
- * for N retries, so their RTT must be at least N * RETRY.
- * The NS in the preference list that responded will have RTT relative to the
- * time when the query was sent out, not when it was originated.
- */
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- const struct sockaddr *addr = &qry->ns.addr[i].ip;
- if (addr->sa_family == AF_UNSPEC) {
- break;
- }
- /* If this address is the source of the answer, update its RTT */
- if (kr_inaddr_equal(src, addr)) {
- kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_UPDATE);
- WITH_VERBOSE(qry) {
- char addr_str[INET6_ADDRSTRLEN];
- inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
- VERBOSE_MSG(qry, "<= server: '%s' rtt: %"PRIu64" ms\n",
- addr_str, elapsed);
- }
- } else {
- /* Response didn't come from this IP, but we know the RTT must be at least
- * several RETRY timer tries, e.g. if we have addresses [a, b, c] and we have
- * tried [a, b] when the answer from 'a' came after 350ms, then we know
- * that 'b' didn't respond for at least 350 - (1 * 300) ms. We can't say that
- * its RTT is 50ms, but we can say that its score shouldn't be less than 50. */
- kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_MAX);
- WITH_VERBOSE(qry) {
- char addr_str[INET6_ADDRSTRLEN];
- inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
- VERBOSE_MSG(qry, "<= server: '%s' rtt: >= %"PRIu64" ms\n",
- addr_str, elapsed);
- }
- }
- /* Subtract query start time from elapsed time */
- if (elapsed < KR_CONN_RETRY) {
- break;
- }
- elapsed = elapsed - KR_CONN_RETRY;
- }
-}
-
-static void update_nslist_score(struct kr_request *request, struct kr_query *qry, const struct sockaddr *src, knot_pkt_t *packet)
-{
- struct kr_context *ctx = request->ctx;
- /* On successful answer, update preference list RTT and penalise timer */
- if (!(request->state & KR_STATE_FAIL)) {
- /* Update RTT information for preference list */
- update_nslist_rtt(ctx, qry, src);
- /* Do not complete NS address resolution on soft-fail. */
- const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0;
- if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) {
- qry->flags.AWAIT_IPV6 = false;
- qry->flags.AWAIT_IPV4 = false;
- } else { /* Penalize SERVFAILs. */
- kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
- }
- }
-}
-
static bool resolution_time_exceeded(struct kr_query *qry, uint64_t now)
{
uint64_t resolving_time = now - qry->creation_time_mono;
return false;
}
-int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet)
+int kr_resolve_consume(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet)
{
struct kr_rplan *rplan = &request->rplan;
}
bool tried_tcp = (qry->flags.TCP);
if (!packet || packet->size == 0) {
- if (tried_tcp) {
- request->state = KR_STATE_FAIL;
- } else {
- qry->flags.TCP = true;
- }
+ return KR_STATE_PRODUCE;
} else {
/* Packet cleared, derandomize QNAME. */
knot_dname_t *qname_raw = knot_pkt_qname(packet);
} else {
/* Fill in source and latency information. */
request->upstream.rtt = kr_now() - qry->timestamp_mono;
- request->upstream.addr = src;
+ request->upstream.transport = transport ? *transport : NULL;
ITERATE_LAYERS(request, qry, consume, packet);
/* Clear temporary information */
- request->upstream.addr = NULL;
+ request->upstream.transport = NULL;
request->upstream.rtt = 0;
}
}
- /* Track RTT for iterative answers */
- if (src && !(qry->flags.CACHED)) {
- update_nslist_score(request, qry, src, packet);
- }
- /* Resolution failed, invalidate current NS. */
- if (request->state & KR_STATE_FAIL) {
- invalidate_ns(rplan, qry);
- qry->flags.RESOLVED = false;
- }
-
- /* For multiple errors in a row; invalidate_ns() is not enough. */
- if (!qry->flags.CACHED) {
- if (request->state & KR_STATE_FAIL) {
- if (++request->count_fail_row > KR_CONSUME_FAIL_ROW_LIMIT) {
- if (VERBOSE_STATUS || kr_log_rtrace_enabled(request)) {
- kr_log_req(request, 0, 2, "resl",
- "=> too many failures in a row, "
- "bail out (mitigation for NXNSAttack "
- "CVE-2020-12667)\n");
- }
- return KR_STATE_FAIL;
- }
- } else {
- request->count_fail_row = 0;
- }
- }
-
/* Pop query if resolved. */
if (request->state == KR_STATE_YIELD) {
return KR_STATE_PRODUCE; /* Requery */
return trust_chain_check(request, qry);
}
-int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet)
+
+int ns_resolve_addr(struct kr_query *qry, struct kr_request *param, struct kr_transport *transport)
+{
+ struct kr_rplan *rplan = ¶m->rplan;
+ struct kr_context *ctx = param->ctx;
+
+
+ /* Start NS queries from root, to avoid certain cases
+ * where a NS drops out of cache and the rest is unavailable,
+ * this would lead to dependency loop in current zone cut.
+ * Prefer IPv6 and continue with IPv4 if not available.
+ */
+ uint16_t next_type = 0;
+ if (!(qry->flags.AWAIT_IPV6) &&
+ !(ctx->options.NO_IPV6)) {
+ next_type = KNOT_RRTYPE_AAAA;
+ qry->flags.AWAIT_IPV6 = true;
+ } else if (!(qry->flags.AWAIT_IPV4) &&
+ !(ctx->options.NO_IPV4)) {
+ next_type = KNOT_RRTYPE_A;
+ qry->flags.AWAIT_IPV4 = true;
+ }
+ /* Bail out if the query is already pending or dependency loop. */
+ if (!next_type || kr_rplan_satisfies(qry->parent, transport->name, KNOT_CLASS_IN, next_type)) {
+ /* Fall back to SBELT if root server query fails. */
+ if (!next_type && qry->zone_cut.name[0] == '\0') {
+ VERBOSE_MSG(qry, "=> fallback to root hints\n");
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ return kr_error(EAGAIN);
+ }
+ /* No IPv4 nor IPv6, flag server as unusable. */
+ VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n");
+ kr_zonecut_del_all(&qry->zone_cut, transport->name);
+ return kr_error(EHOSTUNREACH);
+ }
+ /* Push new query to the resolution plan */
+ struct kr_query *next =
+ kr_rplan_push(rplan, qry, transport->name, KNOT_CLASS_IN, next_type);
+ if (!next) {
+ return kr_error(ENOMEM);
+ }
+ next->flags.NONAUTH = true;
+
+ /* At the root level with no NS addresses, add SBELT subrequest. */
+ int ret = 0;
+ if (qry->zone_cut.name[0] == '\0') {
+ ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
+ if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ }
+ } else {
+ next->flags.AWAIT_CUT = true;
+ }
+
+ return ret;
+}
+
+int kr_resolve_produce(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet)
{
struct kr_rplan *rplan = &request->rplan;
- unsigned ns_election_iter = 0;
/* No query left for resolution */
if (kr_rplan_empty(rplan)) {
return KR_STATE_FAIL;
}
- /* If we have deferred answers, resume them. */
+
struct kr_query *qry = array_tail(rplan->pending);
+
+ /* Initialize server selection */
+ if (!qry->server_selection.initialized) {
+ kr_server_selection_init(qry);
+ }
+
+ /* If we have deferred answers, resume them. */
if (qry->deferred != NULL) {
/* @todo: Refactoring validator, check trust chain before resuming. */
int state = 0;
}
}
-ns_election:
-
- if (unlikely(request->count_no_nsaddr >= KR_COUNT_NO_NSADDR_LIMIT)) {
- VERBOSE_MSG(qry, "=> too many unresolvable NSs, bail out "
- "(mitigation for NXNSAttack CVE-2020-12667)\n");
- return KR_STATE_FAIL;
- }
- /* If the query has already selected a NS and is waiting for IPv4/IPv6 record,
- * elect best address only, otherwise elect a completely new NS.
- */
- if(++ns_election_iter >= KR_ITER_LIMIT) {
- VERBOSE_MSG(qry, "=> couldn't converge NS selection, bail out\n");
- return KR_STATE_FAIL;
- }
const struct kr_qflags qflg = qry->flags;
const bool retry = qflg.TCP || qflg.BADCOOKIE_AGAIN;
- if (qflg.AWAIT_IPV4 || qflg.AWAIT_IPV6) {
- kr_nsrep_elect_addr(qry, request->ctx);
- } else if (qflg.FORWARD || qflg.STUB) {
- kr_nsrep_sort(&qry->ns, request->ctx);
- if (qry->ns.score > KR_NS_MAX_SCORE) {
- /* At the moment all NS have bad reputation.
- * But there can be existing connections*/
- VERBOSE_MSG(qry, "=> no valid NS left\n");
- return KR_STATE_FAIL;
- }
- } else if (!qry->ns.name || !retry) { /* Keep NS when requerying/stub/badcookie. */
+ if (!qflg.FORWARD && !qflg.STUB && !retry) { /* Keep NS when requerying/stub/badcookie. */
/* Root DNSKEY must be fetched from the hints to avoid chicken and egg problem. */
if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) {
kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
}
- kr_nsrep_elect(qry, request->ctx);
- if (qry->ns.score > KR_NS_MAX_SCORE) {
- if (kr_zonecut_is_empty(&qry->zone_cut)) {
- VERBOSE_MSG(qry, "=> no NS with an address\n");
- } else {
- VERBOSE_MSG(qry, "=> no valid NS left\n");
- }
- if (!qry->flags.NO_NS_FOUND) {
- qry->flags.NO_NS_FOUND = true;
- } else {
- ITERATE_LAYERS(request, qry, reset);
- kr_rplan_pop(rplan, qry);
- }
- return KR_STATE_PRODUCE;
- }
}
- /* Resolve address records */
- if (qry->ns.addr[0].ip.sa_family == AF_UNSPEC) {
- int ret = ns_resolve_addr(qry, request);
- if (ret != 0) {
- qry->flags.AWAIT_IPV6 = false;
+ qry->server_selection.choose_transport(qry, transport);
+
+ if (*transport == NULL) {
+ // There is no point in continuing.
+ return KR_STATE_FAIL;
+ }
+
+ if ((*transport)->protocol == KR_TRANSPORT_NOADDR) {
+ int ret = ns_resolve_addr(qry, qry->request, *transport);
+ if (ret) {
qry->flags.AWAIT_IPV4 = false;
- qry->flags.TCP = false;
- qry->ns.name = NULL;
- goto ns_election; /* Must try different NS */
+ qry->flags.AWAIT_IPV6 = false;
}
ITERATE_LAYERS(request, qry, reset);
return KR_STATE_PRODUCE;
* kr_resolve_checkout().
*/
qry->timestamp_mono = kr_now();
- *dst = &qry->ns.addr[0].ip;
- *type = (qry->flags.TCP) ? SOCK_STREAM : SOCK_DGRAM;
return request->state;
}
#endif /* defined(ENABLE_COOKIES) */
int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
- struct sockaddr *dst, int type, knot_pkt_t *packet)
+ struct kr_transport *transport, knot_pkt_t *packet)
{
/* @todo: Update documentation if this function becomes approved. */
* actual cookie. If we don't know the server address then we
* also don't know the actual cookie size.
*/
- if (!outbound_request_update_cookies(request, src, dst)) {
+ if (!outbound_request_update_cookies(request, src, &transport->address.ip)) {
return kr_error(EINVAL);
}
}
/* Run the checkout layers and cancel on failure.
* The checkout layer doesn't persist the state, so canceled subrequests
* don't affect the resolution or rest of the processing. */
+ int type = -1;
+ switch(transport->protocol) {
+ case KR_TRANSPORT_UDP:
+ type = SOCK_DGRAM;
+ break;
+ case KR_TRANSPORT_TCP:
+ case KR_TRANSPORT_TLS:
+ type = SOCK_PACKET;
+ break;
+ default:
+ assert(0);
+ }
int state = request->state;
- ITERATE_LAYERS(request, qry, checkout, packet, dst, type);
+ ITERATE_LAYERS(request, qry, checkout, packet, &transport->address.ip, type);
if (request->state & KR_STATE_FAIL) {
request->state = state; /* Restore */
return kr_error(ECANCELED);
WITH_VERBOSE(qry) {
KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet));
+ KR_DNAME_GET_STR(ns_name, transport->name);
KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet));
+ const char *ns_str = kr_straddr(&transport->address.ip);
- for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
- struct sockaddr *addr = &qry->ns.addr[i].ip;
- if (addr->sa_family == AF_UNSPEC) {
- break;
- }
- if (!kr_inaddr_equal(dst, addr)) {
- continue;
- }
- const char *ns_str = kr_straddr(addr);
- VERBOSE_MSG(qry,
- "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' "
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' querying: '%s'@'%s' zone cut: '%s' "
"qname: '%s' qtype: '%s' proto: '%s'\n",
- qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str,
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str,
qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp");
-
- break;
- }}
+ }
return kr_ok();
}
#include "lib/layer.h"
#include "lib/generic/map.h"
#include "lib/generic/array.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/rplan.h"
#include "lib/module.h"
#include "lib/cache/api.h"
map_t negative_anchors;
struct kr_zonecut root_hints;
struct kr_cache cache;
- kr_nsrep_rtt_lru_t *cache_rtt;
unsigned cache_rtt_tout_retry_interval;
- kr_nsrep_lru_t *cache_rep;
module_array_t *modules;
/* The cookie context structure should not be held within the cookies
* module because of better access. */
bool http:1; /**< true if the request is on HTTP; only meaningful if (dst_addr). */
};
+typedef bool (*addr_info_f)(struct sockaddr*);
+typedef void (*async_resolution_f)(knot_dname_t*, enum knot_rr_type);
+
/**
* Name resolution request.
*
} qsource;
struct {
unsigned rtt; /**< Current upstream RTT */
- const struct sockaddr *addr; /**< Current upstream address */
+ const struct kr_transport *transport; /**< Current upstream transport */
} upstream; /**< Upstream information, valid only in consume() phase */
struct kr_qflags options;
int state;
trace_callback_f trace_finish; /**< Request finish tracepoint */
int vars_ref; /**< Reference to per-request variable table. LUA_NOREF if not set. */
knot_mm_t pool;
+ struct {
+ addr_info_f is_tls_capable;
+ addr_info_f is_tcp_connected;
+ addr_info_f is_tcp_waiting;
+ async_resolution_f async_ns_resolution;
+ union inaddr *forwarding_targets; /**< When forwarding, possible targets are put here */
+ size_t forward_targets_num;
+ } selection_context;
unsigned int uid; /** for logging purposes only */
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
* @return any state
*/
KR_EXPORT
-int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet);
+int kr_resolve_consume(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet);
/**
* Produce either next additional query or finish.
* @return any state
*/
KR_EXPORT
-int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet);
+int kr_resolve_produce(struct kr_request *request, struct kr_transport **transport, knot_pkt_t *packet);
/**
* Finalises the outbound query packet with the knowledge of the IP addresses.
*/
KR_EXPORT
int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
- struct sockaddr *dst, int type, knot_pkt_t *packet);
+ struct kr_transport *transport, knot_pkt_t *packet);
/**
* Finish resolution and commit results if the state is DONE.
*/
KR_EXPORT KR_PURE
knot_mm_t *kr_resolve_pool(struct kr_request *request);
-
qry->flags = rplan->request->options;
qry->parent = parent;
qry->request = rplan->request;
- qry->ns.ctx = rplan->request->ctx;
- qry->ns.addr[0].ip.sa_family = AF_UNSPEC;
+
gettimeofday(&qry->timestamp, NULL);
qry->timestamp_mono = kr_now();
qry->creation_time_mono = parent ? parent->creation_time_mono : qry->timestamp_mono;
kr_zonecut_init(&qry->zone_cut, (const uint8_t *)"", rplan->pool);
qry->reorder = qry->flags.REORDER_RR ? kr_rand_bytes(sizeof(qry->reorder)) : 0;
- /* When forwarding, keep the nameserver addresses. */
- if (parent && parent->flags.FORWARD && qry->flags.FORWARD) {
- ret = kr_nsrep_copy_set(&qry->ns, &parent->ns);
- if (ret) {
- query_free(rplan->pool, qry);
- return NULL;
- }
- }
assert((rplan->pending.len == 0 && rplan->resolved.len == 0)
== (rplan->initial == NULL));
#include <libknot/dname.h>
#include <libknot/codes.h>
+#include "lib/selection.h"
#include "lib/cache/api.h"
#include "lib/zonecut.h"
-#include "lib/nsrep.h"
/** Query flags */
struct kr_qflags {
struct kr_query *cname_parent;
struct kr_request *request; /**< Parent resolution request. */
kr_stale_cb stale_cb; /**< See the type */
- /* Beware: this must remain the last, because of lua bindings. */
- struct kr_nsrep ns;
+ struct kr_server_selection server_selection;
};
/** @cond internal Array of queries. */
--- /dev/null
+#include <libknot/dname.h>
+
+#include "lib/selection.h"
+#include "lib/selection_forward.h"
+#include "lib/selection_iter.h"
+#include "lib/generic/pack.h"
+#include "lib/generic/trie.h"
+#include "lib/rplan.h"
+#include "lib/cache/api.h"
+#include "lib/resolve.h"
+
+#include "daemon/worker.h"
+#include "daemon/tls.h"
+
+#include "lib/utils.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "nsrep", __VA_ARGS__)
+
+/** @internal Macro to set address structure. */
+#define ADDR_SET(sa, family, addr, len, port) do {\
+ memcpy(&sa ## _addr, (addr), (len)); \
+ sa ## _family = (family); \
+ sa ## _port = htons(port); \
+} while (0)
+
+/* Simple cache interface follows */
+
+#define KEY_PREFIX 'S'
+
+void *prefix_key(const uint8_t *ip, size_t len) {
+ void *key = malloc(len+1);
+ *(char*) key = KEY_PREFIX;
+ memcpy(key+1, ip, len);
+ return key;
+}
+
+#undef PREFIX
+
+struct rtt_state get_rtt_state(const uint8_t *ip, size_t len, struct kr_cache *cache) {
+ struct rtt_state state = {0,0};
+ knot_db_val_t value;
+ knot_db_t *db = cache->db;
+ struct kr_cdb_stats *stats = &cache->stats;
+ uint8_t *prefixed_ip = prefix_key(ip, len);
+
+ knot_db_val_t key = {.len = len + 1, .data = prefixed_ip};
+
+ if(cache->api->read(db, stats, &key, &value, 1)) {
+ state = (struct rtt_state){-1, -1}; // No value
+ } else {
+ assert(value.len == sizeof(struct rtt_state));
+ state = *(struct rtt_state *)value.data;
+ }
+
+ free(prefixed_ip);
+ return state;
+}
+
+int put_rtt_state(const uint8_t *ip, size_t len, struct rtt_state state, struct kr_cache *cache) {
+ knot_db_t *db = cache->db;
+ struct kr_cdb_stats *stats = &cache->stats;
+ uint8_t *prefixed_ip = prefix_key(ip, len);
+
+ knot_db_val_t key = {.len = len + 1, .data = prefixed_ip};
+ knot_db_val_t value = {.len = sizeof(struct rtt_state), .data = &state};
+
+ int ret = cache->api->write(db, stats, &key, &value, 1);
+ cache->api->commit(db, stats);
+
+ free(prefixed_ip);
+ return ret;
+}
+
+/* IP helper functions */
+
+void bytes_to_ip(uint8_t *bytes, size_t len, union inaddr *dst) {
+ switch(len) {
+ case sizeof(struct in_addr):
+ ADDR_SET(dst->ip4.sin, AF_INET, bytes, len, 0);
+ break;
+ case sizeof(struct in6_addr):
+ ADDR_SET(dst->ip6.sin6, AF_INET6, bytes, len, 0);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+uint8_t* ip_to_bytes(const union inaddr *src, size_t len) {
+ switch(len) {
+ case sizeof(struct in_addr):
+ return (uint8_t *)&src->ip4.sin_addr;
+ case sizeof(struct in6_addr):
+ return (uint8_t *)&src->ip6.sin6_addr;
+ default:
+ assert(0);
+ }
+}
+
+#define DEFAULT_TIMEOUT 200
+#define MINIMAL_TIMEOUT_ADDITION 20
+
+// This is verbatim (minus the default timeout value and minimal variance) RFC2988, sec. 2
+int32_t calc_timeout(struct rtt_state state) {
+ if (state.srtt == -1 && state.variance == -1) {
+ return DEFAULT_TIMEOUT;
+ }
+ return state.srtt + MAX(4 * state.variance, MINIMAL_TIMEOUT_ADDITION);
+}
+
+// This is verbatim RFC2988, sec. 2
+struct rtt_state calc_rtt_state(struct rtt_state old, unsigned new_rtt) {
+ if (old.srtt == -1 && old.variance == -1) {
+ return (struct rtt_state){new_rtt, new_rtt/2};
+ }
+
+ struct rtt_state ret;
+
+ ret.srtt = 0.75 * old.srtt + 0.25 * new_rtt;
+ ret.variance = 0.875 * old.variance + 0.125 * abs(old.srtt - new_rtt);
+
+ return ret;
+}
+
+void check_tls_capable(struct address_state *address_state, struct kr_request *req, struct sockaddr *address) {
+ address_state->tls_capable = req->selection_context.is_tls_capable ? req->selection_context.is_tls_capable(address) : false;
+}
+
+void check_tcp_connections(struct address_state *address_state, struct kr_request *req, struct sockaddr *address) {
+ address_state->tcp_connected = req->selection_context.is_tcp_connected ? req->selection_context.is_tcp_connected(address) : false;
+ address_state->tcp_waiting = req->selection_context.is_tcp_waiting ? req->selection_context.is_tcp_waiting(address) : false;
+}
+
+void check_network_settings(struct address_state *address_state, size_t address_len, bool no_ipv4, bool no_ipv6) {
+ if (no_ipv4 && address_len == sizeof(struct in_addr)) {
+ address_state->generation = -1; // Invalidate due to IPv4 being disabled in flags
+ }
+ if (no_ipv6 && address_len == sizeof(struct in6_addr)) {
+ address_state->generation = -1; // Invalidate due to IPv6 being disabled in flags
+ }
+}
+
+int cmp_choices(const void *a, const void *b) {
+ struct choice *a_ = (struct choice *) a;
+ struct choice *b_ = (struct choice *) b;
+
+ int diff;
+ if ((diff = a_->address_state->error_count - b_->address_state->error_count)) {
+ return diff;
+ }
+ if ((diff = calc_timeout(a_->address_state->rtt_state) - calc_timeout(b_->address_state->rtt_state))) {
+ return diff;
+ }
+ return 0;
+}
+
+#define ERROR_LIMIT 2
+
+// Performs the actual selection (currently epsilon-greedy with epsilon = 0.05).
+struct kr_transport *choose_transport(struct choice choices[],
+ int choices_len,
+ knot_dname_t **unresolved,
+ int unresolved_len,
+ struct knot_mm *mempool,
+ bool tcp,
+ size_t *out_forward_index) {
+
+ struct kr_transport *transport = mm_alloc(mempool, sizeof(struct kr_transport));
+ memset(transport, 0, sizeof(struct kr_transport));
+ int choice = 0;
+
+ if (kr_rand_coin(1, 20) || choices_len == 0) {
+ // EXPLORE
+ int index = kr_rand_bytes(1) % (choices_len + unresolved_len);
+ if (index < unresolved_len) {
+ // We will resolve a new NS name
+ *transport = (struct kr_transport) {
+ .protocol = KR_TRANSPORT_NOADDR,
+ .name = unresolved[index]
+ };
+ return transport;
+ } else {
+ choice = index - unresolved_len;
+ }
+ } else {
+ // EXPLOIT
+ qsort(choices, choices_len, sizeof(struct choice), cmp_choices);
+ if (choices[0].address_state->error_count > ERROR_LIMIT) {
+ return NULL;
+ } else {
+ choice = 0;
+ }
+ }
+
+ *transport = (struct kr_transport) {
+ .name = choices[choice].address_state->name,
+ .protocol = tcp ? KR_TRANSPORT_TCP : KR_TRANSPORT_UDP,
+ .timeout = calc_timeout(choices[choice].address_state->rtt_state),
+ };
+
+
+ int port;
+ switch (transport->protocol)
+ {
+ case KR_TRANSPORT_TLS:
+ port = KR_DNS_TLS_PORT;
+ break;
+ case KR_TRANSPORT_UDP:
+ case KR_TRANSPORT_TCP:
+ port = KR_DNS_PORT;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+
+ switch (choices[choice].address_len)
+ {
+ case sizeof(struct in_addr):
+ ADDR_SET(transport->address.ip4.sin, AF_INET, choices[choice].address, choices[choice].address_len, port);
+ transport->address_len = choices[choice].address_len;
+ break;
+ case sizeof(struct in6_addr):
+ ADDR_SET(transport->address.ip6.sin6, AF_INET6, choices[choice].address, choices[choice].address_len, port);
+ transport->address_len = choices[choice].address_len;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ if (out_forward_index) {
+ *out_forward_index = choices[choice].address_state->forward_index;
+ }
+
+ return transport;
+
+}
+
+void update_rtt(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, unsigned rtt) {
+ if (!transport) {
+ return;
+ }
+
+ struct kr_cache *cache = &qry->request->ctx->cache;
+ struct rtt_state new_rtt_state = calc_rtt_state(addr_state->rtt_state, rtt);
+ uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
+ put_rtt_state(address, transport->address_len, new_rtt_state, cache);
+
+ WITH_VERBOSE(qry) {
+
+ KR_DNAME_GET_STR(ns_name, transport->name);
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ const char *ns_str = kr_straddr(&transport->address.ip);
+
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' updating: '%s'@'%s' zone cut: '%s' with rtt %u to srtt: %d and variance: %d \n",
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str, rtt, new_rtt_state.srtt, new_rtt_state.variance);
+ }
+}
+
+
+void error(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, enum kr_selection_error sel_error) {
+ if (!transport) {
+ return;
+ }
+
+ if (sel_error >= KR_SELECTION_NUMBER_OF_ERRORS) {
+ assert(0);
+ }
+
+ addr_state->errors[sel_error]++;
+ addr_state->error_count++;
+
+ WITH_VERBOSE(qry) {
+
+ KR_DNAME_GET_STR(ns_name, transport->name);
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ const char *ns_str = kr_straddr(&transport->address.ip);
+
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' noting selection error: '%s'@'%s' zone cut: '%s' error no.:%d\n",
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str, sel_error);
+ }
+}
+
+
+
+void kr_server_selection_init(struct kr_query *qry) {
+ struct knot_mm *mempool = &qry->request->pool;
+ if (qry->flags.FORWARD || qry->flags.STUB) {
+ qry->server_selection = (struct kr_server_selection){
+ .initialized = true,
+ .choose_transport = forward_choose_transport,
+ .success = forward_success,
+ .update_rtt = forward_update_rtt,
+ .error = forward_error,
+ .local_state = NULL,
+ };
+ forward_local_state_init(mempool, &qry->server_selection.local_state, qry->request);
+ } else {
+ qry->server_selection = (struct kr_server_selection){
+ .initialized = true,
+ .choose_transport = iter_choose_transport,
+ .success = iter_success,
+ .update_rtt = iter_update_rtt,
+ .error = iter_error,
+ .local_state = NULL,
+ };
+ iter_local_state_init(mempool, &qry->server_selection.local_state);
+ }
+}
+
+int kr_forward_add_target(struct kr_request *req, size_t index, const struct sockaddr *sock) {
+ if (!req->selection_context.forwarding_targets) {
+ req->selection_context.forwarding_targets = mm_alloc(&req->pool, req->selection_context.forward_targets_num * sizeof(union inaddr));
+ }
+
+ switch (sock->sa_family) {
+ case AF_INET:
+ req->selection_context.forwarding_targets[index].ip4 = *(const struct sockaddr_in *)sock;
+ break;
+ case AF_INET6:
+ req->selection_context.forwarding_targets[index].ip6 = *(const struct sockaddr_in6 *)sock;
+ break;
+ default:
+ return kr_error(EINVAL);
+ }
+
+ return kr_ok();
+}
+
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "lib/cache/api.h"
+
+enum kr_selection_error {
+ // Network errors
+ KR_SELECTION_TIMEOUT,
+ KR_SELECTION_TLS_HANDSHAKE_FAILED,
+ KR_SELECTION_TCP_CONNECT_FAILED,
+ KR_SELECTION_TCP_CONNECT_TIMEOUT,
+
+ // RCODEs
+ KR_SELECTION_REFUSED,
+ KR_SELECTION_SERVFAIL,
+ KR_SELECTION_FORMERROR,
+ KR_SELECTION_NOTIMPL,
+ KR_SELECTION_OTHER_RCODE,
+ KR_SELECTION_TRUNCATED,
+
+ // DNS errors
+ KR_SELECTION_DNSSEC_ERROR,
+ KR_SELECTION_LAME_DELEGATION,
+
+ KR_SELECTION_NUMBER_OF_ERRORS // Leave this last as it is used as array size.
+};
+
+enum kr_transport_protocol {
+ KR_TRANSPORT_NOADDR = 0,
+ KR_TRANSPORT_UDP,
+ KR_TRANSPORT_TCP,
+ KR_TRANSPORT_TLS,
+};
+
+struct kr_transport {
+ knot_dname_t *name;
+ union inaddr address;
+ size_t address_len;
+ enum kr_transport_protocol protocol;
+ unsigned timeout;
+};
+
+struct kr_server_selection
+{
+ bool initialized;
+ void (*choose_transport)(struct kr_query *qry, struct kr_transport **transport);
+ void (*success)(struct kr_query *qry, const struct kr_transport *transport);
+ void (*update_rtt)(struct kr_query *qry, const struct kr_transport *transport, unsigned rtt);
+ void (*error)(struct kr_query *qry, const struct kr_transport *transport, enum kr_selection_error error);
+
+ void *local_state;
+};
+
+// Initialize server selection structure inside qry.
+KR_EXPORT
+void kr_server_selection_init(struct kr_query *qry);
+
+KR_EXPORT
+int kr_forward_add_target(struct kr_request *req, size_t index, const struct sockaddr *sock);
+
+// To be held per IP address in the global LMDB cache
+struct rtt_state {
+ int32_t srtt;
+ int32_t variance;
+};
+
+// To be held per IP address and locally
+struct address_state {
+ unsigned int generation;
+ struct rtt_state rtt_state;
+ knot_dname_t *name;
+ bool tls_capable : 1;
+ bool tcp_waiting : 1;
+ bool tcp_connected : 1;
+
+ int forward_index;
+ int error_count;
+ int errors[KR_SELECTION_NUMBER_OF_ERRORS];
+};
+
+// Array of these is one of inputs for the actual selection algorithm (`iter_get_best_transport`)
+struct choice {
+ uint8_t *address;
+ size_t address_len;
+ struct address_state *address_state;
+};
+
+struct kr_transport *choose_transport(struct choice choices[],
+ int choices_len,
+ knot_dname_t **unresolved,
+ int unresolved_len,
+ struct knot_mm *mempool,
+ bool tcp,
+ size_t *out_forward_index);
+void update_rtt(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, unsigned rtt);
+void error(struct kr_query *qry, struct address_state *addr_state, const struct kr_transport *transport, enum kr_selection_error sel_error);
+
+struct rtt_state get_rtt_state(const uint8_t *ip, size_t len, struct kr_cache *cache);
+int put_rtt_state(const uint8_t *ip, size_t len, struct rtt_state state, struct kr_cache *cache);
+
+void bytes_to_ip(uint8_t *bytes, size_t len, union inaddr *dst);
+uint8_t* ip_to_bytes(const union inaddr *src, size_t len);
+
+void check_tls_capable(struct address_state *address_state, struct kr_request *req, struct sockaddr *address);
+void check_tcp_connections(struct address_state *address_state, struct kr_request *req, struct sockaddr *address);
+void check_network_settings(struct address_state *address_state, size_t address_len, bool no_ipv4, bool no_ipv6);
+
+
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#include "lib/selection_forward.h"
+#include "lib/resolve.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "nsrep", __VA_ARGS__)
+
+struct forward_local_state {
+ union inaddr *targets;
+ size_t target_num;
+ struct address_state *addr_states;
+ size_t last_choice_index;
+};
+
+void forward_local_state_init(struct knot_mm *mm, void **local_state, struct kr_request *req) {
+ assert(req->selection_context.forwarding_targets);
+ *local_state = mm_alloc(mm, sizeof(struct forward_local_state));
+ memset(*local_state, 0, sizeof(struct forward_local_state));
+
+ struct forward_local_state *forward_state = (struct forward_local_state *)*local_state;
+ forward_state->targets = req->selection_context.forwarding_targets;
+ forward_state->target_num = req->selection_context.forward_targets_num;
+
+ forward_state->addr_states = mm_alloc(mm, sizeof(struct address_state) * forward_state->target_num);
+ memset(forward_state->addr_states, 0, sizeof(struct address_state) * forward_state->target_num);
+}
+
+void forward_choose_transport(struct kr_query *qry, struct kr_transport **transport) {
+ struct forward_local_state *local_state = qry->server_selection.local_state;
+ struct choice choices[local_state->target_num];
+ int valid = 0;
+
+ for (int i = 0; i < local_state->target_num; i++) {
+ union inaddr *address = &local_state->targets[i];
+ size_t addr_len;
+ switch (address->ip.sa_family) {
+ case AF_INET:
+ addr_len = sizeof(struct in_addr);
+ break;
+ case AF_INET6:
+ addr_len = sizeof(struct in6_addr);
+ break;
+ default:
+ assert(0);
+ }
+
+ struct address_state *addr_state = &local_state->addr_states[i];
+ addr_state->name = (knot_dname_t *)"";
+ check_tls_capable(addr_state, qry->request, &address->ip);
+ check_tcp_connections(addr_state, qry->request, &address->ip);
+ check_network_settings(addr_state, addr_len, qry->flags.NO_IPV4, qry->flags.NO_IPV6);
+
+ if(addr_state->generation == -1) {
+ continue;
+ }
+ addr_state->forward_index = i;
+
+ addr_state->rtt_state = get_rtt_state(ip_to_bytes(address, addr_len), addr_len, &qry->request->ctx->cache);
+ const char *ns_str = kr_straddr(&address->ip);
+ if (VERBOSE_STATUS) {
+ printf("[nsrep] rtt of %s is %d, variance is %d\n", ns_str, addr_state->rtt_state.srtt, addr_state->rtt_state.variance);
+ }
+
+ choices[valid++] = (struct choice){
+ .address = ip_to_bytes(address, addr_len),
+ .address_len = addr_len,
+ .address_state = addr_state,
+ };
+ }
+
+ *transport = choose_transport(choices, valid, NULL, 0, &qry->request->pool, qry->flags.TCP, &local_state->last_choice_index);
+}
+
+void forward_success(struct kr_query *qry, const struct kr_transport *transport) {
+ return;
+}
+
+void forward_error(struct kr_query *qry, const struct kr_transport *transport, enum kr_selection_error sel_error) {
+ struct forward_local_state *local_state = qry->server_selection.local_state;
+ struct address_state *addr_state = &local_state->addr_states[local_state->last_choice_index];
+ error(qry, addr_state, transport, sel_error);
+}
+
+void forward_update_rtt(struct kr_query *qry, const struct kr_transport *transport, unsigned rtt) {
+ if (!transport) {
+ return;
+ }
+
+ struct forward_local_state *local_state = qry->server_selection.local_state;
+ struct address_state *addr_state = &local_state->addr_states[local_state->last_choice_index];
+
+ update_rtt(qry, addr_state, transport, rtt);
+}
\ No newline at end of file
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "lib/selection.h"
+#include "lib/resolve.h"
+
+void forward_local_state_init(struct knot_mm *mm, void **local_state, struct kr_request *req);
+void forward_choose_transport(struct kr_query *qry, struct kr_transport **transport);
+void forward_success(struct kr_query *qry, const struct kr_transport *transport);
+void forward_error(struct kr_query *qry, const struct kr_transport *transport, enum kr_selection_error sel_error);
+void forward_update_rtt(struct kr_query *qry, const struct kr_transport *transport, unsigned rtt);
\ No newline at end of file
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#include "lib/selection_iter.h"
+#include "lib/selection.h"
+
+#include "lib/generic/trie.h"
+#include "lib/generic/pack.h"
+#include "lib/zonecut.h"
+#include "lib/resolve.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "nsrep", __VA_ARGS__)
+
+// To be held per query and locally
+struct iter_local_state {
+ trie_t *unresolved_names;
+ trie_t *addresses;
+ unsigned int generation; // Used to distinguish old and valid records in tries
+ knot_dname_t *zonecut_name;
+};
+
+// To be held per NS name and locally
+struct iter_name_state {
+ unsigned int generation;
+};
+
+void iter_local_state_init(struct knot_mm *mm, void **local_state) {
+ *local_state = mm_alloc(mm, sizeof(struct iter_local_state));
+ memset(*local_state, 0, sizeof(struct iter_local_state));
+}
+
+struct address_state *get_address_state(struct iter_local_state *local_state, const struct kr_transport *transport) {
+ trie_t *addresses = local_state->addresses;
+ uint8_t *address = ip_to_bytes(&transport->address, transport->address_len);
+
+ trie_val_t *address_state = trie_get_try(addresses, (char *)address, transport->address_len);
+
+ if (!address_state) {
+ assert(0);
+ }
+ return (struct address_state *)*address_state;
+}
+
+bool zonecut_changed(knot_dname_t *new, knot_dname_t *old) {
+ return knot_dname_cmp(old, new);
+}
+
+void iter_update_state_from_rtt_cache(struct iter_local_state *local_state, struct kr_cache *cache) {
+ trie_it_t *it;
+ for(it = trie_it_begin(local_state->addresses); !trie_it_finished(it); trie_it_next(it)) {
+ size_t address_len;
+ uint8_t *address = (uint8_t *)trie_it_key(it, &address_len);
+ struct address_state *address_state = (struct address_state *)*trie_it_val(it);
+ address_state->rtt_state = get_rtt_state(address, address_len, cache);
+ union inaddr addr;
+ bytes_to_ip(address, address_len, &addr);
+ const char *ns_str = kr_straddr(&addr.ip);
+ if (VERBOSE_STATUS) {
+ printf("[nsrep] rtt of %s is %d, variance is %d\n", ns_str, address_state->rtt_state.srtt, address_state->rtt_state.variance);
+ }
+ }
+ trie_it_free(it);
+}
+
+
+void iter_update_state_from_zonecut(struct iter_local_state *local_state, struct kr_zonecut *zonecut, struct knot_mm *mm) {
+ if (zonecut_changed(zonecut->name, local_state->zonecut_name) ||
+ local_state->unresolved_names == NULL || local_state->addresses == NULL) {
+ // Local state initialization
+ memset(local_state, 0, sizeof(struct iter_local_state));
+ local_state->unresolved_names = trie_create(mm);
+ local_state->addresses = trie_create(mm);
+ local_state->zonecut_name = knot_dname_copy(zonecut->name, mm);
+ }
+
+ local_state->generation++;
+
+ trie_it_t *it;
+ unsigned int current_generation = local_state->generation;
+
+ for(it = trie_it_begin(zonecut->nsset); !trie_it_finished(it); trie_it_next(it)) {
+ knot_dname_t *dname = (knot_dname_t *)trie_it_key(it, NULL);
+ pack_t *addresses = (pack_t *)*trie_it_val(it);
+
+ if (addresses->len == 0) {
+ // Name with no address
+ trie_val_t *val = trie_get_ins(local_state->unresolved_names, (char *)dname, knot_dname_size(dname));
+ if (!*val) {
+ // that we encountered for the first time
+ *val = mm_alloc(mm, sizeof(struct iter_name_state));
+ memset(*val, 0, sizeof(struct iter_name_state));
+ }
+ (*(struct iter_name_state **)val)->generation = current_generation;
+ } else {
+ // We have some addresses to work with, let's iterate over them
+ for(uint8_t *obj = pack_head(*addresses); obj != pack_tail(*addresses); obj = pack_obj_next(obj)) {
+ uint8_t *address = (uint8_t *)pack_obj_val(obj);
+ size_t address_len = pack_obj_len(obj);
+ trie_val_t *val = trie_get_ins(local_state->addresses, (char *)address, address_len);
+ if (!*val) {
+ // We have have not seen this address before.
+ *val = mm_alloc(mm, sizeof(struct address_state));
+ memset(*val, 0, sizeof(struct address_state));
+ }
+ struct address_state *address_state = (*(struct address_state **)val);
+ address_state->generation = current_generation;
+ address_state->name = dname;
+ }
+ }
+ }
+
+ trie_it_free(it);
+}
+
+void iter_choose_transport(struct kr_query *qry, struct kr_transport **transport) {
+ struct knot_mm *mempool = qry->request->rplan.pool;
+ struct iter_local_state *local_state = (struct iter_local_state *)qry->server_selection.local_state;
+
+ iter_update_state_from_zonecut(local_state, &qry->zone_cut, mempool);
+ iter_update_state_from_rtt_cache(local_state, &qry->request->ctx->cache);
+
+ trie_it_t *it;
+ for(it = trie_it_begin(local_state->addresses); !trie_it_finished(it); trie_it_next(it)) {
+ size_t address_len;
+ uint8_t* address = (uint8_t *)trie_it_key(it, &address_len);
+
+ union inaddr tmp_address;
+ bytes_to_ip(address, address_len, &tmp_address);
+
+ struct address_state *address_state = (struct address_state *)*trie_it_val(it);
+ check_tls_capable(address_state, qry->request, &tmp_address.ip);
+ check_tcp_connections(address_state, qry->request, &tmp_address.ip);
+ check_network_settings(address_state, address_len, qry->flags.NO_IPV4, qry->flags.NO_IPV6);
+ }
+ trie_it_free(it);
+
+ // also take qry->flags.TCP into consideration (do that in the actual choosing function)
+
+ int num_addresses = trie_weight(local_state->addresses);
+ int num_unresolved_names = trie_weight(local_state->unresolved_names);
+
+ struct choice choices[num_addresses]; // Some will get unused, oh well
+ knot_dname_t *unresolved_names[num_unresolved_names];
+
+ int valid_addresses = 0;
+ for(it = trie_it_begin(local_state->addresses); !trie_it_finished(it); trie_it_next(it)) {
+ size_t address_len;
+ uint8_t* address = (uint8_t *)trie_it_key(it, &address_len);
+ struct address_state *address_state = (struct address_state *)*trie_it_val(it);
+ if (address_state->generation == local_state->generation) {
+ choices[valid_addresses].address = address;
+ choices[valid_addresses].address_len = address_len;
+ choices[valid_addresses].address_state = address_state;
+ valid_addresses++;
+ }
+
+ }
+
+ trie_it_free(it);
+
+ int to_resolve = 0;
+ for(it = trie_it_begin(local_state->unresolved_names); !trie_it_finished(it); trie_it_next(it)) {
+ struct iter_name_state *name_state = *(struct iter_name_state **)trie_it_val(it);
+ if (name_state->generation == local_state->generation) {
+ knot_dname_t *name = (knot_dname_t *)trie_it_key(it, NULL);
+ unresolved_names[to_resolve++] = name;
+ }
+ }
+
+ trie_it_free(it);
+
+ if (valid_addresses || to_resolve) {
+ *transport = choose_transport(choices, valid_addresses, unresolved_names, to_resolve, mempool, qry->flags.TCP, NULL);
+ } else {
+ *transport = NULL;
+ }
+
+ WITH_VERBOSE(qry) {
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ if (*transport) {
+ KR_DNAME_GET_STR(ns_name, (*transport)->name);
+ const char *ns_str = kr_straddr(&(*transport)->address.ip);
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' choosing: '%s'@'%s' zone cut: '%s'\n",
+ qry->id, ns_name, ns_str ? ns_str : "", zonecut_str);
+ } else {
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' no suitable transport, zone cut: '%s'\n",
+ qry->id, zonecut_str);
+ }
+ }
+}
+
+void iter_success(struct kr_query *qry, const struct kr_transport *transport) {
+ return;
+}
+
+void iter_error(struct kr_query *qry, const struct kr_transport *transport, enum kr_selection_error sel_error) {
+ struct iter_local_state *local_state = qry->server_selection.local_state;
+ struct address_state *addr_state = get_address_state(local_state, transport);
+ error(qry, addr_state, transport, sel_error);
+}
+
+void iter_update_rtt(struct kr_query *qry, const struct kr_transport *transport, unsigned rtt) {
+ struct iter_local_state *local_state = qry->server_selection.local_state;
+ struct address_state *addr_state = get_address_state(local_state, transport);
+ update_rtt(qry, addr_state, transport, rtt);
+}
--- /dev/null
+/* Copyright (C) 2014-2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ * SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#pragma once
+
+#include "lib/selection.h"
+
+void iter_local_state_init(struct knot_mm *mm, void **local_state);
+void iter_choose_transport(struct kr_query *qry, struct kr_transport **transport);
+void iter_success(struct kr_query *qry, const struct kr_transport *transport);
+void iter_error(struct kr_query *qry, const struct kr_transport *transport, enum kr_selection_error sel_error);
+void iter_update_rtt(struct kr_query *qry, const struct kr_transport *transport, unsigned rtt);
\ No newline at end of file
#include "lib/defines.h"
#include "lib/generic/array.h"
#include "lib/module.h"
-#include "lib/nsrep.h"
+#include "lib/selection.h"
#include "lib/resolve.h"
#include <gnutls/gnutls.h>
free((void *)what);
}
+// Use this for alocations with mm.
static inline void *mm_alloc(knot_mm_t *mm, size_t size)
{
if (mm) return mm->alloc(mm->ctx, size);
void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size);
/** Trivial malloc() wrapper. */
+// Use mm_alloc for alocations into mempool
void *mm_malloc(void *ctx, size_t n);
/** posix_memalign() wrapper. */
void *mm_malloc_aligned(void *ctx, size_t n);
(int)rd->len, (int)rrtype);
continue;
}
- /* Check RTT cache - whether the IP is usable or not. */
- kr_nsrep_rtt_lru_entry_t *rtt_e = ctx->cache_rtt
- ? lru_get_try(ctx->cache_rtt, (const char *)rd->data, rd->len)
- : NULL;
- const bool unusable = rtt_e && rtt_e->score >= KR_NS_TIMEOUT
- && qry->creation_time_mono
- < rtt_e->tout_timestamp + ctx->cache_rtt_tout_retry_interval;
- if (!unusable) {
- result = AI_OK;
- ++usable_cnt;
- }
+ result = AI_OK;
+ ++usable_cnt;
ret = pack_obj_push(addrs, rd->data, rd->len);
assert(!ret); /* didn't fit because of incorrectly reserved memory */
pack_init(**pack);
addrset_info_t infos[2];
+
/* Fetch NS reputation and decide whether to prefetch A/AAAA records. */
- unsigned *cached = lru_get_try(ctx->cache_rep,
- (const char *)ns_name, ns_size);
- unsigned reputation = (cached) ? *cached : 0;
- infos[0] = (reputation & KR_NS_NOIP4) || qry->flags.NO_IPV4
- ? AI_REPUT
- : fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, cut->pool, qry);
- infos[1] = (reputation & KR_NS_NOIP6) || qry->flags.NO_IPV6
- ? AI_REPUT
- : fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, cut->pool, qry);
+ infos[0] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, cut->pool, qry);
+ infos[1] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, cut->pool, qry);
#if 0 /* rather unlikely to be useful unless changing some zcut code */
WITH_VERBOSE(qry) {
end
-- Override the list of nameservers (forwarders)
-local function set_nslist(qry, list)
+local function set_nslist(req, list)
+ req.selection_context.forward_targets_num = #list
local ns_i = 0
for _, ns in ipairs(list) do
- -- kr_nsrep_set() can return kr_error(ENOENT), it's OK
- if ffi.C.kr_nsrep_set(qry, ns_i, ns) == 0 then
+ if ffi.C.kr_forward_add_target(req, ns_i, ns) == 0 then
ns_i = ns_i + 1
end
end
- -- If less than maximum NSs, insert guard to terminate the list
- if ns_i < 3 then
- assert(ffi.C.kr_nsrep_set(qry, ns_i, nil) == 0);
- end
if ns_i == 0 then
-- would use assert() but don't want to compose the message if not triggered
error('no usable address in NS set (check net.ipv4 and '
if type(target) == 'table' then
for _, v in pairs(target) do
table.insert(list, addr2sock(v, 53))
- assert(#list <= 4, 'at most 4 STUB targets are supported')
end
else
table.insert(list, addr2sock(target, 53))
-- Switch mode to stub resolver, do not track origin zone cut since it's not real authority NS
qry.flags.STUB = true
qry.flags.ALWAYS_CUT = false
- set_nslist(qry, list)
+ set_nslist(req, list)
return state
end
end
if type(target) == 'table' then
for _, v in pairs(target) do
table.insert(list, addr2sock(v, 53))
- assert(#list <= 4, 'at most 4 FORWARD targets are supported')
end
else
table.insert(list, addr2sock(target, 53))
qry.flags.ALWAYS_CUT = false
qry.flags.NO_MINIMIZE = true
qry.flags.AWAIT_CUT = true
- set_nslist(qry, list)
+ set_nslist(req, list)
return state
end
end
{
struct kr_request *req = ctx->req;
struct kr_query *qry = req->current_query;
- if (qry->flags.CACHED || !req->upstream.addr) {
+ if (qry->flags.CACHED || !req->upstream.transport) {
return ctx->state;
}
/* Socket address is encoded into sockaddr_in6 struct that
* unions with sockaddr_in and differ in sa_family */
struct sockaddr_in6 *e = &data->upstreams.q.at[data->upstreams.head];
- const struct sockaddr *src = req->upstream.addr;
+ const struct sockaddr *src = &req->upstream.transport->address.ip;
switch (src->sa_family) {
case AF_INET: memcpy(e, src, sizeof(struct sockaddr_in)); break;
case AF_INET6: memcpy(e, src, sizeof(struct sockaddr_in6)); break;