knot_mm_t mm;
knot_compr_t compr;
};
+typedef struct {
+ uint16_t family;
+ uint8_t source_len;
+ uint8_t scope_len;
+ uint8_t address[16];
+} knot_edns_client_subnet_t;
typedef struct {
void *root;
struct knot_mm *pool;
trace_log_f trace_log;
trace_callback_f trace_finish;
int vars_ref;
+ int cache_scope_len_bits;
+ const uint8_t *cache_scope;
knot_mm_t pool;
};
enum kr_rank {KR_RANK_INITIAL, KR_RANK_OMIT, KR_RANK_TRY, KR_RANK_INDET = 4, KR_RANK_BOGUS, KR_RANK_MISMATCH, KR_RANK_MISSING, KR_RANK_INSECURE, KR_RANK_AUTH = 16, KR_RANK_SECURE = 32};
knot_pkt_t *knot_pkt_new(void *, uint16_t, knot_mm_t *);
void knot_pkt_free(knot_pkt_t *);
int knot_pkt_parse(knot_pkt_t *, unsigned int);
+int knot_pkt_reserve(knot_pkt_t *pkt, uint16_t size);
+uint8_t knot_edns_get_version(const knot_rrset_t *);
+uint16_t knot_edns_get_payload(const knot_rrset_t *);
+bool knot_edns_has_option(const knot_rrset_t *, uint16_t);
+uint8_t *knot_edns_get_option(const knot_rrset_t *, uint16_t);
+int knot_edns_add_option(knot_rrset_t *, uint16_t, uint16_t, const uint8_t *, knot_mm_t *);
+uint16_t knot_edns_client_subnet_size(const knot_edns_client_subnet_t *);
+int knot_edns_client_subnet_write(uint8_t *, uint16_t, const knot_edns_client_subnet_t *);
+int knot_edns_client_subnet_parse(knot_edns_client_subnet_t *, const uint8_t *, uint16_t);
struct kr_rplan *kr_resolve_plan(struct kr_request *);
knot_mm_t *kr_resolve_pool(struct kr_request *);
struct kr_query *kr_rplan_push(struct kr_rplan *, struct kr_query *, const knot_dname_t *, uint16_t, uint16_t);
struct knot_compr
knot_compr_t
struct knot_pkt
+ knot_edns_client_subnet_t
# generics
map_t
# libkres
knot_pkt_new
knot_pkt_free
knot_pkt_parse
+ knot_pkt_reserve
+# OPT
+ knot_edns_get_version
+ knot_edns_get_payload
+ knot_edns_has_option
+ knot_edns_get_option
+ knot_edns_add_option
+ knot_edns_client_subnet_size
+ knot_edns_client_subnet_write
+ knot_edns_client_subnet_parse
+ knot_edns_client_subnet_set_addr
EOF
## libkres API
return kr_ok();
}
-int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp)
+int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig,
+ uint8_t rank, uint32_t timestamp, const uint8_t *scope, int scope_len_bits)
{
int err = stash_rrset_precond(rr, NULL);
if (err <= 0) {
return ret;
}
+int cache_key_write_scope(struct key *k, size_t off, const uint8_t *scope, int scope_len_bits)
+{
+ const int scope_len_bytes = (scope_len_bits + 7) / 8;
+ if (!k || !scope || off + scope_len_bytes + 1 > KR_CACHE_KEY_MAXLEN) {
+ return kr_error(EINVAL);
+ }
+
+ /* Write scope at current offset */
+ memmove(k->buf + off, scope, scope_len_bytes);
+
+ /* Write a terminal byte to distinguish 'no scope' from 'global scope' */
+ k->buf[off + scope_len_bytes] = '\0';
+
+ return scope_len_bytes + 1;
+}
+
/** Like key_exact_type() but omits a couple checks not holding for pkt cache. */
-knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type)
+knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits)
{
assert(check_rrtype(type, NULL));
+ if (!is_scopable_type(type)) {
+ scope = NULL;
+ scope_len_bits = 0;
+ }
+
switch (type) {
case KNOT_RRTYPE_RRSIG: /* no RRSIG query caching, at least for now */
assert(false);
int name_len = k->buf[0];
k->buf[name_len + 1] = 0; /* make sure different names can never match */
k->buf[name_len + 2] = 'E'; /* tag for exact name+type matches */
- memcpy(k->buf + name_len + 3, &type, 2);
+
+ size_t off = name_len + 3;
+ memcpy(k->buf + off, &type, sizeof(type));
k->type = type;
- /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE */
- return (knot_db_val_t){ k->buf + 1, name_len + 4 };
-}
+ off += sizeof(type);
+
+ int ret = cache_key_write_scope(k, off, scope, scope_len_bits);
+ if (ret > 0) {
+ off += ret;
+ }
+ /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE + scope */
+ return (knot_db_val_t){ k->buf + 1, off - 1 };
+}
/** The inside for cache_peek(); implementation separated to ./peek.c */
int peek_nosync(kr_layer_t *ctx, knot_pkt_t *pkt);
VERBOSE_MSG(qry, "=> stashing RRs errored out\n");
goto finally;
}
- cache->stats.insert += 1;
/* LATER(optim.): maybe filter out some type-rank combinations
* that won't be useful as separate RRsets. */
}
/* Construct the key under which RRs will be stored,
* and add corresponding nsec_pmap item (if necessary). */
+ int used_scope_len = -1;
struct key k_storage, *k = &k_storage;
knot_db_val_t key;
switch (rr->type) {
assert(!ret);
return kr_error(ret);
}
- key = key_exact_type(k, rr->type);
+ /* Scope the record if authoritative, and scopeable type */
+ const uint8_t *scope = NULL;
+ int scope_len = 0;
+ if (qry) {
+ struct kr_request *req = qry->request;
+ /* Exclude infrastructure service requests (e.g. A/AAAA for an NS set)
+ * and exclude non-authoritative data (records from other sections)
+ */
+ if (!qry->parent && kr_rank_test(rank, KR_RANK_AUTH) && is_scopable_type(rr->type)) {
+ scope = req->cache_scope;
+ scope_len = req->cache_scope_len_bits;
+ used_scope_len = scope_len;
+ }
+ }
+
+ key = key_exact_type(k, rr->type, scope, scope_len);
}
/* Compute materialized sizes of the new data. */
|| rr->type == KNOT_RRTYPE_NS) {
auto_free char *type_str = kr_rrtype_text(rr->type),
*encl_str = kr_dname_text(encloser);
- VERBOSE_MSG(qry, "=> stashed %s%s %s, rank 0%.2o, "
+ VERBOSE_MSG(qry, "=> stashed %s%s %s, rank 0%.2o, scoped: %d "
"%d B total, incl. %d RRSIGs\n",
- (wild_labels ? "*." : ""), encl_str, type_str, rank,
+ (wild_labels ? "*." : ""), encl_str, type_str, rank, used_scope_len,
(int)val_new_entry.len, (rr_sigs ? rr_sigs->rrs.count : 0)
);
} }
struct key k_storage, *k = &k_storage;
int ret = kr_dname_lf(k->buf, dname, false);
if (ret) return kr_error(ret);
- knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS);
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS, NULL, 0);
knot_db_val_t val_orig = { NULL, 0 };
ret = cache_op(cache, read, &key, &val_orig, 1);
if (ret && ret != -ABS(ENOENT)) {
int ret = kr_dname_lf(k->buf, name, false);
if (ret) return kr_error(ret);
- knot_db_val_t key = key_exact_type(k, type);
+ knot_db_val_t key = key_exact_type(k, type, NULL, 0);
knot_db_val_t val = { NULL, 0 };
ret = cache_op(cache, read, &key, &val, 1);
if (!ret) ret = entry_h_seek(&val, type);
int ret = kr_dname_lf(k->buf, name, false);
if (ret) return kr_error(ret);
- knot_db_val_t key = key_exact_type(k, type);
+ knot_db_val_t key = key_exact_type(k, type, NULL, 0);
return cache_op(cache, remove, &key, 1);
}
if (ret) return kr_error(ret);
// use a mock type
- knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_A);
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_A, NULL, 0);
/* CACHE_KEY_DEF */
key.len -= sizeof(uint16_t); /* the type */
if (!exact_name) {
* @param rrsig RRSIG for inserted RRSet (optional)
* @param rank rank of the data
* @param timestamp current time
+ * @param scope scope of the record
+ * @param scope_len_bits scope of the record in bits
* @return 0 or an errcode
*/
KR_EXPORT
-int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp);
+int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig,
+ uint8_t rank, uint32_t timestamp, const uint8_t *scope, int scope_len_bits);
/**
* Clear all items from the cache.
assert(owner == NULL);
return;
}
- key = key_exact_type_maypkt(k, pkt_type);
+ key = key_exact_type_maypkt(k, pkt_type, NULL, 0);
/* For now we stash the full packet byte-exactly as it came from upstream. */
const uint16_t pkt_size = pkt->size;
static const int NSEC3_HASH_LEN = 20,
NSEC3_HASH_TXT_LEN = 32;
+/**
+ * This does not exactly implement https://datatracker.ietf.org/doc/rfc7871/ as in 7.3.1.
+ * The section says that only DNSSEC records and records from non-answer sections must be scoped to given network.
+ * However, ECS is used almost exclusively just for traffic engineering, many types are not meant for that.
+ * The NS record can also show up in the answer section in parent-child setup, but it should not be scoped.
+ */
+static inline bool is_scopable_type(uint16_t type)
+{
+ return type == KNOT_RRTYPE_A || type == KNOT_RRTYPE_AAAA || type == KNOT_RRTYPE_CNAME;
+}
+
+/**
+ * Write cache key scope after the formatted lookup key.
+ * The cache key looks roughly like this:
+ * off -- len (bytes)
+ * 0 .. 1 domain name len (d)
+ * 1 .. 1 tag (E or 1)
+ * 2 .. d domain name (d = 0 .. 255)
+ * .. 1 terminator \x00
+ *
+ * The E tag has additional information:
+ * .. t type in text (e.g AAAA, t = 1 .. 9 (as of now))
+ * .. s cache scope (e.g. [192 168 1], s = 0 .. 16)
+ */
+int cache_key_write_scope(struct key *k, size_t off, const uint8_t *scope, int scope_len_bits);
+
/** Finish constructing string key for for exact search.
* It's assumed that kr_dname_lf(k->buf, owner, *) had been ran.
*/
-knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type);
+knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits);
/** Like key_exact_type_maypkt but with extra checks if used for RRs only. */
-static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type)
+static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits)
{
switch (type) {
/* Sanity check: forbidden types represented in other way(s). */
assert(false);
return (knot_db_val_t){ NULL, 0 };
}
- return key_exact_type_maypkt(k, type);
+ return key_exact_type_maypkt(k, type, scope, scope_len_bits);
}
return KR_RANK_INITIAL | KR_RANK_AUTH;
}
+/**
+ * Return cache scope as a hexstring.
+ */
+static char *cache_scope_hex(const uint8_t *scope, int scope_len_bits)
+{
+ const int len = (scope_len_bits + 7) / 8;
+ char *hex_str = calloc(1, len * 2 + 1);
+ for (int i = 0; i < len; ++i) {
+ snprintf(hex_str + (i * 2), 3, "%02x", scope[i]);
+ }
+ return hex_str;
+}
/** Almost whole .produce phase for the cache module.
* \note we don't transition to KR_STATE_FAIL even in case of "unexpected errors".
/**** 1. find the name or the closest (available) zone, not considering wildcards
**** 1a. exact name+type match (can be negative answer in insecure zones) */
- knot_db_val_t key = key_exact_type_maypkt(k, qry->stype);
+ knot_db_val_t key = key_exact_type_maypkt(k, qry->stype, req->cache_scope, req->cache_scope_len_bits);
knot_db_val_t val = { NULL, 0 };
ret = cache_op(cache, read, &key, &val, 1);
+ /* If the name is expected to be scope, but there's no scoped result in cache,
+ * check global scope, as the name may not be scoped by server. */
+ if (req->cache_scope != NULL && ret && ret == -abs(ENOENT)) {
+ /* Retry using global scope */
+ VERBOSE_MSG(qry, "=> searching global scope /0\n");
+ key = key_exact_type_maypkt(k, qry->stype, req->cache_scope, 0);
+ ret = cache_op(cache, read, &key, &val, 1);
+ }
if (!ret) {
/* found an entry: test conditions, materialize into pkt, etc. */
ret = found_exact_hit(ctx, pkt, val, lowest_rank);
assert(false);
return ctx->state;
} else if (!ret) {
+ WITH_VERBOSE(qry) {
+ if (req->cache_scope && is_scopable_type(qry->stype)) {
+ auto_free char *hex_str = cache_scope_hex(req->cache_scope, req->cache_scope_len_bits);
+ VERBOSE_MSG(qry, "=> found exact match in scope %s/%d\n", hex_str, req->cache_scope_len_bits);
+ }
+ }
cache->stats.hit += 1;
return KR_STATE_DONE;
}
/* Assuming k->buf still starts with zone's prefix,
* look up the SOA in cache. */
k->buf[0] = k->zlf_len;
- key = key_exact_type(k, KNOT_RRTYPE_SOA);
+ key = key_exact_type(k, KNOT_RRTYPE_SOA, NULL, 0);
knot_db_val_t val = { NULL, 0 };
ret = cache_op(cache, read, &key, &val, 1);
const struct entry_h *eh;
if (qf->DNSSEC_INSECURE) {
qf->DNSSEC_WANT = false;
}
- VERBOSE_MSG(qry, "=> satisfied by exact %s: rank 0%.2o, new TTL %d\n",
- (type == KNOT_RRTYPE_CNAME ? "CNAME" : "RRset"),
- eh->rank, new_ttl);
+ WITH_VERBOSE(qry) {
+ auto_free char *scope_hex = NULL;
+ if (req->cache_scope && is_scopable_type(type)) {
+ scope_hex = cache_scope_hex(req->cache_scope, req->cache_scope_len_bits);
+ }
+ VERBOSE_MSG(qry, "=> satisfied by exact RR or CNAME: rank 0%.2o, new TTL %d, scope %s/%d\n",
+ eh->rank, new_ttl, scope_hex ? scope_hex : "", scope_hex ? req->cache_scope_len_bits : 0);
+ }
return kr_ok();
}
#undef CHECK_RET
const uint16_t type, const uint8_t lowest_rank,
const struct kr_query *qry, struct kr_cache *cache)
{
- knot_db_val_t key = key_exact_type(k, type);
+ knot_db_val_t key = key_exact_type(k, type, NULL, 0);
/* Find the record. */
knot_db_val_t val = { NULL, 0 };
int ret = cache_op(cache, read, &key, &val, 1);
struct kr_query *qry, const bool only_NS, const bool is_DS)
{
/* get the current timestamp */
+ const uint8_t *cache_scope = NULL;
+ int cache_scope_len_bits = 0;
uint32_t timestamp;
if (qry) {
timestamp = qry->timestamp.tv_sec;
+ cache_scope = qry->request->cache_scope;
+ cache_scope_len_bits = qry->request->cache_scope_len_bits;
} else {
struct timeval tv;
if (gettimeofday(&tv, NULL)) return kr_error(errno);
/* Inspect the NS/xNAME entries, shortening by a label on each iteration. */
do {
k->buf[0] = zlf_len;
- knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS);
+ /* Look for CNAME for the exact match to allow scoping, NS otherwise.
+ * The CNAME is going to get rewritten to NS key, but it will be scoped if possible.
+ */
+ const uint16_t find_type = exact_match ? KNOT_RRTYPE_CNAME : KNOT_RRTYPE_NS;
+ knot_db_val_t key = key_exact_type(k, find_type, cache_scope, cache_scope_len_bits);
knot_db_val_t val;
int ret = cache_op(cache, read, &key, &val, 1);
+ /* Try in global scope if scoped, but no immediate match found */
+ if (exact_match && cache_scope != NULL && ret == -abs(ENOENT)) {
+ key = key_exact_type_maypkt(k, KNOT_RRTYPE_NS, cache_scope, 0);
+ ret = cache_op(cache, read, &key, &val, 1);
+ }
if (ret == -abs(ENOENT)) goto next_label;
if (ret) {
assert(!ret);
knot_edns_set_do(pkt->opt_rr);
knot_wire_set_cd(pkt->wire);
}
- ret = edns_put(pkt);
}
}
return ret;
}
struct kr_query *qry = array_tail(rplan->pending);
- /* Run the checkout layers and cancel on failure.
- * The checkout layer doesn't persist the state, so canceled subrequests
- * don't affect the resolution or rest of the processing. */
- int state = request->state;
- ITERATE_LAYERS(request, qry, checkout, packet, dst, type);
- if (request->state == KR_STATE_FAIL) {
- request->state = state; /* Restore */
- return kr_error(ECANCELED);
- }
-
#if defined(ENABLE_COOKIES)
/* Update DNS cookies in request. */
if (type == SOCK_DGRAM) { /* @todo: Add cookies also over TCP? */
return kr_error(EINVAL);
}
+ /* Run the checkout layers and cancel on failure.
+ * The checkout layer doesn't persist the state, so canceled subrequests
+ * don't affect the resolution or rest of the processing. */
+ int state = request->state;
+ ITERATE_LAYERS(request, qry, checkout, packet, dst, type);
+ if (request->state == KR_STATE_FAIL) {
+ request->state = state; /* Restore */
+ return kr_error(ECANCELED);
+ }
+
+ /* Write down OPT unless in safemode */
+ if (!(qry->flags.SAFEMODE)) {
+ ret = edns_put(packet);
+ if (ret != 0) {
+ return kr_error(EINVAL);
+ }
+ }
+
WITH_VERBOSE(qry) {
char ns_str[INET6_ADDRSTRLEN];
trace_log_f trace_log; /**< Logging tracepoint */
trace_callback_f trace_finish; /**< Request finish tracepoint */
int vars_ref; /**< Reference to per-request variable table. LUA_NOREF if not set. */
+ int cache_scope_len_bits; /**< Cache scope length (bits) */
+ const uint8_t *cache_scope; /**< Cache scope for the request */
knot_mm_t pool;
};
assert_int_not_equal(kr_cache_peek(cache, KR_CACHE_USER, NULL, KNOT_RRTYPE_TSIG, &entry, ×tamp), 0);
assert_int_not_equal(kr_cache_peek_rr(NULL, NULL, NULL, NULL, NULL), 0);
assert_int_not_equal(kr_cache_peek_rr(cache, NULL, NULL, NULL, NULL), 0);
- assert_int_not_equal(kr_cache_insert_rr(cache, NULL, 0, 0, 0), 0);
- assert_int_not_equal(kr_cache_insert_rr(NULL, NULL, 0, 0, 0), 0);
+ assert_int_not_equal(kr_cache_insert_rr(cache, NULL, 0, 0, 0, 0, 0), 0);
+ assert_int_not_equal(kr_cache_insert_rr(NULL, NULL, 0, 0, 0, 0, 0), 0);
assert_int_not_equal(kr_cache_insert(NULL, KR_CACHE_USER, dname,
KNOT_RRTYPE_TSIG, &global_fake_ce, global_namedb_data), 0);
assert_int_not_equal(kr_cache_insert(cache, KR_CACHE_USER, NULL,
{
test_random_rr(&global_rr, CACHE_TTL);
struct kr_cache *cache = (*state);
- int ret = kr_cache_insert_rr(cache, &global_rr, 0, 0, CACHE_TIME);
+ int ret = kr_cache_insert_rr(cache, &global_rr, 0, 0, CACHE_TIME, 0, 0);
assert_int_equal(ret, 0);
kr_cache_sync(cache);
}
for (unsigned i = 0; i < CACHE_SIZE; ++i) {
knot_rrset_t rr;
test_random_rr(&rr, CACHE_TTL);
- ret = kr_cache_insert_rr(cache, &rr, 0, 0, CACHE_TTL - 1);
+ ret = kr_cache_insert_rr(cache, &rr, 0, 0, CACHE_TTL - 1, 0, 0);
if (ret != 0) {
break;
}