From b11c237679e7d49ead619eae3f6ad40a7a6912c5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Vavrus=CC=8Ca?= Date: Mon, 9 Apr 2018 23:11:16 -0700 Subject: [PATCH] implement basic infrastructure for scoped cache This commit adds support for scoped cache, e.g. keys can be tagged with a scope, so that the same key can exist in multiple scope and returns the value based on the scope set. This is practically requires for scoping by subnet in ECS, but it doesn't implement ECS completely. This is just a framework to make something like ECS possible in a module. The scope search is currently non-exhaustive, it either returns a value bound to given scope or look into global scope, nothing in between. --- daemon/lua/kres-gen.lua | 17 ++++++ daemon/lua/kres-gen.sh | 12 ++++ lib/cache/api.c | 125 ++++++++++++++++++++++++++++++++++------ lib/cache/api.h | 5 +- lib/cache/entry_pkt.c | 2 +- lib/cache/impl.h | 28 ++++++++- lib/resolve.c | 27 ++++++--- lib/resolve.h | 2 + tests/test_cache.c | 8 +-- 9 files changed, 193 insertions(+), 33 deletions(-) diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index 885076360..23b24f294 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -57,6 +57,12 @@ struct knot_pkt { char _stub[]; /* TMP: do NOT replace yet (changed in libknot-2.6.0) */ }; typedef struct knot_pkt knot_pkt_t; +typedef struct { + uint16_t family; + uint8_t source_len; + uint8_t scope_len; + uint8_t address[16]; +} knot_edns_client_subnet_t; typedef struct { void *root; struct knot_mm *pool; @@ -169,6 +175,8 @@ struct kr_request { int has_tls; trace_log_f trace_log; trace_callback_f trace_finish; + int cache_scope_len_bits; + const uint8_t *cache_scope; knot_mm_t pool; }; enum kr_rank {KR_RANK_INITIAL, KR_RANK_OMIT, KR_RANK_TRY, KR_RANK_INDET = 4, KR_RANK_BOGUS, KR_RANK_MISMATCH, KR_RANK_MISSING, KR_RANK_INSECURE, KR_RANK_AUTH = 16, KR_RANK_SECURE = 32}; @@ -267,6 +275,15 @@ const knot_pktsection_t *knot_pkt_section(const knot_pkt_t *, knot_section_t); knot_pkt_t *knot_pkt_new(void *, uint16_t, knot_mm_t *); void knot_pkt_free(knot_pkt_t **); int knot_pkt_parse(knot_pkt_t *, unsigned int); +int knot_pkt_reserve(knot_pkt_t *pkt, uint16_t size); +uint8_t knot_edns_get_version(const knot_rrset_t *); +uint16_t knot_edns_get_payload(const knot_rrset_t *); +bool knot_edns_has_option(const knot_rrset_t *, uint16_t); +uint8_t *knot_edns_get_option(const knot_rrset_t *, uint16_t); +int knot_edns_add_option(knot_rrset_t *, uint16_t, uint16_t, const uint8_t *, knot_mm_t *); +uint16_t knot_edns_client_subnet_size(const knot_edns_client_subnet_t *); +int knot_edns_client_subnet_write(uint8_t *, uint16_t, const knot_edns_client_subnet_t *); +int knot_edns_client_subnet_parse(knot_edns_client_subnet_t *, const uint8_t *, uint16_t); struct kr_rplan *kr_resolve_plan(struct kr_request *); knot_mm_t *kr_resolve_pool(struct kr_request *); struct kr_query *kr_rplan_push(struct kr_rplan *, struct kr_query *, const knot_dname_t *, uint16_t, uint16_t); diff --git a/daemon/lua/kres-gen.sh b/daemon/lua/kres-gen.sh index 5752f2e03..aa6e4391f 100755 --- a/daemon/lua/kres-gen.sh +++ b/daemon/lua/kres-gen.sh @@ -49,6 +49,7 @@ typedef void (*trace_callback_f)(struct kr_request *); knot_pktsection_t struct knot_pkt knot_pkt_t + knot_edns_client_subnet_t # generics map_t # libkres @@ -127,6 +128,17 @@ printf "\tchar _stub[];\n};\n" knot_pkt_new knot_pkt_free knot_pkt_parse + knot_pkt_reserve +# OPT + knot_edns_get_version + knot_edns_get_payload + knot_edns_has_option + knot_edns_get_option + knot_edns_add_option + knot_edns_client_subnet_size + knot_edns_client_subnet_write + knot_edns_client_subnet_parse + knot_edns_client_subnet_set_addr EOF ## libkres API diff --git a/lib/cache/api.c b/lib/cache/api.c index 63e205740..d0d60d274 100644 --- a/lib/cache/api.c +++ b/lib/cache/api.c @@ -154,7 +154,8 @@ int kr_cache_sync(struct kr_cache *cache) return kr_ok(); } -int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp) +int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, + uint8_t rank, uint32_t timestamp, const uint8_t *scope, int scope_len_bits) { int err = stash_rrset_precond(rr, NULL); if (err <= 0) { @@ -279,9 +280,43 @@ static bool check_rrtype(uint16_t type, const struct kr_query *qry/*logging*/) return ret; } +/** + * Return cache scope as a hexstring. + */ +static char *cache_scope_hex(const uint8_t *scope, int scope_len_bits) +{ + const int len = (scope_len_bits + 7) / 8; + char *hex_str = calloc(1, len * 2 + 1); + for (int i = 0; i < len; ++i) { + snprintf(hex_str + (i * 2), 3, "%02x", scope[i]); + } + return hex_str; +} + +int cache_key_write_scope(struct key *k, size_t off, const uint8_t *scope, int scope_len_bits) +{ + const int scope_len_bytes = (scope_len_bits + 7) / 8; + if (!k || !scope || off + scope_len_bytes + 1 > KR_CACHE_KEY_MAXLEN) { + return kr_error(EINVAL); + } + + /* Write scope at current offset */ + memmove(k->buf + off, scope, scope_len_bytes); + + /* Write a terminal byte to distinguish 'no scope' from 'global scope' */ + k->buf[off + scope_len_bytes] = '\0'; + + return scope_len_bytes + 1; +} + /** Like key_exact_type() but omits a couple checks not holding for pkt cache. */ -knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type) +knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits) { + if (!is_scopable_type(type)) { + scope = NULL; + scope_len_bits = 0; + } + assert(check_rrtype(type, NULL)); switch (type) { case KNOT_RRTYPE_RRSIG: /* no RRSIG query caching, at least for now */ @@ -298,14 +333,23 @@ knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type) int name_len = k->buf[0]; k->buf[name_len + 1] = 0; /* make sure different names can never match */ k->buf[name_len + 2] = 'E'; /* tag for exact name+type matches */ - memcpy(k->buf + name_len + 3, &type, 2); + + size_t off = name_len + 3; + memcpy(k->buf + off, &type, sizeof(type)); k->type = type; - /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE */ - return (knot_db_val_t){ k->buf + 1, name_len + 4 }; + off += sizeof(type); + + int ret = cache_key_write_scope(k, off, scope, scope_len_bits); + if (ret > 0) { + off += ret; + } + + /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE + scope */ + return (knot_db_val_t){ k->buf + 1, off - 1 }; } /** Like key_exact_type_maypkt but with extra checks if used for RRs only. */ -static knot_db_val_t key_exact_type(struct key *k, uint16_t type) +static knot_db_val_t key_exact_type(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits) { switch (type) { /* Sanity check: forbidden types represented in other way(s). */ @@ -314,7 +358,7 @@ static knot_db_val_t key_exact_type(struct key *k, uint16_t type) assert(false); return (knot_db_val_t){ NULL, 0 }; } - return key_exact_type_maypkt(k, type); + return key_exact_type_maypkt(k, type, scope, scope_len_bits); } @@ -381,9 +425,17 @@ static int cache_peek_real(kr_layer_t *ctx, knot_pkt_t *pkt) /** 1. find the name or the closest (available) zone, not considering wildcards * 1a. exact name+type match (can be negative answer in insecure zones) */ - knot_db_val_t key = key_exact_type_maypkt(k, qry->stype); + knot_db_val_t key = key_exact_type_maypkt(k, qry->stype, req->cache_scope, req->cache_scope_len_bits); knot_db_val_t val = { NULL, 0 }; ret = cache_op(cache, read, &key, &val, 1); + /* If the name is expected to be scope, but there's no scoped result in cache, + * check global scope, as the name may not be scoped by server. */ + if (req->cache_scope != NULL && ret && ret == -abs(ENOENT)) { + /* Retry using global scope */ + VERBOSE_MSG(qry, "=> searching global scope /0\n"); + key = key_exact_type_maypkt(k, qry->stype, req->cache_scope, 0); + ret = cache_op(cache, read, &key, &val, 1); + } if (!ret) { /* found an entry: test conditions, materialize into pkt, etc. */ ret = found_exact_hit(ctx, pkt, val, lowest_rank); @@ -394,6 +446,12 @@ static int cache_peek_real(kr_layer_t *ctx, knot_pkt_t *pkt) assert(false); return ctx->state; } else if (!ret) { + WITH_VERBOSE(qry) { + if (req->cache_scope && is_scopable_type(qry->stype)) { + auto_free char *hex_str = cache_scope_hex(req->cache_scope, req->cache_scope_len_bits); + VERBOSE_MSG(qry, "=> found exact match in scope %s/%d\n", hex_str, req->cache_scope_len_bits); + } + } return KR_STATE_DONE; } @@ -562,7 +620,7 @@ do_soa: /* Assuming k->buf still starts with zone's prefix, * look up the SOA in cache. */ k->buf[0] = k->zlf_len; - key = key_exact_type(k, KNOT_RRTYPE_SOA); + key = key_exact_type(k, KNOT_RRTYPE_SOA, NULL, 0); knot_db_val_t val = { NULL, 0 }; ret = cache_op(cache, read, &key, &val, 1); const struct entry_h *eh; @@ -735,6 +793,7 @@ static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry, c int ret = 0; /* Construct the key under which RRs will be stored. */ + int used_scope_len = -1; struct key k_storage, *k = &k_storage; knot_db_val_t key; switch (rr->type) { @@ -756,7 +815,22 @@ static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry, c assert(!ret); return kr_error(ret); } - key = key_exact_type(k, rr->type); + /* Scope the record if authoritative, and scopeable type */ + const uint8_t *scope = NULL; + int scope_len = 0; + if (qry) { + struct kr_request *req = qry->request; + /* Exclude infrastructure service requests (e.g. A/AAAA for an NS set) + * and exclude non-authoritative data (records from other sections) + */ + if (!qry->parent && kr_rank_test(rank, KR_RANK_AUTH) && is_scopable_type(rr->type)) { + scope = req->cache_scope; + scope_len = req->cache_scope_len_bits; + used_scope_len = scope_len; + } + } + + key = key_exact_type(k, rr->type, scope, scope_len); } /* Compute materialized sizes of the new data. */ @@ -811,10 +885,10 @@ static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry, c } auto_free char *type_str = kr_rrtype_text(rr->type), *encl_str = kr_dname_text(encloser); - VERBOSE_MSG(qry, "=> stashed rank: 0%.2o, %s %s%s " + VERBOSE_MSG(qry, "=> stashed rank: 0%.2o, %s %s%s, scoped: %d " "(%d B total, incl. %d RRSIGs)\n", rank, type_str, (wild_labels ? "*." : ""), encl_str, - (int)val_new_entry.len, (rr_sigs ? rr_sigs->rrs.rr_count : 0) + used_scope_len, (int)val_new_entry.len, (rr_sigs ? rr_sigs->rrs.rr_count : 0) ); } @@ -902,8 +976,15 @@ static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, if (qry->flags.DNSSEC_INSECURE) { qry->flags.DNSSEC_WANT = false; } - VERBOSE_MSG(qry, "=> satisfied by exact RR or CNAME: rank 0%.2o, new TTL %d\n", - eh->rank, new_ttl); + + WITH_VERBOSE(qry) { + auto_free char *scope_hex = NULL; + if (req->cache_scope && is_scopable_type(type)) { + scope_hex = cache_scope_hex(req->cache_scope, req->cache_scope_len_bits); + } + VERBOSE_MSG(qry, "=> satisfied by exact RR or CNAME: rank 0%.2o, new TTL %d, scope %s/%d\n", + eh->rank, new_ttl, scope_hex ? scope_hex : "", scope_hex ? req->cache_scope_len_bits : 0); + } return kr_ok(); } #undef CHECK_RET @@ -954,7 +1035,8 @@ static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clenc const uint16_t type, const uint8_t lowest_rank, const struct kr_query *qry, struct kr_cache *cache) { - knot_db_val_t key = key_exact_type(k, type); + const struct kr_request *req = qry->request; + knot_db_val_t key = key_exact_type(k, type, req->cache_scope, req->cache_scope_len_bits); /* Find the record. */ knot_db_val_t val = { NULL, 0 }; int ret = cache_op(cache, read, &key, &val, 1); @@ -1012,7 +1094,7 @@ static int peek_exact_real(struct kr_cache *cache, const knot_dname_t *name, uin int ret = kr_dname_lf(k->buf, name, false); if (ret) return kr_error(ret); - knot_db_val_t key = key_exact_type(k, type); + knot_db_val_t key = key_exact_type(k, type, NULL, 0); knot_db_val_t val = { NULL, 0 }; ret = cache_op(cache, read, &key, &val, 1); if (!ret) ret = entry_h_seek(&val, type); @@ -1070,9 +1152,18 @@ static knot_db_val_t closest_NS(kr_layer_t *ctx, struct key *k) /* Inspect the NS/xNAME entries, shortening by a label on each iteration. */ do { k->buf[0] = zlf_len; - knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS); + /* Look for CNAME for the exact match to allow scoping, NS otherwise. + * The CNAME is going to get rewritten to NS key, but it will be scoped if possible. + */ + const uint16_t find_type = exact_match ? KNOT_RRTYPE_CNAME : KNOT_RRTYPE_NS; + knot_db_val_t key = key_exact_type(k, find_type, req->cache_scope, req->cache_scope_len_bits); knot_db_val_t val = VAL_EMPTY; int ret = cache_op(cache, read, &key, &val, 1); + /* Try in global scope if scoped, but no immediate match found */ + if (exact_match && req->cache_scope != NULL && ret == -abs(ENOENT)) { + key = key_exact_type_maypkt(k, KNOT_RRTYPE_NS, req->cache_scope, 0); + ret = cache_op(cache, read, &key, &val, 1); + } if (ret == -abs(ENOENT)) goto next_label; if (ret) { assert(!ret); diff --git a/lib/cache/api.h b/lib/cache/api.h index 35ac4ba75..fac1b2252 100644 --- a/lib/cache/api.h +++ b/lib/cache/api.h @@ -99,10 +99,13 @@ static inline void kr_cache_make_checkpoint(struct kr_cache *cache) * @param rrsig RRSIG for inserted RRSet (optional) * @param rank rank of the data * @param timestamp current time + * @param scope scope of the record + * @param scope_len_bits scope of the record in bits * @return 0 or an errcode */ KR_EXPORT -int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp); +int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, + uint8_t rank, uint32_t timestamp, const uint8_t *scope, int scope_len_bits); /** * Clear all items from the cache. diff --git a/lib/cache/entry_pkt.c b/lib/cache/entry_pkt.c index 972bae62f..ad6e64779 100644 --- a/lib/cache/entry_pkt.c +++ b/lib/cache/entry_pkt.c @@ -135,7 +135,7 @@ void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry, assert(owner == NULL); return; } - key = key_exact_type_maypkt(k, pkt_type); + key = key_exact_type_maypkt(k, pkt_type, NULL, 0); /* For now we stash the full packet byte-exactly as it came from upstream. */ const uint16_t pkt_size = pkt->size; diff --git a/lib/cache/impl.h b/lib/cache/impl.h index 9b0003430..7087aaf25 100644 --- a/lib/cache/impl.h +++ b/lib/cache/impl.h @@ -88,10 +88,36 @@ static inline size_t key_nwz_off(const struct key *k) return k->zlf_len + 2; } +/** + * This does not exactly implement https://datatracker.ietf.org/doc/rfc7871/ as in 7.3.1. + * The section says that only DNSSEC records and records from non-answer sections must be scoped to given network. + * However, ECS is used almost exclusively just for traffic engineering, many types are not meant for that. + * The NS record can also show up in the answer section in parent-child setup, but it should not be scoped. + */ +static inline bool is_scopable_type(uint16_t type) +{ + return type == KNOT_RRTYPE_A || type == KNOT_RRTYPE_AAAA || type == KNOT_RRTYPE_CNAME; +} + +/** + * Write cache key scope after the formatted lookup key. + * The cache key looks roughly like this: + * off -- len (bytes) + * 0 .. 1 domain name len (d) + * 1 .. 1 tag (E or 1) + * 2 .. d domain name (d = 0 .. 255) + * .. 1 terminator \x00 + * + * The E tag has additional information: + * .. t type in text (e.g AAAA, t = 1 .. 9 (as of now)) + * .. s cache scope (e.g. [192 168 1], s = 0 .. 16) + */ +int cache_key_write_scope(struct key *k, size_t off, const uint8_t *scope, int scope_len_bits); + /** Finish constructing string key for for exact search. * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran. */ -knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type); +knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type, const uint8_t *scope, int scope_len_bits); /* entry_h chaining; implementation in ./entry_list.c */ diff --git a/lib/resolve.c b/lib/resolve.c index f93e30cf3..7e97c1b8f 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -707,7 +707,6 @@ static int query_finalize(struct kr_request *request, struct kr_query *qry, knot knot_edns_set_do(pkt->opt_rr); knot_wire_set_cd(pkt->wire); } - ret = edns_put(pkt); } } return ret; @@ -1535,14 +1534,6 @@ int kr_resolve_checkout(struct kr_request *request, struct sockaddr *src, } struct kr_query *qry = array_tail(rplan->pending); - /* Run the checkout layers and cancel on failure. */ - int state = request->state; - ITERATE_LAYERS(request, qry, checkout, packet, dst, type); - if (request->state == KR_STATE_FAIL) { - request->state = state; /* Restore */ - return kr_error(ECANCELED); - } - #if defined(ENABLE_COOKIES) /* Update DNS cookies in request. */ if (type == SOCK_DGRAM) { /* @todo: Add cookies also over TCP? */ @@ -1562,6 +1553,24 @@ int kr_resolve_checkout(struct kr_request *request, struct sockaddr *src, return kr_error(EINVAL); } + /* Run the checkout layers and cancel on failure. + * The checkout layer doesn't persist the state, so canceled subrequests + * don't affect the resolution or rest of the processing. */ + int state = request->state; + ITERATE_LAYERS(request, qry, checkout, packet, dst, type); + if (request->state == KR_STATE_FAIL) { + request->state = state; /* Restore */ + return kr_error(ECANCELED); + } + + /* Write down OPT unless in safemode */ + if (!(qry->flags.SAFEMODE)) { + ret = edns_put(packet); + if (ret != 0) { + return kr_error(EINVAL); + } + } + WITH_VERBOSE(qry) { char qname_str[KNOT_DNAME_MAXLEN], zonecut_str[KNOT_DNAME_MAXLEN], ns_str[INET6_ADDRSTRLEN], type_str[16]; knot_dname_to_str(qname_str, knot_pkt_qname(packet), sizeof(qname_str)); diff --git a/lib/resolve.h b/lib/resolve.h index 011679ec6..5a1d0808e 100644 --- a/lib/resolve.h +++ b/lib/resolve.h @@ -215,6 +215,8 @@ struct kr_request { int has_tls; trace_log_f trace_log; /**< Logging tracepoint */ trace_callback_f trace_finish; /**< Request finish tracepoint */ + int cache_scope_len_bits; /**< Cache scope length (bits) */ + const uint8_t *cache_scope; /**< Cache scope for the request */ knot_mm_t pool; }; diff --git a/tests/test_cache.c b/tests/test_cache.c index 47dafc500..4d5dc693b 100644 --- a/tests/test_cache.c +++ b/tests/test_cache.c @@ -207,8 +207,8 @@ static void test_invalid(void **state) assert_int_not_equal(kr_cache_peek(cache, KR_CACHE_USER, NULL, KNOT_RRTYPE_TSIG, &entry, ×tamp), 0); assert_int_not_equal(kr_cache_peek_rr(NULL, NULL, NULL, NULL, NULL), 0); assert_int_not_equal(kr_cache_peek_rr(cache, NULL, NULL, NULL, NULL), 0); - assert_int_not_equal(kr_cache_insert_rr(cache, NULL, 0, 0, 0), 0); - assert_int_not_equal(kr_cache_insert_rr(NULL, NULL, 0, 0, 0), 0); + assert_int_not_equal(kr_cache_insert_rr(cache, NULL, 0, 0, 0, 0, 0), 0); + assert_int_not_equal(kr_cache_insert_rr(NULL, NULL, 0, 0, 0, 0, 0), 0); assert_int_not_equal(kr_cache_insert(NULL, KR_CACHE_USER, dname, KNOT_RRTYPE_TSIG, &global_fake_ce, global_namedb_data), 0); assert_int_not_equal(kr_cache_insert(cache, KR_CACHE_USER, NULL, @@ -227,7 +227,7 @@ static void test_insert_rr(void **state) { test_random_rr(&global_rr, CACHE_TTL); struct kr_cache *cache = (*state); - int ret = kr_cache_insert_rr(cache, &global_rr, 0, 0, CACHE_TIME); + int ret = kr_cache_insert_rr(cache, &global_rr, 0, 0, CACHE_TIME, 0, 0); assert_int_equal(ret, 0); kr_cache_sync(cache); } @@ -326,7 +326,7 @@ static void test_fill(void **state) for (unsigned i = 0; i < CACHE_SIZE; ++i) { knot_rrset_t rr; test_random_rr(&rr, CACHE_TTL); - ret = kr_cache_insert_rr(cache, &rr, 0, 0, CACHE_TTL - 1); + ret = kr_cache_insert_rr(cache, &rr, 0, 0, CACHE_TTL - 1, 0, 0); if (ret != 0) { break; } -- 2.47.2