From: Vladimír Čunát Date: Thu, 12 Jul 2018 14:20:20 +0000 (+0200) Subject: cache: rdataset layout changes in knot 2.7 X-Git-Tag: v3.0.0~6^2~20 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=aac0d54cf585b42562ff2df5d7d581a1a0989da8;p=thirdparty%2Fknot-resolver.git cache: rdataset layout changes in knot 2.7 --- diff --git a/lib/cache/api.c b/lib/cache/api.c index 63fe1caf7..c848cd331 100644 --- a/lib/cache/api.c +++ b/lib/cache/api.c @@ -49,7 +49,7 @@ /** Cache version */ -static const uint16_t CACHE_VERSION = 4; +static const uint16_t CACHE_VERSION = 5; /** Key size */ #define KEY_HSIZE (sizeof(uint8_t) + sizeof(uint16_t)) #define KEY_SIZE (KEY_HSIZE + KNOT_DNAME_MAXLEN) @@ -501,6 +501,7 @@ static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry, /* Compute materialized sizes of the new data. */ const knot_rdataset_t *rds_sigs = rr_sigs ? &rr_sigs->rrs : NULL; const int rr_ssize = rdataset_dematerialize_size(&rr->rrs); + assert(rr_ssize == to_even(rr_ssize)); knot_db_val_t val_new_entry = { .data = NULL, .len = offsetof(struct entry_h, data) + rr_ssize @@ -532,6 +533,19 @@ static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry, } assert(entry_h_consistent(val_new_entry, rr->type)); + #if 0 /* Occasionally useful when debugging some kinds of changes. */ + { + kr_cache_sync(cache); + knot_db_val_t val = { NULL, 0 }; + ret = cache_op(cache, read, &key, &val, 1); + if (ret != kr_error(ENOENT)) { // ENOENT might happen in some edge case, I guess + assert(!ret); + entry_list_t el; + entry_list_parse(val, el); + } + } + #endif + /* Update metrics */ cache->stats.insert += 1; diff --git a/lib/cache/entry_list.c b/lib/cache/entry_list.c index e6e1a2279..6a5001c1b 100644 --- a/lib/cache/entry_list.c +++ b/lib/cache/entry_list.c @@ -44,7 +44,7 @@ void entry_list_memcpy(struct entry_apex *ea, entry_list_t list) } else { list[i].data = it; } - it += list[i].len; + it += to_even(list[i].len); } } @@ -85,7 +85,7 @@ int entry_list_parse(const knot_db_val_t val, entry_list_t list) default: return kr_error(EILSEQ); }; - it += list[i].len; + it += to_even(list[i].len); } /* Parse every entry_h. */ for (int i = ENTRY_APEX_NSECS_CNT; i < EL_LENGTH; ++i) { @@ -112,7 +112,7 @@ int entry_list_parse(const knot_db_val_t val, entry_list_t list) return kr_error(len); } list[i].len = len; - it += len; + it += to_even(len); } assert(it == it_bound); return kr_ok(); @@ -132,24 +132,22 @@ static int entry_h_len(const knot_db_val_t val) if (!eh->is_packet) { /* Positive RRset + its RRsig set (may be empty). */ int sets = 2; while (sets-- > 0) { - if (d + 2 > data_bound) return kr_error(EILSEQ); - uint16_t rr_count; - memcpy(&rr_count, d, sizeof(rr_count)); - d += sizeof(rr_count); - for (int i = 0; i < rr_count; ++i) { - if (d + 2 > data_bound) return kr_error(EILSEQ); - uint16_t len; - memcpy(&len, d, sizeof(len)); - d += 2 + len; + d += rdataset_dematerialized_size(d); + if (d > data_bound) { + assert(!EILSEQ); + return kr_error(EILSEQ); } } } else { /* A "packet" (opaque ATM). */ - if (d + 2 > data_bound) return kr_error(EILSEQ); uint16_t len; + if (d + sizeof(len) > data_bound) return kr_error(EILSEQ); memcpy(&len, d, sizeof(len)); - d += 2 + len; + d += 2 + to_even(len); + } + if (d > data_bound) { + assert(!EILSEQ); + return kr_error(EILSEQ); } - if (d > data_bound) return kr_error(EILSEQ); return d - (uint8_t *)val.data; } diff --git a/lib/cache/entry_rr.c b/lib/cache/entry_rr.c index af2e95736..f62bef468 100644 --- a/lib/cache/entry_rr.c +++ b/lib/cache/entry_rr.c @@ -23,23 +23,24 @@ int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data) { - //const void *data0 = data; - assert(data); + /* FIXME: either give up on even alignment and thus direct usability + * of rdatasets as they are in lmdb, or align inside cdb_* functions + * (request sizes one byte longer and shift iff on an odd address). */ + //if ((size_t)data & 1) VERBOSE_MSG(NULL, "dematerialize: odd address\n"); + //const uint8_t *data0 = data; if (!data) { + assert(data); return kr_error(EINVAL); } const uint16_t rr_count = rds ? rds->rr_count : 0; memcpy(data, &rr_count, sizeof(rr_count)); data += sizeof(rr_count); - - knot_rdata_t *rd = rds ? rds->data : NULL; - for (int i = 0; i < rr_count; ++i, rd = kr_rdataset_next(rd)) { - const uint16_t len = rd->len; - memcpy(data, &len, sizeof(len)); - data += sizeof(len); - memcpy(data, rd->data, len); - data += len; + if (rr_count) { + size_t size = knot_rdataset_size(rds); + memcpy(data, rds->data, size); + data += size; } + //VERBOSE_MSG(NULL, "dematerialized to %d B\n", (int)(data - data0)); //return data - data0; return kr_ok(); } @@ -50,8 +51,8 @@ int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data) static int rdataset_materialize(knot_rdataset_t * restrict rds, const uint8_t * const data, const uint8_t *data_bound, knot_mm_t *pool) { - /* FIXME: rdataset_t and cache's rdataset have the same binary format now */ - assert(rds && data && data_bound && data_bound > data && !rds->data); + assert(rds && data && data_bound && data_bound > data && !rds->data + /*&& !((size_t)data & 1)*/); assert(pool); /* not required, but that's our current usage; guard leaks */ const uint8_t *d = data; /* iterates over the cache data */ { @@ -64,34 +65,22 @@ static int rdataset_materialize(knot_rdataset_t * restrict rds, const uint8_t * } } /* First sum up the sizes for wire format length. */ - size_t rdata_len_sum = 0; - for (int i = 0; i < rds->rr_count; ++i) { - if (d + 2 > data_bound) { - VERBOSE_MSG(NULL, "materialize: EILSEQ!\n"); - return kr_error(EILSEQ); - } - uint16_t len; - memcpy(&len, d, sizeof(len)); - d += sizeof(len) + len; - rdata_len_sum += len; + const knot_rdataset_t rds_tmp = { + .rr_count = rds->rr_count, + .data = (knot_rdata_t *)d, + }; + size_t rds_size = knot_rdataset_size(&rds_tmp); /* TODO: we might overrun here already, + but we need to trust cache anyway...*/ + if (d + rds_size > data_bound) { + VERBOSE_MSG(NULL, "materialize: EILSEQ!\n"); + return kr_error(EILSEQ); } - /* Each item in knot_rdataset_t needs rdlength (2B) + rdata */ - rds->data = mm_alloc(pool, rdata_len_sum + (size_t)rds->rr_count * 2); + rds->data = mm_alloc(pool, rds_size); if (!rds->data) { return kr_error(ENOMEM); } - /* Construct the output, one "RR" at a time. */ - d = data + KR_CACHE_RR_COUNT_SIZE; - knot_rdata_t *d_out = rds->data; /* iterates over the output being materialized */ - for (int i = 0; i < rds->rr_count; ++i) { - uint16_t len; - memcpy(&len, d, sizeof(len)); - d += sizeof(len); - knot_rdata_init(d_out, len, d); - d += len; - //d_out = kr_rdataset_next(d_out); - d_out += 2 + len; /* rdlen + rdata */ - } + memcpy(rds->data, d, rds_size); + d += rds_size; //VERBOSE_MSG(NULL, "materialized from %d B\n", (int)(d - data)); return d - data; } diff --git a/lib/cache/impl.h b/lib/cache/impl.h index 88513d030..ab4c9149c 100644 --- a/lib/cache/impl.h +++ b/lib/cache/impl.h @@ -54,6 +54,7 @@ struct entry_h { uint8_t rank : 6; /**< See enum kr_rank */ bool is_packet : 1; /**< Negative-answer packet for insecure/bogus name. */ bool has_optout : 1; /**< Only for packets; persisted DNSSEC_OPTOUT. */ + uint8_t _pad; /**< We need even alignment for data now. */ uint8_t data[]; }; struct entry_apex; @@ -226,11 +227,16 @@ int entry_h_splice( /** Parse an entry_apex into individual items. @return error code. */ int entry_list_parse(const knot_db_val_t val, entry_list_t list); +static inline size_t to_even(size_t n) +{ + return n + (n & 1); +} + static inline int entry_list_serial_size(const entry_list_t list) { int size = offsetof(struct entry_apex, data); for (int i = 0; i < EL_LENGTH; ++i) { - size += list[i].len; + size += to_even(list[i].len); } return size; } @@ -281,16 +287,22 @@ int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry, /** Size of the RR count field */ #define KR_CACHE_RR_COUNT_SIZE sizeof(uint16_t) -/** Compute size of dematerialized rdataset. NULL is accepted as empty set. */ +/** Compute size of serialized rdataset. NULL is accepted as empty set. */ static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds) { - return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 - : knot_rdataset_size(rds) - 4 * rds->rr_count /*TTLs*/); + return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 : knot_rdataset_size(rds)); } -/** Dematerialize a rdataset. */ -int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data); +static inline int rdataset_dematerialized_size(const uint8_t *data) +{ + knot_rdataset_t rds; + memcpy(&rds.rr_count, data, sizeof(rds.rr_count)); + rds.data = (knot_rdata_t *)(data + sizeof(rds.rr_count)); + return sizeof(rds.rr_count) + knot_rdataset_size(&rds); +} +/** Serialize an rdataset. */ +int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data); /** Partially constructed answer when gathering RRsets from cache. */ diff --git a/lib/cache/nsec3.c b/lib/cache/nsec3.c index 58f7f562d..27d065eac 100644 --- a/lib/cache/nsec3.c +++ b/lib/cache/nsec3.c @@ -200,7 +200,7 @@ static const char * find_leq_NSEC3(struct kr_cache *cache, const struct kr_query } /* The NSEC3 starts strictly before our target name; * now check that it still belongs into that zone and chain. */ - const knot_rdata_t *nsec_p_raw = eh->data + KR_CACHE_RR_COUNT_SIZE + const uint8_t *nsec_p_raw = eh->data + KR_CACHE_RR_COUNT_SIZE + 2 /* RDLENGTH from rfc1034 */; const int nsec_p_len = nsec_p_rdlen(nsec_p_raw); const bool same_chain = key_found.len == hash_off + NSEC3_HASH_LEN diff --git a/modules/priming/priming.lua b/modules/priming/priming.lua index 5825b564a..2d8046722 100644 --- a/modules/priming/priming.lua +++ b/modules/priming/priming.lua @@ -12,6 +12,8 @@ internal.to_resolve = 0 -- number of pending queries to A or AAAA internal.prime = {} -- function triggering priming query internal.event = nil -- stores event id +local knot_rdata_t_p = ffi.typeof('knot_rdata_t *'); + -- Copy hints from nsset table to resolver engine -- These addresses replace root hints loaded by default from file. -- They are stored outside cache and cache flush will not affect them. @@ -21,7 +23,7 @@ local function publish_hints(nsset) ffi.C.kr_zonecut_set(roothints, kres.str2dname(".")) for dname, addresses in pairs(nsset) do for _, rdata_addr in pairs(addresses) do - ffi.C.kr_zonecut_add(roothints, dname, rdata_addr) + ffi.C.kr_zonecut_add(roothints, dname, ffi.cast(knot_rdata_t_p, rdata_addr)) end end end @@ -50,7 +52,7 @@ local function address_callback(pkt, req) local rr = section[i] if rr.type == kres.type.A or rr.type == kres.type.AAAA then for k = 0, rr.rrs.rr_count-1 do - table.insert(internal.nsset[rr:owner()], rr.rrs:rdata(k)) + table.insert(internal.nsset[rr:owner()], rr:rdata(k)) end end end