};
struct kr_cache_top_context {
uint32_t bloom[32];
- uint32_t cnt;
};
struct kr_request {
struct kr_context *ctx;
double usage_percent;
};
typedef struct uv_timer_s uv_timer_t;
+struct mmapped {
+ void *mem;
+ size_t size;
+ int fd;
+ _Bool write_lock;
+ _Bool persistent;
+};
+struct kr_cache_top {
+ struct mmapped mmapped;
+ struct top_data *data;
+ struct kr_cache_top_context *ctx;
+};
struct kr_cache {
kr_cdb_pt db;
const struct kr_cdb_api *api;
struct kr_cdb_stats stats;
uint32_t ttl_min;
uint32_t ttl_max;
+ struct kr_cache_top top;
struct timeval checkpoint_walltime;
uint64_t checkpoint_monotime;
uv_timer_t *health_timer;
typedef kr_cdb_pt
struct kr_cdb_stats
typedef uv_timer_t
+ struct mmapped
+ struct kr_cache_top
struct kr_cache
# lib/layer.h
kr_layer_t
if (opts->maxsize && (maxsize > opts->maxsize)) {
kr_log_warning(CACHE,
"Warning: real cache size is %zu instead of the requested %zu bytes, removing all data.",
- maxsize, opts->maxsize, fpath);
+ maxsize, opts->maxsize);
cache_op(cache, clear, opts->maxsize);
maxsize = cache->api->get_maxsize(cache->db);
}
#include "contrib/ucw/lib.h"
#include "lib/cache/cdb_lmdb.h"
#include "lib/cache/cdb_api.h"
-#include "lib/cache/top.h"
#include "lib/utils.h"
-#define kr_cache_top_access_cdb(...) { if (env->is_cache) kr_cache_top_access_cdb(__VA_ARGS__); } // TODO remove
-
/// A hacky way allowing usual usage of kr_log_error(MDB, ...)
/// while differentiating between cache and rules in the produced logs.
#define LOG_GRP_MDB (env->is_cache ? LOG_GRP_CACHE : LOG_GRP_RULES)
} txn;
bool is_cache; /**< cache vs. rules; from struct kr_cdb_opts::is_cache */
- struct kr_cache_top *top; // TODO remove
/* Cached part of struct stat for data.mdb. */
dev_t st_dev;
MDB_val _key = val_knot2mdb(key[i]);
MDB_val _val = val_knot2mdb(val[i]);
stats->read++;
- kr_cache_top_access_cdb(env->top, _key.mv_data, _key.mv_size, "readv");
ret = mdb_get(txn, env->dbi, &_key, &_val);
if (ret != MDB_SUCCESS) {
if (ret == MDB_NOTFOUND) {
MDB_val _key = val_knot2mdb(*key);
MDB_val _val = val_knot2mdb(*val);
stats->write++;
- kr_cache_top_access_cdb(env->top, _key.mv_data, _key.mv_size, "write");
/* This is LMDB specific optimisation,
* if caller specifies value with NULL data and non-zero length,
MDB_val cur_key = val_knot2mdb(*key);
MDB_val cur_val = { 0, NULL };
stats->match++;
- kr_cache_top_access_cdb(env->top, cur_key.mv_data, cur_key.mv_size, "match-prefix");
ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_SET_RANGE);
if (ret != MDB_SUCCESS) {
mdb_cursor_close(cur);
}
/* Add to result set */
if (results < maxcount) {
- kr_cache_top_access_cdb(env->top, cur_key.mv_data, cur_key.mv_size, "matched");
keyval[results][0] = val_mdb2knot(cur_key);
keyval[results][1] = val_mdb2knot(cur_val);
++results;
MDB_val key2_m = val_knot2mdb(*key);
MDB_val val2_m = { 0, NULL };
stats->read_leq++;
- kr_cache_top_access_cdb(env->top, key2_m.mv_data, key2_m.mv_size, "leq-query");
ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_SET_RANGE);
if (ret) goto failure;
/* test for equality //:unlikely */
ret = 1;
success:
/* finalize the output */
- kr_cache_top_access_cdb(env->top, key2_m.mv_data, key2_m.mv_size, "leq");
*key = val_mdb2knot(key2_m);
*val = val_mdb2knot(val2_m);
return ret;
MDB_val key2_m = val_knot2mdb(*key);
MDB_val val2_m = { 0, NULL };
stats->read_less++;
- kr_cache_top_access_cdb(env->top, key2_m.mv_data, key2_m.mv_size, "less-query");
// It could keep on the same `key` when MDB_PREV was used.
ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_PREV_NODUP);
if (!ret) {
/* finalize the output */
- kr_cache_top_access_cdb(env->top, key2_m.mv_data, key2_m.mv_size, "less");
*key = val_mdb2knot(key2_m);
*val = val_mdb2knot(val2_m);
return 1;
#include "lib/mmapped.h"
#include "lib/kru.h"
-// #ifdef LOG_GRP_MDB
-#define VERBOSE_LOG(...) printf("GC KRU " __VA_ARGS__)
-
#define FILE_FORMAT_VERSION 1 // fail if different
-
#define KRU_CAPACITY(cache_size) (cache_size / 128) // KRU size is approx. (8 * capacity) B
// average entry size seems to be 100-200 B,
// make KRU capacity between cache_size/128 and cache_size/64 (power of two)
- // -> KRU size: between cache_size/16 and cache_size/8 (cache data size is the rest)
+ // -> KRU size: between cache_size/16 and cache_size/8 (LMDB size is the rest)
#define TICK_SEC 1
#define NORMAL_SIZE (150 + KR_CACHE_SIZE_OVERHEAD) // B; normal size of cache entry
// used as baseline for the following
// -> rate limit: 1/2 per sec (more frequent accesses are incomparable)
// -> half-life: ~5h 3min
-
static inline uint32_t ticks_now(void)
{
struct timespec now_ts = {0};
return now_ts.tv_sec / TICK_SEC;
}
-static inline bool first_access_ro(struct kr_cache_top_context *ctx, kru_hash_t hash) {
- // struct kr_cache_top_context { uint64_t bloom[4]; }
+static inline bool first_access_ro(struct kr_cache_top_context *ctx, kru_hash_t hash)
+{
+ // struct kr_cache_top_context { uint32_t bloom[32]; }
static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom[0]) * 8 == 32);
static_assert(sizeof(((struct kr_cache_top_context *)0)->bloom) * 8 == 32 * 32);
// expected around 40 unique cache accesses per request context, possibly up to ~200;
return !accessed;
}
-static inline bool first_access(struct kr_cache_top_context *ctx, kru_hash_t hash, bool *overfull) {
+static inline bool first_access(struct kr_cache_top_context *ctx, kru_hash_t hash)
+{
if (!first_access_ro(ctx, hash)) return false;
uint8_t *h = (uint8_t *)&hash;
static_assert(sizeof(kru_hash_t) >= 8);
- { // temporal statistics, TODO remove
- int ones = 0;
- for (int i = 0; i < 32; i++) {
- ones += __builtin_popcount(ctx->bloom[i]);
- }
- double collision_prob = ones / 1024.0; // 1-bit collision
- collision_prob *= collision_prob; // 2-bit collision
- collision_prob *= collision_prob; // 4-bit collision
-
- if (collision_prob > 0.1) {
- VERBOSE_LOG("BLOOM %d unique accesses, collision prob. %5.3f %% (%d/1024 ones)\n", ctx->cnt, 100.0 * collision_prob, ones);
- *overfull = true;
- }
- ctx->cnt++;
- }
-
ctx->bloom[h[0] % 32] |= 1u << (h[1] % 32);
ctx->bloom[h[2] % 32] |= 1u << (h[3] % 32);
ctx->bloom[h[4] % 32] |= 1u << (h[5] % 32);
return true;
}
-
static inline void get_size_capacity(size_t cache_size, size_t *top_size, size_t *capacity_log)
{
*top_size = 0;
*top_size = offsetof(struct top_data, kru) + KRU.get_size(*capacity_log);
}
-int kr_cache_top_get_size(size_t cache_size)
+size_t kr_cache_top_get_size(size_t cache_size)
{
size_t top_size, capacity_log;
get_size_capacity(cache_size, &top_size, &capacity_log);
get_size_capacity(cache_size, &size, &capacity_log);
} // else use existing file settings
- VERBOSE_LOG("INIT, cache size %d, KRU capacity_log %d\n", cache_size, capacity_log);
-
-
struct top_data header = {
.version = (FILE_FORMAT_VERSION << 1) | kru_using_avx2(),
.base_price_norm = BASE_PRICE * NORMAL_SIZE,
header_size = offsetof(struct top_data, base_price_norm);
}
- VERBOSE_LOG("INIT mmapped_init\n");
int state = mmapped_init(&top->mmapped, mmap_file, size, &header, header_size, true);
top->data = top->mmapped.mem;
bool using_existing = false;
// try using existing data
if ((state >= 0) && (state & MMAPPED_EXISTING)) {
- if (!KRU.check_size((struct kru *)top->data->kru, top->mmapped.size - offsetof(struct top_data, kru))) {
- VERBOSE_LOG("INIT reset, wrong size\n");
+ if (!KRU.check_size((struct kru *)top->data->kru, (ptrdiff_t)top->mmapped.size - offsetof(struct top_data, kru))) {
state = mmapped_init_reset(&top->mmapped, mmap_file, size, &header, header_size);
top->data = top->mmapped.mem;
} else {
using_existing = true;
- VERBOSE_LOG("INIT finish existing\n");
state = mmapped_init_finish(&top->mmapped);
}
}
state = kr_error(EINVAL);
goto fail;
}
- kr_assert(KRU.check_size((struct kru *)top->data->kru, top->mmapped.size - offsetof(struct top_data, kru)));
+ kr_assert(KRU.check_size((struct kru *)top->data->kru, (ptrdiff_t)top->mmapped.size - offsetof(struct top_data, kru)));
- VERBOSE_LOG("INIT finish new\n");
state = mmapped_init_finish(&top->mmapped);
}
return 0;
fail:
- VERBOSE_LOG("INIT error, deinit\n");
kr_cache_top_deinit(top);
kr_log_crit(SYSTEM, "Initialization of cache top failed.\n");
return state;
}
-void kr_cache_top_deinit(struct kr_cache_top *top) {
+void kr_cache_top_deinit(struct kr_cache_top *top)
+{
top->data = NULL;
mmapped_deinit(&top->mmapped);
}
/* text mode: '\0' -> '|'
* hex bytes: <x00010203x>
* decimal bytes: <0.1.2.3>
+ * CACHE_KEY_DEF
*/
-char *kr_cache_top_strkey(void *key, size_t len) {
+char *kr_cache_top_strkey(void *key, size_t len)
+{
static char str[4 * KR_CACHE_KEY_MAXLEN + 1];
if (4 * len + 1 > sizeof(str)) len = (sizeof(str) - 1) / 4;
unsigned char *k = key;
void kr_cache_top_access(struct kr_cache_top *top, void *key, size_t key_len, size_t data_size, char *debug_label)
{
- bool bloom_overfull = false; // XXX tmp
kru_hash_t hash = KRU.hash_bytes((struct kru *)&top->data->kru, (uint8_t *)key, key_len);
- const bool unique = top->ctx ? first_access(top->ctx, hash, &bloom_overfull) : true;
- const size_t size = kr_cache_top_entry_size(key_len, data_size);
- if (unique) {
- const kru_price_t price = kr_cache_top_entry_price(top, size);
- KRU.load_hash((struct kru *)&top->data->kru, ticks_now(), hash, price);
- }
- if (bloom_overfull) {
- VERBOSE_LOG("ACCESS %-19s%4d B %-5s %s\n", debug_label, size,
- !top->ctx ? "NO_CTX" : unique ? "" : "SKIP",
- kr_cache_top_strkey(key, key_len));
- }
-}
-
-// temporal logging one level under _access
-void kr_cache_top_access_cdb(struct kr_cache_top *top, void *key, size_t len, char *debug_label)
-{
+ const bool unique = top->ctx ? first_access(top->ctx, hash) : true;
+ if (!unique) return;
- // VERBOSE_LOG("ACCESS %-17s %s\n", debug_label, kr_cache_top_strkey(key, len));
+ const size_t size = kr_cache_top_entry_size(key_len, data_size);
+ const kru_price_t price = kr_cache_top_entry_price(top, size);
+ KRU.load_hash((struct kru *)&top->data->kru, ticks_now(), hash, price);
}
struct kr_cache_top_context *kr_cache_top_context_switch(struct kr_cache_top *top,
return old_ctx;
}
-uint16_t kr_cache_top_load(struct kr_cache_top *top, void *key, size_t len) {
+uint16_t kr_cache_top_load(struct kr_cache_top *top, void *key, size_t len)
+{
kru_hash_t hash = KRU.hash_bytes((struct kru *)&top->data->kru, (uint8_t *)key, len);
- uint16_t load = KRU.load_hash((struct kru *)&top->data->kru, ticks_now(), hash, 0);
-
- // VERBOSE_LOG("LOAD %s -> %d\n", kr_cache_top_strkey(key, len), load);
- return load;
+ return KRU.load_hash((struct kru *)&top->data->kru, ticks_now(), hash, 0);
}
* SPDX-License-Identifier: GPL-3.0-or-later
*/
+/// Top uses KRU to maintain statistics about recently used cache entries
+/// for deciding what to evict during garbage collection.
+///
+/// The statistics are stored persistently beside LMDB data file
+/// and their half-life is currently 5 hours, taking ~3-day traffic into account.
+/// Each accessed cache entry is counted only once within a single request context
+/// and the price of the access is inversely proportional to the size of the cache entry;
+/// thus accesses per byte are the measure.
+///
+/// The keys currently stored in KRU and in cache need not necessarily correspond.
+/// It is possible that a key is reinserted into cache after it was previously evicted
+/// likely due to the expired TTL, but still has high load assinged in KRU.
+/// Or that the KRU load of a key decreased to zero after hours or days of inactivity,
+/// but there was no need to remove the (possibly expired) entry from the cache.
+
#pragma once
#include "lib/mmapped.h"
#include "lib/kru.h"
+/// Data related to open cache.
struct kr_cache_top {
struct mmapped mmapped;
struct top_data *data;
struct kr_cache_top_context *ctx;
};
-struct kr_cache_top_context {
- uint32_t bloom[32]; // size of just one cache-line, but probably not aligned (neither kr_request is)
- uint32_t cnt; // TODO remove this (and propagate to kres-gen)
-};
-
+/// Part of the previous, shared between all processes.
struct top_data {
uint32_t version;
uint32_t base_price_norm;
_Alignas(64) uint8_t kru[];
};
+/// Part of kr_request to avoid counting repeated cache accesses multiple times during single request.
+struct kr_cache_top_context {
+ uint32_t bloom[32];
+};
+
#define KR_CACHE_SIZE_OVERHEAD 16 // B, just guess, probably more; size = key + data + DB overhead
+/// Approximate size of a cache entry.
static inline size_t kr_cache_top_entry_size(size_t key_len, size_t data_size) {
return key_len + data_size + KR_CACHE_SIZE_OVERHEAD;
}
+
+/// Price of a cache entry access in KRU based on the entry size.
static inline kru_price_t kr_cache_top_entry_price(struct kr_cache_top *top, size_t size) {
return top->data->base_price_norm / size;
}
+/// Size of the top data as part of the cache size, LMDB should occupy the rest;
+/// currently between 6 and 13 %.
KR_EXPORT
-int kr_cache_top_get_size(size_t cache_size);
+size_t kr_cache_top_get_size(size_t cache_size);
+/// Initialize memory shared between processes, possibly using existing data in mmap_file.
+/// If cache_size differs from the previously used value, the data are cleared,
+/// otherwise they are persistent across restarts.
KR_EXPORT
int kr_cache_top_init(struct kr_cache_top *top, char *mmap_file, size_t cache_size);
+/// Deinitialize shared memory, keeping the data stored in file.
KR_EXPORT
void kr_cache_top_deinit(struct kr_cache_top *top);
-KR_EXPORT
-void kr_cache_top_access_cdb(struct kr_cache_top *top, void *key, size_t len, char *debug_label); // temporal, TODO remove
-
+/// Charge cache access to the accessed key
+/// unless it was already accessed in the current request context.
KR_EXPORT
void kr_cache_top_access(struct kr_cache_top *top, void *key, size_t key_len, size_t data_size, char *debug_label);
+ // debug_label is currently not used, TODO remove?
+/// Get current KRU load value assigned to the given cache entry key.
KR_EXPORT
uint16_t kr_cache_top_load(struct kr_cache_top *top, void *key, size_t len);
-// ctx has to be kept valid until next call
+/// Switch request context; the ctx has to be kept valid until next call.
+/// The context of a new kr_request has to be initialized with zeroes.
+/// Use NULL as ctx to stop using current context;
+/// all cache accesses in such a state are considered unique,
+/// but no such access is expected to happen.
+/// Returns the previous context.
KR_EXPORT
struct kr_cache_top_context *kr_cache_top_context_switch(struct kr_cache_top *top, struct kr_cache_top_context *ctx, char *debug_label);
+ // debug_label is currently not used, TODO remove?
+/// Return readable string representation of a cache key in a statically allocated memory.
+/// By default printable characters are kept unchanged and NULL-bytes are printed as '|'.
+/// Where numeric values are expected (CACHE_KEY_DEF) or non-printable characters occur,
+/// either decimal bytes in form <0.1.2> or hexadecimal in form <x000102x> are printed.
+/// Decimal form is used for RRTYPEs and IPv4; hexadecimal for NSEC3 hashes, IPv6
+/// and unexpected unprintable characters or '|', '<', '>' for unambiguity.
KR_EXPORT
char *kr_cache_top_strkey(void *key, size_t len);
/// Verify that given KRU structure expects just memory of the given size;
/// it accesses just the first size bytes of kru.
/// If false is returned, the memory is corrupted and calling other methods may cause SIGSEGV.
- bool (*check_size)(struct kru *kru, size_t size);
+ bool (*check_size)(struct kru *kru, ptrdiff_t size);
/// Determine if a key should get limited (and update the KRU).
/// key needs to be aligned to a multiple of 16 bytes.
void (*load_multi_prefix)(struct kru *kru, uint32_t time_now,
uint8_t namespace, uint8_t key[static 16], uint8_t *prefixes, kru_price_t *prices, size_t queries_cnt, uint16_t *loads_out);
- // TODO
- /// Compute 64-bit hash to be used in load_hash.
+ /// Compute 64-bit hash of an arbitrary-size byte array to be used in load_hash function.
/// The key need not to be aligned as we use always unoptimized variant here.
kru_hash_t (*hash_bytes)(struct kru *kru, uint8_t *key, size_t key_size);
+
+ /// Single query based on the hash computed by the previous function.
+ /// Returns the final value of the counter normalized to the limit 2^16.
+ /// Set price to zero to skip updating; otherwise, KRU is always updated, using maximal allowed value on overflow.
uint16_t (*load_hash)(struct kru *kru, uint32_t time_now, kru_hash_t hash, kru_price_t price);
};
+ sizeof(struct load_cl) * TABLE_COUNT * (1 << loads_bits);
}
-static bool kru_check_size(struct kru *kru, size_t size) {
+static bool kru_check_size(struct kru *kru, ptrdiff_t size)
+{
if (size < sizeof(struct kru)) return false;
return size == kru_get_size(kru->loads_bits + LOADS_CAPACITY_SHIFT);
}
ctx->id = hash;
}
-static kru_hash_t kru_hash_bytes(struct kru *kru, uint8_t *key, size_t key_size) {
+static kru_hash_t kru_hash_bytes(struct kru *kru, uint8_t *key, size_t key_size)
+{
// Obtain hash of *buf.
kru_hash_t hash;
static_assert(sizeof(kru_hash_t) * 8 <= 64);
mmapped_init_finish
}
if (<0) fail + return
- assert(==0) // if both _finish above were used
+ assertion (==0) // if both _finish above were used
// done
non-persistent case:
mmapped_init_finish
}
if (<0) fail + return
- assert(==0) // no other outcome if both _finish above were used
+ assertion (==0) // no other outcome if both _finish above were used
// done
*/
#include "lib/cache/top.h"
#include "utils/cache_gc/db.h"
-static bool rrtype_is_infrastructure(uint16_t r) // currently unused
+static inline int load2cat(uint16_t load) // -> 0..64, reversed
{
- switch (r) {
- case KNOT_RRTYPE_NS:
- case KNOT_RRTYPE_DS:
- case KNOT_RRTYPE_DNSKEY:
- case KNOT_RRTYPE_A:
- case KNOT_RRTYPE_AAAA:
- return true;
- default:
- return false;
- }
-}
-
-static unsigned int get_random(int to) // currently unused
-{
- // We don't need these to be really unpredictable,
- // but this should be cheap enough not to be noticeable.
- return kr_rand_bytes(1) % to;
-}
-
-static inline int load2cat(uint16_t load) { // 0..64, reversed
const uint32_t load32 = ((uint32_t)load << 16) | 0xFFFF;
const int leading_zeroes = __builtin_clz(load32); // 0..16
const int logss2 = // 0, 4, 6, 8..64; approx of log with base 2^{1/4}
{
category_t res; // 0..(CATEGORIES - 1), highest will be dropped first
- if (!info->valid)
+ if (!info->valid) {
+ // invalid entries will be evicted first
return CATEGORIES - 1;
+ }
uint16_t load = kr_cache_top_load(top, key, key_len);
res = load2cat(load); // 0..64
- if (info->rrtype == KNOT_CACHE_RTT) {
- // TODO some correction here?
- } else {
- if (info->expires_in <= 0) {
- // evict all expired before any non-expired
- res = res / 2 + 65; // 65..94
- }
+ if ((info->rrtype != KNOT_CACHE_RTT) && (info->expires_in <= 0)) {
+ // evict all expired before any non-expired (incl. RTT)
+ res = res / 2 + 65; // 65..97
}
- static_assert(CATEGORIES - 1 > 94);
+ static_assert(CATEGORIES - 1 > 97);
const kru_price_t price = kr_cache_top_entry_price(top, info->entry_size);
const double accesses = (double)((kru_price_t)load << (KRU_PRICE_BITS - 16)) / price;