From: Lukáš Ondráček Date: Wed, 5 Jun 2024 18:26:15 +0000 (+0200) Subject: ratelimiting: moving mmapping to daemon/mmapped X-Git-Tag: v6.0.9~1^2~44 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e7761ef12e12f79effed618356f8cc2fd9966071;p=thirdparty%2Fknot-resolver.git ratelimiting: moving mmapping to daemon/mmapped --- diff --git a/daemon/lua/kres-gen-30.lua b/daemon/lua/kres-gen-30.lua index c7df184c6..b5ff14784 100644 --- a/daemon/lua/kres-gen-30.lua +++ b/daemon/lua/kres-gen-30.lua @@ -581,7 +581,7 @@ knot_pkt_t *worker_resolve_mk_pkt(const char *, uint16_t, uint16_t, const struct struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags); int zi_zone_import(const zi_config_t); _Bool ratelimiting_request_begin(struct kr_request *); -void ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); +int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); struct engine { char _stub[]; }; diff --git a/daemon/lua/kres-gen-31.lua b/daemon/lua/kres-gen-31.lua index 28c9c5a62..51f93dc73 100644 --- a/daemon/lua/kres-gen-31.lua +++ b/daemon/lua/kres-gen-31.lua @@ -581,7 +581,7 @@ knot_pkt_t *worker_resolve_mk_pkt(const char *, uint16_t, uint16_t, const struct struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags); int zi_zone_import(const zi_config_t); _Bool ratelimiting_request_begin(struct kr_request *); -void ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); +int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); struct engine { char _stub[]; }; diff --git a/daemon/lua/kres-gen-32.lua b/daemon/lua/kres-gen-32.lua index cbf4b3042..35acdeb19 100644 --- a/daemon/lua/kres-gen-32.lua +++ b/daemon/lua/kres-gen-32.lua @@ -582,7 +582,7 @@ knot_pkt_t *worker_resolve_mk_pkt(const char *, uint16_t, uint16_t, const struct struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags); int zi_zone_import(const zi_config_t); _Bool ratelimiting_request_begin(struct kr_request *); -void ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); +int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int); struct engine { char _stub[]; }; diff --git a/daemon/meson.build b/daemon/meson.build index 87475ea3d..332890b46 100644 --- a/daemon/meson.build +++ b/daemon/meson.build @@ -13,6 +13,7 @@ kresd_src = files([ 'ffimodule.c', 'io.c', 'main.c', + 'mmapped.c', 'network.c', 'proxyv2.c', 'ratelimiting.c', @@ -32,10 +33,10 @@ endif c_src_lint += kresd_src unit_tests += [ - ['ratelimiting', files('ratelimiting.test/tests.c') + libkres_src ], + ['ratelimiting', files('ratelimiting.test/tests.c', 'mmapped.c') + libkres_src ], # parallel tests timeouts under valgrind; they checks mainly for race conditions, which is not needed there - ['ratelimiting-parallel', files('ratelimiting.test/tests-parallel.c') + libkres_src, ['skip_valgrind']] + ['ratelimiting-parallel', files('ratelimiting.test/tests-parallel.c', 'mmapped.c') + libkres_src, ['skip_valgrind']] ] config_tests += [ diff --git a/daemon/mmapped.c b/daemon/mmapped.c new file mode 100644 index 000000000..67309c588 --- /dev/null +++ b/daemon/mmapped.c @@ -0,0 +1,109 @@ +#include +#include +#include +#include + +#include "daemon/mmapped.h" +#include "lib/utils.h" + +static inline bool fcntl_flock_whole(int fd, short int type, bool wait) +{ + struct flock fl = { + .l_type = type, // F_WRLCK, F_RDLCK, F_UNLCK + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0 }; + return fcntl(fd, (wait ? F_SETLKW : F_SETLK), &fl) != -1; +} + +int mmapped_init(struct mmapped *mmapped, const char *mmap_file, size_t size, void *header, size_t header_size) +{ + int ret = 0; + int fd = mmapped->fd = open(mmap_file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (fd == -1) { + ret = kr_error(errno); + kr_log_crit(SYSTEM, "Cannot open file %s with shared data: %s\n", + mmap_file, strerror(errno)); + goto fail; + } + + // try to acquire write lock; copy header on success + if (fcntl_flock_whole(fd, F_WRLCK, false)) { + if (ftruncate(fd, 0) == -1 || ftruncate(fd, size) == -1) { // get all zeroed + ret = kr_error(errno); + kr_log_crit(SYSTEM, "Cannot change size of file %s containing shared data: %s\n", + mmap_file, strerror(errno)); + goto fail; + } + mmapped->mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mmapped->mem == MAP_FAILED) goto fail_errno; + + memcpy(mmapped->mem, header, header_size); + + return MMAPPED_WAS_FIRST; + } + + // wait for acquiring shared lock; check header on success + if (!fcntl_flock_whole(fd, F_RDLCK, true)) goto fail_errno; + + struct stat s; + bool succ = (fstat(fd, &s) == 0); + if (!succ) goto fail_errno; + if (s.st_size != size) goto fail_header_mismatch; + + mmapped->mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mmapped->mem == MAP_FAILED) goto fail_errno; + if (memcmp(mmapped->mem, header, header_size) != 0) { + munmap(mmapped->mem, size); + goto fail_header_mismatch; + } + + return 0; + + +fail_header_mismatch: + kr_log_crit(SYSTEM, "Another instance of kresd uses file %s with different configuration.", mmap_file); + errno = EUCLEAN; + +fail_errno: + ret = kr_error(errno); + +fail: + if (fd >= 0) { + fcntl_flock_whole(fd, F_UNLCK, false); + close(fd); + } + mmapped->mem = NULL; + return ret; +} + +int mmapped_init_continue(struct mmapped *mmapped) +{ + if (!fcntl_flock_whole(mmapped->fd, F_RDLCK, false)) return kr_error(errno); + return 0; +} + +void mmapped_deinit(struct mmapped *mmapped) +{ + if (mmapped->mem == NULL) return; + int fd = mmapped->fd; + + munmap(mmapped->mem, mmapped->size); + mmapped->mem = NULL; + + fcntl_flock_whole(fd, F_UNLCK, false); + + // remove file data unless it is still locked by other processes + if (fcntl_flock_whole(fd, F_WRLCK, false)) { + + /* If the configuration is updated at runtime, manager may remove the file + * and the new processes create it again while old processes are still using the old data. + * Here we keep zero-size file not to accidentally remove the new file instead of the old one. + * Still truncating the file will cause currently starting processes waiting for read lock on the same file to fail, + * but such processes are not expected to exist. */ + ftruncate(fd, 0); + + fcntl_flock_whole(fd, F_UNLCK, false); + } + close(fd); +} diff --git a/daemon/mmapped.h b/daemon/mmapped.h new file mode 100644 index 000000000..dd55903a5 --- /dev/null +++ b/daemon/mmapped.h @@ -0,0 +1,26 @@ +#include + +#define MMAPPED_WAS_FIRST 1 + +struct mmapped { + void *mem; + size_t size; + int fd; +}; + +/* Initialize/Use file data as mmapped memory. + * + * If write flock can be acquired, the file is resized, zeroed and mmapped, + * header is copied at its beginning and MMAPPED_WAS_FIRST is returned; + * you should finish initialization and call mmapped_init_continue to degrade flock to shared. + * Otherwise, it waits for shared flock, calls mmap, verifies that header is byte-wise identical and returns zero. + * On header mismatch, kr_error(EUCLEAN) is returned; on a system error, kr_error(errno) is returned. */ +int mmapped_init(struct mmapped *mmapped, const char *mmap_file, size_t size, void *header, size_t header_size); + +/* Degrade flock to shared after getting MMAPPED_WAS_FIRST from mmapped_init. + * + * Returns zero on success and kr_error(errno) on system error. */ +int mmapped_init_continue(struct mmapped *mmapped); + +/* Free mmapped memory and truncate underlying file to zero size unless it is used by other processes. */ +void mmapped_deinit(struct mmapped *mmapped); diff --git a/daemon/ratelimiting.c b/daemon/ratelimiting.c index fc89497fc..320f0e18f 100644 --- a/daemon/ratelimiting.c +++ b/daemon/ratelimiting.c @@ -1,12 +1,9 @@ -#include -#include - #include "daemon/ratelimiting.h" +#include "daemon/mmapped.h" #include "lib/kru.h" #include "lib/utils.h" #include "lib/resolve.h" - #define RRL_V4_PREFIXES (uint8_t[]) { 18, 20, 24, 32 } #define RRL_V4_RATE_MULT (kru_price_t[]) { 768, 256, 32, 1 } @@ -17,7 +14,7 @@ #define RRL_V6_PREFIXES_CNT (sizeof(RRL_V6_PREFIXES) / sizeof(*RRL_V6_PREFIXES)) #define RRL_MAX_PREFIXES_CNT ((RRL_V4_PREFIXES_CNT > RRL_V6_PREFIXES_CNT) ? RRL_V4_PREFIXES_CNT : RRL_V6_PREFIXES_CNT) -struct rrl { // TODO rename? +struct ratelimiting { size_t capacity; uint32_t instant_limit; uint32_t rate_limit; @@ -27,9 +24,8 @@ struct rrl { // TODO rename? kru_price_t v6_prices[RRL_V6_PREFIXES_CNT]; uint8_t kru[] ALIGNED(64); }; -struct rrl *the_rrl = NULL; -int the_rrl_fd = -1; -char *the_rrl_mmap_file = NULL; +struct ratelimiting *ratelimiting = NULL; +struct mmapped ratelimiting_mmapped = {0}; /// return whether we're using optimized variant right now static bool using_avx2(void) @@ -39,130 +35,77 @@ static bool using_avx2(void) return result; } -void ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_limit, uint32_t rate_limit, int tc_limit_perc) +int ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_limit, uint32_t rate_limit, int tc_limit_perc) { - int fd = the_rrl_fd = open(mmap_file, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - if (fd == -1) { - kr_log_crit(SYSTEM, "Cannot open file %s containing shared rate-limiting data: %s\n", - mmap_file, strerror(errno)); - abort(); - } - - the_rrl_mmap_file = malloc(strlen(mmap_file) + 1); - strcpy(the_rrl_mmap_file, mmap_file); size_t capacity_log = 0; for (size_t c = capacity - 1; c > 0; c >>= 1) capacity_log++; - size_t size = offsetof(struct rrl, kru) + KRU.get_size(capacity_log); + size_t size = offsetof(struct ratelimiting, kru) + KRU.get_size(capacity_log); + size_t header_size = offsetof(struct ratelimiting, v4_prices); - uint16_t tc_limit = (tc_limit_perc == 100 ? -1 : ((uint32_t)tc_limit_perc << 16) / 100); + struct ratelimiting header = { + .capacity = capacity, + .instant_limit = instant_limit, + .rate_limit = rate_limit, + .tc_limit = (tc_limit_perc == 100 ? -1 : ((uint32_t)tc_limit_perc << 16) / 100), + .using_avx2 = using_avx2() + }; - // try to acquire write lock; initialize KRU on success - struct flock fl = { - .l_type = F_WRLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0 }; - if (fcntl(fd, F_SETLK, &fl) != -1) { + int ret = mmapped_init(&ratelimiting_mmapped, mmap_file, size, &header, header_size); + if (ret == MMAPPED_WAS_FIRST) { kr_log_info(SYSTEM, "Initializing rate-limiting...\n"); - if (ftruncate(fd, 0) == -1 || ftruncate(fd, size) == -1) { // get all zeroed - kr_log_crit(SYSTEM, "Cannot change size of file %s containing shared rate-limiting data: %s\n", - mmap_file, strerror(errno)); - abort(); - } - the_rrl = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - kr_require(the_rrl != MAP_FAILED); - the_rrl->capacity = capacity; - the_rrl->instant_limit = instant_limit; - the_rrl->rate_limit = rate_limit; - the_rrl->tc_limit = tc_limit; - the_rrl->using_avx2 = using_avx2(); + ratelimiting = ratelimiting_mmapped.mem; const kru_price_t base_price = KRU_LIMIT / instant_limit; const kru_price_t max_decay = rate_limit > 1000ll * instant_limit ? base_price : (uint64_t) base_price * rate_limit / 1000; - bool succ = KRU.initialize((struct kru *)the_rrl->kru, capacity_log, max_decay); + bool succ = KRU.initialize((struct kru *)ratelimiting->kru, capacity_log, max_decay); kr_require(succ); for (size_t i = 0; i < RRL_V4_PREFIXES_CNT; i++) { - the_rrl->v4_prices[i] = base_price / RRL_V4_RATE_MULT[i]; + ratelimiting->v4_prices[i] = base_price / RRL_V4_RATE_MULT[i]; } for (size_t i = 0; i < RRL_V6_PREFIXES_CNT; i++) { - the_rrl->v6_prices[i] = base_price / RRL_V6_RATE_MULT[i]; + ratelimiting->v6_prices[i] = base_price / RRL_V6_RATE_MULT[i]; } - fl.l_type = F_RDLCK; - succ = (fcntl(fd, F_SETLK, &fl) != -1); - kr_require(succ); - kr_log_info(SYSTEM, "Rate-limiting initialized (%s).\n", (the_rrl->using_avx2 ? "AVX2" : "generic")); + ret = mmapped_init_continue(&ratelimiting_mmapped); + if (ret != 0) goto fail; - return; - } + kr_log_info(SYSTEM, "Rate-limiting initialized (%s).\n", (ratelimiting->using_avx2 ? "AVX2" : "generic")); + return 0; + } else if (ret == 0) { + ratelimiting = ratelimiting_mmapped.mem; + kr_log_info(SYSTEM, "Using existing rate-limiting data (%s).\n", (ratelimiting->using_avx2 ? "AVX2" : "generic")); + return 0; + } // else fail - // wait for acquiring shared lock; check KRU parameters on success - fl.l_type = F_RDLCK; - if (fcntl(fd, F_SETLKW, &fl) != -1) { - kr_log_info(SYSTEM, "Checking existing RRL data...\n"); - struct stat s; - bool succ = (fstat(fd, &s) == 0); - kr_require(succ); - if (s.st_size != size) goto check_fail; - the_rrl = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - kr_require(the_rrl != MAP_FAILED); - if ((the_rrl->capacity != capacity) || (the_rrl->instant_limit != instant_limit) || - (the_rrl->rate_limit != rate_limit) || (the_rrl->tc_limit != tc_limit)) goto check_fail; - if (using_avx2() != the_rrl->using_avx2) goto check_fail; - kr_log_info(SYSTEM, "Using existing RRL data.\n"); - - return; - } +fail: - kr_require(false); // we can get here for example if signal interrupt is received during fcntl + kr_log_crit(SYSTEM, "Initialization of shared rate-limiting data failed.\n"); -check_fail: - kr_log_crit(SYSTEM, "Another instance of kresd uses rate-limiting with different configuration, cannot share data in %s.", mmap_file); - abort(); + return ret; } void ratelimiting_deinit(void) { - if (the_rrl == NULL) return; - int fd = the_rrl_fd; - - struct flock fl = { - .l_type = F_UNLCK, - .l_whence = SEEK_SET, - .l_start = 0, - .l_len = 0 }; - fcntl(fd, F_SETLK, &fl); // unlock - - fl.l_type = F_WRLCK; - if (fcntl(fd, F_SETLK, &fl) != -1) { - - /* If the RRL configuration is updated at runtime, manager removes the file - * and the new processes create it again while old processes are still using the old data. - * Here we keep zero-size file not to accidentally remove the new file instead of the old one. - * Still truncating the file will cause currently starting processes waiting for read lock on the same file to fail, - * but such processes are not expected to exist. */ - ftruncate(fd, 0); - - fl.l_type = F_UNLCK; - fcntl(fd, F_SETLK, &fl); - } + if (ratelimiting == NULL) return; - the_rrl = NULL; + mmapped_deinit(&ratelimiting_mmapped); + ratelimiting = NULL; } + bool ratelimiting_request_begin(struct kr_request *req) { if (!req->qsource.addr) return false; // don't consider internal requests uint8_t limited = 0; // 0: not limited, 1: truncated, 2: no answer - if (the_rrl) { + if (ratelimiting) { uint8_t key[16] ALIGNED(16) = {0, }; uint8_t limited_prefix; uint16_t max_final_load = 0; @@ -170,16 +113,16 @@ bool ratelimiting_request_begin(struct kr_request *req) struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)req->qsource.addr; memcpy(key, &ipv6->sin6_addr, 16); - limited_prefix = KRU.limited_multi_prefix_or((struct kru *)the_rrl->kru, kr_now(), - 1, key, RRL_V6_PREFIXES, the_rrl->v6_prices, RRL_V6_PREFIXES_CNT, &max_final_load); + limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, kr_now(), + 1, key, RRL_V6_PREFIXES, ratelimiting->v6_prices, RRL_V6_PREFIXES_CNT, &max_final_load); } else { struct sockaddr_in *ipv4 = (struct sockaddr_in *)req->qsource.addr; memcpy(key, &ipv4->sin_addr, 4); // TODO append port? - limited_prefix = KRU.limited_multi_prefix_or((struct kru *)the_rrl->kru, kr_now(), - 0, key, RRL_V4_PREFIXES, the_rrl->v4_prices, RRL_V4_PREFIXES_CNT, &max_final_load); + limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, kr_now(), + 0, key, RRL_V4_PREFIXES, ratelimiting->v4_prices, RRL_V4_PREFIXES_CNT, &max_final_load); } - limited = (limited_prefix ? 2 : (max_final_load > the_rrl->tc_limit ? 1 : 0)); + limited = (limited_prefix ? 2 : (max_final_load > ratelimiting->tc_limit ? 1 : 0)); } if (!limited) return false; diff --git a/daemon/ratelimiting.h b/daemon/ratelimiting.h index 406431b5a..8322772d2 100644 --- a/daemon/ratelimiting.h +++ b/daemon/ratelimiting.h @@ -7,7 +7,7 @@ struct kr_request; * The existing data are used if another instance is already using the file * and it was initialized with the same parameters; it fails on mismatch. */ KR_EXPORT -void ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_limit, uint32_t rate_limit, int tc_limit_perc); +int ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_limit, uint32_t rate_limit, int tc_limit_perc); /** Do rate-limiting, during knot_layer_api::begin. */ KR_EXPORT diff --git a/daemon/ratelimiting.test/tests.inc.c b/daemon/ratelimiting.test/tests.inc.c index 14a5a9c08..9e56bc67f 100644 --- a/daemon/ratelimiting.test/tests.inc.c +++ b/daemon/ratelimiting.test/tests.inc.c @@ -98,10 +98,10 @@ static void test_rrl(void **state) { ratelimiting_init(mmap_file, RRL_TABLE_SIZE, RRL_INSTANT_LIMIT, RRL_RATE_LIMIT, 100); if (KRU.initialize == KRU_GENERIC.initialize) { - struct kru_generic *kru = (struct kru_generic *) the_rrl->kru; + struct kru_generic *kru = (struct kru_generic *) ratelimiting->kru; memset(&kru->hash_key, RRL_SEED_GENERIC, sizeof(kru->hash_key)); } else if (KRU.initialize == KRU_AVX2.initialize) { - struct kru_avx2 *kru = (struct kru_avx2 *) the_rrl->kru; + struct kru_avx2 *kru = (struct kru_avx2 *) ratelimiting->kru; memset(&kru->hash_key, RRL_SEED_AVX2, sizeof(kru->hash_key)); } else { assert(0); diff --git a/manager/knot_resolver_manager/datamodel/templates/rate_limiting.lua.j2 b/manager/knot_resolver_manager/datamodel/templates/rate_limiting.lua.j2 index dfd6b7705..096c7f3c2 100644 --- a/manager/knot_resolver_manager/datamodel/templates/rate_limiting.lua.j2 +++ b/manager/knot_resolver_manager/datamodel/templates/rate_limiting.lua.j2 @@ -1,10 +1,10 @@ {% from 'macros/common_macros.lua.j2' import boolean %} {% if cfg.rate_limiting.rate_limit -%} -C.ratelimiting_init( +assert(C.ratelimiting_init( '{{ cfg.rundir }}/ratelimiting', {{ cfg.rate_limiting.capacity }}, {{ cfg.rate_limiting.instant_limit }}, {{ cfg.rate_limiting.rate_limit }}, - {{ cfg.rate_limiting.tc_limit_perc }}) + {{ cfg.rate_limiting.tc_limit_perc }}) == 0) {%- endif %}