From fa45e59bcf8423496b085a5817adaff8e014743c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?= Date: Thu, 9 Jan 2025 01:04:09 +0100 Subject: [PATCH] daemon/defer: add hard-timeout for interrupting expensive computations --- daemon/defer.c | 36 ++++++++++++++++++- daemon/defer.h | 13 ++++--- daemon/lua/kres-gen-33.lua | 2 +- daemon/main.c | 4 +-- doc/_static/config.schema.json | 13 +++++-- doc/user/config-defer.rst | 26 +++++++++++++- .../knot_resolver/datamodel/defer_schema.py | 2 ++ .../datamodel/templates/defer.lua.j2 | 3 +- 8 files changed, 86 insertions(+), 13 deletions(-) diff --git a/daemon/defer.c b/daemon/defer.c index aeaded206..42bd5c11d 100644 --- a/daemon/defer.c +++ b/daemon/defer.c @@ -4,6 +4,7 @@ #include #include +#include #include "daemon/defer.h" #include "daemon/session2.h" #include "daemon/udp_queue.h" @@ -63,6 +64,7 @@ struct defer { size_t capacity; kru_price_t max_decay; uint32_t log_period; + uint32_t hard_timeout; int cpus; bool using_avx2; _Atomic uint32_t log_time; @@ -642,9 +644,36 @@ static void defer_queues_idle(uv_idle_t *handle) VERBOSE_LOG("POLL\n"); } +static void defer_alarm(int signum) +{ + if (!defer || (defer->hard_timeout == 0)) return; + + uint64_t elapsed = 0; + if (defer_sample_state.is_accounting) { + elapsed = defer_get_stamp() - defer_sample_state.stamp; + VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n", + signum ? "received" : "initialized", + kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX + } else { + VERBOSE_LOG("SIGALRM %s, no measuring in progress\n", + signum ? "received" : "initialized"); + } + int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns + if (rest_to_timeout_ms <= 0) { + uv_update_time(uv_default_loop()); // TODO more conceptual solution? + defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream); + kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n", + kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); + classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX + __sync_synchronize(); + abort(); + } + alarm((rest_to_timeout_ms + 999) / 1000); +} /// Initialize shared memory, queues. To be called from Lua. -int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO possibly remove cpus; not needed +int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus) + // TODO possibly remove cpus; not needed { defer_initialized = true; if (mmap_file == NULL) { @@ -662,6 +691,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po .capacity = KRU_CAPACITY, .max_decay = MAX_DECAY, .log_period = log_period, + .hard_timeout = hard_timeout, .cpus = cpus, .using_avx2 = using_avx2(), }; @@ -676,6 +706,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po sizeof(header.capacity) + sizeof(header.max_decay) + sizeof(header.log_period) + + sizeof(header.hard_timeout) + sizeof(header.cpus), "detected padding with undefined data inside mmapped header"); @@ -713,6 +744,9 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po for (size_t i = 0; i < QUEUES_CNT; i++) queue_init(queues[i]); + signal(SIGALRM, defer_alarm); + defer_alarm(0); + return 0; fail: diff --git a/daemon/defer.h b/daemon/defer.h index e6ade87aa..71769c6f4 100644 --- a/daemon/defer.h +++ b/daemon/defer.h @@ -9,9 +9,9 @@ /// Initialize defer, incl. shared memory with KRU, excl. idle. KR_EXPORT -int defer_init(const char *mmap_file, uint32_t log_period, int cpus); +int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus); -/// Initialize idle. +/// Initialize idle and SIGALRM handler. int defer_init_idle(uv_loop_t *loop); /// Deinitialize shared memory. @@ -92,9 +92,10 @@ static inline void defer_sample_start_stamp(uint64_t stamp) { if (!defer) return; kr_assert(!defer_sample_state.is_accounting); - defer_sample_state.is_accounting = true; defer_sample_state.stamp = stamp; defer_sample_state.addr.ip.sa_family = AF_UNSPEC; + __sync_synchronize(); + defer_sample_state.is_accounting = true; } /// Internal; stop accounting work at specified timestamp and charge the source if applicable. @@ -103,6 +104,7 @@ static inline void defer_sample_stop_stamp(uint64_t stamp) if (!defer) return; kr_assert(defer_sample_state.is_accounting); defer_sample_state.is_accounting = false; + __sync_synchronize(); if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return; @@ -159,7 +161,10 @@ static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reus // resume if (prev_state) { - defer_sample_state = *prev_state; + defer_sample_state.addr = prev_state->addr; + defer_sample_state.stream = prev_state->stream; defer_sample_state.stamp = stamp; + __sync_synchronize(); + defer_sample_state.is_accounting = prev_state->is_accounting; } } diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua index 8147af88f..f77900279 100644 --- a/daemon/lua/kres-gen-33.lua +++ b/daemon/lua/kres-gen-33.lua @@ -620,7 +620,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags); int zi_zone_import(const zi_config_t); _Bool ratelimiting_request_begin(struct kr_request *); int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool); -int defer_init(const char *, uint32_t, int); +int defer_init(const char *, uint32_t, uint32_t, int); void defer_set_price_factor16(struct kr_request *, uint32_t); struct engine { char _stub[]; diff --git a/daemon/main.c b/daemon/main.c index a7b9c92b6..5f0717d11 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -553,7 +553,7 @@ int main(int argc, char **argv) uv_loop_t *loop = uv_default_loop(); /* Catch some signals. */ - uv_signal_t sigint, sigterm, sigchld; + uv_signal_t sigint, sigterm, sigchld; // +SIGALRM handled by defer if (true) ret = uv_signal_init(loop, &sigint); if (!ret) ret = uv_signal_init(loop, &sigterm); if (!ret) ret = uv_signal_init(loop, &sigchld); @@ -618,7 +618,7 @@ int main(int argc, char **argv) if (!defer_initialized) { kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n"); - ret = defer_init("defer", 1, 1); + ret = defer_init("defer", 1, 0, 1); if (ret) { ret = EXIT_FAILURE; goto cleanup; diff --git a/doc/_static/config.schema.json b/doc/_static/config.schema.json index 0bedbbc4e..b9063fd4b 100644 --- a/doc/_static/config.schema.json +++ b/doc/_static/config.schema.json @@ -374,7 +374,7 @@ } }, "default": { - "files_watchdog": true, + "files_watchdog": false, "cert_file": null, "key_file": null, "sticket_secret": null, @@ -533,7 +533,7 @@ }, "address_renumbering": null, "tls": { - "files_watchdog": true, + "files_watchdog": false, "cert_file": null, "key_file": null, "sticket_secret": null, @@ -1751,11 +1751,18 @@ "pattern": "^(\\d+)(us|ms|s|m|h|d)$", "description": "Minimal time between two log messages, or '0s' to disable.", "default": "0s" + }, + "hard-timeout": { + "type": "string", + "pattern": "^(\\d+)(us|ms|s|m|h|d)$", + "description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.", + "default": "0s" } }, "default": { "enabled": false, - "log_period": "0s" + "log_period": "0s", + "hard_timeout": "0s" } }, "lua": { diff --git a/doc/user/config-defer.rst b/doc/user/config-defer.rst index 4d26ee403..2baafa958 100644 --- a/doc/user/config-defer.rst +++ b/doc/user/config-defer.rst @@ -8,6 +8,7 @@ Request prioritization (defer) Defer tries to mitigate DoS attacks by measuring cpu time consumption of different hosts and networks and deferring future requests from the same origin. If there is not enough time to process all the requests, the lowest priority ones are dropped. +It also allows setting a hard timeout on a continuous computation on a single request. The time measurements are taken into account only for TCP-based queries (including DoT and DoH), as the source address of plain UDP can be forged. @@ -46,6 +47,30 @@ The limits can be adjusted for different packet origins using :option:`price-fac and sources with more dropped queries have greater probability to be chosen. +.. option:: defer/hard-timeout: