From: Vsevolod Stakhov Date: Sun, 3 Apr 2022 11:36:55 +0000 (+0100) Subject: [Rework] Reimplement saving/loading the cache items X-Git-Tag: 3.3~293^2~45 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=562b5dd0fd72065176ade3616e2bafe3618879a8;p=thirdparty%2Frspamd.git [Rework] Reimplement saving/loading the cache items --- diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx index aaf8b0cdd4..7836c80b93 100644 --- a/src/libserver/symcache/symcache_impl.cxx +++ b/src/libserver/symcache/symcache_impl.cxx @@ -142,7 +142,7 @@ auto symcache::load_items() -> bool return false; } - const auto *hdr = (struct symcache_header *)cached_map->get_map(); + const auto *hdr = (struct symcache_header *) cached_map->get_map(); if (memcmp(hdr->magic, symcache_magic, sizeof(symcache_magic)) != 0) { @@ -152,7 +152,7 @@ auto symcache::load_items() -> bool } auto *parser = ucl_parser_new(0); - const auto *p = (const std::uint8_t *)(hdr + 1); + const auto *p = (const std::uint8_t *) (hdr + 1); if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) { msg_info_cache ("cannot use file %s, cannot parse: %s", cfg->cache_filename, @@ -222,15 +222,13 @@ auto symcache::load_items() -> bool } if (item->is_virtual() && !(item->type & SYMBOL_TYPE_GHOST)) { - g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len); - parent = g_ptr_array_index (cache->items_by_id, - item->specific.virtual.parent); - item->specific.virtual.parent_item = parent; + const auto &parent = item->get_parent(*this); - if (parent->st->weight < item->st->weight) { - parent->st->weight = item->st->weight; + if (parent) { + if (parent->st->weight < item->st->weight) { + parent->st->weight = item->st->weight; + } } - /* * We maintain avg_time for virtual symbols equal to the * parent item avg_time @@ -238,8 +236,8 @@ auto symcache::load_items() -> bool item->st->avg_time = parent->st->avg_time; } - cache->total_weight += fabs(item->st->weight); - cache->total_hits += item->st->total_hits; + total_weight += fabs(item->st->weight); + total_hits += item->st->total_hits; } } @@ -249,41 +247,113 @@ auto symcache::load_items() -> bool return true; } -auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr & +template +static constexpr auto round_to_hundreds(T x) +{ + return (::floor(x) * 100.0) / 100.0; +} + +bool symcache::save_items() const +{ + auto file_sink = util::raii_file_sink::create(cfg->cache_filename, + O_WRONLY | O_TRUNC, 00644); + + if (!file_sink.has_value()) { + if (errno == EEXIST) { + /* Some other process is already writing data, give up silently */ + return false; + } + + msg_err_cache("%s", file_sink.error().c_str()); + + return false; + } + + struct symcache_header hdr; + memset(&hdr, 0, sizeof(hdr)); + memcpy(hdr.magic, symcache_magic, sizeof(symcache_magic)); + + if (write(file_sink->get_fd(), &hdr, sizeof(hdr)) == -1) { + msg_err_cache("cannot write to file %s, error %d, %s", cfg->cache_filename, + errno, strerror(errno)); + + return false; + } + + auto *top = ucl_object_typed_new(UCL_OBJECT); + + for (const auto &it : items_by_symbol) { + auto item = it.second; + auto elt = ucl_object_typed_new(UCL_OBJECT); + ucl_object_insert_key(elt, + ucl_object_fromdouble(round_to_hundreds(item->st->weight)), + "weight", 0, false); + ucl_object_insert_key(elt, + ucl_object_fromdouble(round_to_hundreds(item->st->time_counter.mean)), + "time", 0, false); + ucl_object_insert_key(elt, ucl_object_fromint(item->st->total_hits), + "count", 0, false); + + auto *freq = ucl_object_typed_new(UCL_OBJECT); + ucl_object_insert_key(freq, + ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.mean)), + "avg", 0, false); + ucl_object_insert_key(freq, + ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.stddev)), + "stddev", 0, false); + ucl_object_insert_key(elt, freq, "frequency", 0, false); + + ucl_object_insert_key(top, elt, it.first.data(), 0, true); + } + + auto fp = fdopen(file_sink->get_fd(), "a"); + auto *efunc = ucl_object_emit_file_funcs(fp); + auto ret = ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT, efunc, nullptr); + ucl_object_emit_funcs_free(efunc); + ucl_object_unref(top); + fclose(fp); + + return ret; +} + + +auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item * { if (id < 0 || id >= items_by_id.size()) { + g_error("internal error: requested item with id %d, when we have just %d items in the cache", + id, (int)items_by_id.size()); g_abort(); } auto &ret = items_by_id[id]; if (!ret) { - g_abort(); + return nullptr; } if (resolve_parent && ret->is_virtual()) { return ret->get_parent(*this); } - return ret; + return ret.get(); } -auto cache_item::get_parent(const symcache &cache) const -> const cache_item_ptr & +auto cache_item::get_parent(const symcache &cache) const -> const cache_item * { if (is_virtual()) { const auto &virtual_sp = std::get(specific); - return virtual_sp.get_parent() + return virtual_sp.get_parent(cache); } - return cache_item_ptr{nullptr}; + return nullptr; } -auto virtual_item::get_parent(const symcache &cache) const -> const cache_item_ptr & +auto virtual_item::get_parent(const symcache &cache) const -> const cache_item * { if (parent) { - return parent; + return parent.get(); } return cache.get_item_by_id(parent_id, false); diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx index a1207fc972..420004064a 100644 --- a/src/libserver/symcache/symcache_internal.hxx +++ b/src/libserver/symcache/symcache_internal.hxx @@ -36,24 +36,24 @@ #include "lua/lua_common.h" #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \ - cache->static_pool->tag.tagname, cache->cfg->checksum, \ - G_STRFUNC, \ + static_pool->tag.tagname, cfg->checksum, \ + RSPAMD_LOG_FUNC, \ __VA_ARGS__) #define msg_warn_cache(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \ static_pool->tag.tagname, cfg->checksum, \ - G_STRFUNC, \ + RSPAMD_LOG_FUNC, \ __VA_ARGS__) #define msg_info_cache(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \ static_pool->tag.tagname, cfg->checksum, \ - G_STRFUNC, \ + RSPAMD_LOG_FUNC, \ __VA_ARGS__) #define msg_debug_cache(...) rspamd_conditional_debug_fast (NULL, NULL, \ rspamd_symcache_log_id, "symcache", cfg->checksum, \ - G_STRFUNC, \ + RSPAMD_LOG_FUNC, \ __VA_ARGS__) #define msg_debug_cache_task(...) rspamd_conditional_debug_fast (NULL, NULL, \ rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \ - G_STRFUNC, \ + RSPAMD_LOG_FUNC, \ __VA_ARGS__) namespace rspamd::symcache { @@ -214,7 +214,7 @@ public: // TODO } - auto get_parent(const symcache &cache) const -> const cache_item_ptr&; + auto get_parent(const symcache &cache) const -> const cache_item *; }; struct cache_item { @@ -253,7 +253,7 @@ struct cache_item { std::vector rdeps; auto is_virtual() const -> bool { return std::holds_alternative(specific); } - auto get_parent(const symcache &cache) const -> const cache_item_ptr &; + auto get_parent(const symcache &cache) const -> const cache_item *; }; struct delayed_cache_dependency { @@ -306,6 +306,7 @@ private: private: /* Internal methods */ auto load_items() -> bool; + auto save_items() const -> bool; public: explicit symcache(struct rspamd_config *cfg) : cfg(cfg) { @@ -326,7 +327,7 @@ public: } } - auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &; + auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *; /* * Initialises the symbols cache, must be called after all symbols are added