From: Vsevolod Stakhov Date: Tue, 6 Jan 2026 12:41:33 +0000 (+0000) Subject: [Fix] Prevent hs_helper from deleting multipattern cache files X-Git-Tag: 4.0.0~208^2~19 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=2fd97fa9ddbf6a98ea64aa08d145f349e933a98d;p=thirdparty%2Frspamd.git [Fix] Prevent hs_helper from deleting multipattern cache files Add rspamd_hyperscan_is_file_known() API to check if a file is in the known hyperscan files cache. Modify hs_helper cleanup to skip files that are known (e.g., multipattern TLD cache files) even if they aren't part of the re_cache. --- diff --git a/src/hs_helper.c b/src/hs_helper.c index e0ef0d298c..026de81d5c 100644 --- a/src/hs_helper.c +++ b/src/hs_helper.c @@ -212,8 +212,9 @@ rspamd_hs_helper_cleanup_dir(struct hs_helper_ctx *ctx, gboolean forced) } if (forced || - !rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache, - globbuf.gl_pathv[i], TRUE, TRUE, &err)) { + (!rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache, + globbuf.gl_pathv[i], TRUE, TRUE, &err) && + !rspamd_hyperscan_is_file_known(globbuf.gl_pathv[i]))) { if (unlink(globbuf.gl_pathv[i]) == -1) { msg_err("cannot unlink %s: %s; reason for expiration: %e", globbuf.gl_pathv[i], strerror(errno), err); diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx index a814e1cfab..2dd13da079 100644 --- a/src/libserver/hyperscan_tools.cxx +++ b/src/libserver/hyperscan_tools.cxx @@ -210,6 +210,21 @@ public: known_cached_files.erase(fpath.string()); } + auto is_file_known(const char *fname) -> bool + { + auto fpath = std::filesystem::path{fname}; + std::error_code ec; + + fpath = std::filesystem::canonical(fpath, ec); + + if (ec && ec.value() != 0) { + /* File doesn't exist or can't be canonicalized - not known */ + return false; + } + + return known_cached_files.contains(fpath.string()); + } + auto cleanup_maybe() -> void { auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP"); @@ -807,6 +822,11 @@ void rspamd_hyperscan_cleanup_maybe(void) rspamd::util::hs_known_files_cache::get().cleanup_maybe(); } +gboolean rspamd_hyperscan_is_file_known(const char *fname) +{ + return rspamd::util::hs_known_files_cache::get().is_file_known(fname); +} + void rspamd_hyperscan_notice_loaded(void) { rspamd::util::hs_known_files_cache::get().notice_loaded(); diff --git a/src/libserver/hyperscan_tools.h b/src/libserver/hyperscan_tools.h index e923323161..45c88ec29c 100644 --- a/src/libserver/hyperscan_tools.h +++ b/src/libserver/hyperscan_tools.h @@ -70,6 +70,13 @@ void rspamd_hyperscan_notice_loaded(void); */ void rspamd_hyperscan_cleanup_maybe(void); +/** + * Check if a file is known to the hyperscan cache (has been noticed) + * @param fname path to check + * @return TRUE if the file is known + */ +gboolean rspamd_hyperscan_is_file_known(const char *fname); + /** * Get a platform identifier string for hyperscan cache keys. * This includes the hyperscan version, platform tune, and CPU features.