]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Prevent hs_helper from deleting multipattern cache files
authorVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 6 Jan 2026 12:41:33 +0000 (12:41 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Tue, 6 Jan 2026 12:41:33 +0000 (12:41 +0000)
Add rspamd_hyperscan_is_file_known() API to check if a file is in the
known hyperscan files cache. Modify hs_helper cleanup to skip files
that are known (e.g., multipattern TLD cache files) even if they
aren't part of the re_cache.

src/hs_helper.c
src/libserver/hyperscan_tools.cxx
src/libserver/hyperscan_tools.h

index e0ef0d298c5facfd5a0e2b03e8ff47dfc3b3a565..026de81d5cd9b37deb41f93b1f674255d86bb870 100644 (file)
@@ -212,8 +212,9 @@ rspamd_hs_helper_cleanup_dir(struct hs_helper_ctx *ctx, gboolean forced)
                        }
 
                        if (forced ||
-                               !rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache,
-                                                                                                                globbuf.gl_pathv[i], TRUE, TRUE, &err)) {
+                               (!rspamd_re_cache_is_valid_hyperscan_file(ctx->cfg->re_cache,
+                                                                                                                 globbuf.gl_pathv[i], TRUE, TRUE, &err) &&
+                                !rspamd_hyperscan_is_file_known(globbuf.gl_pathv[i]))) {
                                if (unlink(globbuf.gl_pathv[i]) == -1) {
                                        msg_err("cannot unlink %s: %s; reason for expiration: %e", globbuf.gl_pathv[i],
                                                        strerror(errno), err);
index a814e1cfab998faa7aecd1fdaffa78b94017e9fc..2dd13da07953886deb221b94554bd7feccf40e88 100644 (file)
@@ -210,6 +210,21 @@ public:
                known_cached_files.erase(fpath.string());
        }
 
+       auto is_file_known(const char *fname) -> bool
+       {
+               auto fpath = std::filesystem::path{fname};
+               std::error_code ec;
+
+               fpath = std::filesystem::canonical(fpath, ec);
+
+               if (ec && ec.value() != 0) {
+                       /* File doesn't exist or can't be canonicalized - not known */
+                       return false;
+               }
+
+               return known_cached_files.contains(fpath.string());
+       }
+
        auto cleanup_maybe() -> void
        {
                auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP");
@@ -807,6 +822,11 @@ void rspamd_hyperscan_cleanup_maybe(void)
        rspamd::util::hs_known_files_cache::get().cleanup_maybe();
 }
 
+gboolean rspamd_hyperscan_is_file_known(const char *fname)
+{
+       return rspamd::util::hs_known_files_cache::get().is_file_known(fname);
+}
+
 void rspamd_hyperscan_notice_loaded(void)
 {
        rspamd::util::hs_known_files_cache::get().notice_loaded();
index e9233231614b78605fa49e21b449fc0a177b9e6c..45c88ec29c127da57d8b37e7e1ef2894023f661b 100644 (file)
@@ -70,6 +70,13 @@ void rspamd_hyperscan_notice_loaded(void);
  */
 void rspamd_hyperscan_cleanup_maybe(void);
 
+/**
+ * Check if a file is known to the hyperscan cache (has been noticed)
+ * @param fname path to check
+ * @return TRUE if the file is known
+ */
+gboolean rspamd_hyperscan_is_file_known(const char *fname);
+
 /**
  * Get a platform identifier string for hyperscan cache keys.
  * This includes the hyperscan version, platform tune, and CPU features.