From f661188f499424e58ada563f9f972b18a862c3b1 Mon Sep 17 00:00:00 2001 From: Vsevolod Stakhov Date: Mon, 3 Nov 2025 14:50:16 +0000 Subject: [PATCH] [Fix] Recreate invalid unserialized Hyperscan cache files on version mismatch When Hyperscan library is updated, previously cached .unser files become invalid due to version mismatch. Previously, these files would remain unusable, forcing fallback to slower deserialization on every load. This commit adds automatic detection and recreation of invalid unserialized files: - Extract file creation logic into create_unserialized_file() helper function - Add error handler that deletes and recreates invalid .unser files - Maintain file locking protection against concurrent process access - Fall back to serialized version if recreation fails This ensures cache files are automatically updated after Hyperscan upgrades while protecting against race conditions in multi-process environments. --- src/libserver/hyperscan_tools.cxx | 221 ++++++++++++++++++------------ 1 file changed, 131 insertions(+), 90 deletions(-) diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx index 0217610476..a6f5bfdf88 100644 --- a/src/libserver/hyperscan_tools.cxx +++ b/src/libserver/hyperscan_tools.cxx @@ -426,6 +426,104 @@ hs_shared_from_serialized(hs_known_files_cache &hs_cache, raii_mmaped_file &&map return tl::expected{tl::in_place, target, map.get_file().get_name().data()}; } +#if defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 +/** + * Helper function to create unserialized hyperscan database file from serialized data + * This function handles the entire process of creating a temporary file, deserializing + * the database into it, and atomically replacing the target file. + */ +template +static auto +create_unserialized_file(const char *unserialized_fname, T &&cached_serialized, std::int64_t offset) -> tl::expected +{ + const auto *log_func = RSPAMD_LOG_FUNC; + return raii_locked_file::create(unserialized_fname, O_CREAT | O_RDWR | O_EXCL, 00644) + .and_then([&](auto &&new_file_locked) -> tl::expected { + auto tmpfile_pattern = fmt::format("{}{}hsmp-XXXXXXXXXXXXXXXXXX", + cached_serialized.get_file().get_dir(), G_DIR_SEPARATOR); + auto tmpfile = raii_locked_file::mkstemp(tmpfile_pattern.data(), O_CREAT | O_RDWR | O_EXCL, 00644); + + if (!tmpfile) { + return tl::make_unexpected(tmpfile.error()); + } + + auto &tmpfile_checked = tmpfile.value(); + auto tmpfile_name = std::string{tmpfile_checked.get_name()}; + std::size_t unserialized_size; + + if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset, + cached_serialized.get_size() - offset, &unserialized_size); + ret != HS_SUCCESS) { + return tl::make_unexpected(error{ + fmt::format("cannot get unserialized database size: {}", ret), + EINVAL, + error_category::IMPORTANT}); + } + + msg_debug_hyperscan_lambda("multipattern: create new database in %s; %Hz size", + tmpfile_name.c_str(), unserialized_size); + + void *buf; +#ifdef HAVE_GETPAGESIZE + auto page_size = getpagesize(); +#else + auto page_size = sysconf(_SC_PAGESIZE); +#endif + if (page_size == -1) { + page_size = 4096; + } + auto errcode = posix_memalign(&buf, page_size, unserialized_size); + if (errcode != 0 || buf == nullptr) { + return tl::make_unexpected(error{"Cannot allocate memory", + errno, error_category::CRITICAL}); + } + + if (auto ret = hs_deserialize_database_at(((const char *) cached_serialized.get_map()) + offset, + cached_serialized.get_size() - offset, (hs_database_t *) buf); + ret != HS_SUCCESS) { + free(buf); + return tl::make_unexpected(error{ + fmt::format("cannot deserialize hyperscan database: {}", ret), ret}); + } + + if (write(tmpfile_checked.get_fd(), buf, unserialized_size) == -1) { + free(buf); + return tl::make_unexpected(error{fmt::format("cannot write to {}: {}", + tmpfile_name, ::strerror(errno)), + errno, error_category::CRITICAL}); + } + + free(buf); + /* + * Unlink target file before renaming to avoid race condition. + * new_file_locked will have flock on that file, so it will be + * replaced after unlink safely, and also unlocked. + */ + (void) unlink(unserialized_fname); + if (rename(tmpfile_name.c_str(), unserialized_fname) == -1) { + if (errno != EEXIST) { + msg_info_hyperscan_lambda("cannot rename %s -> %s: %s", + tmpfile_name.c_str(), + unserialized_fname, + strerror(errno)); + } + } + else { + /* Unlock file but mark it as immortal first to avoid deletion */ + tmpfile_checked.make_immortal(); + (void) tmpfile_checked.unlock(); + } + + /* Reopen in RO mode */ + return raii_file::open(unserialized_fname, O_RDONLY); + }) + .or_else([&](auto unused) -> tl::expected { + // Cannot create file, so try to open it in RO mode + return raii_file::open(unserialized_fname, O_RDONLY); + }); +} +#endif + auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expected { auto &hs_cache = hs_known_files_cache::get(); @@ -438,96 +536,7 @@ auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expe } #if defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4 auto unserialized_fname = fmt::format("{}.unser", fname); - auto unserialized_file = raii_locked_file::create(unserialized_fname.c_str(), O_CREAT | O_RDWR | O_EXCL, - 00644) - .and_then([&](auto &&new_file_locked) -> tl::expected { - auto tmpfile_pattern = fmt::format("{}{}hsmp-XXXXXXXXXXXXXXXXXX", - cached_serialized.get_file().get_dir(), G_DIR_SEPARATOR); - auto tmpfile = raii_locked_file::mkstemp(tmpfile_pattern.data(), O_CREAT | O_RDWR | O_EXCL, - 00644); - - if (!tmpfile) { - return tl::make_unexpected(tmpfile.error()); - } - else { - auto &tmpfile_checked = tmpfile.value(); - // Store owned string - auto tmpfile_name = std::string{tmpfile_checked.get_name()}; - std::size_t unserialized_size; - - if (auto ret = hs_serialized_database_size(((const char *) cached_serialized.get_map()) + offset, - cached_serialized.get_size() - offset, &unserialized_size); - ret != HS_SUCCESS) { - return tl::make_unexpected(error{ - fmt::format("cannot get unserialized database size: {}", ret), - EINVAL, - error_category::IMPORTANT}); - } - - msg_debug_hyperscan_lambda("multipattern: create new database in %s; %Hz size", - tmpfile_name.c_str(), unserialized_size); - void *buf; -#ifdef HAVE_GETPAGESIZE - auto page_size = getpagesize(); -#else - auto page_size = sysconf(_SC_PAGESIZE); -#endif - if (page_size == -1) { - page_size = 4096; - } - auto errcode = posix_memalign(&buf, page_size, unserialized_size); - if (errcode != 0 || buf == nullptr) { - return tl::make_unexpected(error{"Cannot allocate memory", - errno, error_category::CRITICAL}); - } - - if (auto ret = hs_deserialize_database_at(((const char *) cached_serialized.get_map()) + offset, - cached_serialized.get_size() - offset, (hs_database_t *) buf); - ret != HS_SUCCESS) { - return tl::make_unexpected(error{ - fmt::format("cannot deserialize hyperscan database: {}", ret), ret}); - } - else { - if (write(tmpfile_checked.get_fd(), buf, unserialized_size) == -1) { - free(buf); - return tl::make_unexpected(error{fmt::format("cannot write to {}: {}", - tmpfile_name, ::strerror(errno)), - errno, error_category::CRITICAL}); - } - else { - free(buf); - /* - * Unlink target file before renaming to avoid - * race condition. - * So what we have is that `new_file_locked` - * will have flock on that file, so it will be - * replaced after unlink safely, and also unlocked. - */ - (void) unlink(unserialized_fname.c_str()); - if (rename(tmpfile_name.c_str(), - unserialized_fname.c_str()) == -1) { - if (errno != EEXIST) { - msg_info_hyperscan_lambda("cannot rename %s -> %s: %s", - tmpfile_name.c_str(), - unserialized_fname.c_str(), - strerror(errno)); - } - } - else { - /* Unlock file but mark it as immortal first to avoid deletion */ - tmpfile_checked.make_immortal(); - (void) tmpfile_checked.unlock(); - } - } - } - /* Reopen in RO mode */ - return raii_file::open(unserialized_fname.c_str(), O_RDONLY); - }; - }) - .or_else([&](auto unused) -> tl::expected { - // Cannot create file, so try to open it in RO mode - return raii_file::open(unserialized_fname.c_str(), O_RDONLY); - }); + auto unserialized_file = create_unserialized_file(unserialized_fname.c_str(), std::forward(cached_serialized), offset); tl::expected ret; @@ -547,6 +556,38 @@ auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expe ret = raii_mmaped_file::mmap_shared(std::move(unserialized_checked), PROT_READ) .and_then([&](U &&mmapped_unserialized) -> auto { return hs_shared_from_unserialized(hs_cache, std::forward(mmapped_unserialized)); + }) + .or_else([&](const error &err) -> tl::expected { + // If validation failed (e.g., version mismatch), try to recreate the file + msg_info_hyperscan_lambda("unserialized hyperscan file %s is invalid (%s), attempting to recreate", + unserialized_fname.c_str(), err.error_message.data()); + + // Delete the invalid file + if (unlink(unserialized_fname.c_str()) == -1) { + msg_debug_hyperscan_lambda("cannot unlink invalid unserialized file %s: %s", + unserialized_fname.c_str(), strerror(errno)); + } + + // Try to create it again + auto recreated_file = create_unserialized_file(unserialized_fname.c_str(), + std::forward(cached_serialized), offset); + + if (recreated_file.has_value() && recreated_file.value().get_size() > 0) { + return raii_mmaped_file::mmap_shared(std::move(recreated_file.value()), PROT_READ) + .and_then([&](V &&mmapped_recreated) -> auto { + return hs_shared_from_unserialized(hs_cache, std::forward(mmapped_recreated)); + }) + .or_else([&](const error &recreate_err) -> tl::expected { + // If recreation also failed, fall back to serialized version + msg_info_hyperscan_lambda("failed to recreate unserialized file, using serialized version: %s", + recreate_err.error_message.data()); + return hs_shared_from_serialized(hs_cache, std::forward(cached_serialized), offset); + }); + } + + // If we couldn't recreate, fall back to serialized version + msg_debug_hyperscan_lambda("falling back to serialized version"); + return hs_shared_from_serialized(hs_cache, std::forward(cached_serialized), offset); }); } } -- 2.47.3