]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Enhance hyperscan cache debug logging and correlation
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 10 Jan 2026 15:58:40 +0000 (15:58 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 10 Jan 2026 15:58:40 +0000 (15:58 +0000)
- Add entity_name parameter to async cache API for better traceability
- Correlate cache requests with callbacks (show entity/key in both)
- Use rspamd_zhs prefix by default for compressed Redis data
- Switch to idiomatic lua_util.debugm for Lua debug logging
- Log Redis backend config (prefix, ttl, compression) on creation

25 files changed:
.factory/commands/build-and-test.md [new file with mode: 0644]
.factory/commands/build.md [new file with mode: 0644]
.factory/commands/check-code.md [new file with mode: 0644]
.factory/commands/create-release.md [new file with mode: 0644]
.factory/commands/format-code.md [new file with mode: 0644]
.factory/commands/prepare-commit.md [new file with mode: 0644]
.factory/commands/remember.md [new file with mode: 0644]
.factory/commands/review-pr.md [new file with mode: 0644]
.factory/commands/test-lua.md [new file with mode: 0644]
.factory/commands/test.md [new file with mode: 0644]
.factory/memories.md [new file with mode: 0644]
.factory/rules/project_context.md [new file with mode: 0644]
lualib/lua_hs_cache.lua
src/hs_helper.c
src/libserver/hs_cache_backend.c
src/libserver/hs_cache_backend.h
src/libserver/hyperscan_tools.cxx
src/libserver/hyperscan_tools.h
src/libserver/maps/map_helpers.c
src/libserver/maps/map_helpers.h
src/libserver/re_cache.c
src/libserver/rspamd_control.c
src/libserver/rspamd_control.h
src/libserver/worker_util.c
src/libutil/multipattern.c

diff --git a/.factory/commands/build-and-test.md b/.factory/commands/build-and-test.md
new file mode 100644 (file)
index 0000000..3457d50
--- /dev/null
@@ -0,0 +1,18 @@
+---
+description: Build Rspamd and run all unit tests
+---
+
+Complete build and test workflow for Rspamd:
+
+1. Build project:
+   - `cd ~/rspamd.build`
+   - `ninja -j8 install`
+2. If build succeeds, run unit tests:
+   - C/C++ tests: `test/rspamd-test-cxx`
+   - Lua tests: `test/rspamd-test -p /rspamd/lua`
+3. Report comprehensive results:
+   - Build status
+   - Test results
+   - Any failures or errors
+
+This is the standard pre-commit verification workflow.
diff --git a/.factory/commands/build.md b/.factory/commands/build.md
new file mode 100644 (file)
index 0000000..a58a6a7
--- /dev/null
@@ -0,0 +1,12 @@
+---
+description: Build and install Rspamd using ninja
+---
+
+Build and install Rspamd:
+
+1. Navigate to build directory: `~/rspamd.build`
+2. Run: `ninja -j8 install`
+3. Report build status and any errors
+4. If build succeeds, suggest running tests
+
+This uses the project's out-of-source build system with Ninja.
diff --git a/.factory/commands/check-code.md b/.factory/commands/check-code.md
new file mode 100644 (file)
index 0000000..5ce351b
--- /dev/null
@@ -0,0 +1,24 @@
+---
+description: Run all code quality checks (luacheck, clang-format check)
+---
+
+Run the following code quality checks for Rspamd project:
+
+1. For Lua files: Run `luacheck src/plugins/lua/ lualib/ rules/` from project root
+2. For C/C++ files: Check if clang-format would make changes (dry-run)
+3. Report any issues found
+4. Suggest fixes if there are problems
+
+# Remove AI code slop
+
+Check the diff against main, and remove all AI generated slop introduced in this branch.
+
+This includes:
+- Extra comments that a human wouldn't add or is inconsistent with the rest of the file
+- Extra defensive checks or try/catch blocks that are abnormal for that area of the codebase (especially if called by trusted / validated codepaths)
+- Casts to any to get around type issues
+- Any other style that is inconsistent with the file
+
+Report at the end with only a 1-3 sentence summary of what you changed
+
+Focus on files that have been modified in the current git working directory.
diff --git a/.factory/commands/create-release.md b/.factory/commands/create-release.md
new file mode 100644 (file)
index 0000000..6a55a71
--- /dev/null
@@ -0,0 +1,36 @@
+---
+description: Guide through creating a new Rspamd release
+---
+
+Guide through the Rspamd release process:
+
+1. Ask for the new version number (X.Y.Z)
+2. Show recent commits since last release to help write ChangeLog
+3. Help update ChangeLog with proper format:
+   ```
+   X.Y.Z: DD MMM YYYY
+     * [Feature] Feature description
+     * [Fix] Fix description
+   ```
+4. Update version in CMakeLists.txt
+5. Create release commit with full changelog:
+   ```
+   git commit --no-verify -S -m "Release X.Y.Z
+
+   * [Feature] ...
+   * [Fix] ..."
+   ```
+6. Create signed release tag:
+   ```
+   git tag -s X.Y.Z -m "Rspamd X.Y.Z
+
+   Main features:
+   * ...
+
+   Critical fixes:
+   * ..."
+   ```
+7. Create follow-up commit updating version for next dev cycle
+8. Remind to push commits and tags
+
+Walk through each step interactively.
diff --git a/.factory/commands/format-code.md b/.factory/commands/format-code.md
new file mode 100644 (file)
index 0000000..aca6dcb
--- /dev/null
@@ -0,0 +1,12 @@
+---
+description: Format code according to project style (clang-format for C/C++)
+---
+
+Format code files according to Rspamd project style:
+
+1. Identify modified C/C++ files in the current git working directory
+2. Run `clang-format -i` on those files using the `.clang-format` config
+3. Report what was formatted
+4. For Lua files, suggest running luacheck but don't auto-fix
+
+Make sure to use the `.clang-format` file in the project root.
\ No newline at end of file
diff --git a/.factory/commands/prepare-commit.md b/.factory/commands/prepare-commit.md
new file mode 100644 (file)
index 0000000..b8f952a
--- /dev/null
@@ -0,0 +1,37 @@
+---
+description: Prepare for commit - build, test, format, check, suggest commit message
+---
+
+# Remove AI code slop
+
+Check the diff against main, and remove all AI generated slop introduced in this branch.
+
+This includes:
+- Extra comments that a human wouldn't add or is inconsistent with the rest of the file
+- Extra defensive checks or try/catch blocks that are abnormal for that area of the codebase (especially if called by trusted / validated codepaths)
+- Casts to any to get around type issues
+- Any other style that is inconsistent with the file
+
+Report at the end with only a 1-3 sentence summary of what you changed
+
+Complete pre-commit workflow for Rspamd project:
+
+1. Check git status to see what files are staged/modified
+2. **Code formatting and checks**:
+   - C/C++ files: Run clang-format
+   - Lua files: Run luacheck and report issues
+   - Stage formatted files if needed
+3. **Build project**:
+   - `cd ~/rspamd.build && ninja -j8 install`
+   - Report build status
+4. **Run unit tests** (if build succeeds):
+   - C/C++ tests: `test/rspamd-test-cxx`
+   - Lua tests: `test/rspamd-test -p /rspamd/lua`
+   - Report test results
+5. **Suggest commit message** following Rspamd format:
+   - Use appropriate tag: [Feature], [Fix], [Minor], [Test], [Conf], etc.
+   - Write clear, concise description
+   - Remind to use `git commit -S` for GPG signing
+6. Ask if the user wants to proceed with the commit
+
+**NEVER MENTION** generated by or coauthored by.
\ No newline at end of file
diff --git a/.factory/commands/remember.md b/.factory/commands/remember.md
new file mode 100644 (file)
index 0000000..4a1dd30
--- /dev/null
@@ -0,0 +1,10 @@
+---
+description: Save a memory to your memories file
+argument-hint: <what to remember>
+---
+
+Add this to project memories file (.factory/memories.md):
+
+$ARGUMENTS
+
+Format it appropriately based on whether it's a preference, decision, or learning. Include today's date.
diff --git a/.factory/commands/review-pr.md b/.factory/commands/review-pr.md
new file mode 100644 (file)
index 0000000..c53d5e8
--- /dev/null
@@ -0,0 +1,19 @@
+---
+description: Review a GitHub PR for code quality and project standards
+---
+
+Review a GitHub pull request for Rspamd project compliance.
+
+Given a PR number or URL:
+
+1. Fetch PR details using `gh pr view <number>`
+2. Check PR title follows commit message format ([Tag] description)
+3. Review changed files for:
+   - C/C++ files: clang-format compliance, no std::unordered_map usage
+   - Lua files: luacheck compliance, proper rspamd_logger usage
+   - Commit messages: proper tags and GPG signatures
+4. Check if tests are included for new features
+5. Provide detailed feedback on what needs to be fixed
+6. Suggest improvements following project guidelines
+
+Be thorough but constructive in the review.
diff --git a/.factory/commands/test-lua.md b/.factory/commands/test-lua.md
new file mode 100644 (file)
index 0000000..e6eb764
--- /dev/null
@@ -0,0 +1,13 @@
+---
+description: Test Lua code changes with luacheck and functional tests
+---
+
+Test Lua code changes in Rspamd:
+
+1. Run `luacheck src/plugins/lua/ lualib/ rules/` from project root
+2. Report any issues found
+3. If specific Lua files were modified, offer to run related functional tests
+4. Check if test files need to be updated for the changes
+5. Suggest creating new tests if adding new functionality
+
+Provide clear feedback on what needs to be fixed.
\ No newline at end of file
diff --git a/.factory/commands/test.md b/.factory/commands/test.md
new file mode 100644 (file)
index 0000000..6add36c
--- /dev/null
@@ -0,0 +1,13 @@
+---
+description: Run Rspamd unit tests (C/C++ and Lua)
+---
+
+Run Rspamd unit tests:
+
+1. First, ensure the project is built (`ninja -j8 install` in ~/rspamd.build)
+2. Run C/C++ unit tests: `test/rspamd-test-cxx`
+3. Run Lua unit tests: `test/rspamd-test -p /rspamd/lua`
+4. Report results from both test suites
+5. If tests fail, provide details on failures
+
+**Note**: Functional tests are run manually only, not part of this command.
diff --git a/.factory/memories.md b/.factory/memories.md
new file mode 100644 (file)
index 0000000..17ca813
--- /dev/null
@@ -0,0 +1,12 @@
+# Project Memories
+
+## Tools & Preferences
+
+- **2026-01-09**: Use `gh` CLI to operate with GitHub API: getting PRs, issues, etc.
+
+## Project Standards
+
+- **2026-01-09**: Current year is 2026 - use this for copyright headers and date references.
+- **2026-01-09**: Redis sync API is actually a coroutines API - it is unstable and fragile. Refrain from using it anywhere except in `rspamadm` utility.
+- **2026-01-09**: When performing Lua `pcall` from C, use `rspamd_lua_traceback` as the message handler (errfunc) to preserve stack traces for debugging.
+- **2026-01-09**: In Lua C API prefer using `lua_check_text_or_string` to accept both `rspamd{text}` userdata and Lua native strings (string interning is expensive in Lua).
diff --git a/.factory/rules/project_context.md b/.factory/rules/project_context.md
new file mode 100644 (file)
index 0000000..de280a6
--- /dev/null
@@ -0,0 +1,186 @@
+# Rspamd Project Context
+
+This document contains project-specific guidelines and requirements for the Rspamd mail processing system.
+
+## Project Overview
+
+Rspamd is a fast, free and open-source spam filtering system. The codebase consists of:
+- **C/C++ code**: Core functionality in `src/`
+- **Lua code**: Plugins, libraries, and rules in `src/plugins/lua/`, `lualib/`, `rules/`
+- **Configuration**: UCL-based configuration in `conf/`
+- **Tests**: Functional and unit tests in `test/`
+
+## Code Style and Quality
+
+### C and C++ Code
+
+- **Formatting**: Always run `clang-format` using the `.clang-format` file in project root before every commit
+- **Hash Maps**: DO NOT use C++ standard library hash maps (`std::unordered_map`, `std::hash`)
+  - Always use containers from `contrib/ankerl/unordered_dense` for maps/sets and related hashes
+- **Logging**: All debug logging functions use custom printf format implementation
+  - Read comments in `src/libutil/printf.h` before adding logging code
+
+### Lua Code
+
+- **Linting**: Run `luacheck src/plugins/lua/ lualib/ rules/` before every commit
+  - Change to project root directory before running luacheck
+  - Resolve all warnings except those explicitly permitted by project exceptions
+- **Logging**: `rspamd_logger` uses `%s` format strings for all argument placeholders
+
+## Commit Message Format
+
+All commits MUST follow structured format with tags:
+
+### Commit Tags
+
+- `[Feature]` - New features and capabilities
+- `[Fix]` - Bug fixes and corrections
+- `[CritFix]` - Critical bug fixes needing immediate attention
+- `[Minor]` - Minor changes, tweaks, or version updates (prefer for whitespace, nil checks, etc.)
+- `[Project]` - Project-wide changes, refactoring, or infrastructure
+- `[Rework]` - Major reworking of existing functionality
+- `[Conf]` - Configuration changes or updates
+- `[Test]` - Test additions or modifications
+- `[Rules]` - Changes to spam detection rules
+
+**NEVER MENTION** generated by or coauthored by.
+
+### Examples
+
+Single-line commits:
+```
+[Fix] Fix memory leak in dkim module
+[Feature] Add support for encrypted maps
+[Minor] Add missing cmath include
+```
+
+Multi-line commits (releases):
+```
+Release X.Y.Z
+
+* [Feature] First feature description
+* [Fix] First fix description
+```
+
+### GPG Signing
+
+**ALL commits and tags MUST be GPG-signed:**
+- Commits: `git commit -S`
+- Tags: `git tag -s <tagname>`
+- Verify: `git log --show-signature` or `git tag -v <tagname>`
+
+## Pre-commit Checks
+
+Pre-commit hooks verify:
+- Trailing whitespace
+- Line endings
+- ClangFormat
+- LuaCheck
+
+Use `--no-verify` only when necessary and ensure code quality manually.
+
+## Release Process
+
+### 1. Update ChangeLog
+
+Format:
+```
+X.Y.Z: DD MMM YYYY
+  * [Feature] Feature description
+  * [Fix] Fix description
+```
+
+Rules:
+- Date format: `DD MMM YYYY` (e.g., `30 Sep 2025`)
+- Each entry: `  * [Tag]` (two spaces, asterisk, space, tag)
+- Group by tag type
+- Keep descriptions concise
+
+### 2. Create Release Commit
+
+```bash
+git add ChangeLog
+git commit --no-verify -S -m "Release X.Y.Z
+
+* [Feature] Feature 1
+* [Fix] Fix 1
+..."
+```
+
+### 3. Create Release Tag
+
+```bash
+git tag -s X.Y.Z -m "Rspamd X.Y.Z
+
+Brief summary.
+
+Main features:
+* Feature 1
+
+Critical fixes:
+* Fix 1"
+```
+
+### 4. Update Version
+
+After release, increment version in `CMakeLists.txt`:
+```bash
+git add CMakeLists.txt
+git commit --no-verify -S -m "[Minor] Update version of rspamd to X.Y.Z"
+```
+
+## Version Numbers
+
+Defined in `CMakeLists.txt`:
+- **MAJOR**: Incompatible API changes
+- **MINOR**: New features (backward-compatible)
+- **PATCH**: Backward-compatible bug fixes
+
+## Build System
+
+### Build Directory
+- Build directory: `~/rspamd.build` (separate from source tree)
+- Use out-of-source builds with CMake + Ninja
+
+### Build and Install
+```bash
+cd ~/rspamd.build
+ninja -j8 install
+```
+
+### Testing
+
+**Unit Tests (C/C++):**
+```bash
+test/rspamd-test-cxx
+```
+
+**Unit Tests (Lua):**
+```bash
+test/rspamd-test -p /rspamd/lua
+```
+
+**Functional Tests:**
+- Run manually only (not automated in development workflow)
+- Located in `test/functional/`
+
+### Pre-commit Workflow
+1. Make changes in source directory
+2. Build: `cd ~/rspamd.build && ninja -j8 install`
+3. Run unit tests:
+   - C/C++: `test/rspamd-test-cxx`
+   - Lua: `test/rspamd-test -p /rspamd/lua`
+4. For Lua changes: `luacheck src/plugins/lua/ lualib/ rules/`
+5. For C/C++ changes: Check `clang-format` compliance
+6. Commit with GPG signature: `git commit -S -m "[Tag] Description"`
+
+**Note**: Do NOT use `luac` for syntax checking - use the project's test suite instead.
+
+## General Principles
+
+- Write clear, descriptive commit messages
+- One logical change per commit
+- Reference issue numbers when applicable
+- Keep commit history clean and meaningful
+- Do not introduce changes conflicting with these rules
+- When unsure, consult maintainers or in-code comments
index c55f8d7f574ccdf522518a55e8a055ab10fd3ec1..89ef5e1ab321a1c81af0e3a8f35c4bde54af2647 100644 (file)
@@ -17,10 +17,12 @@ limitations under the License.
 local logger = require "rspamd_logger"
 local rspamd_util = require "rspamd_util"
 local lua_redis = require "lua_redis"
+local lua_util = require "lua_util"
 local rspamd_http = require "rspamd_http"
 
 local exports = {}
-local N = "lua_hs_cache"
+-- Use "hyperscan" module name so debug output is enabled together with C code
+local N = "hyperscan"
 
 -- File backend
 local file_backend = {}
@@ -56,8 +58,10 @@ function file_backend:exists(cache_key, platform_id, callback)
   local stat = rspamd_util.stat(path)
 
   if stat then
+    lua_util.debugm(N, "file exists check: %s found, size: %d", path, stat.size)
     callback(nil, true, { size = stat.size, mtime = stat.mtime })
   else
+    lua_util.debugm(N, "file exists check: %s not found", path)
     callback(nil, false, nil)
   end
 end
@@ -65,11 +69,14 @@ end
 function file_backend:load(cache_key, platform_id, callback)
   local path = self:_get_path(cache_key, platform_id)
 
+  lua_util.debugm(N, "file load from: %s", path)
+
   local data, err = rspamd_util.read_file(path)
   if data then
-    logger.debugx(N, "loaded %d bytes from %s", #data, path)
+    lua_util.debugm(N, "file loaded %d bytes from %s", #data, path)
     callback(nil, data)
   else
+    lua_util.debugm(N, "file load failed from %s: %s", path, err or "file not found")
     callback(err or "file not found", nil)
   end
 end
@@ -86,7 +93,7 @@ function file_backend:store(cache_key, platform_id, data, _ttl, callback)
   if ok then
     local renamed, rename_err = os.rename(tmp_path, path)
     if renamed then
-      logger.debugx(N, "stored %d bytes to %s", #data, path)
+      lua_util.debugm(N, "stored %d bytes to %s", #data, path)
       callback(nil)
     else
       os.remove(tmp_path)
@@ -102,7 +109,7 @@ function file_backend:delete(cache_key, platform_id, callback)
   local ok, err = os.remove(path)
 
   if ok then
-    logger.debugx(N, "deleted %s", path)
+    lua_util.debugm(N, "deleted %s", path)
     callback(nil)
   else
     callback(err or "delete failed")
@@ -111,7 +118,9 @@ end
 
 function file_backend:exists_sync(cache_key, platform_id)
   local path = self:_get_path(cache_key, platform_id)
-  return rspamd_util.stat(path) ~= nil, nil
+  local exists = rspamd_util.stat(path) ~= nil
+  lua_util.debugm(N, "file sync exists check: %s %s", path, exists and "found" or "not found")
+  return exists, nil
 end
 
 function file_backend:save_async(cache_key, platform_id, data, callback)
@@ -129,25 +138,36 @@ end
 
 function file_backend:load_sync(cache_key, platform_id)
   local path = self:_get_path(cache_key, platform_id)
-  return rspamd_util.read_file(path)
+  lua_util.debugm(N, "file sync load from: %s", path)
+  local data, err = rspamd_util.read_file(path)
+  if data then
+    lua_util.debugm(N, "file sync loaded %d bytes from %s", #data, path)
+  else
+    lua_util.debugm(N, "file sync load failed from %s: %s", path, err or "file not found")
+  end
+  return data, err
 end
 
 function file_backend:save_sync(cache_key, platform_id, data)
   local path = self:_get_path(cache_key, platform_id)
+  lua_util.debugm(N, "file sync save to: %s, size: %d bytes", path, #data)
   self:_ensure_dir(path)
 
   local tmp_path = path .. ".tmp." .. rspamd_util.random_hex(8)
   local ok, err = rspamd_util.write_file(tmp_path, data)
   if not ok then
+    lua_util.debugm(N, "file sync write failed to %s: %s", tmp_path, err)
     return false, err
   end
 
   local renamed, rename_err = os.rename(tmp_path, path)
   if not renamed then
+    lua_util.debugm(N, "file sync rename failed %s -> %s: %s", tmp_path, path, rename_err)
     os.remove(tmp_path)
     return false, rename_err
   end
 
+  lua_util.debugm(N, "file sync stored %d bytes to %s", #data, path)
   return true, nil
 end
 
@@ -157,9 +177,20 @@ redis_backend.__index = redis_backend
 
 function redis_backend.new(config)
   local self = setmetatable({}, redis_backend)
-  self.redis_params = lua_redis.parse_redis_server('hyperscan', config)
+
+  -- Redis config can be:
+  -- 1. In a 'redis' sub-section of hs_helper worker options
+  -- 2. Directly in the hs_helper worker options (servers, write_servers, etc.)
+  -- 3. Fallback to global 'redis' configuration section
+  local redis_opts = config.redis or config
+  self.redis_params = lua_redis.parse_redis_server(nil, redis_opts, true)
   if not self.redis_params then
-    self.redis_params = lua_redis.parse_redis_server(nil, config)
+    -- Fallback to global redis config
+    self.redis_params = lua_redis.parse_redis_server('redis')
+  end
+
+  if not self.redis_params then
+    logger.warnx(N, "redis backend: no redis configuration found in hs_helper worker or global redis section")
   end
 
   if config.ev_base and self.redis_params then
@@ -172,10 +203,18 @@ function redis_backend.new(config)
     self.config = config
   end
 
-  self.prefix = config.prefix or 'rspamd_hs'
-  self.default_ttl = config.ttl or (86400 * 30) -- 30 days default
-  self.refresh_ttl = config.refresh_ttl ~= false -- Refresh TTL on read by default
-  self.use_compression = config.compression ~= false
+  -- Config options can be in redis sub-section or at top level
+  local opts = config.redis or config
+  self.default_ttl = opts.ttl or config.ttl or (86400 * 30) -- 30 days default
+  self.refresh_ttl = (opts.refresh_ttl ~= false) and (config.refresh_ttl ~= false)
+  self.use_compression = (opts.compression ~= false) and (config.compression ~= false)
+  -- Use different default prefix for compressed (rspamd_zhs) vs uncompressed (rspamd_hs)
+  local default_prefix = self.use_compression and 'rspamd_zhs' or 'rspamd_hs'
+  self.prefix = opts.prefix or config.prefix or default_prefix
+
+  lua_util.debugm(N, "redis backend config: prefix=%s, ttl=%s, refresh_ttl=%s, compression=%s",
+      self.prefix, self.default_ttl, self.refresh_ttl, self.use_compression)
+
   return self
 end
 
@@ -191,13 +230,17 @@ function redis_backend:exists(cache_key, platform_id, callback)
     return
   end
 
+  lua_util.debugm(N, "redis EXISTS check for key: %s", key)
+
   local attrs = {
     ev_base = self.redis_params.ev_base,
     config = self.config,
     callback = function(err, data)
       if err then
+        lua_util.debugm(N, "redis EXISTS failed for key %s: %s", key, err)
         callback(err, false, nil)
       else
+        lua_util.debugm(N, "redis EXISTS result for key %s: %s", key, data == 1 and "found" or "not found")
         callback(nil, data == 1, nil)
       end
     end
@@ -218,8 +261,10 @@ function redis_backend:load(cache_key, platform_id, callback)
   -- Use GETEX to refresh TTL on read if enabled
   local req
   if self.refresh_ttl then
+    lua_util.debugm(N, "redis GETEX (with TTL refresh %d) for key: %s", self.default_ttl, key)
     req = {'GETEX', key, 'EX', tostring(self.default_ttl)}
   else
+    lua_util.debugm(N, "redis GET for key: %s", key)
     req = {'GET', key}
   end
 
@@ -228,25 +273,28 @@ function redis_backend:load(cache_key, platform_id, callback)
     config = self.config,
     callback = function(err, data)
       if err then
+        lua_util.debugm(N, "redis GET failed for key %s: %s", key, err)
         callback(err, nil)
       elseif not data then
+        lua_util.debugm(N, "redis cache miss for key %s", key)
         callback("not found", nil)
-             else
-               -- Decompress if needed
-               if self.use_compression then
-                 local decompress_err, decompressed = rspamd_util.zstd_decompress(data)
-                 if not decompress_err and decompressed then
-                   logger.debugx(N, "loaded and decompressed %d -> %d bytes from redis key %s",
-                       #data, #decompressed, key)
-                   callback(nil, decompressed)
-                 else
-                   callback(decompress_err or "decompression failed", nil)
-                 end
-               else
-                 logger.debugx(N, "loaded %d bytes from redis key %s", #data, key)
-                 callback(nil, data)
-               end
-             end
+      else
+        -- Decompress if needed
+        if self.use_compression then
+          local decompress_err, decompressed = rspamd_util.zstd_decompress(data)
+          if not decompress_err and decompressed then
+            lua_util.debugm(N, "redis loaded and decompressed %d -> %d bytes from key %s (compression ratio: %.1f%%)",
+                #data, #decompressed, key, (1 - #data / #decompressed) * 100)
+            callback(nil, decompressed)
+          else
+            lua_util.debugm(N, "redis decompression failed for key %s: %s", key, decompress_err)
+            callback(decompress_err or "decompression failed", nil)
+          end
+        else
+          lua_util.debugm(N, "redis loaded %d bytes (uncompressed) from key %s", #data, key)
+          callback(nil, data)
+        end
+      end
     end
   }
 
@@ -262,13 +310,16 @@ function redis_backend:store(cache_key, platform_id, data, ttl, callback)
     return
   end
 
+  lua_util.debugm(N, "redis SETEX for key: %s, original size: %d bytes, TTL: %d, compression: %s",
+      key, #data, actual_ttl, self.use_compression and "enabled" or "disabled")
+
   local store_data = data
   -- Compress if enabled
   if self.use_compression then
     local compressed, compress_err = rspamd_util.zstd_compress(data)
     if compressed then
-      logger.debugx(N, "compressed %d -> %d bytes (%.1f%% reduction)",
-          #data, #compressed, (1 - #compressed / #data) * 100)
+      lua_util.debugm(N, "redis compressed %d -> %d bytes (%.1f%% size reduction) for key %s",
+          #data, #compressed, (1 - #compressed / #data) * 100, key)
       store_data = compressed
     else
       logger.warnx(N, "compression failed: %s, storing uncompressed", compress_err)
@@ -280,9 +331,10 @@ function redis_backend:store(cache_key, platform_id, data, ttl, callback)
     config = self.config,
     callback = function(err)
       if err then
+        lua_util.debugm(N, "redis SETEX failed for key %s: %s", key, err)
         callback(err)
       else
-        logger.debugx(N, "stored %d bytes to redis key %s with TTL %d",
+        lua_util.debugm(N, "redis stored %d bytes to key %s with TTL %d",
             #store_data, key, actual_ttl)
         callback(nil)
       end
@@ -301,14 +353,17 @@ function redis_backend:delete(cache_key, platform_id, callback)
     return
   end
 
+  lua_util.debugm(N, "redis DEL for key: %s", key)
+
   local attrs = {
     ev_base = self.redis_params.ev_base,
     config = self.config,
     callback = function(err)
       if err then
+        lua_util.debugm(N, "redis DEL failed for key %s: %s", key, err)
         callback(err)
       else
-        logger.debugx(N, "deleted redis key %s", key)
+        lua_util.debugm(N, "redis deleted key %s", key)
         callback(nil)
       end
     end
@@ -318,74 +373,6 @@ function redis_backend:delete(cache_key, platform_id, callback)
   lua_redis.request(self.redis_params, attrs, req)
 end
 
--- Synchronous methods for C backend interface
-function redis_backend:exists_sync(cache_key, platform_id)
-  local key = self:_get_key(cache_key, platform_id)
-
-  if not self.redis_params then
-    return false, "redis not configured"
-  end
-
-  local ret, conn = lua_redis.redis_connect_sync(self.redis_params, false, key,
-      self.config or rspamd_config, self.redis_params.ev_base)
-  if not ret then
-    return false, "cannot connect to redis"
-  end
-
-  conn:add_cmd('EXISTS', { key })
-  local ok, result = conn:exec()
-  if not ok then
-    return false, "redis EXISTS failed"
-  end
-
-  return result == 1, nil
-end
-
-function redis_backend:load_sync(cache_key, platform_id)
-  local key = self:_get_key(cache_key, platform_id)
-
-  if not self.redis_params then
-    return nil, "redis not configured"
-  end
-
-  local ret, conn = lua_redis.redis_connect_sync(self.redis_params, false, key,
-      self.config or rspamd_config, self.redis_params.ev_base)
-  if not ret then
-    return nil, "cannot connect to redis"
-  end
-
-  -- Use GETEX to refresh TTL on read if enabled
-  if self.refresh_ttl then
-    conn:add_cmd('GETEX', { key, 'EX', tostring(self.default_ttl) })
-  else
-    conn:add_cmd('GET', { key })
-  end
-
-  local ok, data = conn:exec()
-  if not ok then
-    return nil, "redis GET failed"
-  end
-
-  if not data then
-    return nil, nil -- Cache miss, not an error
-  end
-
-  -- Decompress if needed
-  if self.use_compression then
-    local decompress_err, decompressed = rspamd_util.zstd_decompress(data)
-    if not decompress_err and decompressed then
-      logger.debugx(N, "loaded and decompressed %d -> %d bytes from redis key %s",
-          #data, #decompressed, key)
-      return decompressed, nil
-    end
-
-    return nil, decompress_err or "decompression failed"
-  else
-    logger.debugx(N, "loaded %d bytes from redis key %s", #data, key)
-    return data, nil
-  end
-end
-
 function redis_backend:save_async(cache_key, platform_id, data, callback)
   self:store(cache_key, platform_id, data, nil, callback)
 end
@@ -398,54 +385,20 @@ function redis_backend:exists_async(cache_key, platform_id, callback)
   self:exists(cache_key, platform_id, callback)
 end
 
-function redis_backend:save_sync(cache_key, platform_id, data)
-  local key = self:_get_key(cache_key, platform_id)
-
-  if not self.redis_params then
-    return false, "redis not configured"
-  end
-
-  local ret, conn = lua_redis.redis_connect_sync(self.redis_params, true, key,
-      self.config or rspamd_config, self.redis_params.ev_base)
-  if not ret then
-    return false, "cannot connect to redis"
-  end
-
-  local store_data = data
-  -- Compress if enabled
-  if self.use_compression then
-    local compressed, compress_err = rspamd_util.zstd_compress(data)
-    if compressed then
-      logger.debugx(N, "compressed %d -> %d bytes (%.1f%% reduction)",
-          #data, #compressed, (1 - #compressed / #data) * 100)
-      store_data = compressed
-    else
-      logger.warnx(N, "compression failed: %s, storing uncompressed", compress_err)
-    end
-  end
-
-  conn:add_cmd('SETEX', { key, tostring(self.default_ttl), store_data })
-  local ok, result = conn:exec()
-  if not ok then
-    return false, "redis SETEX failed: " .. tostring(result)
-  end
-
-  logger.debugx(N, "stored %d bytes to redis key %s with TTL %d",
-      #store_data, key, self.default_ttl)
-  return true, nil
-end
-
 -- HTTP backend
 local http_backend = {}
 http_backend.__index = http_backend
 
 function http_backend.new(config)
   local self = setmetatable({}, http_backend)
-  self.base_url = config.base_url or config.url
-  self.timeout = config.timeout or 30
-  self.auth_header = config.auth_header
-  self.auth_value = config.auth_value
-  self.use_compression = config.compression ~= false
+
+  -- HTTP config can be in 'http' sub-section or at top level
+  local opts = config.http or config
+  self.base_url = opts.base_url or opts.url
+  self.timeout = opts.timeout or config.timeout or 30
+  self.auth_header = opts.auth_header or config.auth_header
+  self.auth_value = opts.auth_value or config.auth_value
+  self.use_compression = (opts.compression ~= false) and (config.compression ~= false)
   return self
 end
 
@@ -464,6 +417,8 @@ end
 function http_backend:exists(cache_key, platform_id, callback)
   local url = self:_get_url(cache_key, platform_id)
 
+  lua_util.debugm(N, "http HEAD check for url: %s", url)
+
   rspamd_http.request({
     url = url,
     method = 'HEAD',
@@ -471,11 +426,14 @@ function http_backend:exists(cache_key, platform_id, callback)
     timeout = self.timeout,
     callback = function(err, code, _, headers)
       if err then
+        lua_util.debugm(N, "http HEAD failed for %s: %s", url, err)
         callback(err, false, nil)
       elseif code == 200 then
         local size = headers and headers['content-length']
+        lua_util.debugm(N, "http HEAD found %s, size: %s", url, size or "unknown")
         callback(nil, true, { size = tonumber(size) })
       else
+        lua_util.debugm(N, "http HEAD not found %s (code: %d)", url, code)
         callback(nil, false, nil)
       end
     end
@@ -485,13 +443,16 @@ end
 function http_backend:load(cache_key, platform_id, callback)
   local url = self:_get_url(cache_key, platform_id)
 
+  lua_util.debugm(N, "http GET for url: %s", url)
+
   rspamd_http.request({
     url = url,
     method = 'GET',
     headers = self:_get_headers(),
     timeout = self.timeout,
-           callback = function(err, code, body, headers)
+    callback = function(err, code, body, headers)
       if err then
+        lua_util.debugm(N, "http GET failed for %s: %s", url, err)
         callback(err, nil)
       elseif code == 200 and body then
         -- Check if content is compressed
@@ -499,16 +460,22 @@ function http_backend:load(cache_key, platform_id, callback)
         if content_encoding == 'zstd' or self.use_compression then
           local decompress_err, decompressed = rspamd_util.zstd_decompress(body)
           if not decompress_err and decompressed then
-                   callback(nil, decompressed)
+            lua_util.debugm(N, "http loaded and decompressed %d -> %d bytes from %s",
+                #body, #decompressed, url)
+            callback(nil, decompressed)
           else
-                   callback(nil, body)
+            lua_util.debugm(N, "http loaded %d bytes (no decompression) from %s", #body, url)
+            callback(nil, body)
           end
         else
-                 callback(nil, body)
+          lua_util.debugm(N, "http loaded %d bytes from %s", #body, url)
+          callback(nil, body)
         end
       elseif code == 404 then
+        lua_util.debugm(N, "http cache miss (404) for %s", url)
         callback("not found", nil)
       else
+        lua_util.debugm(N, "http GET failed for %s: HTTP %d", url, code)
         callback(string.format("HTTP %d", code), nil)
       end
     end
@@ -519,10 +486,15 @@ function http_backend:store(cache_key, platform_id, data, ttl, callback)
   local url = self:_get_url(cache_key, platform_id)
   local headers = self:_get_headers()
 
+  lua_util.debugm(N, "http PUT for url: %s, original size: %d bytes, compression: %s",
+      url, #data, self.use_compression and "enabled" or "disabled")
+
   local store_data = data
   if self.use_compression then
     local compressed = rspamd_util.zstd_compress(data)
     if compressed then
+      lua_util.debugm(N, "http compressed %d -> %d bytes (%.1f%% size reduction) for %s",
+          #data, #compressed, (1 - #compressed / #data) * 100, url)
       store_data = compressed
       headers['Content-Encoding'] = 'zstd'
     end
@@ -540,10 +512,13 @@ function http_backend:store(cache_key, platform_id, data, ttl, callback)
     timeout = self.timeout,
     callback = function(err, code)
       if err then
+        lua_util.debugm(N, "http PUT failed for %s: %s", url, err)
         callback(err)
       elseif code >= 200 and code < 300 then
+        lua_util.debugm(N, "http stored %d bytes to %s", #store_data, url)
         callback(nil)
       else
+        lua_util.debugm(N, "http PUT failed for %s: HTTP %d", url, code)
         callback(string.format("HTTP %d", code))
       end
     end
@@ -553,6 +528,8 @@ end
 function http_backend:delete(cache_key, platform_id, callback)
   local url = self:_get_url(cache_key, platform_id)
 
+  lua_util.debugm(N, "http DELETE for url: %s", url)
+
   rspamd_http.request({
     url = url,
     method = 'DELETE',
@@ -560,10 +537,13 @@ function http_backend:delete(cache_key, platform_id, callback)
     timeout = self.timeout,
     callback = function(err, code)
       if err then
+        lua_util.debugm(N, "http DELETE failed for %s: %s", url, err)
         callback(err)
       elseif code >= 200 and code < 300 or code == 404 then
+        lua_util.debugm(N, "http deleted %s", url)
         callback(nil)
       else
+        lua_util.debugm(N, "http DELETE failed for %s: HTTP %d", url, code)
         callback(string.format("HTTP %d", code))
       end
     end
@@ -582,14 +562,28 @@ end
 function exports.create_backend(config)
   local backend_type = config.backend or config.cache_backend or 'file'
 
+  lua_util.debugm(N, "creating hyperscan cache backend: %s", backend_type)
+
+  -- Always pass full config - backends will extract what they need
+  -- (config contains ev_base, rspamd_config at top level, plus optional
+  -- redis/http sub-sections for backend-specific settings)
   if backend_type == 'file' then
-    return file_backend.new(config)
+    local be = file_backend.new(config)
+    lua_util.debugm(N, "file backend created, cache_dir: %s", be.cache_dir or "not set")
+    return be
   elseif backend_type == 'redis' then
-    local redis_config = config.redis or config
-    return redis_backend.new(redis_config)
+    local be = redis_backend.new(config)
+    if be.redis_params then
+      lua_util.debugm(N, "redis backend created, prefix: %s, compression: %s",
+          be.prefix, be.use_compression and "enabled" or "disabled")
+    else
+      logger.errx(N, "redis backend created but no redis params - operations will fail!")
+    end
+    return be
   elseif backend_type == 'http' then
-    local http_config = config.http or config
-    return http_backend.new(http_config)
+    local be = http_backend.new(config)
+    lua_util.debugm(N, "http backend created, base_url: %s", be.base_url or "not set")
+    return be
   else
     logger.errx(N, "unknown hyperscan cache backend: %s, falling back to file", backend_type)
     return file_backend.new(config)
index 56c4b9f5a8a9dcafcca418d6594baf4d8f61c881..48b2f8a9873de7617d944fe285266cf85d456445 100644 (file)
@@ -21,6 +21,7 @@
 #include "libserver/worker_util.h"
 #include "libserver/rspamd_control.h"
 #include "libserver/hs_cache_backend.h"
+#include "libserver/maps/map_helpers.h"
 #include "lua/lua_common.h"
 #include "lua/lua_classnames.h"
 #include "unix-std.h"
@@ -321,7 +322,7 @@ rspamd_rs_send_final_notification(struct rspamd_hs_helper_compile_cbdata *cbd)
 
        /* Don't send if worker is terminating */
        if (worker->state != rspamd_worker_state_running) {
-               msg_info("skipping final notification, worker is terminating");
+               msg_debug_hyperscan("skipping final notification, worker is terminating");
                g_free(cbd);
                ev_timer_stop(ctx->event_loop, &ctx->recompile_timer);
                return;
@@ -337,8 +338,8 @@ rspamd_rs_send_final_notification(struct rspamd_hs_helper_compile_cbdata *cbd)
        rspamd_srv_send_command(worker,
                                                        ctx->event_loop, &srv_cmd, -1, NULL, NULL);
 
-       msg_info("sent final hyperscan loaded notification (%d total expressions compiled)",
-                        cbd->total_compiled);
+       msg_debug_hyperscan("sent final hyperscan loaded notification (%d total expressions compiled)",
+                                               cbd->total_compiled);
 
        g_free(cbd);
        ev_timer_stop(ctx->event_loop, &ctx->recompile_timer);
@@ -373,8 +374,8 @@ rspamd_rs_compile_scoped_cb(const char *scope, unsigned int ncompiled, GError *e
 
                        /* Re-check state before sending - could have changed during compilation */
                        if (worker->state != rspamd_worker_state_running) {
-                               msg_info("skipping scope notification for %s, worker is terminating",
-                                                scope ? scope : "default");
+                               msg_debug_hyperscan("skipping scope notification for %s, worker is terminating",
+                                                                       scope ? scope : "default");
                                compile_cbd->scopes_remaining--;
                                if (compile_cbd->scopes_remaining == 0) {
                                        g_free(compile_cbd);
@@ -399,8 +400,8 @@ rspamd_rs_compile_scoped_cb(const char *scope, unsigned int ncompiled, GError *e
                        rspamd_srv_send_command(worker,
                                                                        ctx->event_loop, &srv_cmd, -1, NULL, NULL);
 
-                       msg_info("compiled %d regular expressions for scope %s",
-                                        ncompiled, scope ? scope : "default");
+                       msg_debug_hyperscan("compiled %d regular expressions for scope %s",
+                                                               ncompiled, scope ? scope : "default");
                }
        }
 
@@ -408,15 +409,15 @@ rspamd_rs_compile_scoped_cb(const char *scope, unsigned int ncompiled, GError *e
 
        if (compile_cbd->scopes_remaining == 0) {
                if (compile_cbd->workers_ready) {
-                       msg_info("compiled %d total regular expressions to the hyperscan tree, "
-                                        "send final notification",
-                                        compile_cbd->total_compiled);
+                       msg_debug_hyperscan("compiled %d total regular expressions to the hyperscan tree, "
+                                                               "send final notification",
+                                                               compile_cbd->total_compiled);
                        rspamd_rs_send_final_notification(compile_cbd);
                }
                else {
-                       msg_info("compiled %d total regular expressions to the hyperscan tree, "
-                                        "waiting for workers to be ready before sending notification",
-                                        compile_cbd->total_compiled);
+                       msg_debug_hyperscan("compiled %d total regular expressions to the hyperscan tree, "
+                                                               "waiting for workers to be ready before sending notification",
+                                                               compile_cbd->total_compiled);
                        ctx->loaded = TRUE;
                }
        }
@@ -439,7 +440,7 @@ rspamd_rs_send_single_notification(struct rspamd_hs_helper_single_compile_cbdata
 
        /* Don't send if worker is terminating */
        if (worker->state != rspamd_worker_state_running) {
-               msg_info("skipping single notification, worker is terminating");
+               msg_debug_hyperscan("skipping single notification, worker is terminating");
                g_free(cbd);
                return;
        }
@@ -454,7 +455,7 @@ rspamd_rs_send_single_notification(struct rspamd_hs_helper_single_compile_cbdata
        rspamd_srv_send_command(worker,
                                                        ctx->event_loop, &srv_cmd, -1, NULL, NULL);
 
-       msg_info("sent hyperscan loaded notification");
+       msg_debug_hyperscan("sent hyperscan loaded notification");
 
        g_free(cbd);
        ev_timer_again(ctx->event_loop, &ctx->recompile_timer);
@@ -489,15 +490,15 @@ rspamd_rs_compile_cb(unsigned int ncompiled, GError *err, void *cbd)
        timer_cbd->workers_ready = compile_cbd->workers_ready;
 
        if (timer_cbd->workers_ready) {
-               msg_info("compiled %d regular expressions to the hyperscan tree, "
-                                "send loaded notification",
-                                ncompiled);
+               msg_debug_hyperscan("compiled %d regular expressions to the hyperscan tree, "
+                                                       "send loaded notification",
+                                                       ncompiled);
                rspamd_rs_send_single_notification(timer_cbd);
        }
        else {
-               msg_info("compiled %d regular expressions to the hyperscan tree, "
-                                "waiting for workers to be ready before sending notification",
-                                ncompiled);
+               msg_debug_hyperscan("compiled %d regular expressions to the hyperscan tree, "
+                                                       "waiting for workers to be ready before sending notification",
+                                                       ncompiled);
                ctx->loaded = TRUE;
        }
 
@@ -512,8 +513,8 @@ rspamd_rs_compile(struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
                return FALSE;
        }
 
-       msg_info("starting hyperscan compilation (forced: %s, workers_ready: %s)",
-                        forced ? "yes" : "no", ctx->workers_ready ? "yes" : "no");
+       msg_debug_hyperscan("starting hyperscan compilation (forced: %s, workers_ready: %s)",
+                                               forced ? "yes" : "no", ctx->workers_ready ? "yes" : "no");
 
 #if !defined(__aarch64__) && !defined(__powerpc64__)
        if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
@@ -605,7 +606,7 @@ rspamd_rs_compile(struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
                                                                                                                        compile_cbd);
                }
                else {
-                       msg_debug("skipping unloaded scope: %s", scope_name);
+                       msg_debug_hyperscan("skipping unloaded scope: %s", scope_name);
                }
        }
        return TRUE;
@@ -670,7 +671,7 @@ rspamd_hs_helper_mp_send_notification(struct hs_helper_ctx *ctx,
                                   sizeof(srv_cmd.cmd.mp_loaded.cache_dir));
 
        rspamd_srv_send_command(worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
-       msg_info("sent multipattern loaded notification for '%s'", name);
+       msg_debug_hyperscan("sent multipattern loaded notification for '%s'", name);
 }
 
 static void
@@ -710,7 +711,7 @@ rspamd_hs_helper_mp_exists_cb(gboolean success,
        (void) error;
 
        if (exists) {
-               msg_info("multipattern cache already exists for '%s', skipping compilation", entry->name);
+               msg_debug_hyperscan("multipattern cache already exists for '%s', skipping compilation", entry->name);
                rspamd_hs_helper_mp_send_notification(mpctx->ctx, mpctx->worker, entry->name);
                mpctx->idx++;
                rspamd_hs_helper_compile_pending_multipatterns_next(mpctx);
@@ -728,7 +729,7 @@ static void
 rspamd_hs_helper_compile_pending_multipatterns_next(struct rspamd_hs_helper_mp_async_ctx *mpctx)
 {
        if (mpctx->worker->state != rspamd_worker_state_running) {
-               msg_info("worker terminating, stopping multipattern compilation");
+               msg_debug_hyperscan("worker terminating, stopping multipattern compilation");
                goto done;
        }
 
@@ -738,13 +739,14 @@ rspamd_hs_helper_compile_pending_multipatterns_next(struct rspamd_hs_helper_mp_a
 
        struct rspamd_multipattern_pending *entry = &mpctx->pending[mpctx->idx];
        unsigned int npatterns = rspamd_multipattern_get_npatterns(entry->mp);
-       msg_info("processing multipattern '%s' with %ud patterns", entry->name, npatterns);
+       msg_debug_hyperscan("processing multipattern '%s' with %ud patterns", entry->name, npatterns);
 
        if (rspamd_hs_cache_has_lua_backend()) {
                char cache_key[rspamd_cryptobox_HASHBYTES * 2 + 1];
                rspamd_snprintf(cache_key, sizeof(cache_key), "%*xs",
                                                (int) sizeof(entry->hash) / 2, entry->hash);
-               rspamd_hs_cache_lua_exists_async(cache_key, rspamd_hs_helper_mp_exists_cb, mpctx);
+               rspamd_hs_cache_lua_exists_async(cache_key, entry->name,
+                                                                                rspamd_hs_helper_mp_exists_cb, mpctx);
                return;
        }
 
@@ -755,8 +757,8 @@ rspamd_hs_helper_compile_pending_multipatterns_next(struct rspamd_hs_helper_mp_a
                rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hs", mpctx->ctx->hs_dir,
                                                (int) sizeof(entry->hash) / 2, entry->hash);
                if (access(fp, R_OK) == 0) {
-                       msg_info("cache file %s already exists for multipattern '%s', skipping compilation",
-                                        fp, entry->name);
+                       msg_debug_hyperscan("cache file %s already exists for multipattern '%s', skipping compilation",
+                                                               fp, entry->name);
                }
                else {
                        rspamd_worker_set_busy(mpctx->worker, mpctx->ctx->event_loop, "compile multipattern");
@@ -791,11 +793,11 @@ rspamd_hs_helper_compile_pending_multipatterns(struct hs_helper_ctx *ctx,
 
        pending = rspamd_multipattern_get_pending(&count);
        if (pending == NULL || count == 0) {
-               msg_debug("no pending multipattern compilations");
+               msg_debug_hyperscan("no pending multipattern compilations");
                return;
        }
 
-       msg_info("processing %ud pending multipattern compilations", count);
+       msg_debug_hyperscan("processing %ud pending multipattern compilations", count);
 
        struct rspamd_hs_helper_mp_async_ctx *mpctx = g_malloc0(sizeof(*mpctx));
        mpctx->ctx = ctx;
@@ -806,6 +808,168 @@ rspamd_hs_helper_compile_pending_multipatterns(struct hs_helper_ctx *ctx,
 
        rspamd_hs_helper_compile_pending_multipatterns_next(mpctx);
 }
+
+/*
+ * Compile pending regexp maps that were queued during initialization
+ */
+
+struct rspamd_hs_helper_remap_async_ctx {
+       struct hs_helper_ctx *ctx;
+       struct rspamd_worker *worker;
+       struct rspamd_regexp_map_pending *pending;
+       unsigned int count;
+       unsigned int idx;
+};
+
+static void rspamd_hs_helper_compile_pending_regexp_maps_next(struct rspamd_hs_helper_remap_async_ctx *rmctx);
+
+static void
+rspamd_hs_helper_remap_send_notification(struct hs_helper_ctx *ctx,
+                                                                                struct rspamd_worker *worker,
+                                                                                const char *name)
+{
+       struct rspamd_srv_command srv_cmd;
+
+       memset(&srv_cmd, 0, sizeof(srv_cmd));
+       srv_cmd.type = RSPAMD_SRV_REGEXP_MAP_LOADED;
+       rspamd_strlcpy(srv_cmd.cmd.re_map_loaded.name, name,
+                                  sizeof(srv_cmd.cmd.re_map_loaded.name));
+       rspamd_strlcpy(srv_cmd.cmd.re_map_loaded.cache_dir, ctx->hs_dir,
+                                  sizeof(srv_cmd.cmd.re_map_loaded.cache_dir));
+
+       rspamd_srv_send_command(worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+       msg_debug_hyperscan("sent regexp map loaded notification for '%s'", name);
+}
+
+static void
+rspamd_hs_helper_remap_compiled_cb(struct rspamd_regexp_map_helper *re_map,
+                                                                  gboolean success,
+                                                                  GError *err,
+                                                                  void *ud)
+{
+       struct rspamd_hs_helper_remap_async_ctx *rmctx = ud;
+       struct rspamd_regexp_map_pending *entry = &rmctx->pending[rmctx->idx];
+
+       (void) re_map;
+       rspamd_worker_set_busy(rmctx->worker, rmctx->ctx->event_loop, NULL);
+
+       if (!success) {
+               msg_err("failed to compile regexp map '%s': %e", entry->name, err);
+       }
+       else {
+               rspamd_hs_helper_remap_send_notification(rmctx->ctx, rmctx->worker, entry->name);
+       }
+
+       rmctx->idx++;
+       rspamd_hs_helper_compile_pending_regexp_maps_next(rmctx);
+}
+
+static void
+rspamd_hs_helper_remap_exists_cb(gboolean success,
+                                                                const unsigned char *data,
+                                                                gsize len,
+                                                                const char *error,
+                                                                void *ud)
+{
+       struct rspamd_hs_helper_remap_async_ctx *rmctx = ud;
+       struct rspamd_regexp_map_pending *entry = &rmctx->pending[rmctx->idx];
+       bool exists = (success && data == NULL && len == 1);
+
+       (void) error;
+
+       if (exists) {
+               msg_debug_hyperscan("regexp map cache already exists for '%s', skipping compilation", entry->name);
+               rspamd_hs_helper_remap_send_notification(rmctx->ctx, rmctx->worker, entry->name);
+               rmctx->idx++;
+               rspamd_hs_helper_compile_pending_regexp_maps_next(rmctx);
+               return;
+       }
+
+       /* Need to compile+store */
+       rspamd_worker_set_busy(rmctx->worker, rmctx->ctx->event_loop, "compile regexp map");
+       rspamd_regexp_map_compile_hs_to_cache_async(entry->re_map, rmctx->ctx->hs_dir,
+                                                                                               rmctx->ctx->event_loop,
+                                                                                               rspamd_hs_helper_remap_compiled_cb, rmctx);
+}
+
+static void
+rspamd_hs_helper_compile_pending_regexp_maps_next(struct rspamd_hs_helper_remap_async_ctx *rmctx)
+{
+       if (rmctx->worker->state != rspamd_worker_state_running) {
+               msg_debug_hyperscan("worker terminating, stopping regexp map compilation");
+               goto done;
+       }
+
+       if (rmctx->idx >= rmctx->count) {
+               goto done;
+       }
+
+       struct rspamd_regexp_map_pending *entry = &rmctx->pending[rmctx->idx];
+       msg_debug_hyperscan("processing regexp map '%s'", entry->name);
+
+       if (rspamd_hs_cache_has_lua_backend()) {
+               char cache_key[rspamd_cryptobox_HASHBYTES * 2 + 1];
+               rspamd_snprintf(cache_key, sizeof(cache_key), "%*xs",
+                                               (int) sizeof(entry->hash) / 2, entry->hash);
+               rspamd_hs_cache_lua_exists_async(cache_key, entry->name,
+                                                                                rspamd_hs_helper_remap_exists_cb, rmctx);
+               return;
+       }
+
+       /* File backend path: check if cache file exists */
+       {
+               char fp[PATH_MAX];
+               GError *err = NULL;
+               rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmc", rmctx->ctx->hs_dir,
+                                               (int) sizeof(entry->hash) / 2, entry->hash);
+               if (access(fp, R_OK) == 0) {
+                       msg_debug_hyperscan("cache file %s already exists for regexp map '%s', skipping compilation",
+                                                               fp, entry->name);
+               }
+               else {
+                       rspamd_worker_set_busy(rmctx->worker, rmctx->ctx->event_loop, "compile regexp map");
+                       if (!rspamd_regexp_map_compile_hs_to_cache(entry->re_map, rmctx->ctx->hs_dir, &err)) {
+                               msg_err("failed to compile regexp map '%s': %e", entry->name, err);
+                               if (err) g_error_free(err);
+                       }
+                       rspamd_worker_set_busy(rmctx->worker, rmctx->ctx->event_loop, NULL);
+               }
+
+               rspamd_hs_helper_remap_send_notification(rmctx->ctx, rmctx->worker, entry->name);
+               rmctx->idx++;
+               rspamd_hs_helper_compile_pending_regexp_maps_next(rmctx);
+               return;
+       }
+
+done:
+       rspamd_regexp_map_clear_pending();
+       g_free(rmctx);
+}
+
+static void
+rspamd_hs_helper_compile_pending_regexp_maps(struct hs_helper_ctx *ctx,
+                                                                                        struct rspamd_worker *worker)
+{
+       struct rspamd_regexp_map_pending *pending;
+       unsigned int count = 0;
+
+       pending = rspamd_regexp_map_get_pending(&count);
+       if (pending == NULL || count == 0) {
+               msg_debug_hyperscan("no pending regexp map compilations");
+               return;
+       }
+
+       msg_debug_hyperscan("processing %ud pending regexp map compilations", count);
+
+       struct rspamd_hs_helper_remap_async_ctx *rmctx = g_malloc0(sizeof(*rmctx));
+       rmctx->ctx = ctx;
+       rmctx->worker = worker;
+       rmctx->pending = pending;
+       rmctx->count = count;
+       rmctx->idx = 0;
+
+       rspamd_hs_helper_compile_pending_regexp_maps_next(rmctx);
+}
 #endif
 
 static gboolean
@@ -818,9 +982,9 @@ rspamd_hs_helper_workers_spawned(struct rspamd_main *rspamd_main,
        struct rspamd_control_reply rep;
        struct hs_helper_ctx *ctx = ud;
 
-       msg_info("received workers_spawned notification (%d workers); hyperscan compilation finished: %s",
-                        cmd->cmd.workers_spawned.workers_count,
-                        ctx->loaded ? "yes" : "no");
+       msg_debug_hyperscan("received workers_spawned notification (%d workers); hyperscan compilation finished: %s",
+                                               cmd->cmd.workers_spawned.workers_count,
+                                               ctx->loaded ? "yes" : "no");
 
        /* Mark that workers are ready */
        ctx->workers_ready = TRUE;
@@ -857,20 +1021,23 @@ rspamd_hs_helper_workers_spawned(struct rspamd_main *rspamd_main,
                rspamd_srv_send_command(worker,
                                                                ctx->event_loop, &srv_cmd, -1, NULL, NULL);
 
-               msg_info("sent delayed hyperscan loaded notification after workers spawned");
+               msg_debug_hyperscan("sent delayed hyperscan loaded notification after workers spawned");
                ctx->loaded = FALSE; /* Reset to avoid duplicate notifications */
        }
        else if (!ctx->loaded && worker->state == rspamd_worker_state_running) {
                /* Start initial compilation now that workers are ready */
-               msg_info("starting initial hyperscan compilation after workers spawned");
+               msg_debug_hyperscan("starting initial hyperscan compilation after workers spawned");
                if (!rspamd_rs_compile(ctx, worker, FALSE)) {
-                       msg_warn("initial hyperscan compilation failed or not needed");
+                       msg_debug_hyperscan("initial hyperscan compilation skipped or not needed");
                }
        }
 
 #ifdef WITH_HYPERSCAN
        /* Process pending multipattern compilations (e.g., TLD patterns) */
        rspamd_hs_helper_compile_pending_multipatterns(ctx, worker);
+
+       /* Process pending regexp map compilations */
+       rspamd_hs_helper_compile_pending_regexp_maps(ctx, worker);
 #endif
 
        if (attached_fd != -1) {
@@ -897,8 +1064,8 @@ rspamd_hs_helper_timer(EV_P_ ev_timer *w, int revents)
        tim = rspamd_time_jitter(ctx->recompile_time, 0);
        w->repeat = tim;
 
-       msg_info("periodic recompilation timer triggered (workers_ready: %s)",
-                        ctx->workers_ready ? "yes" : "no");
+       msg_debug_hyperscan("periodic recompilation timer triggered (workers_ready: %s)",
+                                               ctx->workers_ready ? "yes" : "no");
        rspamd_rs_compile(ctx, worker, FALSE);
 }
 
@@ -941,11 +1108,11 @@ start_hs_helper(struct rspamd_worker *worker)
 
        ctx->recompile_timer.data = worker;
        tim = rspamd_time_jitter(ctx->recompile_time, 0);
-       msg_info("setting up recompile timer for %.1f seconds", tim);
+       msg_debug_hyperscan("setting up recompile timer for %.1f seconds", tim);
        ev_timer_init(&ctx->recompile_timer, rspamd_hs_helper_timer, tim, 0.0);
        ev_timer_start(ctx->event_loop, &ctx->recompile_timer);
 
-       msg_info("hs_helper starting event loop");
+       msg_debug_hyperscan("hs_helper starting event loop");
        ev_loop(ctx->event_loop, 0);
        rspamd_worker_block_signals();
 
index 83ae5becca6836915c497c688fdbaa22cb8148ae..aeb5195a9e38f6978df27d7bb2e109346633dc8d 100644 (file)
@@ -115,6 +115,9 @@ rspamd_hs_cache_try_init_lua_backend_with_opts(struct rspamd_config *cfg,
 
        /* Call create_backend(config) */
        if (lua_pcall(L, 1, 1, err_idx) != 0) {
+               const char *lua_err = lua_tostring(L, -1);
+               msg_err("failed to create hyperscan cache backend '%s': %s",
+                               backend_name, lua_err ? lua_err : "unknown error");
                lua_settop(L, err_idx - 1);
                return FALSE;
        }
@@ -126,6 +129,8 @@ rspamd_hs_cache_try_init_lua_backend_with_opts(struct rspamd_config *cfg,
        rspamd_hs_cache_set_lua_backend(L, ref, platform_id);
        lua_settop(L, err_idx - 1);
 
+       msg_debug_hyperscan("initialized hyperscan cache backend: %s", backend_name);
+
        return TRUE;
 }
 
@@ -224,220 +229,13 @@ rspamd_hs_cache_try_init_lua_backend(struct rspamd_config *cfg,
        return rspamd_hs_cache_try_init_lua_backend_with_opts(cfg, ev_base, opts, backend_name, cache_dir);
 }
 
-gboolean
-rspamd_hs_cache_lua_save(const char *cache_key,
-                                                const unsigned char *data,
-                                                gsize len,
-                                                GError **err)
-{
-       lua_State *L = lua_backend_L;
-
-       if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), ECANCELED,
-                                       "worker is terminating");
-               return FALSE;
-       }
-
-       if (!rspamd_hs_cache_has_lua_backend()) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend not initialized");
-               return FALSE;
-       }
-
-       /* Get backend object */
-       lua_rawgeti(L, LUA_REGISTRYINDEX, lua_backend_ref);
-       if (!lua_istable(L, -1)) {
-               lua_pop(L, 1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Invalid Lua backend reference");
-               return FALSE;
-       }
-
-       /* Get save_sync method */
-       lua_getfield(L, -1, "save_sync");
-       if (!lua_isfunction(L, -1)) {
-               lua_pop(L, 2);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend has no save_sync method");
-               return FALSE;
-       }
-
-       /* Push self (backend object) */
-       lua_pushvalue(L, -2);
-       /* Push cache_key */
-       lua_pushstring(L, cache_key);
-       /* Push platform_id */
-       lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "default");
-       /* Push data as string */
-       lua_pushlstring(L, (const char *) data, len);
-
-       /* Call backend:save_sync(cache_key, platform_id, data) */
-       if (lua_pcall(L, 4, 2, 0) != 0) {
-               const char *lua_err = lua_tostring(L, -1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua save_sync failed: %s", lua_err ? lua_err : "unknown error");
-               lua_pop(L, 2); /* error + backend table */
-               return FALSE;
-       }
-
-       /* Check result: returns success, error_message */
-       gboolean success = lua_toboolean(L, -2);
-       if (!success) {
-               const char *lua_err = lua_tostring(L, -1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend save failed: %s", lua_err ? lua_err : "unknown error");
-       }
-
-       lua_pop(L, 3); /* result, error, backend table */
-       return success;
-}
-
-gboolean
-rspamd_hs_cache_lua_load(const char *cache_key,
-                                                unsigned char **data,
-                                                gsize *len,
-                                                GError **err)
-{
-       lua_State *L = lua_backend_L;
-
-       if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), ECANCELED,
-                                       "worker is terminating");
-               return FALSE;
-       }
-
-       if (!rspamd_hs_cache_has_lua_backend()) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend not initialized");
-               return FALSE;
-       }
-
-       /* Get backend object */
-       lua_rawgeti(L, LUA_REGISTRYINDEX, lua_backend_ref);
-       if (!lua_istable(L, -1)) {
-               lua_pop(L, 1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Invalid Lua backend reference");
-               return FALSE;
-       }
-
-       /* Get load_sync method */
-       lua_getfield(L, -1, "load_sync");
-       if (!lua_isfunction(L, -1)) {
-               lua_pop(L, 2);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend has no load_sync method");
-               return FALSE;
-       }
-
-       /* Push self (backend object) */
-       lua_pushvalue(L, -2);
-       /* Push cache_key */
-       lua_pushstring(L, cache_key);
-       /* Push platform_id */
-       lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "default");
-
-       /* Call backend:load_sync(cache_key, platform_id) */
-       if (lua_pcall(L, 3, 2, 0) != 0) {
-               const char *lua_err = lua_tostring(L, -1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua load_sync failed: %s", lua_err ? lua_err : "unknown error");
-               lua_pop(L, 2); /* error + backend table */
-               return FALSE;
-       }
-
-       /* Check result: returns data_or_nil, error_message */
-       if (lua_isnil(L, -2)) {
-               const char *lua_err = lua_tostring(L, -1);
-               if (lua_err) {
-                       g_set_error(err, g_quark_from_static_string("hs_cache"), ENOENT,
-                                               "Lua backend load failed: %s", lua_err);
-               }
-               /* Not an error - cache miss */
-               lua_pop(L, 3); /* nil, error, backend table */
-               *data = NULL;
-               *len = 0;
-               return TRUE; /* Cache miss is not an error */
-       }
-
-       /* Get data */
-       size_t data_len;
-       const char *lua_data = lua_tolstring(L, -2, &data_len);
-       if (lua_data && data_len > 0) {
-               *data = g_malloc(data_len);
-               memcpy(*data, lua_data, data_len);
-               *len = data_len;
-       }
-       else {
-               *data = NULL;
-               *len = 0;
-       }
-
-       lua_pop(L, 3); /* data, error/nil, backend table */
-       return TRUE;
-}
-
-gboolean
-rspamd_hs_cache_lua_exists(const char *cache_key, GError **err)
-{
-       lua_State *L = lua_backend_L;
-
-       if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), ECANCELED,
-                                       "worker is terminating");
-               return FALSE;
-       }
-
-       if (!rspamd_hs_cache_has_lua_backend()) {
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend not initialized");
-               return FALSE;
-       }
-
-       /* Get backend object */
-       lua_rawgeti(L, LUA_REGISTRYINDEX, lua_backend_ref);
-       if (!lua_istable(L, -1)) {
-               lua_pop(L, 1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Invalid Lua backend reference");
-               return FALSE;
-       }
-
-       /* Get exists_sync method */
-       lua_getfield(L, -1, "exists_sync");
-       if (!lua_isfunction(L, -1)) {
-               lua_pop(L, 2);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua backend has no exists_sync method");
-               return FALSE;
-       }
-
-       /* Push self (backend object) */
-       lua_pushvalue(L, -2);
-       /* Push cache_key */
-       lua_pushstring(L, cache_key);
-       /* Push platform_id */
-       lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "default");
-
-       /* Call backend:exists_sync(cache_key, platform_id) */
-       if (lua_pcall(L, 3, 2, 0) != 0) {
-               const char *lua_err = lua_tostring(L, -1);
-               g_set_error(err, g_quark_from_static_string("hs_cache"), EINVAL,
-                                       "Lua exists_sync failed: %s", lua_err ? lua_err : "unknown error");
-               lua_pop(L, 2);
-               return FALSE;
-       }
-
-       gboolean exists = lua_toboolean(L, -2);
-       lua_pop(L, 3); /* result, error/nil, backend table */
-       return exists;
-}
-
 static int
 lua_hs_cache_async_callback(lua_State *L)
 {
        rspamd_hs_cache_async_cb cb = (rspamd_hs_cache_async_cb) lua_touserdata(L, lua_upvalueindex(1));
        void *ud = lua_touserdata(L, lua_upvalueindex(2));
+       const char *entity_name = lua_tostring(L, lua_upvalueindex(3));
+       const char *cache_key = lua_tostring(L, lua_upvalueindex(4));
        const char *err = lua_tostring(L, 1);
        const unsigned char *data = NULL;
        size_t len = 0;
@@ -457,6 +255,11 @@ lua_hs_cache_async_callback(lua_State *L)
                }
        }
 
+       msg_debug_hyperscan("async_callback: entity='%s', key=%s, success=%s, len=%z, err=%s",
+                                               entity_name ? entity_name : "unknown",
+                                               cache_key ? cache_key : "unknown",
+                                               err == NULL ? "yes" : "no", len, err ? err : "(none)");
+
        if (cb) {
                cb(err == NULL, data, len, err, ud);
        }
@@ -465,6 +268,7 @@ lua_hs_cache_async_callback(lua_State *L)
 }
 
 void rspamd_hs_cache_lua_save_async(const char *cache_key,
+                                                                       const char *entity_name,
                                                                        const unsigned char *data,
                                                                        gsize len,
                                                                        rspamd_hs_cache_async_cb cb,
@@ -473,12 +277,17 @@ void rspamd_hs_cache_lua_save_async(const char *cache_key,
        lua_State *L = lua_backend_L;
        int err_idx;
 
+       msg_debug_hyperscan("save_async: entity='%s', key=%s, len=%z",
+                                               entity_name ? entity_name : "unknown", cache_key, len);
+
        if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
+               msg_debug_hyperscan("save_async: worker terminating, skipping");
                if (cb) cb(FALSE, NULL, 0, "worker is terminating", ud);
                return;
        }
 
        if (!rspamd_hs_cache_has_lua_backend()) {
+               msg_debug_hyperscan("save_async: no Lua backend");
                if (cb) cb(FALSE, NULL, 0, "Lua backend not initialized", ud);
                return;
        }
@@ -511,10 +320,12 @@ void rspamd_hs_cache_lua_save_async(const char *cache_key,
        /* Push data */
        lua_pushlstring(L, (const char *) data, len);
 
-       /* Push callback wrapper */
+       /* Push callback wrapper with 4 upvalues: cb, ud, entity_name, cache_key */
        lua_pushlightuserdata(L, (void *) cb);
        lua_pushlightuserdata(L, ud);
-       lua_pushcclosure(L, lua_hs_cache_async_callback, 2);
+       lua_pushstring(L, entity_name ? entity_name : "unknown");
+       lua_pushstring(L, cache_key);
+       lua_pushcclosure(L, lua_hs_cache_async_callback, 4);
 
        /* Call backend:save_async(cache_key, platform_id, data, callback) */
        if (lua_pcall(L, 5, 0, err_idx) != 0) {
@@ -528,13 +339,18 @@ void rspamd_hs_cache_lua_save_async(const char *cache_key,
 }
 
 void rspamd_hs_cache_lua_load_async(const char *cache_key,
+                                                                       const char *entity_name,
                                                                        rspamd_hs_cache_async_cb cb,
                                                                        void *ud)
 {
        lua_State *L = lua_backend_L;
        int err_idx;
 
+       msg_debug_hyperscan("load_async: entity='%s', key=%s",
+                                               entity_name ? entity_name : "unknown", cache_key);
+
        if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
+               msg_debug_hyperscan("load_async: worker terminating, skipping");
                if (cb) cb(FALSE, NULL, 0, "worker is terminating", ud);
                return;
        }
@@ -570,10 +386,12 @@ void rspamd_hs_cache_lua_load_async(const char *cache_key,
        /* Push platform_id */
        lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "default");
 
-       /* Push callback wrapper */
+       /* Push callback wrapper with 4 upvalues: cb, ud, entity_name, cache_key */
        lua_pushlightuserdata(L, (void *) cb);
        lua_pushlightuserdata(L, ud);
-       lua_pushcclosure(L, lua_hs_cache_async_callback, 2);
+       lua_pushstring(L, entity_name ? entity_name : "unknown");
+       lua_pushstring(L, cache_key);
+       lua_pushcclosure(L, lua_hs_cache_async_callback, 4);
 
        /* Call backend:load_async(cache_key, platform_id, callback) */
        if (lua_pcall(L, 4, 0, err_idx) != 0) {
@@ -587,18 +405,24 @@ void rspamd_hs_cache_lua_load_async(const char *cache_key,
 }
 
 void rspamd_hs_cache_lua_exists_async(const char *cache_key,
+                                                                         const char *entity_name,
                                                                          rspamd_hs_cache_async_cb cb,
                                                                          void *ud)
 {
        lua_State *L = lua_backend_L;
        int err_idx;
 
+       msg_debug_hyperscan("exists_async: entity='%s', key=%s",
+                                               entity_name ? entity_name : "unknown", cache_key);
+
        if (rspamd_current_worker && rspamd_current_worker->state != rspamd_worker_state_running) {
+               msg_debug_hyperscan("exists_async: worker terminating, skipping");
                if (cb) cb(FALSE, NULL, 0, "worker is terminating", ud);
                return;
        }
 
        if (!rspamd_hs_cache_has_lua_backend()) {
+               msg_debug_hyperscan("exists_async: no Lua backend");
                if (cb) cb(FALSE, NULL, 0, "Lua backend not initialized", ud);
                return;
        }
@@ -629,10 +453,12 @@ void rspamd_hs_cache_lua_exists_async(const char *cache_key,
        /* Push platform_id */
        lua_pushstring(L, lua_backend_platform_id ? lua_backend_platform_id : "default");
 
-       /* Push callback wrapper */
+       /* Push callback wrapper with 4 upvalues: cb, ud, entity_name, cache_key */
        lua_pushlightuserdata(L, (void *) cb);
        lua_pushlightuserdata(L, ud);
-       lua_pushcclosure(L, lua_hs_cache_async_callback, 2);
+       lua_pushstring(L, entity_name ? entity_name : "unknown");
+       lua_pushstring(L, cache_key);
+       lua_pushcclosure(L, lua_hs_cache_async_callback, 4);
 
        /* Call backend:exists_async(cache_key, platform_id, callback) */
        if (lua_pcall(L, 4, 0, err_idx) != 0) {
index 682e616801e56c312bc092cc42fcec80dd14e18e..6a87d7b54d3c900814af3565eaaf14ebbe32302e 100644 (file)
@@ -134,40 +134,6 @@ gboolean rspamd_hs_cache_has_lua_backend(void);
 gboolean rspamd_hs_cache_try_init_lua_backend(struct rspamd_config *cfg,
                                                                                          struct ev_loop *ev_base);
 
-/**
- * Save data to cache via Lua backend (synchronous)
- * @param cache_key unique cache key (hash)
- * @param data serialized hyperscan data
- * @param len data length
- * @param err error output
- * @return TRUE on success
- */
-gboolean rspamd_hs_cache_lua_save(const char *cache_key,
-                                                                 const unsigned char *data,
-                                                                 gsize len,
-                                                                 GError **err);
-
-/**
- * Load data from cache via Lua backend (synchronous)
- * @param cache_key unique cache key (hash)
- * @param data output data (caller must g_free)
- * @param len output data length
- * @param err error output
- * @return TRUE on success (including cache miss with data=NULL)
- */
-gboolean rspamd_hs_cache_lua_load(const char *cache_key,
-                                                                 unsigned char **data,
-                                                                 gsize *len,
-                                                                 GError **err);
-
-/**
- * Check if cache entry exists via Lua backend (synchronous)
- * @param cache_key unique cache key (hash)
- * @param err error output
- * @return TRUE if exists
- */
-gboolean rspamd_hs_cache_lua_exists(const char *cache_key, GError **err);
-
 /**
  * Async callback type
  * @param success TRUE if operation succeeded
@@ -185,12 +151,14 @@ typedef void (*rspamd_hs_cache_async_cb)(gboolean success,
 /**
  * Save data to cache via Lua backend (asynchronous)
  * @param cache_key unique cache key (hash)
+ * @param entity_name human-readable name of the entity (e.g., multimap name, re class)
  * @param data serialized hyperscan data
  * @param len data length
  * @param cb completion callback
  * @param ud userdata
  */
 void rspamd_hs_cache_lua_save_async(const char *cache_key,
+                                                                       const char *entity_name,
                                                                        const unsigned char *data,
                                                                        gsize len,
                                                                        rspamd_hs_cache_async_cb cb,
@@ -199,20 +167,24 @@ void rspamd_hs_cache_lua_save_async(const char *cache_key,
 /**
  * Load data from cache via Lua backend (asynchronous)
  * @param cache_key unique cache key (hash)
+ * @param entity_name human-readable name of the entity (e.g., multimap name, re class)
  * @param cb completion callback
  * @param ud userdata
  */
 void rspamd_hs_cache_lua_load_async(const char *cache_key,
+                                                                       const char *entity_name,
                                                                        rspamd_hs_cache_async_cb cb,
                                                                        void *ud);
 
 /**
  * Check if cache entry exists via Lua backend (asynchronous)
  * @param cache_key unique cache key (hash)
+ * @param entity_name human-readable name of the entity (e.g., multimap name, re class)
  * @param cb completion callback (len will be 1 if exists, 0 otherwise)
  * @param ud userdata
  */
 void rspamd_hs_cache_lua_exists_async(const char *cache_key,
+                                                                         const char *entity_name,
                                                                          rspamd_hs_cache_async_cb cb,
                                                                          void *ud);
 
index 5494595a748939e6a0200f3b8be254434dd84ee4..fcb83d1866cc61ac6546ee6091a382556efd8859 100644 (file)
@@ -37,8 +37,6 @@
 #include "rspamd_control.h"
 #include "cryptobox.h"
 
-#define HYPERSCAN_LOG_TAG "hsxxxx"
-
 #ifdef HS_MAJOR
 #ifndef HS_VERSION_32BIT
 #define HS_VERSION_32BIT ((HS_MAJOR << 24) | (HS_MINOR << 16) | (HS_PATCH << 8) | 0)
                                                                                                                   "hyperscan", HYPERSCAN_LOG_TAG, \
                                                                                                                   RSPAMD_LOG_FUNC,                \
                                                                                                                   __VA_ARGS__)
-#define msg_debug_hyperscan(...) rspamd_conditional_debug_fast(nullptr, nullptr,                                        \
-                                                                                                                          rspamd_hyperscan_log_id, "hyperscan", HYPERSCAN_LOG_TAG, \
-                                                                                                                          RSPAMD_LOG_FUNC,                                         \
-                                                                                                                          __VA_ARGS__)
 #define msg_debug_hyperscan_lambda(...) rspamd_conditional_debug_fast(nullptr, nullptr,                                        \
                                                                                                                                          rspamd_hyperscan_log_id, "hyperscan", HYPERSCAN_LOG_TAG, \
                                                                                                                                          log_func,                                                \
index 45c88ec29c127da57d8b37e7e1ef2894023f661b..255e334e0466c59ba5b34480953051ec87cda2a4 100644 (file)
 #ifdef WITH_HYPERSCAN
 
 #include "hs.h"
+#include "logger.h"
 
 G_BEGIN_DECLS
 
+EXTERN_LOG_MODULE_DEF(hyperscan);
+
+#define HYPERSCAN_LOG_TAG "hsxxxx"
+
+#define msg_debug_hyperscan(...) rspamd_conditional_debug_fast(NULL, NULL,                                              \
+                                                                                                                          rspamd_hyperscan_log_id, "hyperscan", HYPERSCAN_LOG_TAG, \
+                                                                                                                          RSPAMD_LOG_FUNC, __VA_ARGS__)
+
 /**
  * Opaque structure that represents hyperscan (maybe shared/cached database)
  */
index 7e7d701740723e89e1c9bcd3a5778310cc9263ac..3f5598b2de998a0f006d3200958fbe6606c97559 100644 (file)
@@ -27,6 +27,8 @@
 #ifdef WITH_HYPERSCAN
 #include "hs.h"
 #include "hyperscan_tools.h"
+#include "hs_cache_backend.h"
+#include "unix-std.h"
 #endif
 #ifndef WITH_PCRE2
 #include <pcre.h>
@@ -1150,13 +1152,17 @@ rspamd_re_map_finalize(struct rspamd_regexp_map_helper *re_map)
 #ifdef WITH_HYPERSCAN
        unsigned int i;
        hs_platform_info_t plt;
-       hs_compile_error_t *err;
        struct rspamd_map *map;
        rspamd_regexp_t *re;
        int pcre_flags;
 
        map = re_map->map;
 
+       if (re_map->regexps == NULL || re_map->regexps->len == 0) {
+               msg_err_map("regexp map is empty");
+               return;
+       }
+
 #if !defined(__aarch64__) && !defined(__powerpc64__)
        if (!(map->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
                msg_info_map("disable hyperscan for map %s, ssse3 instructions are not supported by CPU",
@@ -1170,6 +1176,7 @@ rspamd_re_map_finalize(struct rspamd_regexp_map_helper *re_map)
                return;
        }
 
+       /* Prepare patterns for hyperscan compilation */
        re_map->patterns = g_new(char *, re_map->regexps->len);
        re_map->flags = g_new(int, re_map->regexps->len);
        re_map->ids = g_new(int, re_map->regexps->len);
@@ -1222,59 +1229,17 @@ rspamd_re_map_finalize(struct rspamd_regexp_map_helper *re_map)
                re_map->ids[i] = i;
        }
 
-       if (re_map->regexps->len > 0 && re_map->patterns) {
-
-               if (!rspamd_try_load_re_map_cache(re_map)) {
-                       double ts1 = rspamd_get_ticks(FALSE);
-                       hs_database_t *hs_db = NULL;
-
-                       if (hs_compile_multi((const char **) re_map->patterns,
-                                                                re_map->flags,
-                                                                re_map->ids,
-                                                                re_map->regexps->len,
-                                                                HS_MODE_BLOCK,
-                                                                &plt,
-                                                                &hs_db,
-                                                                &err) != HS_SUCCESS) {
-
-                               msg_err_map("cannot create tree of regexp when processing '%s': %s",
-                                                       err->expression >= 0 ? re_map->patterns[err->expression] : "unknown regexp", err->message);
-                               re_map->hs_db = NULL;
-                               hs_free_compile_error(err);
-
-                               return;
-                       }
+       /*
+        * Instead of compiling hyperscan synchronously here (which blocks the main process),
+        * we add this map to the pending compilation queue. The hs_helper worker will compile
+        * the hyperscan database asynchronously and notify workers when it's ready.
+        *
+        * In the meantime, we use PCRE fallback via the regexps array.
+        */
+       msg_info_map("regexp map %s (%ud patterns) queued for async hyperscan compilation, using PCRE fallback",
+                                map->name, re_map->regexps->len);
 
-                       if (re_map->map->cfg->hs_cache_dir) {
-                               char fpath[PATH_MAX];
-                               rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmc",
-                                                               re_map->map->cfg->hs_cache_dir,
-                                                               (int) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
-                               re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, fpath);
-                       }
-                       else {
-                               re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, NULL);
-                       }
-
-                       ts1 = (rspamd_get_ticks(FALSE) - ts1) * 1000.0;
-                       msg_info_map("hyperscan compiled %d regular expressions from %s in %.1f ms",
-                                                re_map->regexps->len, re_map->map->name, ts1);
-                       rspamd_try_save_re_map_cache(re_map);
-               }
-               else {
-                       msg_info_map("hyperscan read %d cached regular expressions from %s",
-                                                re_map->regexps->len, re_map->map->name);
-               }
-
-               if (hs_alloc_scratch(rspamd_hyperscan_get_database(re_map->hs_db), &re_map->hs_scratch) != HS_SUCCESS) {
-                       msg_err_map("cannot allocate scratch space for hyperscan");
-                       rspamd_hyperscan_free(re_map->hs_db, true);
-                       re_map->hs_db = NULL;
-               }
-       }
-       else {
-               msg_err_map("regexp map is empty");
-       }
+       rspamd_regexp_map_add_pending(re_map, map->name);
 #endif
 }
 
@@ -1850,3 +1815,567 @@ rspamd_match_cdb_map(struct rspamd_cdb_map_helper *map,
 
        return NULL;
 }
+
+/*
+ * Pending regexp map compilation queue for deferred HS compilation
+ */
+static GArray *pending_regexp_maps = NULL;
+
+void rspamd_regexp_map_add_pending(struct rspamd_regexp_map_helper *re_map,
+                                                                  const char *name)
+{
+       struct rspamd_regexp_map_pending entry;
+       struct rspamd_map *map;
+
+       g_assert(re_map != NULL);
+       g_assert(name != NULL);
+
+       map = re_map->map;
+
+       if (pending_regexp_maps == NULL) {
+               pending_regexp_maps = g_array_new(FALSE, FALSE,
+                                                                                 sizeof(struct rspamd_regexp_map_pending));
+       }
+
+       entry.re_map = re_map;
+       entry.name = g_strdup(name);
+       rspamd_regexp_map_get_hash(re_map, entry.hash);
+
+       g_array_append_val(pending_regexp_maps, entry);
+
+       msg_info_map("added regexp map '%s' (%ud patterns) to pending compilation queue",
+                                name, re_map->regexps ? re_map->regexps->len : 0);
+}
+
+struct rspamd_regexp_map_pending *
+rspamd_regexp_map_get_pending(unsigned int *count)
+{
+       if (pending_regexp_maps == NULL || pending_regexp_maps->len == 0) {
+               *count = 0;
+               return NULL;
+       }
+
+       *count = pending_regexp_maps->len;
+       return (struct rspamd_regexp_map_pending *) pending_regexp_maps->data;
+}
+
+void rspamd_regexp_map_clear_pending(void)
+{
+       if (pending_regexp_maps == NULL) {
+               return;
+       }
+
+       for (unsigned int i = 0; i < pending_regexp_maps->len; i++) {
+               struct rspamd_regexp_map_pending *entry;
+
+               entry = &g_array_index(pending_regexp_maps,
+                                                          struct rspamd_regexp_map_pending, i);
+               g_free(entry->name);
+       }
+
+       g_array_free(pending_regexp_maps, TRUE);
+       pending_regexp_maps = NULL;
+}
+
+struct rspamd_regexp_map_helper *
+rspamd_regexp_map_find_pending(const char *name)
+{
+       if (pending_regexp_maps == NULL || name == NULL) {
+               return NULL;
+       }
+
+       for (unsigned int i = 0; i < pending_regexp_maps->len; i++) {
+               struct rspamd_regexp_map_pending *entry;
+
+               entry = &g_array_index(pending_regexp_maps,
+                                                          struct rspamd_regexp_map_pending, i);
+               if (strcmp(entry->name, name) == 0) {
+                       return entry->re_map;
+               }
+       }
+
+       return NULL;
+}
+
+void rspamd_regexp_map_get_hash(struct rspamd_regexp_map_helper *re_map,
+                                                               unsigned char *hash_out)
+{
+       g_assert(re_map != NULL);
+       g_assert(hash_out != NULL);
+
+       memcpy(hash_out, re_map->re_digest, rspamd_cryptobox_HASHBYTES);
+}
+
+#ifdef WITH_HYPERSCAN
+
+gboolean
+rspamd_regexp_map_compile_hs_to_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                         const char *cache_dir,
+                                                                         GError **err)
+{
+       hs_platform_info_t plt;
+       hs_compile_error_t *hs_errors = NULL;
+       hs_database_t *db = NULL;
+       char *bytes = NULL;
+       gsize len;
+       char fp[PATH_MAX], np[PATH_MAX];
+       int fd;
+       struct rspamd_map *map;
+
+       g_assert(re_map != NULL);
+       g_assert(cache_dir != NULL);
+
+       map = re_map->map;
+
+       if (re_map->regexps == NULL || re_map->regexps->len == 0) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "regexp map is empty");
+               return FALSE;
+       }
+
+       /* Patterns must already be prepared */
+       if (re_map->patterns == NULL) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "regexp map patterns not prepared");
+               return FALSE;
+       }
+
+       if (hs_populate_platform(&plt) != HS_SUCCESS) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot populate hyperscan platform");
+               return FALSE;
+       }
+
+       if (hs_compile_multi((const char **) re_map->patterns,
+                                                re_map->flags,
+                                                re_map->ids,
+                                                re_map->regexps->len,
+                                                HS_MODE_BLOCK,
+                                                &plt,
+                                                &db,
+                                                &hs_errors) != HS_SUCCESS) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot compile hyperscan database: %s (pattern %d)",
+                                       hs_errors ? hs_errors->message : "unknown error",
+                                       hs_errors ? hs_errors->expression : -1);
+               if (hs_errors) {
+                       hs_free_compile_error(hs_errors);
+               }
+               return FALSE;
+       }
+
+       if (hs_serialize_database(db, &bytes, &len) != HS_SUCCESS) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot serialize hyperscan database");
+               hs_free_database(db);
+               return FALSE;
+       }
+
+       hs_free_database(db);
+
+       /* Write to temp file and rename atomically */
+       rspamd_snprintf(fp, sizeof(fp), "%s/hsmc-XXXXXXXXXXXXX", cache_dir);
+
+       if ((fd = g_mkstemp_full(fp, O_WRONLY | O_CREAT | O_EXCL, 00644)) == -1) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), errno,
+                                       "cannot create temp file %s: %s", fp, strerror(errno));
+               free(bytes);
+               return FALSE;
+       }
+
+       if (write(fd, bytes, len) != (ssize_t) len) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), errno,
+                                       "cannot write to %s: %s", fp, strerror(errno));
+               close(fd);
+               unlink(fp);
+               free(bytes);
+               return FALSE;
+       }
+
+       free(bytes);
+       fsync(fd);
+       close(fd);
+
+       rspamd_snprintf(np, sizeof(np), "%s/%*xs.hsmc",
+                                       cache_dir,
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       if (rename(fp, np) == -1) {
+               g_set_error(err, g_quark_from_static_string("regexp_map"), errno,
+                                       "cannot rename %s to %s: %s", fp, np, strerror(errno));
+               unlink(fp);
+               return FALSE;
+       }
+
+       msg_info_map("written cached hyperscan data for %s to %s (%Hz length)",
+                                map ? map->name : "unknown", np, len);
+       rspamd_hyperscan_notice_known(np);
+
+       return TRUE;
+}
+
+struct rspamd_regexp_map_async_compile_ctx {
+       struct rspamd_regexp_map_helper *re_map;
+       rspamd_regexp_map_hs_cache_cb_t cb;
+       void *ud;
+       char *cache_dir;
+       unsigned char *serialized_db;
+       gsize serialized_len;
+};
+
+static void
+rspamd_regexp_map_async_store_cb(gboolean success,
+                                                                const unsigned char *data,
+                                                                gsize len,
+                                                                const char *error,
+                                                                void *ud)
+{
+       struct rspamd_regexp_map_async_compile_ctx *ctx = ud;
+       GError *err = NULL;
+
+       (void) data;
+       (void) len;
+
+       if (!success) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "failed to store regexp map to cache backend: %s",
+                                       error ? error : "unknown error");
+       }
+
+       if (ctx->cb) {
+               ctx->cb(ctx->re_map, success, err, ctx->ud);
+       }
+
+       if (err) {
+               g_error_free(err);
+       }
+
+       g_free(ctx->serialized_db);
+       g_free(ctx->cache_dir);
+       g_free(ctx);
+}
+
+void rspamd_regexp_map_compile_hs_to_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                                const char *cache_dir,
+                                                                                                struct ev_loop *event_loop,
+                                                                                                rspamd_regexp_map_hs_cache_cb_t cb,
+                                                                                                void *ud)
+{
+       GError *err = NULL;
+       struct rspamd_map *map;
+
+       g_assert(re_map != NULL);
+       g_assert(cache_dir != NULL);
+
+       map = re_map->map;
+
+       if (!rspamd_hs_cache_has_lua_backend()) {
+               /* Synchronous file backend */
+               gboolean success = rspamd_regexp_map_compile_hs_to_cache(re_map, cache_dir, &err);
+               if (cb) {
+                       cb(re_map, success, err, ud);
+               }
+               if (err) {
+                       g_error_free(err);
+               }
+               return;
+       }
+
+       /* Async Lua backend path */
+       hs_platform_info_t plt;
+       hs_compile_error_t *hs_errors = NULL;
+       hs_database_t *db = NULL;
+       char *bytes = NULL;
+       gsize len;
+
+       if (re_map->regexps == NULL || re_map->regexps->len == 0) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "regexp map is empty");
+               if (cb) {
+                       cb(re_map, FALSE, err, ud);
+               }
+               g_error_free(err);
+               return;
+       }
+
+       if (re_map->patterns == NULL) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "regexp map patterns not prepared");
+               if (cb) {
+                       cb(re_map, FALSE, err, ud);
+               }
+               g_error_free(err);
+               return;
+       }
+
+       if (hs_populate_platform(&plt) != HS_SUCCESS) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot populate hyperscan platform");
+               if (cb) {
+                       cb(re_map, FALSE, err, ud);
+               }
+               g_error_free(err);
+               return;
+       }
+
+       if (hs_compile_multi((const char **) re_map->patterns,
+                                                re_map->flags,
+                                                re_map->ids,
+                                                re_map->regexps->len,
+                                                HS_MODE_BLOCK,
+                                                &plt,
+                                                &db,
+                                                &hs_errors) != HS_SUCCESS) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot compile hyperscan database: %s (pattern %d)",
+                                       hs_errors ? hs_errors->message : "unknown error",
+                                       hs_errors ? hs_errors->expression : -1);
+               if (hs_errors) {
+                       hs_free_compile_error(hs_errors);
+               }
+               if (cb) {
+                       cb(re_map, FALSE, err, ud);
+               }
+               g_error_free(err);
+               return;
+       }
+
+       if (hs_serialize_database(db, &bytes, &len) != HS_SUCCESS) {
+               g_set_error(&err, g_quark_from_static_string("regexp_map"), EINVAL,
+                                       "cannot serialize hyperscan database");
+               hs_free_database(db);
+               if (cb) {
+                       cb(re_map, FALSE, err, ud);
+               }
+               g_error_free(err);
+               return;
+       }
+
+       hs_free_database(db);
+
+       msg_info_map("compiled hyperscan database for %s (%Hz bytes), storing via Lua backend",
+                                map ? map->name : "unknown", len);
+
+       char cache_key[rspamd_cryptobox_HASHBYTES * 2 + 1];
+       rspamd_snprintf(cache_key, sizeof(cache_key), "%*xs",
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       struct rspamd_regexp_map_async_compile_ctx *ctx = g_malloc0(sizeof(*ctx));
+       ctx->re_map = re_map;
+       ctx->cb = cb;
+       ctx->ud = ud;
+       ctx->cache_dir = g_strdup(cache_dir);
+       ctx->serialized_db = (unsigned char *) bytes;
+       ctx->serialized_len = len;
+
+       rspamd_hs_cache_lua_save_async(cache_key,
+                                                                  re_map->map ? re_map->map->name : "regexp_map",
+                                                                  ctx->serialized_db, ctx->serialized_len,
+                                                                  rspamd_regexp_map_async_store_cb, ctx);
+}
+
+gboolean
+rspamd_regexp_map_load_from_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                 const char *cache_dir)
+{
+       char fp[PATH_MAX];
+       struct rspamd_map *map;
+
+       g_assert(re_map != NULL);
+       g_assert(cache_dir != NULL);
+
+       map = re_map->map;
+
+       rspamd_snprintf(fp, sizeof(fp), "%s/%*xs.hsmc",
+                                       cache_dir,
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       rspamd_hyperscan_t *hs_db = rspamd_hyperscan_maybe_load(fp, 0);
+
+       if (hs_db == NULL) {
+               msg_info_map("cannot load hyperscan database from %s for %s",
+                                        fp, map ? map->name : "unknown");
+               return FALSE;
+       }
+
+       /* Free old database if any */
+       if (re_map->hs_db != NULL) {
+               rspamd_hyperscan_free(re_map->hs_db, true);
+               re_map->hs_db = NULL;
+       }
+
+       if (re_map->hs_scratch != NULL) {
+               hs_free_scratch(re_map->hs_scratch);
+               re_map->hs_scratch = NULL;
+       }
+
+       re_map->hs_db = hs_db;
+
+       if (hs_alloc_scratch(rspamd_hyperscan_get_database(re_map->hs_db),
+                                                &re_map->hs_scratch) != HS_SUCCESS) {
+               msg_err_map("cannot allocate scratch space for hyperscan");
+               rspamd_hyperscan_free(re_map->hs_db, true);
+               re_map->hs_db = NULL;
+               return FALSE;
+       }
+
+       msg_info_map("loaded hyperscan database from %s for %s",
+                                fp, map ? map->name : "unknown");
+
+       return TRUE;
+}
+
+struct rspamd_regexp_map_async_load_ctx {
+       struct rspamd_regexp_map_helper *re_map;
+       void (*cb)(gboolean success, void *ud);
+       void *ud;
+       char *cache_dir;
+};
+
+static void
+rspamd_regexp_map_async_load_cb(gboolean success,
+                                                               const unsigned char *data,
+                                                               gsize len,
+                                                               const char *error,
+                                                               void *ud)
+{
+       struct rspamd_regexp_map_async_load_ctx *ctx = ud;
+       struct rspamd_map *map = ctx->re_map->map;
+       gboolean result = FALSE;
+
+       if (!success || data == NULL || len == 0) {
+               msg_warn_map("failed to load regexp map from cache backend: %s",
+                                        error ? error : "no data");
+       }
+       else {
+               hs_database_t *db = NULL;
+
+               if (hs_deserialize_database((const char *) data, len, &db) != HS_SUCCESS) {
+                       msg_err_map("cannot deserialize hyperscan database from cache backend");
+               }
+               else {
+                       /* Free old database if any */
+                       if (ctx->re_map->hs_db != NULL) {
+                               rspamd_hyperscan_free(ctx->re_map->hs_db, true);
+                               ctx->re_map->hs_db = NULL;
+                       }
+
+                       if (ctx->re_map->hs_scratch != NULL) {
+                               hs_free_scratch(ctx->re_map->hs_scratch);
+                               ctx->re_map->hs_scratch = NULL;
+                       }
+
+                       ctx->re_map->hs_db = rspamd_hyperscan_from_raw_db(db, NULL);
+
+                       if (hs_alloc_scratch(rspamd_hyperscan_get_database(ctx->re_map->hs_db),
+                                                                &ctx->re_map->hs_scratch) != HS_SUCCESS) {
+                               msg_err_map("cannot allocate scratch space for hyperscan");
+                               rspamd_hyperscan_free(ctx->re_map->hs_db, true);
+                               ctx->re_map->hs_db = NULL;
+                       }
+                       else {
+                               msg_info_map("loaded hyperscan database from cache backend for %s",
+                                                        map ? map->name : "unknown");
+                               result = TRUE;
+                       }
+               }
+       }
+
+       if (ctx->cb) {
+               ctx->cb(result, ctx->ud);
+       }
+
+       g_free(ctx->cache_dir);
+       g_free(ctx);
+}
+
+void rspamd_regexp_map_load_from_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                        const char *cache_dir,
+                                                                                        struct ev_loop *event_loop,
+                                                                                        void (*cb)(gboolean success, void *ud),
+                                                                                        void *ud)
+{
+       g_assert(re_map != NULL);
+       g_assert(cache_dir != NULL);
+
+       if (!rspamd_hs_cache_has_lua_backend()) {
+               /* Synchronous file backend */
+               gboolean success = rspamd_regexp_map_load_from_cache(re_map, cache_dir);
+               if (cb) {
+                       cb(success, ud);
+               }
+               return;
+       }
+
+       /* Async Lua backend path */
+       char cache_key[rspamd_cryptobox_HASHBYTES * 2 + 1];
+       rspamd_snprintf(cache_key, sizeof(cache_key), "%*xs",
+                                       (int) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+       struct rspamd_regexp_map_async_load_ctx *ctx = g_malloc0(sizeof(*ctx));
+       ctx->re_map = re_map;
+       ctx->cb = cb;
+       ctx->ud = ud;
+       ctx->cache_dir = g_strdup(cache_dir);
+
+       rspamd_hs_cache_lua_load_async(cache_key,
+                                                                  re_map->map ? re_map->map->name : "regexp_map",
+                                                                  rspamd_regexp_map_async_load_cb, ctx);
+}
+
+#else /* !WITH_HYPERSCAN */
+
+gboolean
+rspamd_regexp_map_compile_hs_to_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                         const char *cache_dir,
+                                                                         GError **err)
+{
+       (void) re_map;
+       (void) cache_dir;
+       g_set_error(err, g_quark_from_static_string("regexp_map"), ENOTSUP,
+                               "hyperscan not supported");
+       return FALSE;
+}
+
+void rspamd_regexp_map_compile_hs_to_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                                const char *cache_dir,
+                                                                                                struct ev_loop *event_loop,
+                                                                                                rspamd_regexp_map_hs_cache_cb_t cb,
+                                                                                                void *ud)
+{
+       (void) re_map;
+       (void) cache_dir;
+       (void) event_loop;
+       GError *err = NULL;
+       g_set_error(&err, g_quark_from_static_string("regexp_map"), ENOTSUP,
+                               "hyperscan not supported");
+       if (cb) {
+               cb(re_map, FALSE, err, ud);
+       }
+       g_error_free(err);
+}
+
+gboolean
+rspamd_regexp_map_load_from_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                 const char *cache_dir)
+{
+       (void) re_map;
+       (void) cache_dir;
+       return FALSE;
+}
+
+void rspamd_regexp_map_load_from_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                        const char *cache_dir,
+                                                                                        struct ev_loop *event_loop,
+                                                                                        void (*cb)(gboolean success, void *ud),
+                                                                                        void *ud)
+{
+       (void) re_map;
+       (void) cache_dir;
+       (void) event_loop;
+       if (cb) {
+               cb(FALSE, ud);
+       }
+}
+
+#endif /* WITH_HYPERSCAN */
index 41e352c015ffcdd04c6dea7abebc13f52b6c5b02..dea754e56be7a233015f66a6f12611b1c4515357 100644 (file)
@@ -38,6 +38,7 @@ extern "C" {
 struct rspamd_radix_map_helper;
 struct rspamd_hash_map_helper;
 struct rspamd_regexp_map_helper;
+struct ev_loop;
 struct rspamd_cdb_map_helper;
 struct rspamd_map_helper_value;
 
@@ -262,6 +263,103 @@ void rspamd_map_helper_insert_re(gpointer st, gconstpointer key, gconstpointer v
  */
 void rspamd_map_helper_destroy_regexp(struct rspamd_regexp_map_helper *re_map);
 
+/**
+ * Pending regexp map entry for deferred HS compilation
+ */
+struct rspamd_regexp_map_pending {
+       struct rspamd_regexp_map_helper *re_map;
+       char *name;             /* Map identifier for logging/IPC */
+       unsigned char hash[64]; /* Cache key hash (rspamd_cryptobox_HASHBYTES) */
+};
+
+/**
+ * Add regexp map to pending compilation queue.
+ * Called during initialization when hs_helper is not yet available.
+ * @param re_map regexp map helper
+ * @param name identifier for this map (e.g., map name)
+ */
+void rspamd_regexp_map_add_pending(struct rspamd_regexp_map_helper *re_map,
+                                                                  const char *name);
+
+/**
+ * Get list of pending regexp map compilations.
+ * Returns array of rspamd_regexp_map_pending, caller must free array (not contents).
+ * @param count output: number of pending entries
+ * @return array of pending entries or NULL if none
+ */
+struct rspamd_regexp_map_pending *rspamd_regexp_map_get_pending(unsigned int *count);
+
+/**
+ * Clear pending queue after hs_helper has processed it.
+ */
+void rspamd_regexp_map_clear_pending(void);
+
+/**
+ * Find a pending regexp map by name.
+ * @param name identifier
+ * @return regexp map helper or NULL if not found
+ */
+struct rspamd_regexp_map_helper *rspamd_regexp_map_find_pending(const char *name);
+
+/**
+ * Get hash/digest from regexp map for cache key generation.
+ * @param re_map
+ * @param hash_out output buffer (must be rspamd_cryptobox_HASHBYTES)
+ */
+void rspamd_regexp_map_get_hash(struct rspamd_regexp_map_helper *re_map,
+                                                               unsigned char *hash_out);
+
+/**
+ * Compile hyperscan database for regexp map and save to cache.
+ * This is called by hs_helper for async compilation.
+ * @param re_map regexp map helper
+ * @param cache_dir directory to save cache file
+ * @param err error output
+ * @return TRUE on success
+ */
+gboolean rspamd_regexp_map_compile_hs_to_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                                          const char *cache_dir,
+                                                                                          GError **err);
+
+typedef void (*rspamd_regexp_map_hs_cache_cb_t)(struct rspamd_regexp_map_helper *re_map,
+                                                                                               gboolean success,
+                                                                                               GError *err,
+                                                                                               void *ud);
+
+/**
+ * Compile regexp map HS database and store it in the configured HS cache backend.
+ * If Lua backend is enabled, store is done asynchronously and callback is invoked on completion.
+ * For file backend, compilation+store is synchronous and callback is invoked immediately.
+ */
+void rspamd_regexp_map_compile_hs_to_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                                const char *cache_dir,
+                                                                                                struct ev_loop *event_loop,
+                                                                                                rspamd_regexp_map_hs_cache_cb_t cb,
+                                                                                                void *ud);
+
+/**
+ * Load hyperscan database from cache for regexp map.
+ * This is called by workers when they receive notification that
+ * hs_helper has compiled a regexp map database.
+ * @param re_map regexp map helper
+ * @param cache_dir directory containing cache files
+ * @return TRUE if loaded successfully
+ */
+gboolean rspamd_regexp_map_load_from_cache(struct rspamd_regexp_map_helper *re_map,
+                                                                                  const char *cache_dir);
+
+/**
+ * Asynchronously load hyperscan database for a regexp map from the configured
+ * HS cache backend (Lua backend if present, otherwise filesystem).
+ *
+ * The callback is invoked when hot-swap has been attempted.
+ */
+void rspamd_regexp_map_load_from_cache_async(struct rspamd_regexp_map_helper *re_map,
+                                                                                        const char *cache_dir,
+                                                                                        struct ev_loop *event_loop,
+                                                                                        void (*cb)(gboolean success, void *ud),
+                                                                                        void *ud);
+
 #ifdef __cplusplus
 }
 #endif
index 7dda95a55de860c1a87ba1281a4c52277587a644..e8ad04723d64e88bca119f10b093212dc678b38a 100644 (file)
@@ -2368,7 +2368,17 @@ rspamd_re_cache_compile_timer_cb(EV_P_ ev_timer *w, int revents)
                        ctx->cbdata = cbdata;
                        ctx->loop = loop;
                        ctx->w = w;
-                       rspamd_hs_cache_lua_exists_async(re_class->hash, rspamd_re_cache_exists_cb, ctx);
+                       char entity_name[256];
+                       if (re_class->type_len > 0) {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s(%*s)",
+                                                               rspamd_re_cache_type_to_string(re_class->type),
+                                                               (int) re_class->type_len - 1, re_class->type_data);
+                       }
+                       else {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s",
+                                                               rspamd_re_cache_type_to_string(re_class->type));
+                       }
+                       rspamd_hs_cache_lua_exists_async(re_class->hash, entity_name, rspamd_re_cache_exists_cb, ctx);
                        ev_timer_stop(EV_A_ w);
                        return;
                }
@@ -2669,7 +2679,17 @@ rspamd_re_cache_compile_timer_cb(EV_P_ ev_timer *w, int revents)
                        ctx->w = w;
                        ctx->n = n;
 
-                       rspamd_hs_cache_lua_save_async(re_class->hash, combined, total_len, rspamd_re_cache_save_cb, ctx);
+                       char entity_name[256];
+                       if (re_class->type_len > 0) {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s(%*s)",
+                                                               rspamd_re_cache_type_to_string(re_class->type),
+                                                               (int) re_class->type_len - 1, re_class->type_data);
+                       }
+                       else {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s",
+                                                               rspamd_re_cache_type_to_string(re_class->type));
+                       }
+                       rspamd_hs_cache_lua_save_async(re_class->hash, entity_name, combined, total_len, rspamd_re_cache_save_cb, ctx);
 
                        g_free(combined);
                        CLEANUP_ALLOCATED(false);
@@ -3616,7 +3636,17 @@ void rspamd_re_cache_load_hyperscan_scoped_async(struct rspamd_re_cache *cache_h
                        item->cache_key = g_strdup(re_class->hash);
                        sctx->pending++;
                        sctx->total++;
-                       rspamd_hs_cache_lua_load_async(item->cache_key, rspamd_re_cache_hs_load_cb, item);
+                       char entity_name[256];
+                       if (re_class->type_len > 0) {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s(%*s)",
+                                                               rspamd_re_cache_type_to_string(re_class->type),
+                                                               (int) re_class->type_len - 1, re_class->type_data);
+                       }
+                       else {
+                               rspamd_snprintf(entity_name, sizeof(entity_name), "re_class:%s",
+                                                               rspamd_re_cache_type_to_string(re_class->type));
+                       }
+                       rspamd_hs_cache_lua_load_async(item->cache_key, entity_name, rspamd_re_cache_hs_load_cb, item);
                }
 
                if (sctx->pending == 0) {
index c1f9528b6f7778d71e6d48b56391495afafec34e..586774ff5b3dcea1667320b72e30f80aa99774be 100644 (file)
@@ -1175,6 +1175,24 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
                                                           sizeof(wcmd.cmd.mp_loaded.cache_dir));
                                rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
                                                                                         rspamd_control_ignore_io_handler, NULL, worker->pid);
+#endif
+                               break;
+                       case RSPAMD_SRV_REGEXP_MAP_LOADED:
+#ifdef WITH_HYPERSCAN
+                               msg_info_main("received regexp map loaded notification for '%s' from %s",
+                                                         cmd.cmd.re_map_loaded.name, cmd.cmd.re_map_loaded.cache_dir);
+
+                               /* Broadcast command to all workers */
+                               memset(&wcmd, 0, sizeof(wcmd));
+                               wcmd.type = RSPAMD_CONTROL_REGEXP_MAP_LOADED;
+                               rspamd_strlcpy(wcmd.cmd.re_map_loaded.name,
+                                                          cmd.cmd.re_map_loaded.name,
+                                                          sizeof(wcmd.cmd.re_map_loaded.name));
+                               rspamd_strlcpy(wcmd.cmd.re_map_loaded.cache_dir,
+                                                          cmd.cmd.re_map_loaded.cache_dir,
+                                                          sizeof(wcmd.cmd.re_map_loaded.cache_dir));
+                               rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
+                                                                                        rspamd_control_ignore_io_handler, NULL, worker->pid);
 #endif
                                break;
                        case RSPAMD_SRV_MONITORED_CHANGE:
@@ -1562,6 +1580,9 @@ rspamd_control_command_to_string(enum rspamd_control_type cmd)
        case RSPAMD_CONTROL_MULTIPATTERN_LOADED:
                reply = "multipattern_loaded";
                break;
+       case RSPAMD_CONTROL_REGEXP_MAP_LOADED:
+               reply = "regexp_map_loaded";
+               break;
        default:
                break;
        }
@@ -1607,6 +1628,9 @@ const char *rspamd_srv_command_to_string(enum rspamd_srv_type cmd)
        case RSPAMD_SRV_MULTIPATTERN_LOADED:
                reply = "multipattern_loaded";
                break;
+       case RSPAMD_SRV_REGEXP_MAP_LOADED:
+               reply = "regexp_map_loaded";
+               break;
        case RSPAMD_SRV_BUSY:
                reply = "busy";
                break;
index 0cb2ce8b2370b269cec6c8645b3020af24afae40..d68a1387559f37bba552de8639f47f51667417c5 100644 (file)
@@ -40,6 +40,7 @@ enum rspamd_control_type {
        RSPAMD_CONTROL_WORKERS_SPAWNED,
        RSPAMD_CONTROL_COMPOSITES_STATS,
        RSPAMD_CONTROL_MULTIPATTERN_LOADED,
+       RSPAMD_CONTROL_REGEXP_MAP_LOADED,
        RSPAMD_CONTROL_MAX
 };
 
@@ -55,6 +56,7 @@ enum rspamd_srv_type {
        RSPAMD_SRV_FUZZY_BLOCKED,       /* Used to notify main process about a blocked ip */
        RSPAMD_SRV_WORKERS_SPAWNED,     /* Used to notify workers that all workers have been spawned */
        RSPAMD_SRV_MULTIPATTERN_LOADED, /* Multipattern HS compiled and ready */
+       RSPAMD_SRV_REGEXP_MAP_LOADED,   /* Regexp map HS compiled and ready */
        RSPAMD_SRV_BUSY,                /* Worker is busy with long-running operation */
 };
 
@@ -86,6 +88,10 @@ struct rspamd_control_command {
                        char name[64];
                        char cache_dir[CONTROL_PATHLEN];
                } mp_loaded;
+               struct {
+                       char name[CONTROL_PATHLEN]; /* Map name */
+                       char cache_dir[CONTROL_PATHLEN];
+               } re_map_loaded;
                struct {
                        char tag[32];
                        gboolean alive;
@@ -240,6 +246,11 @@ struct rspamd_srv_command {
                        char name[64];
                        char cache_dir[CONTROL_PATHLEN];
                } mp_loaded;
+               /* Sent when a regexp map hyperscan db is compiled */
+               struct {
+                       char name[CONTROL_PATHLEN]; /* Map name */
+                       char cache_dir[CONTROL_PATHLEN];
+               } re_map_loaded;
                struct {
                        gboolean is_busy;
                        char reason[32];
index 5f0fa665e3ef116f129ec25bf71d0aaad469d7ed..298ba4af45d83081abdbf956e5c8cbe08c997f28 100644 (file)
 #include "ottery.h"
 #include "rspamd_control.h"
 #include "hs_cache_backend.h"
+#include "hyperscan_tools.h"
 #include "libserver/maps/map.h"
 #include "libserver/maps/map_private.h"
+#include "libserver/maps/map_helpers.h"
 #include "libserver/http/http_private.h"
 #include "libserver/http/http_router.h"
 #include "libutil/rrd.h"
@@ -1918,12 +1920,12 @@ rspamd_worker_multipattern_async_loaded(gboolean success, void *ud)
        struct rspamd_worker_mp_async_cbdata *cbd = (struct rspamd_worker_mp_async_cbdata *) ud;
 
        if (success) {
-               msg_info("multipattern '%s' hot-swapped to hyperscan (backend)", cbd->name);
+               msg_debug_hyperscan("multipattern '%s' hot-swapped to hyperscan (backend)", cbd->name);
        }
        else {
                /* Try file fallback if available */
                if (cbd->mp && cbd->cache_dir && rspamd_multipattern_load_from_cache(cbd->mp, cbd->cache_dir)) {
-                       msg_info("multipattern '%s' hot-swapped to hyperscan (file fallback)", cbd->name);
+                       msg_debug_hyperscan("multipattern '%s' hot-swapped to hyperscan (file fallback)", cbd->name);
                }
                else {
                        msg_warn("failed to hot-swap multipattern '%s' to hyperscan, continuing with ACISM fallback",
@@ -1949,11 +1951,11 @@ rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
        memset(&rep, 0, sizeof(rep));
        rep.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
 
+       msg_debug_hyperscan("received hyperscan loaded notification, cache_dir=%s, forced=%d",
+                                               cmd->cmd.hs_loaded.cache_dir, cmd->cmd.hs_loaded.forced);
+
        if (rspamd_hs_cache_has_lua_backend()) {
-               /*
-                * Backend-based hot-swap: schedule async loads to avoid blocking control pipe.
-                * Reply immediately.
-                */
+               msg_debug_hyperscan("using async backend-based hyperscan loading");
                rspamd_re_cache_load_hyperscan_scoped_async(cache, worker->srv->event_loop,
                                                                                                        cmd->cmd.hs_loaded.cache_dir, false);
                rep.reply.hs_loaded.status = 0;
@@ -1962,15 +1964,15 @@ rspamd_worker_hyperscan_ready(struct rspamd_main *rspamd_main,
                /* File-based loading (legacy, synchronous) */
                if (cmd->cmd.hs_loaded.scope[0] != '\0') {
                        const char *scope = cmd->cmd.hs_loaded.scope;
-                       msg_info("loading hyperscan expressions for scope '%s' after receiving compilation notice", scope);
+                       msg_debug_hyperscan("loading hyperscan expressions for scope '%s' after receiving compilation notice", scope);
                        rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan_scoped(
                                cache, cmd->cmd.hs_loaded.cache_dir, false);
                }
                else {
                        if (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
                                cmd->cmd.hs_loaded.forced) {
-                               msg_info("loading hyperscan expressions after receiving compilation notice: %s",
-                                                (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update");
+                               msg_debug_hyperscan("loading hyperscan expressions after receiving compilation notice: %s",
+                                                                       (rspamd_re_cache_is_hs_loaded(cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ? "new db" : "forced update");
                                rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan(
                                        worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir, false);
                        }
@@ -2004,10 +2006,14 @@ rspamd_worker_multipattern_ready(struct rspamd_main *rspamd_main,
        memset(&rep, 0, sizeof(rep));
        rep.type = RSPAMD_CONTROL_MULTIPATTERN_LOADED;
 
+       msg_debug_hyperscan("received multipattern loaded notification for '%s', cache_dir=%s",
+                                               name, cache_dir);
+
        mp = rspamd_multipattern_find_pending(name);
 
        if (mp != NULL) {
                if (rspamd_hs_cache_has_lua_backend()) {
+                       msg_debug_hyperscan("using async backend-based multipattern loading for '%s'", name);
                        struct rspamd_worker_mp_async_cbdata *cbd = g_malloc0(sizeof(*cbd));
                        cbd->name = g_strdup(name);
                        cbd->cache_dir = g_strdup(cache_dir);
@@ -2018,7 +2024,7 @@ rspamd_worker_multipattern_ready(struct rspamd_main *rspamd_main,
                }
                else {
                        if (rspamd_multipattern_load_from_cache(mp, cache_dir)) {
-                               msg_info("multipattern '%s' hot-swapped to hyperscan", name);
+                               msg_debug_hyperscan("multipattern '%s' hot-swapped to hyperscan", name);
                                rep.reply.hs_loaded.status = 0;
                        }
                        else {
@@ -2044,6 +2050,90 @@ rspamd_worker_multipattern_ready(struct rspamd_main *rspamd_main,
 
        return TRUE;
 }
+
+struct rspamd_worker_remap_async_cbdata {
+       char *name;
+       char *cache_dir;
+       struct rspamd_regexp_map_helper *re_map;
+};
+
+static void
+rspamd_worker_regexp_map_async_loaded(gboolean success, void *ud)
+{
+       struct rspamd_worker_remap_async_cbdata *cbd = ud;
+
+       if (success) {
+               msg_debug_hyperscan("regexp map '%s' hot-swapped to hyperscan (async)", cbd->name);
+       }
+       else {
+               msg_warn("failed to load regexp map '%s' from cache backend, continuing with PCRE fallback",
+                                cbd->name);
+       }
+
+       g_free(cbd->name);
+       g_free(cbd->cache_dir);
+       g_free(cbd);
+}
+
+static gboolean
+rspamd_worker_regexp_map_ready(struct rspamd_main *rspamd_main,
+                                                          struct rspamd_worker *worker, int fd,
+                                                          int attached_fd,
+                                                          struct rspamd_control_command *cmd,
+                                                          gpointer ud)
+{
+       struct rspamd_control_reply rep;
+       struct rspamd_regexp_map_helper *re_map;
+       const char *name = cmd->cmd.re_map_loaded.name;
+       const char *cache_dir = cmd->cmd.re_map_loaded.cache_dir;
+
+       memset(&rep, 0, sizeof(rep));
+       rep.type = RSPAMD_CONTROL_REGEXP_MAP_LOADED;
+
+       msg_debug_hyperscan("received regexp map loaded notification for '%s', cache_dir=%s",
+                                               name, cache_dir);
+
+       re_map = rspamd_regexp_map_find_pending(name);
+
+       if (re_map != NULL) {
+               if (rspamd_hs_cache_has_lua_backend()) {
+                       msg_debug_hyperscan("using async backend-based regexp map loading for '%s'", name);
+                       struct rspamd_worker_remap_async_cbdata *cbd = g_malloc0(sizeof(*cbd));
+                       cbd->name = g_strdup(name);
+                       cbd->cache_dir = g_strdup(cache_dir);
+                       cbd->re_map = re_map;
+                       rspamd_regexp_map_load_from_cache_async(re_map, cache_dir, worker->srv->event_loop,
+                                                                                                       rspamd_worker_regexp_map_async_loaded, cbd);
+                       rep.reply.hs_loaded.status = 0;
+               }
+               else {
+                       if (rspamd_regexp_map_load_from_cache(re_map, cache_dir)) {
+                               msg_debug_hyperscan("regexp map '%s' hot-swapped to hyperscan", name);
+                               rep.reply.hs_loaded.status = 0;
+                       }
+                       else {
+                               msg_warn("failed to load regexp map '%s' from cache, continuing with PCRE fallback",
+                                                name);
+                               rep.reply.hs_loaded.status = ENOENT;
+                       }
+               }
+       }
+       else {
+               msg_warn("received regexp map notification for unknown '%s'", name);
+               rep.reply.hs_loaded.status = ENOENT;
+       }
+
+       if (write(fd, &rep, sizeof(rep)) != sizeof(rep)) {
+               msg_err("cannot write reply to the control socket: %s",
+                               strerror(errno));
+       }
+
+       if (attached_fd >= 0) {
+               close(attached_fd);
+       }
+
+       return TRUE;
+}
 #endif /* With Hyperscan */
 
 gboolean
@@ -2144,6 +2234,10 @@ void rspamd_worker_init_scanner(struct rspamd_worker *worker,
                                                                                  RSPAMD_CONTROL_MULTIPATTERN_LOADED,
                                                                                  rspamd_worker_multipattern_ready,
                                                                                  NULL);
+       rspamd_control_worker_add_cmd_handler(worker,
+                                                                                 RSPAMD_CONTROL_REGEXP_MAP_LOADED,
+                                                                                 rspamd_worker_regexp_map_ready,
+                                                                                 NULL);
 #endif
        rspamd_control_worker_add_cmd_handler(worker,
                                                                                  RSPAMD_CONTROL_LOG_PIPE,
index c5eeda5abf938295679add2d0cb59f9e2d0a257f..127233c876cd07640ac79da0cdf745b039ba1408 100644 (file)
@@ -1584,7 +1584,7 @@ void rspamd_multipattern_compile_hs_to_cache_async(struct rspamd_multipattern *m
        ctx->cb = cb;
        ctx->ud = ud;
 
-       rspamd_hs_cache_lua_save_async(cache_key, (const unsigned char *) bytes, len,
+       rspamd_hs_cache_lua_save_async(cache_key, "multipattern", (const unsigned char *) bytes, len,
                                                                   rspamd_multipattern_hs_cache_save_cb, ctx);
 
        g_free(bytes);
@@ -1762,7 +1762,7 @@ void rspamd_multipattern_load_from_cache_async(struct rspamd_multipattern *mp,
                ctx->cb = cb;
                ctx->ud = ud;
                (void) event_loop;
-               rspamd_hs_cache_lua_load_async(ctx->cache_key, rspamd_multipattern_load_from_cache_cb, ctx);
+               rspamd_hs_cache_lua_load_async(ctx->cache_key, "multipattern", rspamd_multipattern_load_from_cache_cb, ctx);
                return;
        }