From: Vsevolod Stakhov Date: Mon, 13 Oct 2025 15:54:06 +0000 (+0100) Subject: [Minor] Remove irrelevant file X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=46d94f8ae0e7fcceb61d30d5f2670cd113f85677;p=thirdparty%2Frspamd.git [Minor] Remove irrelevant file --- diff --git a/test/lua/unit/get_html_urls_async_example.lua b/test/lua/unit/get_html_urls_async_example.lua deleted file mode 100644 index 90d44b6191..0000000000 --- a/test/lua/unit/get_html_urls_async_example.lua +++ /dev/null @@ -1,273 +0,0 @@ ---[[ - Async HTML URL Rewriting Example - - This is an example demonstrating how to use task:get_html_urls() with - async operations to batch-check URLs against an external service before - rewriting them. - - Usage pattern: - 1. Extract all URLs from HTML parts using task:get_html_urls() - 2. Send all URLs to external service via async HTTP/Redis/etc - 3. Receive URL replacements from service - 4. Apply rewrites using task:rewrite_html_urls() with lookup table -]] - --- Example rule implementation -local function register_async_url_rewriter(rspamd_config) - rspamd_config:register_symbol({ - name = 'ASYNC_URL_REWRITER', - type = 'postfilter', - callback = function(task) - -- Step 1: Extract all URLs from HTML parts - local urls_by_part = task:get_html_urls() - - if not urls_by_part then - return -- No HTML URLs to process - end - - -- Flatten URLs for batched API request - local all_urls = {} - local url_to_info = {} - - for part_id, url_list in pairs(urls_by_part) do - for _, url_info in ipairs(url_list) do - table.insert(all_urls, url_info.url) - url_to_info[url_info.url] = url_info - end - end - - if #all_urls == 0 then - return - end - - rspamd_logger.infox(task, "Found %s HTML URLs to check", #all_urls) - - -- Step 2: Make async request to URL checking service - local http = require "rspamd_http" - local ucl = require "ucl" - - http.request({ - task = task, - url = 'http://url-checker.example.com/api/check-batch', - callback = function(err, code, body) - if err then - rspamd_logger.errx(task, 'URL check failed: %s', err) - return - end - - if code ~= 200 then - rspamd_logger.errx(task, 'URL check service returned HTTP %s', code) - return - end - - -- Step 3: Parse response containing URL replacements - local parser = ucl.parser() - local ok, parse_err = parser:parse_string(body) - - if not ok then - rspamd_logger.errx(task, 'Failed to parse response: %s', parse_err) - return - end - - local response = parser:get_object() - - -- Build replacement map: original_url -> new_url - local replacements = {} - - for original_url, result in pairs(response.urls or {}) do - if result.action == 'rewrite' and result.new_url then - replacements[original_url] = result.new_url - rspamd_logger.infox(task, "Will rewrite %s -> %s", - original_url, result.new_url) - elseif result.action == 'block' then - -- Redirect blocked URLs to warning page - replacements[original_url] = 'https://warning.example.com/blocked' - rspamd_logger.infox(task, "Blocking URL %s", original_url) - - -- Optionally set a symbol - task:insert_result('BLOCKED_URL', 1.0, original_url) - end - end - - -- Step 4: Apply rewrites using lookup table callback - if next(replacements) then - local rewritten = task:rewrite_html_urls(function(task, url) - -- Simple lookup - returns nil if URL shouldn't be rewritten - return replacements[url] - end) - - if rewritten then - rspamd_logger.infox(task, 'Rewritten URLs in parts: %s', - table.concat(table_keys(rewritten), ', ')) - - -- Optionally set a symbol to track rewrites - task:insert_result('URL_REWRITTEN', 1.0, - string.format('%d URLs', count_rewrites(replacements))) - end - end - end, - - -- Request configuration - headers = { - ['Content-Type'] = 'application/json', - ['Authorization'] = 'Bearer YOUR_API_TOKEN' - }, - body = ucl.to_format({ - urls = all_urls, - -- Include additional context if needed - message_id = task:get_message_id(), - from = (task:get_from('smtp') or {})[1] - }, 'json'), - timeout = 5.0 - }) - end, - priority = 10 -- Postfilter priority - }) -end - --- Helper functions -local function table_keys(t) - local keys = {} - for k, _ in pairs(t) do - table.insert(keys, tostring(k)) - end - return keys -end - -local function count_rewrites(replacements) - local count = 0 - for _, _ in pairs(replacements) do - count = count + 1 - end - return count -end - ---[[ - Alternative: Using Redis for caching URL check results -]] - -local function register_redis_cached_url_rewriter(rspamd_config) - rspamd_config:register_symbol({ - name = 'REDIS_CACHED_URL_REWRITER', - type = 'postfilter', - callback = function(task) - local redis = require "rspamd_redis" - local urls_by_part = task:get_html_urls() - - if not urls_by_part then - return - end - - -- Collect all URLs - local all_urls = {} - for part_id, url_list in pairs(urls_by_part) do - for _, url_info in ipairs(url_list) do - table.insert(all_urls, url_info.url) - end - end - - if #all_urls == 0 then - return - end - - -- Build Redis MGET command to check all URLs at once - local redis_keys = {} - for _, url in ipairs(all_urls) do - table.insert(redis_keys, 'url:rewrite:' .. url) - end - - redis.make_request({ - task = task, - cmd = 'MGET', - args = redis_keys, - callback = function(err, data) - if err then - rspamd_logger.errx(task, 'Redis error: %s', err) - return - end - - -- Build replacement map from Redis results - local replacements = {} - for i, url in ipairs(all_urls) do - if data[i] and data[i] ~= '' then - replacements[url] = data[i] - end - end - - -- Apply rewrites - if next(replacements) then - local rewritten = task:rewrite_html_urls(function(task, url) - return replacements[url] - end) - - if rewritten then - rspamd_logger.infox(task, 'Applied %d URL rewrites from Redis', - count_rewrites(replacements)) - end - end - end - }) - end - }) -end - ---[[ - Simpler example: Rewrite specific domains without external service -]] - -local function register_simple_domain_rewriter(rspamd_config) - -- Mapping of domains to redirect targets - local domain_redirects = { - ['evil.com'] = 'https://warning.example.com/blocked?domain=evil.com', - ['phishing.net'] = 'https://warning.example.com/blocked?domain=phishing.net', - } - - rspamd_config:register_symbol({ - name = 'SIMPLE_DOMAIN_REWRITER', - type = 'postfilter', - callback = function(task) - local urls_by_part = task:get_html_urls() - - if not urls_by_part then - return - end - - -- Check if any URLs match blocked domains - local needs_rewrite = false - for part_id, url_list in pairs(urls_by_part) do - for _, url_info in ipairs(url_list) do - for blocked_domain, _ in pairs(domain_redirects) do - if url_info.url:find(blocked_domain, 1, true) then - needs_rewrite = true - break - end - end - end - end - - if not needs_rewrite then - return - end - - -- Apply rewrites - local rewritten = task:rewrite_html_urls(function(task, url) - for blocked_domain, redirect_url in pairs(domain_redirects) do - if url:find(blocked_domain, 1, true) then - return redirect_url - end - end - return nil -- Don't rewrite - end) - - if rewritten then - task:insert_result('DOMAIN_REWRITTEN', 1.0) - end - end - }) -end - -return { - register_async_url_rewriter = register_async_url_rewriter, - register_redis_cached_url_rewriter = register_redis_cached_url_rewriter, - register_simple_domain_rewriter = register_simple_domain_rewriter, -}