]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Remove irrelevant file
authorVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 13 Oct 2025 15:54:06 +0000 (16:54 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Mon, 13 Oct 2025 15:54:06 +0000 (16:54 +0100)
test/lua/unit/get_html_urls_async_example.lua [deleted file]

diff --git a/test/lua/unit/get_html_urls_async_example.lua b/test/lua/unit/get_html_urls_async_example.lua
deleted file mode 100644 (file)
index 90d44b6..0000000
+++ /dev/null
@@ -1,273 +0,0 @@
---[[
-  Async HTML URL Rewriting Example
-
-  This is an example demonstrating how to use task:get_html_urls() with
-  async operations to batch-check URLs against an external service before
-  rewriting them.
-
-  Usage pattern:
-  1. Extract all URLs from HTML parts using task:get_html_urls()
-  2. Send all URLs to external service via async HTTP/Redis/etc
-  3. Receive URL replacements from service
-  4. Apply rewrites using task:rewrite_html_urls() with lookup table
-]]
-
--- Example rule implementation
-local function register_async_url_rewriter(rspamd_config)
-  rspamd_config:register_symbol({
-    name = 'ASYNC_URL_REWRITER',
-    type = 'postfilter',
-    callback = function(task)
-      -- Step 1: Extract all URLs from HTML parts
-      local urls_by_part = task:get_html_urls()
-
-      if not urls_by_part then
-        return -- No HTML URLs to process
-      end
-
-      -- Flatten URLs for batched API request
-      local all_urls = {}
-      local url_to_info = {}
-
-      for part_id, url_list in pairs(urls_by_part) do
-        for _, url_info in ipairs(url_list) do
-          table.insert(all_urls, url_info.url)
-          url_to_info[url_info.url] = url_info
-        end
-      end
-
-      if #all_urls == 0 then
-        return
-      end
-
-      rspamd_logger.infox(task, "Found %s HTML URLs to check", #all_urls)
-
-      -- Step 2: Make async request to URL checking service
-      local http = require "rspamd_http"
-      local ucl = require "ucl"
-
-      http.request({
-        task = task,
-        url = 'http://url-checker.example.com/api/check-batch',
-        callback = function(err, code, body)
-          if err then
-            rspamd_logger.errx(task, 'URL check failed: %s', err)
-            return
-          end
-
-          if code ~= 200 then
-            rspamd_logger.errx(task, 'URL check service returned HTTP %s', code)
-            return
-          end
-
-          -- Step 3: Parse response containing URL replacements
-          local parser = ucl.parser()
-          local ok, parse_err = parser:parse_string(body)
-
-          if not ok then
-            rspamd_logger.errx(task, 'Failed to parse response: %s', parse_err)
-            return
-          end
-
-          local response = parser:get_object()
-
-          -- Build replacement map: original_url -> new_url
-          local replacements = {}
-
-          for original_url, result in pairs(response.urls or {}) do
-            if result.action == 'rewrite' and result.new_url then
-              replacements[original_url] = result.new_url
-              rspamd_logger.infox(task, "Will rewrite %s -> %s",
-                                 original_url, result.new_url)
-            elseif result.action == 'block' then
-              -- Redirect blocked URLs to warning page
-              replacements[original_url] = 'https://warning.example.com/blocked'
-              rspamd_logger.infox(task, "Blocking URL %s", original_url)
-
-              -- Optionally set a symbol
-              task:insert_result('BLOCKED_URL', 1.0, original_url)
-            end
-          end
-
-          -- Step 4: Apply rewrites using lookup table callback
-          if next(replacements) then
-            local rewritten = task:rewrite_html_urls(function(task, url)
-              -- Simple lookup - returns nil if URL shouldn't be rewritten
-              return replacements[url]
-            end)
-
-            if rewritten then
-              rspamd_logger.infox(task, 'Rewritten URLs in parts: %s',
-                                 table.concat(table_keys(rewritten), ', '))
-
-              -- Optionally set a symbol to track rewrites
-              task:insert_result('URL_REWRITTEN', 1.0,
-                                string.format('%d URLs', count_rewrites(replacements)))
-            end
-          end
-        end,
-
-        -- Request configuration
-        headers = {
-          ['Content-Type'] = 'application/json',
-          ['Authorization'] = 'Bearer YOUR_API_TOKEN'
-        },
-        body = ucl.to_format({
-          urls = all_urls,
-          -- Include additional context if needed
-          message_id = task:get_message_id(),
-          from = (task:get_from('smtp') or {})[1]
-        }, 'json'),
-        timeout = 5.0
-      })
-    end,
-    priority = 10 -- Postfilter priority
-  })
-end
-
--- Helper functions
-local function table_keys(t)
-  local keys = {}
-  for k, _ in pairs(t) do
-    table.insert(keys, tostring(k))
-  end
-  return keys
-end
-
-local function count_rewrites(replacements)
-  local count = 0
-  for _, _ in pairs(replacements) do
-    count = count + 1
-  end
-  return count
-end
-
---[[
-  Alternative: Using Redis for caching URL check results
-]]
-
-local function register_redis_cached_url_rewriter(rspamd_config)
-  rspamd_config:register_symbol({
-    name = 'REDIS_CACHED_URL_REWRITER',
-    type = 'postfilter',
-    callback = function(task)
-      local redis = require "rspamd_redis"
-      local urls_by_part = task:get_html_urls()
-
-      if not urls_by_part then
-        return
-      end
-
-      -- Collect all URLs
-      local all_urls = {}
-      for part_id, url_list in pairs(urls_by_part) do
-        for _, url_info in ipairs(url_list) do
-          table.insert(all_urls, url_info.url)
-        end
-      end
-
-      if #all_urls == 0 then
-        return
-      end
-
-      -- Build Redis MGET command to check all URLs at once
-      local redis_keys = {}
-      for _, url in ipairs(all_urls) do
-        table.insert(redis_keys, 'url:rewrite:' .. url)
-      end
-
-      redis.make_request({
-        task = task,
-        cmd = 'MGET',
-        args = redis_keys,
-        callback = function(err, data)
-          if err then
-            rspamd_logger.errx(task, 'Redis error: %s', err)
-            return
-          end
-
-          -- Build replacement map from Redis results
-          local replacements = {}
-          for i, url in ipairs(all_urls) do
-            if data[i] and data[i] ~= '' then
-              replacements[url] = data[i]
-            end
-          end
-
-          -- Apply rewrites
-          if next(replacements) then
-            local rewritten = task:rewrite_html_urls(function(task, url)
-              return replacements[url]
-            end)
-
-            if rewritten then
-              rspamd_logger.infox(task, 'Applied %d URL rewrites from Redis',
-                                 count_rewrites(replacements))
-            end
-          end
-        end
-      })
-    end
-  })
-end
-
---[[
-  Simpler example: Rewrite specific domains without external service
-]]
-
-local function register_simple_domain_rewriter(rspamd_config)
-  -- Mapping of domains to redirect targets
-  local domain_redirects = {
-    ['evil.com'] = 'https://warning.example.com/blocked?domain=evil.com',
-    ['phishing.net'] = 'https://warning.example.com/blocked?domain=phishing.net',
-  }
-
-  rspamd_config:register_symbol({
-    name = 'SIMPLE_DOMAIN_REWRITER',
-    type = 'postfilter',
-    callback = function(task)
-      local urls_by_part = task:get_html_urls()
-
-      if not urls_by_part then
-        return
-      end
-
-      -- Check if any URLs match blocked domains
-      local needs_rewrite = false
-      for part_id, url_list in pairs(urls_by_part) do
-        for _, url_info in ipairs(url_list) do
-          for blocked_domain, _ in pairs(domain_redirects) do
-            if url_info.url:find(blocked_domain, 1, true) then
-              needs_rewrite = true
-              break
-            end
-          end
-        end
-      end
-
-      if not needs_rewrite then
-        return
-      end
-
-      -- Apply rewrites
-      local rewritten = task:rewrite_html_urls(function(task, url)
-        for blocked_domain, redirect_url in pairs(domain_redirects) do
-          if url:find(blocked_domain, 1, true) then
-            return redirect_url
-          end
-        end
-        return nil -- Don't rewrite
-      end)
-
-      if rewritten then
-        task:insert_result('DOMAIN_REWRITTEN', 1.0)
-      end
-    end
-  })
-end
-
-return {
-  register_async_url_rewriter = register_async_url_rewriter,
-  register_redis_cached_url_rewriter = register_redis_cached_url_rewriter,
-  register_simple_domain_rewriter = register_simple_domain_rewriter,
-}