]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] url_redirector: skip non-HTTP(S) URLs in http_walk 6035/head
authorAlexander Moisseev <moiseev@mezonplus.ru>
Tue, 12 May 2026 17:17:27 +0000 (20:17 +0300)
committerAlexander Moisseev <moiseev@mezonplus.ru>
Tue, 12 May 2026 17:48:46 +0000 (20:48 +0300)
Non-HTTP(S) schemes (such as tel:, mailto:, etc.) cannot have HTTP
redirects. Attempting to follow them in http_walk is unnecessary and
could potentially lead to errors. This change skips these URLs early
in the redirect chain walk and emits the URL_REDIRECTOR_NON_HTTP
virtual symbol with a single option in the format:

  scheme=http_chain->non_http_url

e.g.: telephone=click.example.com->tel:+71234567890

src/plugins/lua/url_redirector.lua
test/functional/cases/167_url_redirector_non_http_scheme.robot [new file with mode: 0644]
test/functional/messages/redir_chain_tel_url.eml [new file with mode: 0644]
test/functional/messages/redir_multi_non_http.eml [new file with mode: 0644]
test/functional/messages/redir_tel_url.eml [new file with mode: 0644]
test/functional/util/dummy_http.py

index d5b0892557411296e52674add086e474f7a9aced..dee021ea0a4d11a2c7f5949ddd74deb27fa43d37 100644 (file)
@@ -57,6 +57,7 @@ local settings = {
   user_agent = default_ua,
   redirector_symbol = nil, -- insert symbol if redirected url has been found
   redirector_symbol_nested = "URL_REDIRECTOR_NESTED", -- insert symbol if nested limit has been reached
+  redirector_symbol_non_http = "URL_REDIRECTOR_NON_HTTP", -- HTTP -> non-HTTP(S) redirect detected
   redirectors_only = true, -- follow merely redirectors
   top_urls_key = 'rdr:top_urls', -- key for top urls
   top_urls_count = 200, -- how many top urls to save
@@ -108,13 +109,17 @@ local function encode_url_for_redirect(url_str)
   return encoded
 end
 
--- Build a 'host1->host2->...' string from a chain of URL objects, used as
--- the symbol option for redirector_symbol_nested. Mirrors the format that
--- apply_redirect_chain emits for redirector_symbol.
+-- Build a 'host1->host2->...' string from a chain of URL objects.
+-- Includes scheme for non-HTTP(S) URLs to distinguish them.
 local function chain_hosts_string(chain)
   local hosts = {}
   for i = 1, #chain do
-    hosts[i] = chain[i]:get_host() or '?'
+    local proto = chain[i]:get_protocol()
+    if proto ~= 'http' and proto ~= 'https' then
+      hosts[i] = chain[i]:get_text()
+    else
+      hosts[i] = chain[i]:get_host() or '?'
+    end
   end
   return table.concat(hosts, '->')
 end
@@ -175,7 +180,10 @@ local function apply_redirect_chain(task, chain)
     chain[i]:set_redirected(chain[i + 1], mempool)
   end
   for i = 2, #chain do
-    task:inject_url(chain[i])
+    local proto = chain[i]:get_protocol()
+    if proto == 'http' or proto == 'https' then
+      task:inject_url(chain[i])
+    end
   end
   if settings.redirector_symbol then
     task:insert_result(settings.redirector_symbol, 1.0,
@@ -230,7 +238,7 @@ local function cache_chain_to_redis(task, chain, terminal_prefix)
 
   local function write_link(prev_url, next_url, marker)
     local link_key = cache_key_for_url(tostring(prev_url))
-    local next_str = tostring(next_url)
+    local next_str = next_url:get_text()
     local cache_value
     if marker then
       cache_value = string.format('^%s:%s', marker, next_str)
@@ -296,9 +304,9 @@ local http_walk
 -- Terminal exit for step(): write back if we extended via HTTP this scan,
 -- else just apply. Hoisted as a free function so step()'s recursive cache
 -- hops don't allocate a fresh closure per call.
-local function step_finish(task, chain, http_extended)
+local function step_finish(task, chain, http_extended, terminal_prefix)
   if http_extended then
-    finalize_chain(task, chain, nil)
+    finalize_chain(task, chain, terminal_prefix)
   else
     apply_redirect_chain(task, chain)
   end
@@ -369,7 +377,7 @@ step = function(task, orig_url, chain, seen, data, ntries, http_extended)
 
   local prefix, val = nil, data
   if data:sub(1, 1) == '^' then
-    local p, v = data:match('^%^(%a+):(.+)$')
+    local p, v = data:match('^%^([%w_]+):(.+)$')
     if p then
       prefix, val = p, v
     end
@@ -409,6 +417,15 @@ step = function(task, orig_url, chain, seen, data, ntries, http_extended)
     return
   end
 
+  if prefix == 'non_http' then
+    local rscheme = hop:get_protocol() or val:match('^([^:]+)')
+    -- chain already includes hop (appended via chain_append above)
+    task:insert_result(settings.redirector_symbol_non_http, 1.0,
+        string.format('%s=%s', rscheme, chain_hosts_string(chain)))
+    step_finish(task, chain, http_extended, 'non_http')
+    return
+  end
+
   -- Plain terminal: chain fully resolved, apply (and persist if extended).
   step_finish(task, chain, http_extended)
 end
@@ -495,6 +512,16 @@ http_walk = function(task, orig_url, url, ntries, chain, seen)
       end
 
       if redir_url then
+        local rscheme = redir_url:get_protocol()
+        if rscheme ~= 'http' and rscheme ~= 'https' then
+          lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s', loc, rscheme)
+          chain_append(chain, redir_url)
+          task:insert_result(settings.redirector_symbol_non_http, 1.0,
+              string.format('%s=%s', rscheme, chain_hosts_string(chain)))
+          finalize_chain(task, chain, 'non_http')
+          return
+        end
+
         local should_follow
         if settings.redirectors_only then
           should_follow = settings.redirector_hosts_map:get_key(redir_url:get_host()) ~= nil
@@ -546,6 +573,21 @@ http_walk = function(task, orig_url, url, ntries, chain, seen)
           chain_append(chain, redir_url)
           finalize_chain(task, chain, nil)
         end
+      elseif loc then
+        local raw_scheme = loc:match('^([A-Za-z][A-Za-z0-9+%-.]*):')
+        if raw_scheme and raw_scheme ~= 'http' and raw_scheme ~= 'https' then
+          lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s (unparseable url)', loc, raw_scheme)
+          -- loc cannot be parsed into a URL object, so it cannot be appended to
+          -- chain or cached with a ^non_http marker. Emit the symbol now and cache
+          -- as a normal terminal; future scans within the TTL won't re-emit it.
+          task:insert_result(settings.redirector_symbol_non_http, 1.0,
+              string.format('%s=%s->%s', raw_scheme, chain_hosts_string(chain), loc))
+          finalize_chain(task, chain, nil)
+        else
+          lua_util.debugm(N, task, 'no location, headers: %s', headers)
+          chain_append(chain, url)
+          finalize_chain(task, chain, nil)
+        end
       else
         lua_util.debugm(N, task, 'no location, headers: %s', headers)
         chain_append(chain, url)
@@ -789,6 +831,13 @@ if opts then
         score = 0,
       }
 
+      rspamd_config:register_symbol {
+        name = settings.redirector_symbol_non_http,
+        type = 'virtual',
+        parent = id,
+        score = 0,
+      }
+
       if settings.redirector_symbol then
         rspamd_config:register_symbol {
           name = settings.redirector_symbol,
diff --git a/test/functional/cases/167_url_redirector_non_http_scheme.robot b/test/functional/cases/167_url_redirector_non_http_scheme.robot
new file mode 100644 (file)
index 0000000..f0dee13
--- /dev/null
@@ -0,0 +1,74 @@
+*** Settings ***
+Suite Setup     Urlredirector Setup
+Suite Teardown  Urlredirector Teardown
+Library         Process
+Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG}               ${RSPAMD_TESTDIR}/configs/url_redirector.conf
+${TEL_MESSAGE}          ${RSPAMD_TESTDIR}/messages/redir_tel_url.eml
+${CHAIN_TEL_MESSAGE}    ${RSPAMD_TESTDIR}/messages/redir_chain_tel_url.eml
+${MULTI_NON_HTTP_MESSAGE}    ${RSPAMD_TESTDIR}/messages/redir_multi_non_http.eml
+${REDIS_SCOPE}          Suite
+${RSPAMD_SCOPE}         Suite
+${RSPAMD_URL_TLD}       ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+${SETTINGS}             {symbols_enabled=[URL_REDIRECTOR_CHECK]}
+
+*** Test Cases ***
+SKIP NON-HTTP SCHEME REDIRECT
+  # Test that url_redirector skips non-HTTP(S) schemes like tel:
+  # The dummy HTTP server redirects /tel_redirect to tel:88006007775
+  # url_redirector should follow the first redirect to 127.0.0.1:18080/tel_redirect
+  # but then stop when it encounters the tel: scheme and not attempt HTTP request
+  Scan File  ${TEL_MESSAGE}  Flags=ext_urls  Settings=${SETTINGS}
+  # The original URL should be processed
+  Expect Extended URL  http://127.0.0.1:18080/tel_redirect
+  Expect Symbol With Exact Options  URL_REDIRECTOR_NON_HTTP  telephone=127.0.0.1->tel:88006007775
+  Do Not Expect Added URL  tel:88006007775
+
+SKIP NON-HTTP SCHEME REDIRECT WITH INTERMEDIATE HOPS
+  # Test that url_redirector traverses intermediate HTTP hops and still detects the
+  # non-HTTP(S) terminal. chain_intermediate_1 -> chain_intermediate_2 -> tel:88006007776.
+  # Intermediate redirector hops are not saved to chain by default (redirectors=false),
+  # so the chain string only shows the original redirector host and the tel: target.
+  Scan File  ${CHAIN_TEL_MESSAGE}  Flags=ext_urls  Settings=${SETTINGS}
+  Expect Extended URL  http://127.0.0.1:18080/chain_intermediate_1
+  Expect Symbol With Exact Options  URL_REDIRECTOR_NON_HTTP  telephone=127.0.0.1->tel:88006007776
+  Do Not Expect Added URL  tel:88006007776
+
+MULTIPLE NON-HTTP REDIRECT TARGETS
+  # Test that a single message with several redirector URLs each pointing to a different
+  # non-HTTP scheme accumulates all scheme options in URL_REDIRECTOR_NON_HTTP.
+  # tel_redirect -> tel:88006007775 (rspamd scheme: telephone)
+  # mailto_redirect -> mailto:user@example.net (rspamd scheme: mailto)
+  Scan File  ${MULTI_NON_HTTP_MESSAGE}  Flags=ext_urls  Settings=${SETTINGS}
+  Expect Extended URL  http://127.0.0.1:18080/tel_redirect
+  Expect Extended URL  http://127.0.0.1:18080/mailto_redirect
+  Expect Symbol With Exact Options  URL_REDIRECTOR_NON_HTTP
+  ...  telephone=127.0.0.1->tel:88006007775
+  ...  mailto=127.0.0.1->mailto:user@example.net
+  Do Not Expect Added URL  tel:88006007775
+  Do Not Expect Added URL  mailto:user@example.net
+
+*** Keywords ***
+Urlredirector Setup
+  Run Dummy Http
+  Rspamd Redis Setup
+
+Urlredirector Teardown
+  Rspamd Redis Teardown
+  Dummy Http Teardown
+  Terminate All Processes    kill=True
+
+Do Not Expect Added URL
+  [Arguments]  ${url}
+  ${found_url} =  Set Variable  ${FALSE}
+  ${url_list} =  Convert To List  ${SCAN_RESULT}[urls]
+  FOR  ${item}  IN  @{url_list}
+    ${d} =  Convert To Dictionary  ${item}
+    ${found_url} =  Evaluate  "${d}[url]" == "${url}"
+    Exit For Loop If  ${found_url} == ${TRUE}
+  END
+  Should Be True  not ${found_url}  msg="URL ${url} should NOT be found but it was"
diff --git a/test/functional/messages/redir_chain_tel_url.eml b/test/functional/messages/redir_chain_tel_url.eml
new file mode 100644 (file)
index 0000000..2c63e45
--- /dev/null
@@ -0,0 +1,7 @@
+Content-type: text/html\r
+\r
+<html>\r
+<body>\r
+<a href="http://127.0.0.1:18080/chain_intermediate_1">Call us via chain</a>\r
+</body>\r
+</html>
\ No newline at end of file
diff --git a/test/functional/messages/redir_multi_non_http.eml b/test/functional/messages/redir_multi_non_http.eml
new file mode 100644 (file)
index 0000000..6d82b92
--- /dev/null
@@ -0,0 +1,8 @@
+Content-type: text/html
+
+<html>
+<body>
+<a href="http://127.0.0.1:18080/tel_redirect">Call us</a>
+<a href="http://127.0.0.1:18080/mailto_redirect">Email us</a>
+</body>
+</html>
diff --git a/test/functional/messages/redir_tel_url.eml b/test/functional/messages/redir_tel_url.eml
new file mode 100644 (file)
index 0000000..13f08e6
--- /dev/null
@@ -0,0 +1,7 @@
+Content-type: text/html
+
+<html>
+<body>
+<a href="http://127.0.0.1:18080/tel_redirect">Call us</a>
+</body>
+</html>
index 3358a0daed4fbbd3621b9426c75e202aaad0e1dd..4a213b67532a0dd7f355149f81935cb3744f5cf0 100755 (executable)
@@ -102,6 +102,18 @@ class MainHandler(tornado.web.RequestHandler):
         elif path == "/redirect4":
             # Send an HTTP redirect to the bind address of the server
             self.redirect(f"{self.request.protocol}://{self.request.host}/redirect3")
+        elif path == "/tel_redirect":
+            # Send an HTTP redirect to a tel: URL (non-HTTP scheme)
+            self.redirect("tel:88006007775")
+        elif path == "/mailto_redirect":
+            # Send an HTTP redirect to a mailto: URL (non-HTTP scheme)
+            self.redirect("mailto:user@example.net")
+        elif path == "/chain_intermediate_1":
+            # Intermediate hop to chain_to_tel2
+            self.redirect(f"{self.request.protocol}://{self.request.host}/chain_intermediate_2")
+        elif path == "/chain_intermediate_2":
+            # Final hop to a tel: URL
+            self.redirect("tel:88006007776")
         elif path == "/chain1":
             # Intermediate hop to chain2
             self.redirect(f"{self.request.protocol}://{self.request.host}/chain2")