From: Alexander Moisseev Date: Tue, 12 May 2026 17:17:27 +0000 (+0300) Subject: [Minor] url_redirector: skip non-HTTP(S) URLs in http_walk X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=780290d0ea404ad23a4464202f212c6782b730e3;p=thirdparty%2Frspamd.git [Minor] url_redirector: skip non-HTTP(S) URLs in http_walk Non-HTTP(S) schemes (such as tel:, mailto:, etc.) cannot have HTTP redirects. Attempting to follow them in http_walk is unnecessary and could potentially lead to errors. This change skips these URLs early in the redirect chain walk and emits the URL_REDIRECTOR_NON_HTTP virtual symbol with a single option in the format: scheme=http_chain->non_http_url e.g.: telephone=click.example.com->tel:+71234567890 --- diff --git a/src/plugins/lua/url_redirector.lua b/src/plugins/lua/url_redirector.lua index d5b0892557..dee021ea0a 100644 --- a/src/plugins/lua/url_redirector.lua +++ b/src/plugins/lua/url_redirector.lua @@ -57,6 +57,7 @@ local settings = { user_agent = default_ua, redirector_symbol = nil, -- insert symbol if redirected url has been found redirector_symbol_nested = "URL_REDIRECTOR_NESTED", -- insert symbol if nested limit has been reached + redirector_symbol_non_http = "URL_REDIRECTOR_NON_HTTP", -- HTTP -> non-HTTP(S) redirect detected redirectors_only = true, -- follow merely redirectors top_urls_key = 'rdr:top_urls', -- key for top urls top_urls_count = 200, -- how many top urls to save @@ -108,13 +109,17 @@ local function encode_url_for_redirect(url_str) return encoded end --- Build a 'host1->host2->...' string from a chain of URL objects, used as --- the symbol option for redirector_symbol_nested. Mirrors the format that --- apply_redirect_chain emits for redirector_symbol. +-- Build a 'host1->host2->...' string from a chain of URL objects. +-- Includes scheme for non-HTTP(S) URLs to distinguish them. local function chain_hosts_string(chain) local hosts = {} for i = 1, #chain do - hosts[i] = chain[i]:get_host() or '?' + local proto = chain[i]:get_protocol() + if proto ~= 'http' and proto ~= 'https' then + hosts[i] = chain[i]:get_text() + else + hosts[i] = chain[i]:get_host() or '?' + end end return table.concat(hosts, '->') end @@ -175,7 +180,10 @@ local function apply_redirect_chain(task, chain) chain[i]:set_redirected(chain[i + 1], mempool) end for i = 2, #chain do - task:inject_url(chain[i]) + local proto = chain[i]:get_protocol() + if proto == 'http' or proto == 'https' then + task:inject_url(chain[i]) + end end if settings.redirector_symbol then task:insert_result(settings.redirector_symbol, 1.0, @@ -230,7 +238,7 @@ local function cache_chain_to_redis(task, chain, terminal_prefix) local function write_link(prev_url, next_url, marker) local link_key = cache_key_for_url(tostring(prev_url)) - local next_str = tostring(next_url) + local next_str = next_url:get_text() local cache_value if marker then cache_value = string.format('^%s:%s', marker, next_str) @@ -296,9 +304,9 @@ local http_walk -- Terminal exit for step(): write back if we extended via HTTP this scan, -- else just apply. Hoisted as a free function so step()'s recursive cache -- hops don't allocate a fresh closure per call. -local function step_finish(task, chain, http_extended) +local function step_finish(task, chain, http_extended, terminal_prefix) if http_extended then - finalize_chain(task, chain, nil) + finalize_chain(task, chain, terminal_prefix) else apply_redirect_chain(task, chain) end @@ -369,7 +377,7 @@ step = function(task, orig_url, chain, seen, data, ntries, http_extended) local prefix, val = nil, data if data:sub(1, 1) == '^' then - local p, v = data:match('^%^(%a+):(.+)$') + local p, v = data:match('^%^([%w_]+):(.+)$') if p then prefix, val = p, v end @@ -409,6 +417,15 @@ step = function(task, orig_url, chain, seen, data, ntries, http_extended) return end + if prefix == 'non_http' then + local rscheme = hop:get_protocol() or val:match('^([^:]+)') + -- chain already includes hop (appended via chain_append above) + task:insert_result(settings.redirector_symbol_non_http, 1.0, + string.format('%s=%s', rscheme, chain_hosts_string(chain))) + step_finish(task, chain, http_extended, 'non_http') + return + end + -- Plain terminal: chain fully resolved, apply (and persist if extended). step_finish(task, chain, http_extended) end @@ -495,6 +512,16 @@ http_walk = function(task, orig_url, url, ntries, chain, seen) end if redir_url then + local rscheme = redir_url:get_protocol() + if rscheme ~= 'http' and rscheme ~= 'https' then + lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s', loc, rscheme) + chain_append(chain, redir_url) + task:insert_result(settings.redirector_symbol_non_http, 1.0, + string.format('%s=%s', rscheme, chain_hosts_string(chain))) + finalize_chain(task, chain, 'non_http') + return + end + local should_follow if settings.redirectors_only then should_follow = settings.redirector_hosts_map:get_key(redir_url:get_host()) ~= nil @@ -546,6 +573,21 @@ http_walk = function(task, orig_url, url, ntries, chain, seen) chain_append(chain, redir_url) finalize_chain(task, chain, nil) end + elseif loc then + local raw_scheme = loc:match('^([A-Za-z][A-Za-z0-9+%-.]*):') + if raw_scheme and raw_scheme ~= 'http' and raw_scheme ~= 'https' then + lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s (unparseable url)', loc, raw_scheme) + -- loc cannot be parsed into a URL object, so it cannot be appended to + -- chain or cached with a ^non_http marker. Emit the symbol now and cache + -- as a normal terminal; future scans within the TTL won't re-emit it. + task:insert_result(settings.redirector_symbol_non_http, 1.0, + string.format('%s=%s->%s', raw_scheme, chain_hosts_string(chain), loc)) + finalize_chain(task, chain, nil) + else + lua_util.debugm(N, task, 'no location, headers: %s', headers) + chain_append(chain, url) + finalize_chain(task, chain, nil) + end else lua_util.debugm(N, task, 'no location, headers: %s', headers) chain_append(chain, url) @@ -789,6 +831,13 @@ if opts then score = 0, } + rspamd_config:register_symbol { + name = settings.redirector_symbol_non_http, + type = 'virtual', + parent = id, + score = 0, + } + if settings.redirector_symbol then rspamd_config:register_symbol { name = settings.redirector_symbol, diff --git a/test/functional/cases/167_url_redirector_non_http_scheme.robot b/test/functional/cases/167_url_redirector_non_http_scheme.robot new file mode 100644 index 0000000000..f0dee13093 --- /dev/null +++ b/test/functional/cases/167_url_redirector_non_http_scheme.robot @@ -0,0 +1,74 @@ +*** Settings *** +Suite Setup Urlredirector Setup +Suite Teardown Urlredirector Teardown +Library Process +Library ${RSPAMD_TESTDIR}/lib/rspamd.py +Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot +Variables ${RSPAMD_TESTDIR}/lib/vars.py + +*** Variables *** +${CONFIG} ${RSPAMD_TESTDIR}/configs/url_redirector.conf +${TEL_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_tel_url.eml +${CHAIN_TEL_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_chain_tel_url.eml +${MULTI_NON_HTTP_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_multi_non_http.eml +${REDIS_SCOPE} Suite +${RSPAMD_SCOPE} Suite +${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat +${SETTINGS} {symbols_enabled=[URL_REDIRECTOR_CHECK]} + +*** Test Cases *** +SKIP NON-HTTP SCHEME REDIRECT + # Test that url_redirector skips non-HTTP(S) schemes like tel: + # The dummy HTTP server redirects /tel_redirect to tel:88006007775 + # url_redirector should follow the first redirect to 127.0.0.1:18080/tel_redirect + # but then stop when it encounters the tel: scheme and not attempt HTTP request + Scan File ${TEL_MESSAGE} Flags=ext_urls Settings=${SETTINGS} + # The original URL should be processed + Expect Extended URL http://127.0.0.1:18080/tel_redirect + Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP telephone=127.0.0.1->tel:88006007775 + Do Not Expect Added URL tel:88006007775 + +SKIP NON-HTTP SCHEME REDIRECT WITH INTERMEDIATE HOPS + # Test that url_redirector traverses intermediate HTTP hops and still detects the + # non-HTTP(S) terminal. chain_intermediate_1 -> chain_intermediate_2 -> tel:88006007776. + # Intermediate redirector hops are not saved to chain by default (redirectors=false), + # so the chain string only shows the original redirector host and the tel: target. + Scan File ${CHAIN_TEL_MESSAGE} Flags=ext_urls Settings=${SETTINGS} + Expect Extended URL http://127.0.0.1:18080/chain_intermediate_1 + Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP telephone=127.0.0.1->tel:88006007776 + Do Not Expect Added URL tel:88006007776 + +MULTIPLE NON-HTTP REDIRECT TARGETS + # Test that a single message with several redirector URLs each pointing to a different + # non-HTTP scheme accumulates all scheme options in URL_REDIRECTOR_NON_HTTP. + # tel_redirect -> tel:88006007775 (rspamd scheme: telephone) + # mailto_redirect -> mailto:user@example.net (rspamd scheme: mailto) + Scan File ${MULTI_NON_HTTP_MESSAGE} Flags=ext_urls Settings=${SETTINGS} + Expect Extended URL http://127.0.0.1:18080/tel_redirect + Expect Extended URL http://127.0.0.1:18080/mailto_redirect + Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP + ... telephone=127.0.0.1->tel:88006007775 + ... mailto=127.0.0.1->mailto:user@example.net + Do Not Expect Added URL tel:88006007775 + Do Not Expect Added URL mailto:user@example.net + +*** Keywords *** +Urlredirector Setup + Run Dummy Http + Rspamd Redis Setup + +Urlredirector Teardown + Rspamd Redis Teardown + Dummy Http Teardown + Terminate All Processes kill=True + +Do Not Expect Added URL + [Arguments] ${url} + ${found_url} = Set Variable ${FALSE} + ${url_list} = Convert To List ${SCAN_RESULT}[urls] + FOR ${item} IN @{url_list} + ${d} = Convert To Dictionary ${item} + ${found_url} = Evaluate "${d}[url]" == "${url}" + Exit For Loop If ${found_url} == ${TRUE} + END + Should Be True not ${found_url} msg="URL ${url} should NOT be found but it was" diff --git a/test/functional/messages/redir_chain_tel_url.eml b/test/functional/messages/redir_chain_tel_url.eml new file mode 100644 index 0000000000..2c63e4563b --- /dev/null +++ b/test/functional/messages/redir_chain_tel_url.eml @@ -0,0 +1,7 @@ +Content-type: text/html + + + +Call us via chain + + \ No newline at end of file diff --git a/test/functional/messages/redir_multi_non_http.eml b/test/functional/messages/redir_multi_non_http.eml new file mode 100644 index 0000000000..6d82b9298e --- /dev/null +++ b/test/functional/messages/redir_multi_non_http.eml @@ -0,0 +1,8 @@ +Content-type: text/html + + + +Call us +Email us + + diff --git a/test/functional/messages/redir_tel_url.eml b/test/functional/messages/redir_tel_url.eml new file mode 100644 index 0000000000..13f08e64cb --- /dev/null +++ b/test/functional/messages/redir_tel_url.eml @@ -0,0 +1,7 @@ +Content-type: text/html + + + +Call us + + diff --git a/test/functional/util/dummy_http.py b/test/functional/util/dummy_http.py index 3358a0daed..4a213b6753 100755 --- a/test/functional/util/dummy_http.py +++ b/test/functional/util/dummy_http.py @@ -102,6 +102,18 @@ class MainHandler(tornado.web.RequestHandler): elif path == "/redirect4": # Send an HTTP redirect to the bind address of the server self.redirect(f"{self.request.protocol}://{self.request.host}/redirect3") + elif path == "/tel_redirect": + # Send an HTTP redirect to a tel: URL (non-HTTP scheme) + self.redirect("tel:88006007775") + elif path == "/mailto_redirect": + # Send an HTTP redirect to a mailto: URL (non-HTTP scheme) + self.redirect("mailto:user@example.net") + elif path == "/chain_intermediate_1": + # Intermediate hop to chain_to_tel2 + self.redirect(f"{self.request.protocol}://{self.request.host}/chain_intermediate_2") + elif path == "/chain_intermediate_2": + # Final hop to a tel: URL + self.redirect("tel:88006007776") elif path == "/chain1": # Intermediate hop to chain2 self.redirect(f"{self.request.protocol}://{self.request.host}/chain2")