user_agent = default_ua,
redirector_symbol = nil, -- insert symbol if redirected url has been found
redirector_symbol_nested = "URL_REDIRECTOR_NESTED", -- insert symbol if nested limit has been reached
+ redirector_symbol_non_http = "URL_REDIRECTOR_NON_HTTP", -- HTTP -> non-HTTP(S) redirect detected
redirectors_only = true, -- follow merely redirectors
top_urls_key = 'rdr:top_urls', -- key for top urls
top_urls_count = 200, -- how many top urls to save
return encoded
end
--- Build a 'host1->host2->...' string from a chain of URL objects, used as
--- the symbol option for redirector_symbol_nested. Mirrors the format that
--- apply_redirect_chain emits for redirector_symbol.
+-- Build a 'host1->host2->...' string from a chain of URL objects.
+-- Includes scheme for non-HTTP(S) URLs to distinguish them.
local function chain_hosts_string(chain)
local hosts = {}
for i = 1, #chain do
- hosts[i] = chain[i]:get_host() or '?'
+ local proto = chain[i]:get_protocol()
+ if proto ~= 'http' and proto ~= 'https' then
+ hosts[i] = chain[i]:get_text()
+ else
+ hosts[i] = chain[i]:get_host() or '?'
+ end
end
return table.concat(hosts, '->')
end
chain[i]:set_redirected(chain[i + 1], mempool)
end
for i = 2, #chain do
- task:inject_url(chain[i])
+ local proto = chain[i]:get_protocol()
+ if proto == 'http' or proto == 'https' then
+ task:inject_url(chain[i])
+ end
end
if settings.redirector_symbol then
task:insert_result(settings.redirector_symbol, 1.0,
local function write_link(prev_url, next_url, marker)
local link_key = cache_key_for_url(tostring(prev_url))
- local next_str = tostring(next_url)
+ local next_str = next_url:get_text()
local cache_value
if marker then
cache_value = string.format('^%s:%s', marker, next_str)
-- Terminal exit for step(): write back if we extended via HTTP this scan,
-- else just apply. Hoisted as a free function so step()'s recursive cache
-- hops don't allocate a fresh closure per call.
-local function step_finish(task, chain, http_extended)
+local function step_finish(task, chain, http_extended, terminal_prefix)
if http_extended then
- finalize_chain(task, chain, nil)
+ finalize_chain(task, chain, terminal_prefix)
else
apply_redirect_chain(task, chain)
end
local prefix, val = nil, data
if data:sub(1, 1) == '^' then
- local p, v = data:match('^%^(%a+):(.+)$')
+ local p, v = data:match('^%^([%w_]+):(.+)$')
if p then
prefix, val = p, v
end
return
end
+ if prefix == 'non_http' then
+ local rscheme = hop:get_protocol() or val:match('^([^:]+)')
+ -- chain already includes hop (appended via chain_append above)
+ task:insert_result(settings.redirector_symbol_non_http, 1.0,
+ string.format('%s=%s', rscheme, chain_hosts_string(chain)))
+ step_finish(task, chain, http_extended, 'non_http')
+ return
+ end
+
-- Plain terminal: chain fully resolved, apply (and persist if extended).
step_finish(task, chain, http_extended)
end
end
if redir_url then
+ local rscheme = redir_url:get_protocol()
+ if rscheme ~= 'http' and rscheme ~= 'https' then
+ lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s', loc, rscheme)
+ chain_append(chain, redir_url)
+ task:insert_result(settings.redirector_symbol_non_http, 1.0,
+ string.format('%s=%s', rscheme, chain_hosts_string(chain)))
+ finalize_chain(task, chain, 'non_http')
+ return
+ end
+
local should_follow
if settings.redirectors_only then
should_follow = settings.redirector_hosts_map:get_key(redir_url:get_host()) ~= nil
chain_append(chain, redir_url)
finalize_chain(task, chain, nil)
end
+ elseif loc then
+ local raw_scheme = loc:match('^([A-Za-z][A-Za-z0-9+%-.]*):')
+ if raw_scheme and raw_scheme ~= 'http' and raw_scheme ~= 'https' then
+ lua_util.debugm(N, task, 'stop resolving redirects: %s has non-http(s) scheme %s (unparseable url)', loc, raw_scheme)
+ -- loc cannot be parsed into a URL object, so it cannot be appended to
+ -- chain or cached with a ^non_http marker. Emit the symbol now and cache
+ -- as a normal terminal; future scans within the TTL won't re-emit it.
+ task:insert_result(settings.redirector_symbol_non_http, 1.0,
+ string.format('%s=%s->%s', raw_scheme, chain_hosts_string(chain), loc))
+ finalize_chain(task, chain, nil)
+ else
+ lua_util.debugm(N, task, 'no location, headers: %s', headers)
+ chain_append(chain, url)
+ finalize_chain(task, chain, nil)
+ end
else
lua_util.debugm(N, task, 'no location, headers: %s', headers)
chain_append(chain, url)
score = 0,
}
+ rspamd_config:register_symbol {
+ name = settings.redirector_symbol_non_http,
+ type = 'virtual',
+ parent = id,
+ score = 0,
+ }
+
if settings.redirector_symbol then
rspamd_config:register_symbol {
name = settings.redirector_symbol,
--- /dev/null
+*** Settings ***
+Suite Setup Urlredirector Setup
+Suite Teardown Urlredirector Teardown
+Library Process
+Library ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG} ${RSPAMD_TESTDIR}/configs/url_redirector.conf
+${TEL_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_tel_url.eml
+${CHAIN_TEL_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_chain_tel_url.eml
+${MULTI_NON_HTTP_MESSAGE} ${RSPAMD_TESTDIR}/messages/redir_multi_non_http.eml
+${REDIS_SCOPE} Suite
+${RSPAMD_SCOPE} Suite
+${RSPAMD_URL_TLD} ${RSPAMD_TESTDIR}/../lua/unit/test_tld.dat
+${SETTINGS} {symbols_enabled=[URL_REDIRECTOR_CHECK]}
+
+*** Test Cases ***
+SKIP NON-HTTP SCHEME REDIRECT
+ # Test that url_redirector skips non-HTTP(S) schemes like tel:
+ # The dummy HTTP server redirects /tel_redirect to tel:88006007775
+ # url_redirector should follow the first redirect to 127.0.0.1:18080/tel_redirect
+ # but then stop when it encounters the tel: scheme and not attempt HTTP request
+ Scan File ${TEL_MESSAGE} Flags=ext_urls Settings=${SETTINGS}
+ # The original URL should be processed
+ Expect Extended URL http://127.0.0.1:18080/tel_redirect
+ Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP telephone=127.0.0.1->tel:88006007775
+ Do Not Expect Added URL tel:88006007775
+
+SKIP NON-HTTP SCHEME REDIRECT WITH INTERMEDIATE HOPS
+ # Test that url_redirector traverses intermediate HTTP hops and still detects the
+ # non-HTTP(S) terminal. chain_intermediate_1 -> chain_intermediate_2 -> tel:88006007776.
+ # Intermediate redirector hops are not saved to chain by default (redirectors=false),
+ # so the chain string only shows the original redirector host and the tel: target.
+ Scan File ${CHAIN_TEL_MESSAGE} Flags=ext_urls Settings=${SETTINGS}
+ Expect Extended URL http://127.0.0.1:18080/chain_intermediate_1
+ Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP telephone=127.0.0.1->tel:88006007776
+ Do Not Expect Added URL tel:88006007776
+
+MULTIPLE NON-HTTP REDIRECT TARGETS
+ # Test that a single message with several redirector URLs each pointing to a different
+ # non-HTTP scheme accumulates all scheme options in URL_REDIRECTOR_NON_HTTP.
+ # tel_redirect -> tel:88006007775 (rspamd scheme: telephone)
+ # mailto_redirect -> mailto:user@example.net (rspamd scheme: mailto)
+ Scan File ${MULTI_NON_HTTP_MESSAGE} Flags=ext_urls Settings=${SETTINGS}
+ Expect Extended URL http://127.0.0.1:18080/tel_redirect
+ Expect Extended URL http://127.0.0.1:18080/mailto_redirect
+ Expect Symbol With Exact Options URL_REDIRECTOR_NON_HTTP
+ ... telephone=127.0.0.1->tel:88006007775
+ ... mailto=127.0.0.1->mailto:user@example.net
+ Do Not Expect Added URL tel:88006007775
+ Do Not Expect Added URL mailto:user@example.net
+
+*** Keywords ***
+Urlredirector Setup
+ Run Dummy Http
+ Rspamd Redis Setup
+
+Urlredirector Teardown
+ Rspamd Redis Teardown
+ Dummy Http Teardown
+ Terminate All Processes kill=True
+
+Do Not Expect Added URL
+ [Arguments] ${url}
+ ${found_url} = Set Variable ${FALSE}
+ ${url_list} = Convert To List ${SCAN_RESULT}[urls]
+ FOR ${item} IN @{url_list}
+ ${d} = Convert To Dictionary ${item}
+ ${found_url} = Evaluate "${d}[url]" == "${url}"
+ Exit For Loop If ${found_url} == ${TRUE}
+ END
+ Should Be True not ${found_url} msg="URL ${url} should NOT be found but it was"