From: Vsevolod Stakhov Date: Sat, 15 Nov 2025 09:49:22 +0000 (+0000) Subject: [Test] Add comprehensive tests for URL deep processing X-Git-Tag: 3.14.1~15^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7028883c8f38689ffe58aaf806f348f2cb019507;p=thirdparty%2Frspamd.git [Test] Add comprehensive tests for URL deep processing Unit tests (test/lua/unit/lua_url_filter.lua): - filter_url_string basic validation (normal, long user, multiple @) - filter_url with URL objects - UTF-8 validation (ASCII, Cyrillic, Japanese, invalid) - Custom filter registration and chaining - Issue #5731 regression test (oversized user parsing) Functional tests (test/functional/cases/001_merged/400_url_suspect.robot): - Moved to 001_merged for shared setup/teardown - Long user field (80 chars) - URL_USER_LONG - Very long user field (300 chars) - URL_USER_VERY_LONG - Numeric IP - URL_NUMERIC_IP - Numeric IP with user - URL_NUMERIC_IP_USER - Suspicious TLD - URL_SUSPICIOUS_TLD - Multiple @ signs - URL_MULTIPLE_AT_SIGNS - Normal URLs (no false positives) - All tests verify R_SUSPICIOUS_URL backward compatibility Test messages (test/functional/messages/): - url_suspect_long_user.eml (80-char user) - url_suspect_very_long_user.eml (300-char user) - url_suspect_numeric_ip.eml - url_suspect_numeric_ip_user.eml - url_suspect_bad_tld.eml - url_suspect_multiple_at.eml - url_suspect_normal.eml Config: - Enable url_suspect plugin in merged-override.conf - Add Robot Framework outputs to gitignore --- diff --git a/.gitignore b/.gitignore index 1105dfcec9..e65490420b 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,7 @@ luacov.stats.out .Trashes ehthumbs.db Thumbs.db +# Robot Framework outputs +/log.html +/report.html +/output.xml diff --git a/test/functional/cases/001_merged/400_url_suspect.robot b/test/functional/cases/001_merged/400_url_suspect.robot new file mode 100644 index 0000000000..da469d94ac --- /dev/null +++ b/test/functional/cases/001_merged/400_url_suspect.robot @@ -0,0 +1,52 @@ +*** Settings *** +Library ${RSPAMD_TESTDIR}/lib/rspamd.py +Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot +Variables ${RSPAMD_TESTDIR}/lib/vars.py + +*** Test Cases *** + +URL Suspect - Issue 5731 - Long User Field + # Test that URLs with oversized user fields are parsed and scored + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_long_user.eml + Expect Symbol With Exact Options URL_USER_LONG 80 + Do Not Expect Symbol URL_USER_VERY_LONG + # Should also generate R_SUSPICIOUS_URL for backward compatibility + Expect Symbol R_SUSPICIOUS_URL + +URL Suspect - Very Long User Field + # Test that very long user fields get appropriate symbol + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_very_long_user.eml + Expect Symbol With Exact Options URL_USER_VERY_LONG 300 + Expect Symbol R_SUSPICIOUS_URL + +URL Suspect - Numeric IP + # Test numeric IP detection + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip.eml + Expect Symbol URL_NUMERIC_IP + Do Not Expect Symbol URL_NUMERIC_IP_USER + +URL Suspect - Numeric IP with User + # Test numeric IP with user field (more suspicious) + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip_user.eml + Expect Symbol URL_NUMERIC_IP_USER + Expect Symbol R_SUSPICIOUS_URL + +URL Suspect - Suspicious TLD + # Test suspicious TLD detection + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_bad_tld.eml + Expect Symbol URL_SUSPICIOUS_TLD + Expect Symbol R_SUSPICIOUS_URL + +URL Suspect - Multiple At Signs + # Test multiple @ sign detection + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_multiple_at.eml + Expect Symbol URL_MULTIPLE_AT_SIGNS + Expect Symbol R_SUSPICIOUS_URL + +URL Suspect - Normal URL + # Test that normal URLs don't trigger symbols + Scan File ${RSPAMD_TESTDIR}/messages/url_suspect_normal.eml + Do Not Expect Symbol URL_USER_PASSWORD + Do Not Expect Symbol URL_NUMERIC_IP + Do Not Expect Symbol URL_SUSPICIOUS_TLD + Do Not Expect Symbol R_SUSPICIOUS_URL diff --git a/test/functional/configs/merged-override.conf b/test/functional/configs/merged-override.conf index f2bfd3e03f..a5ddc52790 100644 --- a/test/functional/configs/merged-override.conf +++ b/test/functional/configs/merged-override.conf @@ -461,3 +461,8 @@ EOD; } } } + +# URL suspect plugin for testing +url_suspect { + enabled = true; +} diff --git a/test/functional/messages/url_suspect_bad_tld.eml b/test/functional/messages/url_suspect_bad_tld.eml new file mode 100644 index 0000000000..199c16d5ef --- /dev/null +++ b/test/functional/messages/url_suspect_bad_tld.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Suspicious TLD +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/functional/messages/url_suspect_long_user.eml b/test/functional/messages/url_suspect_long_user.eml new file mode 100644 index 0000000000..cb8b523bd2 --- /dev/null +++ b/test/functional/messages/url_suspect_long_user.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Issue #5731 - Long User Field +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/functional/messages/url_suspect_multiple_at.eml b/test/functional/messages/url_suspect_multiple_at.eml new file mode 100644 index 0000000000..59fdc98a79 --- /dev/null +++ b/test/functional/messages/url_suspect_multiple_at.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Multiple At Signs +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/functional/messages/url_suspect_normal.eml b/test/functional/messages/url_suspect_normal.eml new file mode 100644 index 0000000000..7e9788f91c --- /dev/null +++ b/test/functional/messages/url_suspect_normal.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Normal URL +Content-Type: text/html; charset=utf-8 + + + +

Visit our website:

+Example Site + + diff --git a/test/functional/messages/url_suspect_numeric_ip.eml b/test/functional/messages/url_suspect_numeric_ip.eml new file mode 100644 index 0000000000..77de02102e --- /dev/null +++ b/test/functional/messages/url_suspect_numeric_ip.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Numeric IP +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/functional/messages/url_suspect_numeric_ip_user.eml b/test/functional/messages/url_suspect_numeric_ip_user.eml new file mode 100644 index 0000000000..8718e8330d --- /dev/null +++ b/test/functional/messages/url_suspect_numeric_ip_user.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Numeric IP with User +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/functional/messages/url_suspect_very_long_user.eml b/test/functional/messages/url_suspect_very_long_user.eml new file mode 100644 index 0000000000..5791ddc155 --- /dev/null +++ b/test/functional/messages/url_suspect_very_long_user.eml @@ -0,0 +1,11 @@ +From: sender@example.com +To: victim@example.com +Subject: Test Very Long User Field +Content-Type: text/html; charset=utf-8 + + + +

Click this link:

+Click Here + + diff --git a/test/lua/unit/lua_url_filter.lua b/test/lua/unit/lua_url_filter.lua new file mode 100644 index 0000000000..b8a646e808 --- /dev/null +++ b/test/lua/unit/lua_url_filter.lua @@ -0,0 +1,145 @@ +-- URL filter tests + +context("URL filter functions", function() + local lua_url_filter = require("lua_url_filter") + local url = require("rspamd_url") + local mpool = require("rspamd_mempool") + local test_helper = require("rspamd_test_helper") + local logger = require("rspamd_logger") + + test_helper.init_url_parser() + + local pool = mpool.create() + + local ACCEPT = 0 + local SUSPICIOUS = 1 + local REJECT = 2 + + -- Test filter_url_string basic validation + local filter_cases = { + -- Normal URLs - should accept + { "http://example.com", 0, ACCEPT, "normal URL" }, + { "https://www.example.com/path", 0, ACCEPT, "normal HTTPS URL" }, + { "ftp://ftp.example.com", 0, ACCEPT, "normal FTP URL" }, + + -- Long user fields - should be suspicious or rejected + { "http://" .. string.rep("a", 100) .. "@example.com", 0, SUSPICIOUS, "100-char user (suspicious)" }, + { "http://" .. string.rep("a", 300) .. "@example.com", 0, SUSPICIOUS, "300-char user (suspicious)" }, + { "http://" .. string.rep("a", 600) .. "@example.com", 0, REJECT, "600-char user (reject)" }, + + -- Multiple @ signs + { "http://user@@example.com", 0, SUSPICIOUS, "double @ sign" }, + { "http://user@host@example.com", 0, SUSPICIOUS, "multiple @ signs" }, + { "http://" .. string.rep("@", 25) .. "example.com", 0, REJECT, ">20 @ signs (reject)" }, + + -- Very long URLs + { "http://example.com/" .. string.rep("a", 2100), 0, REJECT, ">2048 char URL (reject)" }, + + -- Control characters (should reject) + { "http://example.com/\x00test", 0, REJECT, "URL with null byte" }, + { "http://example.com/\x1ftest", 0, REJECT, "URL with control char" }, + } + + for i, c in ipairs(filter_cases) do + test("filter_url_string: " .. c[4], function() + local result = lua_url_filter.filter_url_string(c[1], c[2]) + assert_equal(c[3], result, + logger.slog('expected result %s, but got %s for "%s"', + c[3], result, c[4])) + end) + end + + -- Test filter_url with URL objects + local url_object_cases = { + { "http://example.com", ACCEPT, "normal URL object" }, + { "http://" .. string.rep("a", 150) .. "@example.com", SUSPICIOUS, "long user in URL object" }, + } + + for i, c in ipairs(url_object_cases) do + test("filter_url: " .. c[3], function() + local parsed_url = url.create(pool, c[1]) + assert_not_nil(parsed_url, "failed to parse: " .. c[1]) + + local result = lua_url_filter.filter_url(parsed_url) + assert_equal(c[2], result, + logger.slog('expected result %s, but got %s for "%s"', + c[2], result, c[3])) + end) + end + + -- Test UTF-8 validation + local utf8_cases = { + { "http://example.com/valid", ACCEPT, "valid ASCII" }, + { "http://example.com/Тест", ACCEPT, "valid UTF-8 Cyrillic" }, + { "http://example.com/日本語", ACCEPT, "valid UTF-8 Japanese" }, + { "http://example.com/\xFF\xFE", REJECT, "invalid UTF-8" }, + } + + for i, c in ipairs(utf8_cases) do + test("UTF-8 validation: " .. c[3], function() + local result = lua_url_filter.filter_url_string(c[1], 0) + assert_equal(c[2], result, + logger.slog('expected result %s, but got %s for "%s"', + c[2], result, c[3])) + end) + end + + -- Test custom filter registration + test("register custom filter", function() + local called = false + local custom_filter = function(url_str, flags) + called = true + if url_str:match("blocked") then + return REJECT + end + return ACCEPT + end + + lua_url_filter.register_filter(custom_filter) + + local result = lua_url_filter.filter_url_string("http://blocked.example.com", 0) + assert_true(called, "custom filter was not called") + assert_equal(REJECT, result, "custom filter did not reject") + end) + + -- Test filter chaining + test("filter chaining stops on REJECT", function() + local filter1_called = false + local filter2_called = false + + lua_url_filter.register_filter(function(url_str, flags) + filter1_called = true + return REJECT + end) + + lua_url_filter.register_filter(function(url_str, flags) + filter2_called = true + return ACCEPT + end) + + lua_url_filter.filter_url_string("http://example.com", 0) + + assert_true(filter1_called, "first filter not called") + assert_false(filter2_called, "second filter called despite REJECT") + end) + + -- Test oversized user field (issue #5731) + test("issue #5731 - oversized user field parsing", function() + local long_user = string.rep("a", 80) + local url_str = "http://" .. long_user .. ":password@example.com/path" + + local result = lua_url_filter.filter_url_string(url_str, 0) + + -- Should be SUSPICIOUS, not REJECT, allowing C parser to continue + assert_equal(SUSPICIOUS, result, + "80-char user should be SUSPICIOUS, allowing parsing to continue") + + -- Verify URL can still be parsed by C parser + local parsed_url = url.create(pool, url_str) + assert_not_nil(parsed_url, "URL with 80-char user should be parseable") + + local t = parsed_url:to_table() + assert_equal("example.com", t.host, "host should be parsed correctly") + end) + +end)