]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Test] Add comprehensive tests for URL deep processing
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 15 Nov 2025 09:49:22 +0000 (09:49 +0000)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 15 Nov 2025 09:49:22 +0000 (09:49 +0000)
Unit tests (test/lua/unit/lua_url_filter.lua):
- filter_url_string basic validation (normal, long user, multiple @)
- filter_url with URL objects
- UTF-8 validation (ASCII, Cyrillic, Japanese, invalid)
- Custom filter registration and chaining
- Issue #5731 regression test (oversized user parsing)

Functional tests (test/functional/cases/001_merged/400_url_suspect.robot):
- Moved to 001_merged for shared setup/teardown
- Long user field (80 chars) - URL_USER_LONG
- Very long user field (300 chars) - URL_USER_VERY_LONG
- Numeric IP - URL_NUMERIC_IP
- Numeric IP with user - URL_NUMERIC_IP_USER
- Suspicious TLD - URL_SUSPICIOUS_TLD
- Multiple @ signs - URL_MULTIPLE_AT_SIGNS
- Normal URLs (no false positives)
- All tests verify R_SUSPICIOUS_URL backward compatibility

Test messages (test/functional/messages/):
- url_suspect_long_user.eml (80-char user)
- url_suspect_very_long_user.eml (300-char user)
- url_suspect_numeric_ip.eml
- url_suspect_numeric_ip_user.eml
- url_suspect_bad_tld.eml
- url_suspect_multiple_at.eml
- url_suspect_normal.eml

Config:
- Enable url_suspect plugin in merged-override.conf
- Add Robot Framework outputs to gitignore

.gitignore
test/functional/cases/001_merged/400_url_suspect.robot [new file with mode: 0644]
test/functional/configs/merged-override.conf
test/functional/messages/url_suspect_bad_tld.eml [new file with mode: 0644]
test/functional/messages/url_suspect_long_user.eml [new file with mode: 0644]
test/functional/messages/url_suspect_multiple_at.eml [new file with mode: 0644]
test/functional/messages/url_suspect_normal.eml [new file with mode: 0644]
test/functional/messages/url_suspect_numeric_ip.eml [new file with mode: 0644]
test/functional/messages/url_suspect_numeric_ip_user.eml [new file with mode: 0644]
test/functional/messages/url_suspect_very_long_user.eml [new file with mode: 0644]
test/lua/unit/lua_url_filter.lua [new file with mode: 0644]

index 1105dfcec991e1f621009326f5310371e368521b..e65490420b3c9a115aeae599d903f34219387eeb 100644 (file)
@@ -31,3 +31,7 @@ luacov.stats.out
 .Trashes
 ehthumbs.db
 Thumbs.db
+# Robot Framework outputs
+/log.html
+/report.html
+/output.xml
diff --git a/test/functional/cases/001_merged/400_url_suspect.robot b/test/functional/cases/001_merged/400_url_suspect.robot
new file mode 100644 (file)
index 0000000..da469d9
--- /dev/null
@@ -0,0 +1,52 @@
+*** Settings ***
+Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Test Cases ***
+
+URL Suspect - Issue 5731 - Long User Field
+  # Test that URLs with oversized user fields are parsed and scored
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_long_user.eml
+  Expect Symbol With Exact Options  URL_USER_LONG  80
+  Do Not Expect Symbol  URL_USER_VERY_LONG
+  # Should also generate R_SUSPICIOUS_URL for backward compatibility
+  Expect Symbol  R_SUSPICIOUS_URL
+
+URL Suspect - Very Long User Field
+  # Test that very long user fields get appropriate symbol
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_very_long_user.eml
+  Expect Symbol With Exact Options  URL_USER_VERY_LONG  300
+  Expect Symbol  R_SUSPICIOUS_URL
+
+URL Suspect - Numeric IP
+  # Test numeric IP detection
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip.eml
+  Expect Symbol  URL_NUMERIC_IP
+  Do Not Expect Symbol  URL_NUMERIC_IP_USER
+
+URL Suspect - Numeric IP with User
+  # Test numeric IP with user field (more suspicious)
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_numeric_ip_user.eml
+  Expect Symbol  URL_NUMERIC_IP_USER
+  Expect Symbol  R_SUSPICIOUS_URL
+
+URL Suspect - Suspicious TLD
+  # Test suspicious TLD detection
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_bad_tld.eml
+  Expect Symbol  URL_SUSPICIOUS_TLD
+  Expect Symbol  R_SUSPICIOUS_URL
+
+URL Suspect - Multiple At Signs
+  # Test multiple @ sign detection
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_multiple_at.eml
+  Expect Symbol  URL_MULTIPLE_AT_SIGNS
+  Expect Symbol  R_SUSPICIOUS_URL
+
+URL Suspect - Normal URL
+  # Test that normal URLs don't trigger symbols
+  Scan File  ${RSPAMD_TESTDIR}/messages/url_suspect_normal.eml
+  Do Not Expect Symbol  URL_USER_PASSWORD
+  Do Not Expect Symbol  URL_NUMERIC_IP
+  Do Not Expect Symbol  URL_SUSPICIOUS_TLD
+  Do Not Expect Symbol  R_SUSPICIOUS_URL
index f2bfd3e03f5c1e41bc7eddf1dbdcbebaaf531b1e..a5ddc527900e1868de3c43fa59d8c1edc90dcb66 100644 (file)
@@ -461,3 +461,8 @@ EOD;
     }
   }
 }
+
+# URL suspect plugin for testing
+url_suspect {
+  enabled = true;
+}
diff --git a/test/functional/messages/url_suspect_bad_tld.eml b/test/functional/messages/url_suspect_bad_tld.eml
new file mode 100644 (file)
index 0000000..199c16d
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Suspicious TLD
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="http://phishing.tk/page">Click Here</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_long_user.eml b/test/functional/messages/url_suspect_long_user.eml
new file mode 100644 (file)
index 0000000..cb8b523
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Issue #5731 - Long User Field
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="https://XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX@phishing.com/page">Click Here</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_multiple_at.eml b/test/functional/messages/url_suspect_multiple_at.eml
new file mode 100644 (file)
index 0000000..59fdc98
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Multiple At Signs
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="http://user@@@phishing.com/page">Click Here</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_normal.eml b/test/functional/messages/url_suspect_normal.eml
new file mode 100644 (file)
index 0000000..7e9788f
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Normal URL
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Visit our website:</p>
+<a href="https://www.example.com/page">Example Site</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_numeric_ip.eml b/test/functional/messages/url_suspect_numeric_ip.eml
new file mode 100644 (file)
index 0000000..77de021
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Numeric IP
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="http://192.168.1.100/phishing">Click Here</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_numeric_ip_user.eml b/test/functional/messages/url_suspect_numeric_ip_user.eml
new file mode 100644 (file)
index 0000000..8718e83
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Numeric IP with User
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="http://admin@192.168.1.100/phishing">Click Here</a>
+</body>
+</html>
diff --git a/test/functional/messages/url_suspect_very_long_user.eml b/test/functional/messages/url_suspect_very_long_user.eml
new file mode 100644 (file)
index 0000000..5791ddc
--- /dev/null
@@ -0,0 +1,11 @@
+From: sender@example.com
+To: victim@example.com
+Subject: Test Very Long User Field
+Content-Type: text/html; charset=utf-8
+
+<html>
+<body>
+<p>Click this link:</p>
+<a href="https://XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX@phishing.com/page">Click Here</a>
+</body>
+</html>
diff --git a/test/lua/unit/lua_url_filter.lua b/test/lua/unit/lua_url_filter.lua
new file mode 100644 (file)
index 0000000..b8a646e
--- /dev/null
@@ -0,0 +1,145 @@
+-- URL filter tests
+
+context("URL filter functions", function()
+  local lua_url_filter = require("lua_url_filter")
+  local url = require("rspamd_url")
+  local mpool = require("rspamd_mempool")
+  local test_helper = require("rspamd_test_helper")
+  local logger = require("rspamd_logger")
+
+  test_helper.init_url_parser()
+
+  local pool = mpool.create()
+
+  local ACCEPT = 0
+  local SUSPICIOUS = 1
+  local REJECT = 2
+
+  -- Test filter_url_string basic validation
+  local filter_cases = {
+    -- Normal URLs - should accept
+    { "http://example.com", 0, ACCEPT, "normal URL" },
+    { "https://www.example.com/path", 0, ACCEPT, "normal HTTPS URL" },
+    { "ftp://ftp.example.com", 0, ACCEPT, "normal FTP URL" },
+
+    -- Long user fields - should be suspicious or rejected
+    { "http://" .. string.rep("a", 100) .. "@example.com", 0, SUSPICIOUS, "100-char user (suspicious)" },
+    { "http://" .. string.rep("a", 300) .. "@example.com", 0, SUSPICIOUS, "300-char user (suspicious)" },
+    { "http://" .. string.rep("a", 600) .. "@example.com", 0, REJECT, "600-char user (reject)" },
+
+    -- Multiple @ signs
+    { "http://user@@example.com", 0, SUSPICIOUS, "double @ sign" },
+    { "http://user@host@example.com", 0, SUSPICIOUS, "multiple @ signs" },
+    { "http://" .. string.rep("@", 25) .. "example.com", 0, REJECT, ">20 @ signs (reject)" },
+
+    -- Very long URLs
+    { "http://example.com/" .. string.rep("a", 2100), 0, REJECT, ">2048 char URL (reject)" },
+
+    -- Control characters (should reject)
+    { "http://example.com/\x00test", 0, REJECT, "URL with null byte" },
+    { "http://example.com/\x1ftest", 0, REJECT, "URL with control char" },
+  }
+
+  for i, c in ipairs(filter_cases) do
+    test("filter_url_string: " .. c[4], function()
+      local result = lua_url_filter.filter_url_string(c[1], c[2])
+      assert_equal(c[3], result,
+          logger.slog('expected result %s, but got %s for "%s"',
+              c[3], result, c[4]))
+    end)
+  end
+
+  -- Test filter_url with URL objects
+  local url_object_cases = {
+    { "http://example.com", ACCEPT, "normal URL object" },
+    { "http://" .. string.rep("a", 150) .. "@example.com", SUSPICIOUS, "long user in URL object" },
+  }
+
+  for i, c in ipairs(url_object_cases) do
+    test("filter_url: " .. c[3], function()
+      local parsed_url = url.create(pool, c[1])
+      assert_not_nil(parsed_url, "failed to parse: " .. c[1])
+
+      local result = lua_url_filter.filter_url(parsed_url)
+      assert_equal(c[2], result,
+          logger.slog('expected result %s, but got %s for "%s"',
+              c[2], result, c[3]))
+    end)
+  end
+
+  -- Test UTF-8 validation
+  local utf8_cases = {
+    { "http://example.com/valid", ACCEPT, "valid ASCII" },
+    { "http://example.com/Тест", ACCEPT, "valid UTF-8 Cyrillic" },
+    { "http://example.com/日本語", ACCEPT, "valid UTF-8 Japanese" },
+    { "http://example.com/\xFF\xFE", REJECT, "invalid UTF-8" },
+  }
+
+  for i, c in ipairs(utf8_cases) do
+    test("UTF-8 validation: " .. c[3], function()
+      local result = lua_url_filter.filter_url_string(c[1], 0)
+      assert_equal(c[2], result,
+          logger.slog('expected result %s, but got %s for "%s"',
+              c[2], result, c[3]))
+    end)
+  end
+
+  -- Test custom filter registration
+  test("register custom filter", function()
+    local called = false
+    local custom_filter = function(url_str, flags)
+      called = true
+      if url_str:match("blocked") then
+        return REJECT
+      end
+      return ACCEPT
+    end
+
+    lua_url_filter.register_filter(custom_filter)
+
+    local result = lua_url_filter.filter_url_string("http://blocked.example.com", 0)
+    assert_true(called, "custom filter was not called")
+    assert_equal(REJECT, result, "custom filter did not reject")
+  end)
+
+  -- Test filter chaining
+  test("filter chaining stops on REJECT", function()
+    local filter1_called = false
+    local filter2_called = false
+
+    lua_url_filter.register_filter(function(url_str, flags)
+      filter1_called = true
+      return REJECT
+    end)
+
+    lua_url_filter.register_filter(function(url_str, flags)
+      filter2_called = true
+      return ACCEPT
+    end)
+
+    lua_url_filter.filter_url_string("http://example.com", 0)
+
+    assert_true(filter1_called, "first filter not called")
+    assert_false(filter2_called, "second filter called despite REJECT")
+  end)
+
+  -- Test oversized user field (issue #5731)
+  test("issue #5731 - oversized user field parsing", function()
+    local long_user = string.rep("a", 80)
+    local url_str = "http://" .. long_user .. ":password@example.com/path"
+
+    local result = lua_url_filter.filter_url_string(url_str, 0)
+
+    -- Should be SUSPICIOUS, not REJECT, allowing C parser to continue
+    assert_equal(SUSPICIOUS, result,
+        "80-char user should be SUSPICIOUS, allowing parsing to continue")
+
+    -- Verify URL can still be parsed by C parser
+    local parsed_url = url.create(pool, url_str)
+    assert_not_nil(parsed_url, "URL with 80-char user should be parseable")
+
+    local t = parsed_url:to_table()
+    assert_equal("example.com", t.host, "host should be parsed correctly")
+  end)
+
+end)