From: Vsevolod Stakhov Date: Fri, 25 Jul 2025 07:48:48 +0000 (+0100) Subject: [Test] Add multiclass tests X-Git-Tag: 3.13.0~38^2~15 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b70a2f928f3f03c493547a45668bc66a934e726f;p=thirdparty%2Frspamd.git [Test] Add multiclass tests --- diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot new file mode 100644 index 0000000000..1663a78258 --- /dev/null +++ b/test/functional/cases/110_statistics/300-multiclass-redis.robot @@ -0,0 +1,60 @@ +*** Settings *** +Documentation Multiclass Bayes Classification Tests with Redis Backend +Suite Setup Rspamd Redis Setup +Suite Teardown Rspamd Redis Teardown +Resource multiclass_lib.robot + +*** Variables *** +${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} +${RSPAMD_STATS_HASH} siphash + +*** Test Cases *** +Multiclass Basic Learning and Classification + [Documentation] Test basic multiclass learning and classification + [Tags] multiclass basic learning + Multiclass Basic Learn Test + +Multiclass Legacy Compatibility + [Documentation] Test that old learn_spam/learn_ham commands still work + [Tags] multiclass compatibility legacy + Multiclass Legacy Compatibility Test + +Multiclass Relearn + [Documentation] Test reclassifying messages to different classes + [Tags] multiclass relearn + Multiclass Relearn Test + +Multiclass Cross-Class Learning + [Documentation] Test learning message as different class than expected + [Tags] multiclass cross-learn + Multiclass Cross-Learn Test + +Multiclass Unlearn + [Documentation] Test unlearning (learning message as different class) + [Tags] multiclass unlearn + Multiclass Unlearn Test + +Multiclass Statistics + [Documentation] Test that statistics show all class information + [Tags] multiclass statistics + Multiclass Stats Test + +Multiclass Performance + [Documentation] Test classification performance with multiple classes + [Tags] multiclass performance + Multiclass Performance Test 50 + +Per-User Multiclass Learning + [Documentation] Test per-user multiclass classification + [Tags] multiclass per-user + [Setup] Set Suite Variable ${RSPAMD_STATS_PER_USER} 1 + Multiclass Basic Learn Test user@example.com + [Teardown] Set Suite Variable ${RSPAMD_STATS_PER_USER} ${EMPTY} + +Multiclass Empty Part Test + [Documentation] Test multiclass learning with empty parts + [Tags] multiclass empty-part + Set Test Variable ${MESSAGE} ${RSPAMD_TESTDIR}/messages/empty_part.eml + Learn Multiclass ${EMPTY} spam ${MESSAGE} + Scan File ${MESSAGE} + Expect Symbol BAYES_SPAM \ No newline at end of file diff --git a/test/functional/cases/110_statistics/310-multiclass-migration.robot b/test/functional/cases/110_statistics/310-multiclass-migration.robot new file mode 100644 index 0000000000..ef13880054 --- /dev/null +++ b/test/functional/cases/110_statistics/310-multiclass-migration.robot @@ -0,0 +1,116 @@ +*** Settings *** +Documentation Multiclass Bayes Migration Tests +Suite Setup Rspamd Redis Setup +Suite Teardown Rspamd Redis Teardown +Resource multiclass_lib.robot +Resource lib.robot + +*** Variables *** +${RSPAMD_REDIS_SERVER} ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT} +${RSPAMD_STATS_HASH} siphash +${BINARY_CONFIG} ${RSPAMD_TESTDIR}/configs/stats.conf +${MULTICLASS_CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf + +*** Test Cases *** +Binary to Multiclass Migration + [Documentation] Test migration from binary to multiclass configuration + [Tags] migration binary-to-multiclass + + # First, start with binary configuration and learn some data + Set Suite Variable ${CONFIG} ${BINARY_CONFIG} + Rspamd Redis Teardown + Rspamd Redis Setup + + # Learn with binary system + Learn Test + + # Now switch to multiclass configuration + Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG} + Rspamd Teardown + Rspamd Setup + + # Should still work with existing data + Scan File ${MESSAGE_SPAM} + Expect Symbol BAYES_SPAM + Scan File ${MESSAGE_HAM} + Expect Symbol BAYES_HAM + + # Should be able to add new classes + Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER} + Scan File ${MESSAGE_NEWSLETTER} + Expect Symbol BAYES_NEWSLETTER + +Configuration Validation + [Documentation] Test multiclass configuration validation + [Tags] configuration validation + + # Test that configuration loads without errors + ${result} = Run Process rspamd -t -c ${MULTICLASS_CONFIG} + Should Be Equal As Integers ${result.rc} 0 msg=Configuration validation failed: ${result.stderr} + +Redis Data Format Migration + [Documentation] Test that Redis data format is properly migrated + [Tags] migration redis data-format + + # Start with binary data + Set Suite Variable ${CONFIG} ${BINARY_CONFIG} + Rspamd Redis Teardown + Rspamd Redis Setup + Learn Test + + # Check binary format in Redis + ${redis_result} = Run Process redis-cli -p ${RSPAMD_REDIS_PORT} KEYS *_learns + Should Contain ${redis_result.stdout} _learns + + # Switch to multiclass + Set Suite Variable ${CONFIG} ${MULTICLASS_CONFIG} + Rspamd Teardown + Rspamd Setup + + # Data should still be accessible + Scan File ${MESSAGE_SPAM} + Expect Symbol BAYES_SPAM + +Backward Compatibility + [Documentation] Test that multiclass system maintains backward compatibility + [Tags] compatibility backward + + # Use multiclass config but test old commands + Learn ${EMPTY} spam ${MESSAGE_SPAM} + Learn ${EMPTY} ham ${MESSAGE_HAM} + + # Should work the same as before + Scan File ${MESSAGE_SPAM} + Expect Symbol BAYES_SPAM + Scan File ${MESSAGE_HAM} + Expect Symbol BAYES_HAM + +Class Label Validation + [Documentation] Test class label validation and error handling + [Tags] validation class-labels + + # This would test invalid class names, duplicate labels, etc. + # Implementation depends on how validation errors are exposed + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:invalid-class-name ${MESSAGE_SPAM} + Should Not Be Equal As Integers ${result.rc} 0 msg=Should reject invalid class name + +Multiclass Stats Format + [Documentation] Test that stats output shows multiclass information + [Tags] statistics multiclass-format + + # Learn some data across multiple classes + Learn Multiclass ${EMPTY} spam ${MESSAGE_SPAM} + Learn Multiclass ${EMPTY} ham ${MESSAGE_HAM} + Learn Multiclass ${EMPTY} newsletter ${MESSAGE_NEWSLETTER} + + # Check stats format + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat + Check Rspamc ${result} + + # Should show all classes in stats + Should Contain ${result.stdout} spam + Should Contain ${result.stdout} ham + Should Contain ${result.stdout} newsletter + + # Should show learn counts + Should Match Regexp ${result.stdout} learned.*\\d+ \ No newline at end of file diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot new file mode 100644 index 0000000000..e6e1788d40 --- /dev/null +++ b/test/functional/cases/110_statistics/multiclass_lib.robot @@ -0,0 +1,196 @@ +*** Settings *** +Library ${RSPAMD_TESTDIR}/lib/rspamd.py +Resource ${RSPAMD_TESTDIR}/lib/rspamd.robot +Variables ${RSPAMD_TESTDIR}/lib/vars.py + +*** Variables *** +${CONFIG} ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf +${MESSAGE_HAM} ${RSPAMD_TESTDIR}/messages/ham.eml +${MESSAGE_SPAM} ${RSPAMD_TESTDIR}/messages/spam_message.eml +${MESSAGE_NEWSLETTER} ${RSPAMD_TESTDIR}/messages/newsletter.eml +${MESSAGE_TRANSACTIONAL} ${RSPAMD_TESTDIR}/messages/transactional.eml +${REDIS_SCOPE} Suite +${RSPAMD_REDIS_SERVER} null +${RSPAMD_SCOPE} Suite +${RSPAMD_STATS_BACKEND} redis +${RSPAMD_STATS_HASH} null +${RSPAMD_STATS_KEY} null +${RSPAMD_STATS_PER_USER} ${EMPTY} + +*** Keywords *** +Learn Multiclass + [Arguments] ${user} ${class} ${message} + IF "${user}" + ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + ELSE + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_class:${class} ${message} + END + Check Rspamc ${result} + +Learn Multiclass Legacy + [Arguments] ${user} ${class} ${message} + # Test backward compatibility with old learn_spam/learn_ham commands + IF "${user}" + ${result} = Run Rspamc -d ${user} -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + ELSE + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} learn_${class} ${message} + END + Check Rspamc ${result} + +Multiclass Basic Learn Test + [Arguments] ${user}=${EMPTY} + Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 0 + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Learn all classes + Learn Multiclass ${user} spam ${MESSAGE_SPAM} + Learn Multiclass ${user} ham ${MESSAGE_HAM} + Learn Multiclass ${user} newsletter ${MESSAGE_NEWSLETTER} + Learn Multiclass ${user} transactional ${MESSAGE_TRANSACTIONAL} + + # Test classification + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + Scan File ${MESSAGE_HAM} &{kwargs} + Expect Symbol BAYES_HAM + + Scan File ${MESSAGE_NEWSLETTER} &{kwargs} + Expect Symbol BAYES_NEWSLETTER + + Scan File ${MESSAGE_TRANSACTIONAL} &{kwargs} + Expect Symbol BAYES_TRANSACTIONAL + + Set Suite Variable ${RSPAMD_STATS_LEARNTEST} 1 + +Multiclass Legacy Compatibility Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Test legacy learn_spam and learn_ham commands still work + Learn Multiclass Legacy ${user} spam ${MESSAGE_SPAM} + Learn Multiclass Legacy ${user} ham ${MESSAGE_HAM} + + # Should still classify correctly + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + Scan File ${MESSAGE_HAM} &{kwargs} + Expect Symbol BAYES_HAM + +Multiclass Relearn Test + [Arguments] ${user}=${EMPTY} + IF ${RSPAMD_STATS_LEARNTEST} == 0 + Fail "Learn test was not run" + END + + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Relearn spam message as ham + Learn Multiclass ${user} ham ${MESSAGE_SPAM} + + # Should now classify as ham or at least not spam + Scan File ${MESSAGE_SPAM} &{kwargs} + ${pass} = Run Keyword And Return Status Expect Symbol BAYES_HAM + IF ${pass} + Pass Execution Successfully reclassified spam as ham + END + Do Not Expect Symbol BAYES_SPAM + +Multiclass Cross-Learn Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # Learn newsletter message as transactional + Learn Multiclass ${user} transactional ${MESSAGE_NEWSLETTER} + + # Should classify as transactional, not newsletter + Scan File ${MESSAGE_NEWSLETTER} &{kwargs} + Expect Symbol BAYES_TRANSACTIONAL + Do Not Expect Symbol BAYES_NEWSLETTER + +Multiclass Unlearn Test + [Arguments] ${user}=${EMPTY} + Set Test Variable ${kwargs} &{EMPTY} + IF "${user}" + Set To Dictionary ${kwargs} Deliver-To=${user} + END + + # First learn spam + Learn Multiclass ${user} spam ${MESSAGE_SPAM} + Scan File ${MESSAGE_SPAM} &{kwargs} + Expect Symbol BAYES_SPAM + + # Then unlearn spam (learn as ham) + Learn Multiclass ${user} ham ${MESSAGE_SPAM} + + # Should no longer classify as spam + Scan File ${MESSAGE_SPAM} &{kwargs} + Do Not Expect Symbol BAYES_SPAM + +Check Multiclass Results + [Arguments] ${result} ${expected_class} + # Check that scan result contains expected class information + Should Contain ${result.stdout} BAYES_${expected_class.upper()} + # Check for multiclass result format [class_name] + Should Match Regexp ${result.stdout} BAYES_${expected_class.upper()}.*\\[${expected_class}\\] + +Multiclass Stats Test + # Check that rspamc stat shows learning counts for all classes + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat + Check Rspamc ${result} + + # Should show statistics for all classes + Should Contain ${result.stdout} spam + Should Contain ${result.stdout} ham + Should Contain ${result.stdout} newsletter + Should Contain ${result.stdout} transactional + +Multiclass Configuration Migration Test + # Test that old binary config can be automatically migrated + Set Test Variable ${binary_config} ${RSPAMD_TESTDIR}/configs/stats.conf + + # Start with binary config + ${result} = Run Rspamc --config ${binary_config} stat + Check Rspamc ${result} + + # Should show deprecation warning but work + Should Contain ${result.stderr} deprecated ignore_case=True + +Multiclass Performance Test + [Arguments] ${num_messages}=100 + # Test classification performance with multiple classes + ${start_time} = Get Time epoch + + FOR ${i} IN RANGE ${num_messages} + Scan File ${MESSAGE_SPAM} + Scan File ${MESSAGE_HAM} + Scan File ${MESSAGE_NEWSLETTER} + Scan File ${MESSAGE_TRANSACTIONAL} + END + + ${end_time} = Get Time epoch + ${duration} = Evaluate ${end_time} - ${start_time} + + # Should complete in reasonable time (adjust threshold as needed) + Should Be True ${duration} < 30 msg=Performance test took ${duration}s, expected < 30s + +Multiclass Memory Test + # Test that memory usage is reasonable for multiclass classification + ${result} = Run Rspamc -h ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER} stat + Check Rspamc ${result} + + # Extract memory usage if available in stats + # This is a placeholder - actual implementation would parse memory stats \ No newline at end of file diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf new file mode 100644 index 0000000000..3504cd16eb --- /dev/null +++ b/test/functional/configs/multiclass_bayes.conf @@ -0,0 +1,143 @@ +options = { + filters = ["spf", "dkim", "regexp"] + url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat" + pidfile = "{= env.TMPDIR =}/rspamd.pid" + dns { + retransmits = 10; + timeout = 2s; + fake_records = [{ + name = "example.net"; + type = txt; + replies = ["v=spf1 -all"]; + }] + } +} + +logging = { + type = "file", + level = "debug" + filename = "{= env.TMPDIR =}/rspamd.log" +} + +metric = { + name = "default", + actions = { + reject = 100500, + } + unknown_weight = 1 +} + +worker { + type = normal + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}" + count = 1 + keypair { + pubkey = "{= env.KEY_PUB1 =}"; + privkey = "{= env.KEY_PVT1 =}"; + } + task_timeout = 60s; +} + +worker { + type = controller + bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}" + count = 1 + keypair { + pubkey = "{= env.KEY_PUB1 =}"; + privkey = "{= env.KEY_PVT1 =}"; + } + secure_ip = ["127.0.0.1", "::1"]; + stats_path = "{= env.TMPDIR =}/stats.ucl"; +} + +# Multi-class Bayes classifier configuration +classifier { + languages_enabled = true; + tokenizer { + name = "osb"; + hash = {= env.STATS_HASH =}; + key = {= env.STATS_KEY =}; + } + backend = "{= env.STATS_BACKEND =}"; + + # Multi-class statfiles + statfile { + class = "spam"; + symbol = BAYES_SPAM; + size = 1M; + server = {= env.REDIS_SERVER =} + } + statfile { + class = "ham"; + symbol = BAYES_HAM; + size = 1M; + server = {= env.REDIS_SERVER =} + } + statfile { + class = "newsletter"; + symbol = BAYES_NEWSLETTER; + size = 1M; + server = {= env.REDIS_SERVER =} + } + statfile { + class = "transactional"; + symbol = BAYES_TRANSACTIONAL; + size = 1M; + server = {= env.REDIS_SERVER =} + } + + # Backend class labels for Redis storage optimization + class_labels = { + "spam" = "S"; + "ham" = "H"; + "newsletter" = "N"; + "transactional" = "T"; + } + + cache { + server = {= env.REDIS_SERVER =} + } + + # Multi-class autolearn configuration + autolearn = { + classes = { + spam = { + threshold = 15.0; + verdict_mapping = { spam = true }; + }; + ham = { + threshold = -5.0; + verdict_mapping = { ham = true }; + }; + newsletter = { + symbols = ["NEWSLETTER_HEADER", "BULK_MAIL"]; + threshold = 8.0; + }; + transactional = { + symbols = ["TRANSACTIONAL_MAIL", "PASSWORD_RESET"]; + threshold = 5.0; + }; + }; + + check_balance = true; + max_class_ratio = 0.6; + skip_threshold = 0.95; + } + + # Standard configuration + min_learns = 10; + min_tokens = 11; + min_prob_strength = 0.05; + + {% if env.STATS_PER_USER ~= '' %} + per_user = < +MIME-Version: 1.0 +Content-Type: text/plain + +Dear Subscriber, + +This is our monthly newsletter with special offers and updates. + +Best regards, +Newsletter Team + +Unsubscribe: https://example.com/unsubscribe?id=123 \ No newline at end of file diff --git a/test/functional/messages/transactional.eml b/test/functional/messages/transactional.eml new file mode 100644 index 0000000000..e227aaa77d --- /dev/null +++ b/test/functional/messages/transactional.eml @@ -0,0 +1,18 @@ +From: noreply@example.com +To: user@example.org +Subject: Password Reset Request +Date: Thu, 21 Jul 2023 11:00:00 +0000 +Message-ID: +MIME-Version: 1.0 +Content-Type: text/plain + +Hello, + +You have requested a password reset for your account. + +Click here to reset your password: https://example.com/reset?token=abc123 + +This link expires in 24 hours. + +Best regards, +Security Team \ No newline at end of file