]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Test] Add multiclass tests
authorVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 07:48:48 +0000 (08:48 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Fri, 25 Jul 2025 07:48:48 +0000 (08:48 +0100)
test/functional/cases/110_statistics/300-multiclass-redis.robot [new file with mode: 0644]
test/functional/cases/110_statistics/310-multiclass-migration.robot [new file with mode: 0644]
test/functional/cases/110_statistics/multiclass_lib.robot [new file with mode: 0644]
test/functional/configs/multiclass_bayes.conf [new file with mode: 0644]
test/functional/messages/newsletter.eml [new file with mode: 0644]
test/functional/messages/transactional.eml [new file with mode: 0644]

diff --git a/test/functional/cases/110_statistics/300-multiclass-redis.robot b/test/functional/cases/110_statistics/300-multiclass-redis.robot
new file mode 100644 (file)
index 0000000..1663a78
--- /dev/null
@@ -0,0 +1,60 @@
+*** Settings ***
+Documentation   Multiclass Bayes Classification Tests with Redis Backend
+Suite Setup     Rspamd Redis Setup
+Suite Teardown  Rspamd Redis Teardown
+Resource        multiclass_lib.robot
+
+*** Variables ***
+${RSPAMD_REDIS_SERVER}  ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
+${RSPAMD_STATS_HASH}    siphash
+
+*** Test Cases ***
+Multiclass Basic Learning and Classification
+    [Documentation]    Test basic multiclass learning and classification
+    [Tags]             multiclass  basic  learning
+    Multiclass Basic Learn Test
+    
+Multiclass Legacy Compatibility
+    [Documentation]    Test that old learn_spam/learn_ham commands still work
+    [Tags]             multiclass  compatibility  legacy
+    Multiclass Legacy Compatibility Test
+
+Multiclass Relearn
+    [Documentation]    Test reclassifying messages to different classes
+    [Tags]             multiclass  relearn
+    Multiclass Relearn Test
+
+Multiclass Cross-Class Learning
+    [Documentation]    Test learning message as different class than expected
+    [Tags]             multiclass  cross-learn
+    Multiclass Cross-Learn Test
+
+Multiclass Unlearn
+    [Documentation]    Test unlearning (learning message as different class)
+    [Tags]             multiclass  unlearn
+    Multiclass Unlearn Test
+
+Multiclass Statistics
+    [Documentation]    Test that statistics show all class information
+    [Tags]             multiclass  statistics
+    Multiclass Stats Test
+
+Multiclass Performance
+    [Documentation]    Test classification performance with multiple classes
+    [Tags]             multiclass  performance
+    Multiclass Performance Test  50
+
+Per-User Multiclass Learning
+    [Documentation]    Test per-user multiclass classification
+    [Tags]             multiclass  per-user
+    [Setup]            Set Suite Variable  ${RSPAMD_STATS_PER_USER}  1
+    Multiclass Basic Learn Test  user@example.com
+    [Teardown]         Set Suite Variable  ${RSPAMD_STATS_PER_USER}  ${EMPTY}
+
+Multiclass Empty Part Test
+    [Documentation]    Test multiclass learning with empty parts
+    [Tags]             multiclass  empty-part
+    Set Test Variable  ${MESSAGE}  ${RSPAMD_TESTDIR}/messages/empty_part.eml
+    Learn Multiclass  ${EMPTY}  spam  ${MESSAGE}
+    Scan File  ${MESSAGE}
+    Expect Symbol  BAYES_SPAM
\ No newline at end of file
diff --git a/test/functional/cases/110_statistics/310-multiclass-migration.robot b/test/functional/cases/110_statistics/310-multiclass-migration.robot
new file mode 100644 (file)
index 0000000..ef13880
--- /dev/null
@@ -0,0 +1,116 @@
+*** Settings ***
+Documentation   Multiclass Bayes Migration Tests
+Suite Setup     Rspamd Redis Setup
+Suite Teardown  Rspamd Redis Teardown
+Resource        multiclass_lib.robot
+Resource        lib.robot
+
+*** Variables ***
+${RSPAMD_REDIS_SERVER}    ${RSPAMD_REDIS_ADDR}:${RSPAMD_REDIS_PORT}
+${RSPAMD_STATS_HASH}      siphash
+${BINARY_CONFIG}          ${RSPAMD_TESTDIR}/configs/stats.conf
+${MULTICLASS_CONFIG}      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+
+*** Test Cases ***
+Binary to Multiclass Migration
+    [Documentation]    Test migration from binary to multiclass configuration
+    [Tags]             migration  binary-to-multiclass
+    
+    # First, start with binary configuration and learn some data
+    Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
+    Rspamd Redis Teardown
+    Rspamd Redis Setup
+    
+    # Learn with binary system
+    Learn Test
+    
+    # Now switch to multiclass configuration
+    Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
+    Rspamd Teardown
+    Rspamd Setup
+    
+    # Should still work with existing data
+    Scan File  ${MESSAGE_SPAM}
+    Expect Symbol  BAYES_SPAM
+    Scan File  ${MESSAGE_HAM}
+    Expect Symbol  BAYES_HAM
+    
+    # Should be able to add new classes
+    Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
+    Scan File  ${MESSAGE_NEWSLETTER}
+    Expect Symbol  BAYES_NEWSLETTER
+
+Configuration Validation
+    [Documentation]    Test multiclass configuration validation
+    [Tags]             configuration  validation
+    
+    # Test that configuration loads without errors
+    ${result} =  Run Process  rspamd  -t  -c  ${MULTICLASS_CONFIG}
+    Should Be Equal As Integers  ${result.rc}  0  msg=Configuration validation failed: ${result.stderr}
+
+Redis Data Format Migration
+    [Documentation]    Test that Redis data format is properly migrated
+    [Tags]             migration  redis  data-format
+    
+    # Start with binary data
+    Set Suite Variable  ${CONFIG}  ${BINARY_CONFIG}
+    Rspamd Redis Teardown
+    Rspamd Redis Setup
+    Learn Test
+    
+    # Check binary format in Redis
+    ${redis_result} =  Run Process  redis-cli  -p  ${RSPAMD_REDIS_PORT}  KEYS  *_learns
+    Should Contain  ${redis_result.stdout}  _learns
+    
+    # Switch to multiclass
+    Set Suite Variable  ${CONFIG}  ${MULTICLASS_CONFIG}
+    Rspamd Teardown
+    Rspamd Setup
+    
+    # Data should still be accessible
+    Scan File  ${MESSAGE_SPAM}
+    Expect Symbol  BAYES_SPAM
+
+Backward Compatibility
+    [Documentation]    Test that multiclass system maintains backward compatibility
+    [Tags]             compatibility  backward
+    
+    # Use multiclass config but test old commands
+    Learn  ${EMPTY}  spam  ${MESSAGE_SPAM}
+    Learn  ${EMPTY}  ham  ${MESSAGE_HAM}
+    
+    # Should work the same as before
+    Scan File  ${MESSAGE_SPAM}
+    Expect Symbol  BAYES_SPAM
+    Scan File  ${MESSAGE_HAM}
+    Expect Symbol  BAYES_HAM
+
+Class Label Validation
+    [Documentation]    Test class label validation and error handling
+    [Tags]             validation  class-labels
+    
+    # This would test invalid class names, duplicate labels, etc.
+    # Implementation depends on how validation errors are exposed
+    ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:invalid-class-name  ${MESSAGE_SPAM}
+    Should Not Be Equal As Integers  ${result.rc}  0  msg=Should reject invalid class name
+
+Multiclass Stats Format
+    [Documentation]    Test that stats output shows multiclass information
+    [Tags]             statistics  multiclass-format
+    
+    # Learn some data across multiple classes
+    Learn Multiclass  ${EMPTY}  spam  ${MESSAGE_SPAM}
+    Learn Multiclass  ${EMPTY}  ham  ${MESSAGE_HAM}
+    Learn Multiclass  ${EMPTY}  newsletter  ${MESSAGE_NEWSLETTER}
+    
+    # Check stats format
+    ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
+    Check Rspamc  ${result}
+    
+    # Should show all classes in stats
+    Should Contain  ${result.stdout}  spam
+    Should Contain  ${result.stdout}  ham
+    Should Contain  ${result.stdout}  newsletter
+    
+    # Should show learn counts
+    Should Match Regexp  ${result.stdout}  learned.*\\d+
\ No newline at end of file
diff --git a/test/functional/cases/110_statistics/multiclass_lib.robot b/test/functional/cases/110_statistics/multiclass_lib.robot
new file mode 100644 (file)
index 0000000..e6e1788
--- /dev/null
@@ -0,0 +1,196 @@
+*** Settings ***
+Library         ${RSPAMD_TESTDIR}/lib/rspamd.py
+Resource        ${RSPAMD_TESTDIR}/lib/rspamd.robot
+Variables       ${RSPAMD_TESTDIR}/lib/vars.py
+
+*** Variables ***
+${CONFIG}                      ${RSPAMD_TESTDIR}/configs/multiclass_bayes.conf
+${MESSAGE_HAM}                 ${RSPAMD_TESTDIR}/messages/ham.eml
+${MESSAGE_SPAM}                ${RSPAMD_TESTDIR}/messages/spam_message.eml
+${MESSAGE_NEWSLETTER}          ${RSPAMD_TESTDIR}/messages/newsletter.eml
+${MESSAGE_TRANSACTIONAL}       ${RSPAMD_TESTDIR}/messages/transactional.eml
+${REDIS_SCOPE}                 Suite
+${RSPAMD_REDIS_SERVER}         null
+${RSPAMD_SCOPE}                Suite
+${RSPAMD_STATS_BACKEND}        redis
+${RSPAMD_STATS_HASH}           null
+${RSPAMD_STATS_KEY}            null
+${RSPAMD_STATS_PER_USER}       ${EMPTY}
+
+*** Keywords ***
+Learn Multiclass
+    [Arguments]  ${user}  ${class}  ${message}
+    IF  "${user}"
+        ${result} =  Run Rspamc  -d  ${user}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
+    ELSE
+        ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_class:${class}  ${message}
+    END
+    Check Rspamc  ${result}
+
+Learn Multiclass Legacy
+    [Arguments]  ${user}  ${class}  ${message}
+    # Test backward compatibility with old learn_spam/learn_ham commands
+    IF  "${user}"
+        ${result} =  Run Rspamc  -d  ${user}  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
+    ELSE
+        ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  learn_${class}  ${message}
+    END
+    Check Rspamc  ${result}
+
+Multiclass Basic Learn Test
+    [Arguments]  ${user}=${EMPTY}
+    Set Suite Variable  ${RSPAMD_STATS_LEARNTEST}  0
+    Set Test Variable  ${kwargs}  &{EMPTY}
+    IF  "${user}"
+        Set To Dictionary  ${kwargs}  Deliver-To=${user}
+    END
+    
+    # Learn all classes
+    Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
+    Learn Multiclass  ${user}  ham  ${MESSAGE_HAM}
+    Learn Multiclass  ${user}  newsletter  ${MESSAGE_NEWSLETTER}
+    Learn Multiclass  ${user}  transactional  ${MESSAGE_TRANSACTIONAL}
+    
+    # Test classification
+    Scan File  ${MESSAGE_SPAM}  &{kwargs}
+    Expect Symbol  BAYES_SPAM
+    
+    Scan File  ${MESSAGE_HAM}  &{kwargs}
+    Expect Symbol  BAYES_HAM
+    
+    Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
+    Expect Symbol  BAYES_NEWSLETTER
+    
+    Scan File  ${MESSAGE_TRANSACTIONAL}  &{kwargs}
+    Expect Symbol  BAYES_TRANSACTIONAL
+    
+    Set Suite Variable  ${RSPAMD_STATS_LEARNTEST}  1
+
+Multiclass Legacy Compatibility Test
+    [Arguments]  ${user}=${EMPTY}
+    Set Test Variable  ${kwargs}  &{EMPTY}
+    IF  "${user}"
+        Set To Dictionary  ${kwargs}  Deliver-To=${user}
+    END
+    
+    # Test legacy learn_spam and learn_ham commands still work
+    Learn Multiclass Legacy  ${user}  spam  ${MESSAGE_SPAM}
+    Learn Multiclass Legacy  ${user}  ham  ${MESSAGE_HAM}
+    
+    # Should still classify correctly
+    Scan File  ${MESSAGE_SPAM}  &{kwargs}
+    Expect Symbol  BAYES_SPAM
+    
+    Scan File  ${MESSAGE_HAM}  &{kwargs}
+    Expect Symbol  BAYES_HAM
+
+Multiclass Relearn Test
+    [Arguments]  ${user}=${EMPTY}
+    IF  ${RSPAMD_STATS_LEARNTEST} == 0
+        Fail  "Learn test was not run"
+    END
+    
+    Set Test Variable  ${kwargs}  &{EMPTY}
+    IF  "${user}"
+        Set To Dictionary  ${kwargs}  Deliver-To=${user}
+    END
+    
+    # Relearn spam message as ham
+    Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
+    
+    # Should now classify as ham or at least not spam
+    Scan File  ${MESSAGE_SPAM}  &{kwargs}
+    ${pass} =  Run Keyword And Return Status  Expect Symbol  BAYES_HAM
+    IF  ${pass}
+        Pass Execution  Successfully reclassified spam as ham
+    END
+    Do Not Expect Symbol  BAYES_SPAM
+
+Multiclass Cross-Learn Test
+    [Arguments]  ${user}=${EMPTY}
+    Set Test Variable  ${kwargs}  &{EMPTY}
+    IF  "${user}"
+        Set To Dictionary  ${kwargs}  Deliver-To=${user}
+    END
+    
+    # Learn newsletter message as transactional
+    Learn Multiclass  ${user}  transactional  ${MESSAGE_NEWSLETTER}
+    
+    # Should classify as transactional, not newsletter
+    Scan File  ${MESSAGE_NEWSLETTER}  &{kwargs}
+    Expect Symbol  BAYES_TRANSACTIONAL
+    Do Not Expect Symbol  BAYES_NEWSLETTER
+
+Multiclass Unlearn Test
+    [Arguments]  ${user}=${EMPTY}
+    Set Test Variable  ${kwargs}  &{EMPTY}
+    IF  "${user}"
+        Set To Dictionary  ${kwargs}  Deliver-To=${user}
+    END
+    
+    # First learn spam
+    Learn Multiclass  ${user}  spam  ${MESSAGE_SPAM}
+    Scan File  ${MESSAGE_SPAM}  &{kwargs}
+    Expect Symbol  BAYES_SPAM
+    
+    # Then unlearn spam (learn as ham)
+    Learn Multiclass  ${user}  ham  ${MESSAGE_SPAM}
+    
+    # Should no longer classify as spam
+    Scan File  ${MESSAGE_SPAM}  &{kwargs}
+    Do Not Expect Symbol  BAYES_SPAM
+
+Check Multiclass Results
+    [Arguments]  ${result}  ${expected_class}
+    # Check that scan result contains expected class information
+    Should Contain  ${result.stdout}  BAYES_${expected_class.upper()}
+    # Check for multiclass result format [class_name]
+    Should Match Regexp  ${result.stdout}  BAYES_${expected_class.upper()}.*\\[${expected_class}\\]
+
+Multiclass Stats Test
+    # Check that rspamc stat shows learning counts for all classes
+    ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
+    Check Rspamc  ${result}
+    
+    # Should show statistics for all classes
+    Should Contain  ${result.stdout}  spam
+    Should Contain  ${result.stdout}  ham
+    Should Contain  ${result.stdout}  newsletter
+    Should Contain  ${result.stdout}  transactional
+
+Multiclass Configuration Migration Test
+    # Test that old binary config can be automatically migrated
+    Set Test Variable  ${binary_config}  ${RSPAMD_TESTDIR}/configs/stats.conf
+    
+    # Start with binary config
+    ${result} =  Run Rspamc  --config  ${binary_config}  stat
+    Check Rspamc  ${result}
+    
+    # Should show deprecation warning but work
+    Should Contain  ${result.stderr}  deprecated  ignore_case=True
+
+Multiclass Performance Test
+    [Arguments]  ${num_messages}=100
+    # Test classification performance with multiple classes
+    ${start_time} =  Get Time  epoch
+    
+    FOR  ${i}  IN RANGE  ${num_messages}
+        Scan File  ${MESSAGE_SPAM}
+        Scan File  ${MESSAGE_HAM}
+        Scan File  ${MESSAGE_NEWSLETTER}
+        Scan File  ${MESSAGE_TRANSACTIONAL}
+    END
+    
+    ${end_time} =  Get Time  epoch
+    ${duration} =  Evaluate  ${end_time} - ${start_time}
+    
+    # Should complete in reasonable time (adjust threshold as needed)
+    Should Be True  ${duration} < 30  msg=Performance test took ${duration}s, expected < 30s
+
+Multiclass Memory Test
+    # Test that memory usage is reasonable for multiclass classification
+    ${result} =  Run Rspamc  -h  ${RSPAMD_LOCAL_ADDR}:${RSPAMD_PORT_CONTROLLER}  stat
+    Check Rspamc  ${result}
+    
+    # Extract memory usage if available in stats
+    # This is a placeholder - actual implementation would parse memory stats
\ No newline at end of file
diff --git a/test/functional/configs/multiclass_bayes.conf b/test/functional/configs/multiclass_bayes.conf
new file mode 100644 (file)
index 0000000..3504cd1
--- /dev/null
@@ -0,0 +1,143 @@
+options = {
+       filters = ["spf", "dkim", "regexp"]
+       url_tld = "{= env.TESTDIR =}/../lua/unit/test_tld.dat"
+       pidfile = "{= env.TMPDIR =}/rspamd.pid"
+       dns {
+               retransmits = 10;
+               timeout = 2s;
+               fake_records = [{
+                       name = "example.net";
+                       type = txt;
+                       replies = ["v=spf1 -all"];
+               }]
+       }
+}
+
+logging = {
+       type = "file",
+       level = "debug"
+       filename = "{= env.TMPDIR =}/rspamd.log"
+}
+
+metric = {
+       name = "default",
+       actions = {
+               reject = 100500,
+       }
+       unknown_weight = 1
+}
+
+worker {
+       type = normal
+       bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_NORMAL =}"
+       count = 1
+       keypair {
+               pubkey = "{= env.KEY_PUB1 =}";
+               privkey = "{= env.KEY_PVT1 =}";
+       }
+       task_timeout = 60s;
+}
+
+worker {
+       type = controller
+       bind_socket = "{= env.LOCAL_ADDR =}:{= env.PORT_CONTROLLER =}"
+       count = 1
+       keypair {
+               pubkey = "{= env.KEY_PUB1 =}";
+               privkey = "{= env.KEY_PVT1 =}";
+       }
+       secure_ip = ["127.0.0.1", "::1"];
+       stats_path = "{= env.TMPDIR =}/stats.ucl";
+}
+
+# Multi-class Bayes classifier configuration
+classifier {
+       languages_enabled = true;
+       tokenizer {
+               name = "osb";
+               hash = {= env.STATS_HASH =};
+               key = {= env.STATS_KEY =};
+       }
+       backend = "{= env.STATS_BACKEND =}";
+
+       # Multi-class statfiles
+       statfile {
+               class = "spam";
+               symbol = BAYES_SPAM;
+               size = 1M;
+               server = {= env.REDIS_SERVER =}
+       }
+       statfile {
+               class = "ham";
+               symbol = BAYES_HAM;
+               size = 1M;
+               server = {= env.REDIS_SERVER =}
+       }
+       statfile {
+               class = "newsletter";
+               symbol = BAYES_NEWSLETTER;
+               size = 1M;
+               server = {= env.REDIS_SERVER =}
+       }
+       statfile {
+               class = "transactional";
+               symbol = BAYES_TRANSACTIONAL;
+               size = 1M;
+               server = {= env.REDIS_SERVER =}
+       }
+
+       # Backend class labels for Redis storage optimization
+       class_labels = {
+               "spam" = "S";
+               "ham" = "H";
+               "newsletter" = "N";
+               "transactional" = "T";
+       }
+
+       cache {
+               server = {= env.REDIS_SERVER =}
+       }
+
+       # Multi-class autolearn configuration
+       autolearn = {
+               classes = {
+                       spam = {
+                               threshold = 15.0;
+                               verdict_mapping = { spam = true };
+                       };
+                       ham = {
+                               threshold = -5.0;
+                               verdict_mapping = { ham = true };
+                       };
+                       newsletter = {
+                               symbols = ["NEWSLETTER_HEADER", "BULK_MAIL"];
+                               threshold = 8.0;
+                       };
+                       transactional = {
+                               symbols = ["TRANSACTIONAL_MAIL", "PASSWORD_RESET"];
+                               threshold = 5.0;
+                       };
+               };
+
+               check_balance = true;
+               max_class_ratio = 0.6;
+               skip_threshold = 0.95;
+       }
+
+       # Standard configuration
+       min_learns = 10;
+       min_tokens = 11;
+       min_prob_strength = 0.05;
+
+       {% if env.STATS_PER_USER ~= '' %}
+       per_user = <<EOD
+return function(task)
+  return task:get_principal_recipient()
+end
+EOD;
+       {% endif %}
+}
+
+lua = "{= env.TESTDIR =}/lua/test_coverage.lua";
+
+settings {}
diff --git a/test/functional/messages/newsletter.eml b/test/functional/messages/newsletter.eml
new file mode 100644 (file)
index 0000000..52e8988
--- /dev/null
@@ -0,0 +1,16 @@
+From: newsletter@example.com
+To: user@example.org
+Subject: Monthly Newsletter - Special Offers Inside
+Date: Thu, 21 Jul 2023 10:00:00 +0000
+Message-ID: <newsletter-123@example.com>
+MIME-Version: 1.0
+Content-Type: text/plain
+
+Dear Subscriber,
+
+This is our monthly newsletter with special offers and updates.
+
+Best regards,
+Newsletter Team
+
+Unsubscribe: https://example.com/unsubscribe?id=123
\ No newline at end of file
diff --git a/test/functional/messages/transactional.eml b/test/functional/messages/transactional.eml
new file mode 100644 (file)
index 0000000..e227aaa
--- /dev/null
@@ -0,0 +1,18 @@
+From: noreply@example.com
+To: user@example.org
+Subject: Password Reset Request
+Date: Thu, 21 Jul 2023 11:00:00 +0000
+Message-ID: <pwd-reset-456@example.com>
+MIME-Version: 1.0
+Content-Type: text/plain
+
+Hello,
+
+You have requested a password reset for your account.
+
+Click here to reset your password: https://example.com/reset?token=abc123
+
+This link expires in 24 hours.
+
+Best regards,
+Security Team
\ No newline at end of file