--- /dev/null
+# Generated keys
+.env.keys
+
+# Runtime data
+data/
+
+# Docker volumes
+rspamd-db/
--- /dev/null
+# Rspamd Integration Test Makefile
+
+.PHONY: help keys build up down test test-proxy test-parallel clean logs check-asan
+
+help:
+ @echo "Rspamd Integration Test"
+ @echo ""
+ @echo "Available targets:"
+ @echo " keys - Generate fuzzy encryption keys"
+ @echo " build - Build Docker containers"
+ @echo " up - Start Docker Compose services"
+ @echo " down - Stop Docker Compose services"
+ @echo " test - Run integration test"
+ @echo " test-proxy - Run integration test including proxy"
+ @echo " test-parallel - Run integration test with custom parallelism (PARALLEL=N)"
+ @echo " check-asan - Check AddressSanitizer logs for memory issues"
+ @echo " clean - Clean up data and logs"
+ @echo " logs - Show Docker logs"
+ @echo ""
+ @echo "Quick start:"
+ @echo " make keys build up test"
+
+keys:
+ @echo "Generating fuzzy encryption keys..."
+ @./scripts/generate-keys.sh
+
+build:
+ @echo "Building Docker containers..."
+ @docker compose build
+
+up:
+ @echo "Cleaning previous data..."
+ @rm -rf data/fuzzy_train data/bayes_spam data/bayes_ham data/test_corpus
+ @rm -rf data/*.json data/*.log data/*.txt
+ @mkdir -p data
+ @echo "Starting Docker Compose services..."
+ @docker compose up -d --force-recreate
+ @echo "Waiting for services to be ready..."
+ @sleep 10
+ @docker compose ps
+
+down:
+ @echo "Stopping Docker Compose services..."
+ @docker compose down
+
+test:
+ @echo "Running integration test..."
+ @docker compose exec -T rspamd /bin/bash < ./scripts/integration-test.sh
+
+test-proxy:
+ @echo "Running integration test (including proxy)..."
+ @TEST_PROXY=true docker compose exec -T rspamd /bin/bash < ./scripts/integration-test.sh
+
+test-parallel:
+ @echo "Running integration test (parallel=$(PARALLEL))..."
+ @PARALLEL=$(PARALLEL) docker compose exec -T rspamd /bin/bash < ./scripts/integration-test.sh
+
+clean:
+ @echo "Cleaning up..."
+ @rm -rf data/fuzzy_train data/bayes_spam data/bayes_ham data/test_corpus
+ @rm -rf data/*.json data/*.log data/*.txt
+ @docker compose down -v
+
+logs:
+ @docker compose logs -f
+
+check-asan:
+ @echo "Checking AddressSanitizer logs..."
+ @./scripts/check-asan-logs.sh
+
+restart: down up
+
+all: keys build up test check-asan
--- /dev/null
+# Rspamd Integration and Load Testing
+
+Comprehensive integration and load testing for Rspamd using Docker Compose.
+
+## Description
+
+This test creates a complete Rspamd environment with:
+
+- Scanner workers for processing emails (with encryption)
+- Controller worker for management
+- Proxy worker for proxying requests (with encryption)
+- Fuzzy storage with encryption
+- Redis for data storage
+- Bayes classifier
+
+The test performs the following steps:
+
+1. Downloads email corpus from a given URL (or uses local test emails)
+2. Trains Fuzzy storage on 10% of emails
+3. Trains Bayes classifier on 10% of emails (spam and ham)
+4. Scans the entire corpus
+5. Validates that detection works correctly (~10% detection rate)
+
+## Requirements
+
+- Docker and Docker Compose
+- Python 3.8+
+- rspamadm (for key generation)
+
+## Features
+
+This test uses **AddressSanitizer (ASan)** to detect:
+
+- Memory leaks
+- Buffer overflows
+- Use-after-free errors
+- Other memory issues
+
+Docker image: `rspamd/rspamd:asan-latest`
+
+## Quick Start
+
+### 1. Generate encryption keys
+
+```bash
+cd test/integration
+./scripts/generate-keys.sh
+```
+
+### 2. Start environment
+
+```bash
+docker compose up -d
+```
+
+### 3. Check readiness
+
+```bash
+docker compose ps
+docker compose logs rspamd
+```
+
+### 4. Run test
+
+```bash
+# With local corpus (uses test/functional/messages)
+./scripts/integration-test.py
+
+# With remote corpus
+./scripts/integration-test.py --corpus-url https://example.com/emails.zip
+
+# With local directory
+./scripts/integration-test.py --corpus-dir /path/to/emails
+```
+
+### 5. Check for memory leaks
+
+```bash
+make check-asan
+```
+
+This script analyzes AddressSanitizer logs and reports any detected memory leaks.
+
+### 6. Stop
+
+```bash
+docker compose down
+```
+
+## Test Parameters
+
+```bash
+./scripts/integration-test.py --help
+
+Options:
+ --corpus-url URL URL to download email corpus from
+ --corpus-dir DIR Directory containing email corpus
+ --rspamd-host HOST Rspamd host (default: localhost)
+ --rspamd-port PORT Controller port (default: 50002)
+ --proxy-port PORT Proxy port (default: 50004)
+ --password PASS Password (default: q1)
+ --train-ratio RATIO Training ratio (default: 0.1 = 10%)
+ --output FILE Output file for results (default: results.json)
+ --test-proxy Also test via proxy worker
+```
+
+## Project Structure
+
+```
+test/integration/
+├── docker-compose.yml # Docker Compose configuration
+├── configs/ # Rspamd configurations
+│ ├── worker-normal.inc # Scanner worker
+│ ├── worker-controller.inc # Controller worker
+│ ├── worker-proxy.inc # Proxy worker
+│ ├── worker-fuzzy.inc # Fuzzy storage worker
+│ ├── fuzzy_check.conf # fuzzy_check module
+│ ├── redis.conf # Redis settings
+│ ├── statistic.conf # Bayes classifier
+│ ├── lsan.supp # LeakSanitizer suppressions
+│ └── fuzzy-keys.conf # Encryption keys (generated)
+├── scripts/
+│ ├── generate-keys.sh # Key generation
+│ ├── integration-test.py # Test script
+│ └── check-asan-logs.sh # ASan log checker
+├── data/ # Data (corpus, results)
+└── README.md
+```
+
+## Configuration
+
+### Ports
+
+- `50001` - Normal worker (scanning)
+- `50002` - Controller (API)
+- `50003` - Fuzzy storage
+- `50004` - Proxy worker
+
+### Environment Variables
+
+In `docker-compose.yml` you can configure:
+
+- `REDIS_ADDR` - Redis address
+- `REDIS_PORT` - Redis port
+- `ASAN_OPTIONS` - AddressSanitizer options
+- `LSAN_OPTIONS` - LeakSanitizer options
+
+### Encryption
+
+Fuzzy storage uses encryption. Keys are generated automatically when running `generate-keys.sh`.
+
+## Results
+
+Results are saved in `data/results.json` in the following format:
+
+```json
+[
+ {
+ "file": "message1.eml",
+ "score": 5.2,
+ "symbols": {
+ "FUZZY_SPAM": 2.5,
+ "BAYES_SPAM": 3.0
+ }
+ },
+ ...
+]
+```
+
+## Debugging
+
+### Check logs
+
+```bash
+# All logs
+docker compose logs
+
+# Only Rspamd
+docker compose logs rspamd
+
+# Follow logs
+docker compose logs -f rspamd
+```
+
+### Connect to container
+
+```bash
+docker compose exec rspamd /bin/sh
+```
+
+### Check Rspamd operation
+
+```bash
+# Ping (Controller)
+curl http://localhost:50002/ping
+
+# Ping (Proxy)
+curl http://localhost:50004/ping
+
+# Statistics
+curl -H "Password: q1" http://localhost:50002/stat
+
+# Scan test email (via Controller)
+curl -H "Password: q1" --data-binary @test.eml http://localhost:50002/checkv2
+
+# Scan via Proxy
+curl -H "Password: q1" --data-binary @test.eml http://localhost:50004/checkv2
+```
+
+### Check Fuzzy storage
+
+```bash
+# Fuzzy statistics
+curl -H "Password: q1" http://localhost:50002/fuzzystats
+```
+
+### Test via Proxy
+
+```bash
+# Run test with proxy check
+./scripts/integration-test.py --test-proxy
+
+# Results will be saved in:
+# - data/results.json (via controller)
+# - data/proxy_results.json (via proxy)
+```
+
+## CI/CD
+
+See `.github/workflows/integration-test.yml` for automated runs in GitHub Actions.
+
+## AddressSanitizer
+
+### View ASan logs
+
+```bash
+# Logs are saved in data/asan.log*
+cat data/asan.log*
+
+# Automatic check
+make check-asan
+```
+
+### ASan Configuration
+
+In `docker-compose.yml` the following options are configured:
+
+```
+ASAN_OPTIONS=detect_leaks=1:halt_on_error=0:abort_on_error=0:print_stats=1:log_path=/data/asan.log
+```
+
+- `detect_leaks=1` - detect memory leaks
+- `halt_on_error=0` - don't stop on first error
+- `abort_on_error=0` - don't call abort()
+- `print_stats=1` - print statistics
+- `log_path=/data/asan.log` - log file path
+
+### Suppress False Positives
+
+Edit `configs/lsan.supp`:
+
+```
+leak:function_name_to_suppress
+```
+
+## Troubleshooting
+
+### Rspamd doesn't start
+
+1. Check that keys are generated: `ls configs/fuzzy-keys.conf`
+2. Check logs: `docker compose logs rspamd`
+3. Check ASan logs: `cat data/asan.log*`
+
+### Redis unavailable
+
+```bash
+docker compose exec redis redis-cli ping
+```
+
+### Low detection rate
+
+- Increase corpus size
+- Verify training completed successfully
+- Check Rspamd logs
+
+## Performance
+
+For load testing you can:
+
+- Increase number of scanner workers in `configs/worker-normal.inc`
+- Increase corpus size
+- Run multiple parallel test instances
--- /dev/null
+# Rspamd Integration Test - Summary
+
+## Overview
+
+Complete integration and load testing infrastructure for Rspamd with Docker Compose.
+
+## Features
+
+### 1. Complete Rspamd Environment
+- **Scanner workers** (2x) with encryption
+- **Controller worker** for API access
+- **Proxy worker** for request proxying with encryption
+- **Fuzzy storage** with encrypted connections
+- **Redis** backend for data storage
+- **Bayes classifier** for spam detection
+
+### 2. AddressSanitizer Integration
+- **Image**: `rspamd/rspamd:asan-latest`
+- Detects memory leaks, buffer overflows, use-after-free
+- Automatic log analysis with `check-asan-logs.sh`
+- Configurable suppressions via `lsan.supp`
+
+### 3. Comprehensive Testing
+- Downloads email corpus from URL or uses local files
+- Trains Fuzzy storage (10% of corpus)
+- Trains Bayes classifier (10% spam + 10% ham)
+- Scans entire corpus
+- Validates detection rates (~10% expected)
+- Tests both controller and proxy workers
+
+### 4. High Ports Configuration
+All services use ports 50000+ to avoid conflicts:
+- 50001: Scanner workers
+- 50002: Controller API
+- 50003: Fuzzy storage
+- 50004: Proxy worker
+
+### 5. Full Encryption
+- Fuzzy storage: encrypted-only mode
+- Scanner workers: keypair encryption
+- Proxy worker: keypair encryption
+- All keys auto-generated via `generate-keys.sh`
+
+## Quick Start
+
+```bash
+cd test/integration
+make keys # Generate encryption keys
+make up # Start Docker environment
+make test # Run integration test
+make check-asan # Check for memory issues
+make down # Stop environment
+```
+
+## Files Created
+
+### Configuration
+- `configs/rspamd.conf` - Main Rspamd configuration
+- `configs/worker-*.inc` - Worker configurations
+- `configs/fuzzy_check.conf` - Fuzzy module settings
+- `configs/redis.conf` - Redis backend
+- `configs/statistic.conf` - Bayes classifier
+- `configs/lsan.supp` - LeakSanitizer suppressions
+- `configs/fuzzy-keys.conf` - Generated encryption keys
+
+### Scripts
+- `scripts/generate-keys.sh` - Generate encryption keys for all workers
+- `scripts/integration-test.py` - Main test script with training and validation
+- `scripts/check-asan-logs.sh` - Analyze AddressSanitizer logs
+
+### Infrastructure
+- `docker-compose.yml` - Docker Compose setup with ASan
+- `Makefile` - Convenient commands
+- `README.md` - Complete documentation
+- `.gitignore` - Ignore temporary files
+
+## GitHub Actions Workflow
+
+`.github/workflows/integration-test.yml` provides:
+- Automated testing on push/PR
+- Daily scheduled runs
+- Manual runs with custom corpus URL
+- ASan log analysis
+- Artifact uploads (results, logs)
+
+## Test Parameters
+
+```bash
+./scripts/integration-test.py \
+ --corpus-url https://example.com/emails.zip \
+ --rspamd-host localhost \
+ --rspamd-port 50002 \
+ --proxy-port 50004 \
+ --train-ratio 0.1 \
+ --test-proxy \
+ --output results.json
+```
+
+## Results
+
+Test outputs:
+- `data/results.json` - Controller scan results
+- `data/proxy_results.json` - Proxy scan results (if --test-proxy)
+- `data/asan.log*` - AddressSanitizer logs
+
+## Validation
+
+The test validates:
+- Fuzzy detection rate ~10% (±5% tolerance)
+- Bayes detection rate ~10% (±5% tolerance)
+- No critical memory issues (via ASan)
+- Proxy worker functionality
+
+## Performance Testing
+
+To increase load:
+1. Increase worker count in `configs/worker-normal.inc`
+2. Use larger email corpus
+3. Run multiple test instances in parallel
+4. Adjust timeout and task limits
+
+## Memory Safety
+
+ASan configuration:
+```
+ASAN_OPTIONS=detect_leaks=1:halt_on_error=0:abort_on_error=0:print_stats=1:log_path=/data/asan.log
+LSAN_OPTIONS=suppressions=/etc/rspamd/lsan.supp:print_suppressions=0
+```
+
+Use `make check-asan` to analyze logs and detect:
+- Memory leaks
+- Heap-use-after-free
+- Heap-buffer-overflow
+- Double-free
+- Use-after-return
+
+## Next Steps
+
+1. **Local Testing**: Run `make all` to test locally
+2. **Custom Corpus**: Provide your own email corpus via `--corpus-url`
+3. **CI/CD Integration**: Push to trigger GitHub Actions workflow
+4. **Tune Parameters**: Adjust training ratios, worker counts, timeouts
+5. **Monitor ASan**: Check logs regularly for memory issues
--- /dev/null
+# Auto-generated Rspamd encryption keys
+# Generated at Thu 16 Oct 2025 14:29:54 BST
+
+# Fuzzy worker keypair
+fuzzy_worker_privkey = "ypwwasni4ckdyoz1u3t1rnqhiyj5n9fpyueb3gif7q6yxqabsh4y";
+fuzzy_worker_pubkey = "rho67hihijq8xxrkjjrutx5w8uj7ycrpmjyw7rjaifndytwx9hiy";
+
+# Fuzzy check encryption key
+fuzzy_encryption_key = "4on5cafx3c9aaffpbmb9c43dxprt35fmntppn43ckzefkxdrtggy";
+
+# Normal worker keypair (for encrypted inter-worker communication)
+rspamd_worker_privkey = "okc9mxs3nzyp37bdxypep775hcypftnckxyy1bw7ahjanoii671y";
+rspamd_worker_pubkey = "t78dndogx3bao8sz7ouaczzcsy7irba63tfmkigct43tgnjhoixy";
+
+# Proxy worker keypair
+rspamd_proxy_privkey = "ajmh5hodpqrpfx8w53sr13fuakcjnyaj1ota6qrxiw9ygdwke7zy";
+rspamd_proxy_pubkey = "usu8tjxyc4kauz3ym5q31yab3iba4kexgy364yw3bozodphc5pey";
--- /dev/null
+# Fuzzy check module configuration
+
+min_bytes = 100;
+timeout = 5s;
+retransmits = 3;
+
+rule "rspamd-integration" {
+ algorithm = "mumhash";
+ servers = "rspamd:50003";
+
+ # Encryption settings
+ encryption_key = "{= env.FUZZY_ENCRYPTION_KEY =}";
+
+ # Fuzzy flags
+ fuzzy_map = {
+ FUZZY_SPAM {
+ max_score = 10.0;
+ flag = 1;
+ }
+ FUZZY_HAM {
+ max_score = 5.0;
+ flag = 2;
+ }
+ }
+
+ min_length = 0;
+ min_bytes = 0;
+ read_only = false;
+ skip_unknown = true;
+}
--- /dev/null
+# LeakSanitizer suppressions file for Rspamd integration tests
+# This file lists known memory leaks that should be suppressed during testing
+
+# Example suppressions (adjust based on actual leaks found):
+# leak:some_known_leak_function
--- /dev/null
+# Redis configuration
+
+servers = "{= env.REDIS_ADDR =}:{= env.REDIS_PORT =}";
--- /dev/null
+# Bayes statistic configuration
+
+classifier "bayes" {
+ tokenizer {
+ name = "osb";
+ }
+
+ cache {
+ backend = "redis";
+ }
+
+ min_tokens = 11;
+ min_learns = 1;
+
+ statfile {
+ symbol = "BAYES_SPAM";
+ spam = true;
+ }
+
+ statfile {
+ symbol = "BAYES_HAM";
+ spam = false;
+ }
+
+ learn_condition = "return function(task, is_spam, is_unlearn) return true end";
+ autolearn = false;
+ backend = "redis"
+}
--- /dev/null
+#Controller worker configuration
+
+bind_socket = "*:50002";
+count = 1;
+secure_ip = ["0.0.0.0/0", "::/0"];
+password = "q1";
+enable_password = "q1";
--- /dev/null
+#Fuzzy storage worker configuration
+
+bind_socket = "*:50003";
+count = 1;
+backend = "redis";
+hashfile = "/var/lib/rspamd/fuzzy.db";
+expire = 90d;
+allow_update = ["0.0.0.0/0", "::/0"];
+encrypted_only = true;
+
+keypair
+{
+ privkey = "{= env.FUZZY_WORKER_PRIVKEY =}";
+ pubkey = "{= env.FUZZY_WORKER_PUBKEY =}";
+}
--- /dev/null
+#Normal worker(scanner) configuration
+
+bind_socket = "*:50001";
+count = 2;
+task_timeout = 60s;
+max_tasks = 1000;
+
+#Enable encryption for inter - worker communication
+keypair
+{
+ pubkey = "{= env.WORKER_PUBKEY =}";
+ privkey = "{= env.WORKER_PRIVKEY =}";
+}
--- /dev/null
+#Proxy worker configuration
+
+bind_socket = "*:50004";
+count = 1;
+timeout = 120s;
+upstream "local"
+{
+ default = yes;
+ self_scan = yes;
+}
+
+#Enable milter protocol
+milter = yes;
+
+#Enable encryption for proxy connections
+keypair
+{
+ pubkey = "{= env.PROXY_PUBKEY =}";
+ privkey = "{= env.PROXY_PRIVKEY =}";
+}
--- /dev/null
+services:
+ redis:
+ image: redis:7-alpine
+ container_name: rspamd-redis
+ networks:
+ - rspamd-net
+ healthcheck:
+ test: [ "CMD", "redis-cli", "ping" ]
+ interval: 5s
+ timeout: 3s
+ retries: 5
+
+ rspamd:
+ image: rspamd/rspamd:asan-nightly
+ container_name: rspamd-main
+ depends_on:
+ redis:
+ condition: service_healthy
+ networks:
+ - rspamd-net
+ ports:
+ - "50001:50001" # Normal worker
+ - "50002:50002" # Controller
+ - "50003:50003" # Fuzzy worker
+ - "50004:50004" # Proxy worker
+ volumes:
+ - ./configs/worker-normal.inc:/etc/rspamd/local.d/worker-normal.inc:ro
+ - ./configs/worker-controller.inc:/etc/rspamd/local.d/worker-controller.inc:ro
+ - ./configs/worker-fuzzy.inc:/etc/rspamd/local.d/worker-fuzzy.inc:ro
+ - ./configs/worker-proxy.inc:/etc/rspamd/local.d/worker-proxy.inc:ro
+ - ./configs/fuzzy_check.conf:/etc/rspamd/local.d/fuzzy_check.conf:ro
+ - ./configs/redis.conf:/etc/rspamd/local.d/redis.conf:ro
+ - ./configs/statistic.conf:/etc/rspamd/local.d/statistic.conf:ro
+ - ./configs/lsan.supp:/etc/rspamd/lsan.supp:ro
+ - ./data:/data
+ - ../functional/messages:/corpus:ro
+ - rspamd-db:/var/lib/rspamd
+ env_file:
+ - .env.keys
+ environment:
+ - RSPAMD_REDIS_ADDR=redis
+ - RSPAMD_REDIS_PORT=6379
+ # AddressSanitizer configuration
+ - ASAN_OPTIONS=detect_leaks=1:halt_on_error=0:abort_on_error=0:print_stats=1:log_path=/data/asan.log
+ - LSAN_OPTIONS=suppressions=/etc/rspamd/lsan.supp:print_suppressions=0
+ healthcheck:
+ test: [ "CMD-SHELL", "pidof rspamd > /dev/null || exit 1" ]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ start_period: 15s
+
+networks:
+ rspamd-net:
+ driver: bridge
+
+volumes:
+ rspamd-db:
--- /dev/null
+#!/bin/bash
+# Check AddressSanitizer logs for memory leaks and errors
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+DATA_DIR="$SCRIPT_DIR/../data"
+
+echo "=== Checking AddressSanitizer logs ==="
+echo ""
+
+# Find all ASAN log files
+ASAN_LOGS=$(find "$DATA_DIR" -name "asan.log*" 2>/dev/null)
+
+if [ -z "$ASAN_LOGS" ]; then
+ echo "No ASAN logs found in $DATA_DIR"
+ exit 0
+fi
+
+TOTAL_LEAKS=0
+TOTAL_ERRORS=0
+
+for log_file in $ASAN_LOGS; do
+ echo "Analyzing: $log_file"
+ echo "----------------------------------------"
+
+ # Count memory leaks
+ LEAKS=$(grep -c "LeakSanitizer" "$log_file" 2>/dev/null || echo "0")
+ if [ "$LEAKS" -gt 0 ]; then
+ echo " Memory leaks detected: $LEAKS"
+ TOTAL_LEAKS=$((TOTAL_LEAKS + LEAKS))
+
+ # Show leak summary
+ grep -A 10 "LeakSanitizer" "$log_file" | head -20
+ fi
+
+ # Count other errors
+ ERRORS=$(grep -c "ERROR: AddressSanitizer" "$log_file" 2>/dev/null || echo "0")
+ if [ "$ERRORS" -gt 0 ]; then
+ echo " AddressSanitizer errors: $ERRORS"
+ TOTAL_ERRORS=$((TOTAL_ERRORS + ERRORS))
+
+ # Show error summary
+ grep -A 10 "ERROR: AddressSanitizer" "$log_file" | head -20
+ fi
+
+ # Check for heap-use-after-free
+ UAF=$(grep -c "heap-use-after-free" "$log_file" 2>/dev/null || echo "0")
+ if [ "$UAF" -gt 0 ]; then
+ echo " Heap-use-after-free: $UAF"
+ fi
+
+ # Check for heap-buffer-overflow
+ OVERFLOW=$(grep -c "heap-buffer-overflow" "$log_file" 2>/dev/null || echo "0")
+ if [ "$OVERFLOW" -gt 0 ]; then
+ echo " Heap-buffer-overflow: $OVERFLOW"
+ fi
+
+ echo ""
+done
+
+echo "========================================"
+echo "SUMMARY"
+echo "========================================"
+echo "Total memory leaks: $TOTAL_LEAKS"
+echo "Total ASan errors: $TOTAL_ERRORS"
+echo ""
+
+if [ "$TOTAL_LEAKS" -gt 0 ] || [ "$TOTAL_ERRORS" -gt 0 ]; then
+ echo "RESULT: FAILED - Memory issues detected"
+ echo ""
+ echo "Full logs available in:"
+ for log_file in $ASAN_LOGS; do
+ echo " - $log_file"
+ done
+ exit 1
+else
+ echo "RESULT: PASSED - No memory issues detected"
+ exit 0
+fi
--- /dev/null
+#!/bin/bash
+# Generate encryption keys for Rspamd workers
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ENV_FILE="$SCRIPT_DIR/../.env.keys"
+
+echo "=== Generating Rspamd encryption keys ==="
+echo ""
+
+# Generate keypair for fuzzy worker (encryption)
+echo "1. Fuzzy worker keypair (encryption)..."
+rspamadm keypair -u > "$SCRIPT_DIR/fuzzy-keypair.tmp"
+FUZZY_PRIVKEY=$(grep "privkey" "$SCRIPT_DIR/fuzzy-keypair.tmp" | cut -d'"' -f2)
+FUZZY_PUBKEY=$(grep "pubkey" "$SCRIPT_DIR/fuzzy-keypair.tmp" | cut -d'"' -f2)
+rm -f "$SCRIPT_DIR/fuzzy-keypair.tmp"
+
+# Generate keypair for normal worker
+echo "2. Normal worker keypair..."
+rspamadm keypair -u > "$SCRIPT_DIR/worker-keypair.tmp"
+WORKER_PRIVKEY=$(grep "privkey" "$SCRIPT_DIR/worker-keypair.tmp" | cut -d'"' -f2)
+WORKER_PUBKEY=$(grep "pubkey" "$SCRIPT_DIR/worker-keypair.tmp" | cut -d'"' -f2)
+rm -f "$SCRIPT_DIR/worker-keypair.tmp"
+
+# Generate keypair for proxy worker
+echo "3. Proxy worker keypair..."
+rspamadm keypair -u > "$SCRIPT_DIR/proxy-keypair.tmp"
+PROXY_PRIVKEY=$(grep "privkey" "$SCRIPT_DIR/proxy-keypair.tmp" | cut -d'"' -f2)
+PROXY_PUBKEY=$(grep "pubkey" "$SCRIPT_DIR/proxy-keypair.tmp" | cut -d'"' -f2)
+rm -f "$SCRIPT_DIR/proxy-keypair.tmp"
+
+echo ""
+echo "Keys generated successfully!"
+echo ""
+
+# Create .env.keys file for docker-compose
+cat > "$ENV_FILE" <<EOF
+# Rspamd integration test keys
+# Generated at $(date)
+
+# Fuzzy worker keypair
+RSPAMD_FUZZY_WORKER_PRIVKEY=$FUZZY_PRIVKEY
+RSPAMD_FUZZY_WORKER_PUBKEY=$FUZZY_PUBKEY
+
+# Fuzzy check encryption key (same as fuzzy worker pubkey)
+RSPAMD_FUZZY_ENCRYPTION_KEY=$FUZZY_PUBKEY
+
+# Normal worker keypair (for encrypted inter-worker communication)
+RSPAMD_WORKER_PRIVKEY=$WORKER_PRIVKEY
+RSPAMD_WORKER_PUBKEY=$WORKER_PUBKEY
+
+# Proxy worker keypair
+RSPAMD_PROXY_PRIVKEY=$PROXY_PRIVKEY
+RSPAMD_PROXY_PUBKEY=$PROXY_PUBKEY
+EOF
+
+echo "Environment variables saved to $ENV_FILE"
+echo ""
+echo "Summary:"
+echo " - Fuzzy worker: encrypted (pubkey used for client encryption)"
+echo " - Normal worker: encrypted"
+echo " - Proxy worker: encrypted"
+echo ""
+echo "Use these in configs with: {= env.VARIABLE_NAME =}"
+echo "(without the RSPAMD_ prefix)"
--- /dev/null
+#!/bin/bash
+# Rspamd Integration Test using rspamc
+# This script tests fuzzy storage, Bayes learning, and scanning via rspamc
+
+set -e
+
+# Configuration
+RSPAMD_HOST=${RSPAMD_HOST:-localhost}
+CONTROLLER_PORT=${CONTROLLER_PORT:-50002}
+PROXY_PORT=${PROXY_PORT:-50004}
+PASSWORD=${PASSWORD:-q1}
+PARALLEL=${PARALLEL:-10}
+TRAIN_RATIO=${TRAIN_RATIO:-0.1}
+TEST_PROXY=${TEST_PROXY:-false}
+
+# Directories
+# When running inside container via stdin, BASH_SOURCE won't work properly
+if [ -d "/corpus" ]; then
+ # Running inside container
+ CORPUS_DIR="${CORPUS_DIR:-/corpus}"
+ DATA_DIR="${DATA_DIR:-/data}"
+else
+ # Running on host
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+ DATA_DIR="$SCRIPT_DIR/../data"
+ CORPUS_DIR="${CORPUS_DIR:-$SCRIPT_DIR/../../functional/messages}"
+fi
+
+# Create working directories
+mkdir -p "$DATA_DIR"/{fuzzy_train,bayes_spam,bayes_ham,test_corpus}
+
+echo "=== Rspamd Integration Test ==="
+echo ""
+echo "Configuration:"
+echo " Host: $RSPAMD_HOST"
+echo " Controller port: $CONTROLLER_PORT"
+echo " Proxy port: $PROXY_PORT"
+echo " Parallelism: $PARALLEL"
+echo " Corpus: $CORPUS_DIR"
+echo ""
+
+# Check if rspamc is available
+if ! command -v rspamc &> /dev/null; then
+ echo "ERROR: rspamc not found. Running inside docker container..."
+ exec docker compose exec -T rspamd bash -s < "$0"
+fi
+
+# Check if Rspamd is running
+echo "Checking Rspamd status..."
+if ! rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" stat &> /dev/null; then
+ echo "ERROR: Cannot connect to Rspamd at $RSPAMD_HOST:$CONTROLLER_PORT"
+ exit 1
+fi
+echo "✓ Rspamd is running"
+echo ""
+
+# Find all email files
+echo "Finding email files in $CORPUS_DIR..."
+EMAIL_FILES=($(find "$CORPUS_DIR" -type f \( -name "*.eml" -o -name "*.msg" -o -name "*.txt" \)))
+TOTAL_EMAILS=${#EMAIL_FILES[@]}
+
+if [ $TOTAL_EMAILS -eq 0 ]; then
+ echo "ERROR: No email files found in $CORPUS_DIR"
+ exit 1
+fi
+
+echo "Found $TOTAL_EMAILS email files"
+echo ""
+
+# Calculate split sizes (using bash arithmetic)
+FUZZY_SIZE=$(awk "BEGIN {printf \"%.0f\", $TOTAL_EMAILS * $TRAIN_RATIO}")
+BAYES_SIZE=$(awk "BEGIN {printf \"%.0f\", $TOTAL_EMAILS * $TRAIN_RATIO}")
+
+# Split corpus
+echo "Splitting corpus..."
+shuf -e "${EMAIL_FILES[@]}" > "$DATA_DIR/shuffled_files.txt"
+
+# Fuzzy training set
+head -n "$FUZZY_SIZE" "$DATA_DIR/shuffled_files.txt" > "$DATA_DIR/fuzzy_train_list.txt"
+while IFS= read -r file; do
+ cp "$file" "$DATA_DIR/fuzzy_train/"
+done < "$DATA_DIR/fuzzy_train_list.txt"
+
+# Bayes training set (spam)
+tail -n +$((FUZZY_SIZE + 1)) "$DATA_DIR/shuffled_files.txt" | head -n "$BAYES_SIZE" > "$DATA_DIR/bayes_spam_list.txt"
+while IFS= read -r file; do
+ cp "$file" "$DATA_DIR/bayes_spam/"
+done < "$DATA_DIR/bayes_spam_list.txt"
+
+# Bayes training set (ham)
+tail -n +$((FUZZY_SIZE + BAYES_SIZE + 1)) "$DATA_DIR/shuffled_files.txt" | head -n "$BAYES_SIZE" > "$DATA_DIR/bayes_ham_list.txt"
+while IFS= read -r file; do
+ cp "$file" "$DATA_DIR/bayes_ham/"
+done < "$DATA_DIR/bayes_ham_list.txt"
+
+# Test corpus (copy all for scanning)
+while IFS= read -r file; do
+ cp "$file" "$DATA_DIR/test_corpus/"
+done < "$DATA_DIR/shuffled_files.txt"
+
+FUZZY_COUNT=$(ls -1 "$DATA_DIR/fuzzy_train" | wc -l)
+SPAM_COUNT=$(ls -1 "$DATA_DIR/bayes_spam" | wc -l)
+HAM_COUNT=$(ls -1 "$DATA_DIR/bayes_ham" | wc -l)
+
+echo "Corpus split:"
+echo " Fuzzy training: $FUZZY_COUNT emails"
+echo " Bayes SPAM training: $SPAM_COUNT emails"
+echo " Bayes HAM training: $HAM_COUNT emails"
+echo " Test set: $TOTAL_EMAILS emails"
+echo ""
+
+# Training phase
+echo "============================================================"
+echo "TRAINING PHASE"
+echo "============================================================"
+echo ""
+
+# Train fuzzy storage
+echo "Training Fuzzy storage ($FUZZY_COUNT emails, flag=1)..."
+if [ $FUZZY_COUNT -gt 0 ]; then
+ rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" -n "$PARALLEL" \
+ fuzzy_add:"$DATA_DIR/fuzzy_train" -f 1 -w 10 2>&1 | tee "$DATA_DIR/fuzzy_train.log"
+ echo "✓ Fuzzy training complete"
+else
+ echo "⚠ No files to train"
+fi
+echo ""
+
+# Train Bayes spam
+echo "Training Bayes SPAM ($SPAM_COUNT emails)..."
+if [ $SPAM_COUNT -gt 0 ]; then
+ rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" -n "$PARALLEL" \
+ learn_spam "$DATA_DIR/bayes_spam" 2>&1 | tee "$DATA_DIR/bayes_spam.log"
+ echo "✓ Bayes SPAM training complete"
+else
+ echo "⚠ No files to train"
+fi
+echo ""
+
+# Train Bayes ham
+echo "Training Bayes HAM ($HAM_COUNT emails)..."
+if [ $HAM_COUNT -gt 0 ]; then
+ rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" -n "$PARALLEL" \
+ learn_ham "$DATA_DIR/bayes_ham" 2>&1 | tee "$DATA_DIR/bayes_ham.log"
+ echo "✓ Bayes HAM training complete"
+else
+ echo "⚠ No files to train"
+fi
+echo ""
+
+# Wait for training to settle
+echo "Waiting for training to settle..."
+sleep 5
+echo ""
+
+# Scanning phase
+echo "============================================================"
+echo "SCANNING PHASE (via controller)"
+echo "============================================================"
+echo ""
+
+echo "Scanning $TOTAL_EMAILS emails (parallelism: $PARALLEL)..."
+rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" -n "$PARALLEL" \
+ -j "$DATA_DIR/test_corpus" > "$DATA_DIR/scan_results.json" 2>&1
+
+echo "✓ Scanning complete"
+echo ""
+
+# Analyze results
+echo "============================================================"
+echo "ANALYSIS"
+echo "============================================================"
+echo ""
+
+# Count detections using grep and jq (or grep if jq not available)
+if command -v jq &> /dev/null; then
+ # Use jq for JSON parsing
+ TOTAL=$(jq 'length' "$DATA_DIR/scan_results.json")
+ FUZZY_COUNT=$(jq '[.[] | select(.symbols | keys[] | startswith("FUZZY_"))] | length' "$DATA_DIR/scan_results.json")
+ BAYES_SPAM_COUNT=$(jq '[.[] | select(.symbols.BAYES_SPAM)] | length' "$DATA_DIR/scan_results.json")
+ BAYES_HAM_COUNT=$(jq '[.[] | select(.symbols.BAYES_HAM)] | length' "$DATA_DIR/scan_results.json")
+else
+ # Fallback to grep
+ TOTAL=$(grep -c '"symbols"' "$DATA_DIR/scan_results.json" || echo 0)
+ FUZZY_COUNT=$(grep -c '"FUZZY_' "$DATA_DIR/scan_results.json" || echo 0)
+ BAYES_SPAM_COUNT=$(grep -c '"BAYES_SPAM"' "$DATA_DIR/scan_results.json" || echo 0)
+ BAYES_HAM_COUNT=$(grep -c '"BAYES_HAM"' "$DATA_DIR/scan_results.json" || echo 0)
+fi
+
+if [ "$TOTAL" -eq 0 ]; then
+ echo "ERROR: No valid results"
+ exit 1
+fi
+
+# Calculate percentages using awk
+FUZZY_RATE=$(awk "BEGIN {printf \"%.1f\", ($FUZZY_COUNT / $TOTAL) * 100}")
+BAYES_SPAM_RATE=$(awk "BEGIN {printf \"%.1f\", ($BAYES_SPAM_COUNT / $TOTAL) * 100}")
+BAYES_HAM_RATE=$(awk "BEGIN {printf \"%.1f\", ($BAYES_HAM_COUNT / $TOTAL) * 100}")
+
+echo "Total scanned: $TOTAL"
+echo "Fuzzy detections: $FUZZY_COUNT ($FUZZY_RATE%)"
+echo "Bayes SPAM: $BAYES_SPAM_COUNT ($BAYES_SPAM_RATE%)"
+echo "Bayes HAM: $BAYES_HAM_COUNT ($BAYES_HAM_RATE%)"
+echo ""
+
+# Validation (fuzzy should detect ~10% since we trained on 10%)
+echo "Validation:"
+FUZZY_RATE_INT=$(echo "$FUZZY_RATE" | cut -d. -f1)
+BAYES_SPAM_RATE_INT=$(echo "$BAYES_SPAM_RATE" | cut -d. -f1)
+
+if [ "$FUZZY_RATE_INT" -ge 5 ] && [ "$FUZZY_RATE_INT" -le 15 ]; then
+ echo " ✓ Fuzzy detection rate: PASS"
+else
+ echo " ✗ Fuzzy detection rate: FAIL (expected ~10%, got $FUZZY_RATE%)"
+fi
+
+if [ "$BAYES_SPAM_RATE_INT" -ge 5 ]; then
+ echo " ✓ Bayes detection: PASS"
+else
+ echo " ✗ Bayes detection: FAIL (got $BAYES_SPAM_RATE%)"
+fi
+
+# Test via proxy if requested
+if [ "$TEST_PROXY" = "true" ]; then
+ echo ""
+ echo "============================================================"
+ echo "PROXY TEST"
+ echo "============================================================"
+ echo ""
+
+ echo "Testing via proxy worker ($PROXY_PORT)..."
+ rspamc -h "$RSPAMD_HOST:$PROXY_PORT" -n "$PARALLEL" \
+ "$DATA_DIR/test_corpus" > "$DATA_DIR/proxy_results.json" 2>&1
+ echo "✓ Proxy test complete"
+ echo "Results saved to $DATA_DIR/proxy_results.json"
+fi
+
+echo ""
+echo "============================================================"
+echo "TEST COMPLETE"
+echo "============================================================"
+echo ""
+echo "Results saved to:"
+echo " - $DATA_DIR/scan_results.json"
+echo " - $DATA_DIR/fuzzy_train.log"
+echo " - $DATA_DIR/bayes_spam.log"
+echo " - $DATA_DIR/bayes_ham.log"