From: Vsevolod Stakhov Date: Fri, 17 Oct 2025 11:40:49 +0000 (+0100) Subject: [Test] Update integration tests to use rspamd-test-corpus X-Git-Tag: 3.14.0~67^2~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=145557296d8f7db3eb1874260e6d1df1d3e986c4;p=thirdparty%2Frspamd.git [Test] Update integration tests to use rspamd-test-corpus - Fix integration-test.py -> integration-test.sh references - Add rspamd-test-corpus repository integration - Update workflow to download corpus from GitHub releases - Update README with corpus usage instructions The corpus repository provides: - 1000 base email messages (SpamAssassin) - Structure for regression tests - Automated corpus management Corpus: https://github.com/rspamd/rspamd-test-corpus --- diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 0000000000..419bc32fbf --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,249 @@ +name: Integration Test + +on: + push: + branches: [master] + pull_request: + branches: [master] + schedule: + # Run daily at 2 AM UTC + - cron: '0 2 * * *' + workflow_dispatch: + inputs: + corpus_url: + description: 'URL to email corpus (zip file)' + required: false + default: 'https://github.com/rspamd/rspamd-test-corpus/releases/download/v1.0/rspamd-test-corpus.zip' + +jobs: + integration-test: + name: Integration & Load Test + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install requests + + - name: Build Rspamd + working-directory: . + run: | + mkdir -p build install + cd build + cmake -DCMAKE_INSTALL_PREFIX=../install \ + -DENABLE_COVERAGE=OFF \ + -GNinja .. + ninja + ninja install + + - name: Generate Fuzzy encryption keys + working-directory: test/integration + run: | + # Use rspamadm from build + export PATH="${GITHUB_WORKSPACE}/install/bin:$PATH" + + # Generate keypair for fuzzy worker + KEYPAIR=$(rspamadm keypair -u) + PRIVKEY=$(echo "$KEYPAIR" | grep privkey | cut -d'"' -f2) + PUBKEY=$(echo "$KEYPAIR" | grep pubkey | cut -d'"' -f2) + + # Generate encryption key (use public key from another keypair) + ENCRYPTION_KEYPAIR=$(rspamadm keypair -u) + ENCRYPTION_KEY=$(echo "$ENCRYPTION_KEYPAIR" | grep pubkey | cut -d'"' -f2) + + # Generate worker and proxy keypairs + WORKER_KEYPAIR=$(rspamadm keypair -u) + WORKER_PRIVKEY=$(echo "$WORKER_KEYPAIR" | grep privkey | cut -d'"' -f2) + WORKER_PUBKEY=$(echo "$WORKER_KEYPAIR" | grep pubkey | cut -d'"' -f2) + + PROXY_KEYPAIR=$(rspamadm keypair -u) + PROXY_PRIVKEY=$(echo "$PROXY_KEYPAIR" | grep privkey | cut -d'"' -f2) + PROXY_PUBKEY=$(echo "$PROXY_KEYPAIR" | grep pubkey | cut -d'"' -f2) + + # Create config + cat > configs/fuzzy-keys.conf < Dockerfile.local <<'EOF' + FROM ubuntu:24.04 + + RUN apt-get update && apt-get install -y \ + redis-tools \ + curl \ + ca-certificates \ + libluajit-5.1-2 \ + libglib2.0-0 \ + libssl3 \ + libicu74 \ + libsodium23 \ + libhyperscan5 \ + && rm -rf /var/lib/apt/lists/* + + COPY install /usr + + RUN mkdir -p /var/lib/rspamd /var/log/rspamd /var/run/rspamd + + EXPOSE 11333 11334 11335 + + CMD ["/usr/bin/rspamd", "-f", "-c", "/etc/rspamd/rspamd.conf"] + EOF + + # Update docker-compose to use local build + sed -i 's|image: ghcr.io/rspamd/rspamd:latest|build:\n context: ../..\n dockerfile: test/integration/Dockerfile.local|g' docker-compose.yml + + - name: Start Docker Compose + working-directory: test/integration + run: | + docker compose up -d + + # Wait for services to be ready + echo "Waiting for services to start..." + sleep 10 + + # Check services + docker compose ps + docker compose logs + + - name: Wait for Rspamd to be ready + working-directory: test/integration + run: | + for i in {1..30}; do + if curl -s http://localhost:50002/ping > /dev/null 2>&1; then + echo "Rspamd Controller is ready!" + + # Also check proxy + if curl -s http://localhost:50004/ping > /dev/null 2>&1; then + echo "Rspamd Proxy is ready!" + else + echo "WARNING: Proxy not responding, but continuing..." + fi + + exit 0 + fi + echo "Waiting for Rspamd... ($i/30)" + sleep 2 + done + + echo "Rspamd failed to start!" + docker compose logs rspamd + exit 1 + + - name: Download email corpus + working-directory: test/integration + run: | + # Use provided URL or default corpus from rspamd-test-corpus + CORPUS_URL="${{ github.event.inputs.corpus_url }}" + if [ -z "$CORPUS_URL" ]; then + # Default: use latest release from rspamd-test-corpus + CORPUS_URL="https://github.com/rspamd/rspamd-test-corpus/releases/latest/download/rspamd-test-corpus.zip" + fi + + echo "Downloading corpus from: $CORPUS_URL" + mkdir -p data + curl -L "$CORPUS_URL" -o data/corpus.zip + + # Extract corpus + unzip data/corpus.zip -d data/ + + # The archive contains a 'corpus' directory, so we should have data/corpus/ now + ls -lh data/corpus/ + + - name: Run integration test + working-directory: test/integration + run: | + export RSPAMD_HOST=localhost + export CONTROLLER_PORT=50002 + export PROXY_PORT=50004 + export PASSWORD=q1 + export TEST_PROXY=true + export CORPUS_DIR=data/corpus/corpus + + # Verify corpus exists + if [ ! -d "$CORPUS_DIR" ]; then + echo "ERROR: Corpus directory not found at $CORPUS_DIR" + ls -la data/corpus/ + exit 1 + fi + + echo "Using corpus: $CORPUS_DIR" + ls -lh "$CORPUS_DIR" + + ./scripts/integration-test.sh + + - name: Check AddressSanitizer logs + if: always() + working-directory: test/integration + run: | + ./scripts/check-asan-logs.sh || echo "Memory issues detected, but continuing..." + + - name: Upload results + if: always() + uses: actions/upload-artifact@v4 + with: + name: integration-test-results + path: | + test/integration/data/results.json + test/integration/data/proxy_results.json + test/integration/data/asan.log* + test/integration/data/*.log + retention-days: 7 + + - name: Upload Docker logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: docker-logs + path: | + test/integration/docker-compose-logs.txt + retention-days: 7 + continue-on-error: true + + - name: Collect Docker logs + if: always() + working-directory: test/integration + run: | + docker compose logs > docker-compose-logs.txt + + - name: Stop Docker Compose + if: always() + working-directory: test/integration + run: | + docker compose down -v + + - name: Test summary + if: always() + run: | + echo "## Integration Test Results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + if [ -f "test/integration/data/results.json" ]; then + TOTAL=$(jq '. | length' test/integration/data/results.json) + echo "- Total emails scanned: $TOTAL" >> $GITHUB_STEP_SUMMARY + echo "- Results saved to artifacts" >> $GITHUB_STEP_SUMMARY + else + echo "- ❌ No results file generated" >> $GITHUB_STEP_SUMMARY + fi diff --git a/test/integration/README.md b/test/integration/README.md index b4308eff15..30ae4f9603 100644 --- a/test/integration/README.md +++ b/test/integration/README.md @@ -63,14 +63,19 @@ docker compose logs rspamd ### 4. Run test ```bash -# With local corpus (uses test/functional/messages) -./scripts/integration-test.py +# With local corpus (uses test/functional/messages by default) +./scripts/integration-test.sh -# With remote corpus -./scripts/integration-test.py --corpus-url https://example.com/emails.zip +# With rspamd-test-corpus (recommended) +# Download and extract the corpus +curl -L https://github.com/rspamd/rspamd-test-corpus/releases/latest/download/rspamd-test-corpus.zip -o corpus.zip +unzip corpus.zip +export CORPUS_DIR=$(pwd)/corpus/corpus +./scripts/integration-test.sh # With local directory -./scripts/integration-test.py --corpus-dir /path/to/emails +export CORPUS_DIR=/path/to/emails +./scripts/integration-test.sh ``` ### 5. Check for memory leaks @@ -89,19 +94,20 @@ docker compose down ## Test Parameters +The test script uses environment variables for configuration: + ```bash -./scripts/integration-test.py --help - -Options: - --corpus-url URL URL to download email corpus from - --corpus-dir DIR Directory containing email corpus - --rspamd-host HOST Rspamd host (default: localhost) - --rspamd-port PORT Controller port (default: 50002) - --proxy-port PORT Proxy port (default: 50004) - --password PASS Password (default: q1) - --train-ratio RATIO Training ratio (default: 0.1 = 10%) - --output FILE Output file for results (default: results.json) - --test-proxy Also test via proxy worker +# Configuration via environment variables +export RSPAMD_HOST=localhost # Rspamd host +export CONTROLLER_PORT=50002 # Controller port +export PROXY_PORT=50004 # Proxy port +export PASSWORD=q1 # API password +export PARALLEL=10 # Parallel requests +export TRAIN_RATIO=0.1 # Training ratio (10%) +export TEST_PROXY=true # Test via proxy worker +export CORPUS_DIR=/path/to/emails # Corpus directory + +./scripts/integration-test.sh ``` ## Project Structure @@ -121,7 +127,7 @@ test/integration/ │ └── fuzzy-keys.conf # Encryption keys (generated) ├── scripts/ │ ├── generate-keys.sh # Key generation -│ ├── integration-test.py # Test script +│ ├── integration-test.sh # Test script │ └── check-asan-logs.sh # ASan log checker ├── data/ # Data (corpus, results) └── README.md @@ -218,17 +224,51 @@ curl -H "Password: q1" http://localhost:50002/fuzzystats ```bash # Run test with proxy check -./scripts/integration-test.py --test-proxy +export TEST_PROXY=true +./scripts/integration-test.sh # Results will be saved in: -# - data/results.json (via controller) +# - data/scan_results.json (via controller) # - data/proxy_results.json (via proxy) ``` +## Email Corpus + +### Using rspamd-test-corpus + +The recommended way to run integration tests is with the [rspamd-test-corpus](https://github.com/rspamd/rspamd-test-corpus) repository. + +This corpus contains: +- **~1000 base messages** from SpamAssassin public corpus +- **Regression tests** from real bug reports +- **Edge cases** for corner case testing + +Download the latest release: + +```bash +curl -L https://github.com/rspamd/rspamd-test-corpus/releases/latest/download/rspamd-test-corpus.zip -o corpus.zip +unzip corpus.zip +export CORPUS_DIR=$(pwd)/corpus/corpus +``` + +### Using Local Messages + +By default, the test uses `test/functional/messages` directory. However, these messages are often too small or synthetic for realistic testing. + +### Adding Regression Tests + +If you find a problematic email that causes a bug: + +1. Report the issue on GitHub +2. Add the email to [rspamd-test-corpus](https://github.com/rspamd/rspamd-test-corpus) +3. The corpus will be automatically used in CI tests + ## CI/CD See `.github/workflows/integration-test.yml` for automated runs in GitHub Actions. +The CI automatically downloads the latest corpus from rspamd-test-corpus repository. + ## AddressSanitizer ### View ASan logs