- Scan same shuffled files used for training to get accurate fuzzy detection rate
- Build with AddressSanitizer enabled (-DENABLE_SANITIZER=address)
- Add libasan8 and missing runtime libraries to Docker container
cd build
cmake -DCMAKE_INSTALL_PREFIX=../install \
-DENABLE_COVERAGE=OFF \
+ -DENABLE_SANITIZER=address \
+ -DSANITIZER=address \
-GNinja ..
ninja
ninja install
libicu74 \
libsodium23 \
libhyperscan5 \
+ libpcre2-8-0 \
+ libjemalloc2 \
+ libmagic1 \
+ libarchive13 \
+ libzstd1 \
+ libbrotli1 \
+ libfann2 \
+ libstemmer0d \
+ libasan8 \
&& rm -rf /var/lib/apt/lists/*
COPY install /usr
echo ""
echo "Scanning $TOTAL_EMAILS emails (parallelism: $PARALLEL)..."
-# rspamc can scan directories recursively
-rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" -P "$PASSWORD" -n "$PARALLEL" -j \
- "$CORPUS_DIR" > "$DATA_DIR/scan_results.json" 2>&1
+# Scan the same files we used for training (from shuffled list)
+# Use xargs with -a to read from file and avoid argument list too long
+xargs -a "$DATA_DIR/shuffled_files.txt" rspamc -h "$RSPAMD_HOST:$CONTROLLER_PORT" \
+ -P "$PASSWORD" -n "$PARALLEL" -j > "$DATA_DIR/scan_results.json" 2>&1
echo "✓ Scanning complete"
echo ""