w = (fw * total_count) / (1.0 + fw * total_count);
- /* Apply multinomial model for each class */
- for (j = 0; j < cl->num_classes; j++) {
- /* Skip classes with insufficient learns */
- if (ctx->cfg->min_learns > 0 && cl->class_learns[j] < ctx->cfg->min_learns) {
- continue;
+ if (cl->num_classes == 2) {
+ /* Binary-compatible path: normalize per-token probabilities across the two classes */
+ double f0 = (double) class_counts[0] / MAX(1.0, (double) cl->class_learns[0]);
+ double f1 = (double) class_counts[1] / MAX(1.0, (double) cl->class_learns[1]);
+ double denom = f0 + f1;
+
+ if (denom > 0.0) {
+ double p0 = f0 / denom;
+ double p1 = f1 / denom;
+ double bp0 = PROB_COMBINE(p0, total_count, w, 0.5);
+ double bp1 = PROB_COMBINE(p1, total_count, w, 0.5);
+
+ /* Bound and apply min strength (relative to 0.5 for binary) */
+ bp0 = MAX(0.0, MIN(1.0, bp0));
+ bp1 = MAX(0.0, MIN(1.0, bp1));
+
+ if (fabs(bp0 - 0.5) >= ctx->cfg->min_prob_strength) {
+ cl->class_log_probs[0] += log(bp0);
+ }
+ if (fabs(bp1 - 0.5) >= ctx->cfg->min_prob_strength) {
+ cl->class_log_probs[1] += log(bp1);
+ }
}
+ }
+ else {
+ /* General multinomial model for N>2 classes */
+ for (j = 0; j < cl->num_classes; j++) {
+ /* Skip classes with insufficient learns */
+ if (ctx->cfg->min_learns > 0 && cl->class_learns[j] < ctx->cfg->min_learns) {
+ continue;
+ }
- double class_freq = (double) class_counts[j] / MAX(1.0, (double) cl->class_learns[j]);
- double class_prob = PROB_COMBINE(class_freq, total_count, w, 1.0 / cl->num_classes);
+ double class_freq = (double) class_counts[j] / MAX(1.0, (double) cl->class_learns[j]);
+ double class_prob = PROB_COMBINE(class_freq, total_count, w, 1.0 / cl->num_classes);
- /* Ensure probability is properly bounded [0, 1] */
- class_prob = MAX(0.0, MIN(1.0, class_prob));
+ /* Ensure probability is properly bounded [0, 1] */
+ class_prob = MAX(0.0, MIN(1.0, class_prob));
- /* Skip probabilities too close to uniform (1/num_classes) */
- double uniform_prior = 1.0 / cl->num_classes;
- if (fabs(class_prob - uniform_prior) < ctx->cfg->min_prob_strength) {
- continue;
- }
+ /* Skip probabilities too close to uniform (1/num_classes) */
+ double uniform_prior = 1.0 / cl->num_classes;
+ if (fabs(class_prob - uniform_prior) < ctx->cfg->min_prob_strength) {
+ continue;
+ }
- cl->class_log_probs[j] += log(class_prob);
+ cl->class_log_probs[j] += log(class_prob);
+ }
}
cl->processed_tokens++;
if (!(tok->flags & RSPAMD_STAT_TOKEN_FLAG_META)) {
cl->text_tokens++;
}
-
- /* Per-token debug logging removed to reduce verbosity */
}
}