From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Sat, 27 Sep 2025 09:56:47 +0000 (+0100)
Subject: [Fix] Another learn checks fix
X-Git-Tag: 3.13.1~9^2~3
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=71fa644b871f47c5abab8b1c006d43d1077560f7;p=thirdparty%2Frspamd.git

[Fix] Another learn checks fix
---

diff --git a/src/libstat/stat_process.c b/src/libstat/stat_process.c
index 9ee7865ca4..8be8ce5d65 100644
--- a/src/libstat/stat_process.c
+++ b/src/libstat/stat_process.c
@@ -613,6 +613,8 @@ rspamd_stat_cache_check(struct rspamd_stat_ctx *st_ctx,
 	struct rspamd_classifier *cl, *sel = NULL;
 	gpointer rt;
 	unsigned int i;
+	gboolean any_considered = FALSE;
+	gboolean any_available = FALSE;
 
 	/* Check whether we have learned that file */
 	for (i = 0; i < st_ctx->classifiers->len; i++) {
@@ -625,6 +627,29 @@ rspamd_stat_cache_check(struct rspamd_stat_ctx *st_ctx,
 		}
 
 		sel = cl;
+		any_considered = TRUE;
+
+		/* If classifier was skipped by learn conditions in preprocess, skip cache */
+		gboolean cl_skipped = TRUE;
+		if (task->stat_runtimes != NULL) {
+			for (int j = 0; j < cl->statfiles_ids->len; j++) {
+				int id = g_array_index(cl->statfiles_ids, int, j);
+				if (g_ptr_array_index(task->stat_runtimes, id) != NULL) {
+					cl_skipped = FALSE;
+					break;
+				}
+			}
+		}
+		else {
+			/* No runtimes prepared means not skipped */
+			cl_skipped = FALSE;
+		}
+
+		if (cl_skipped) {
+			continue;
+		}
+
+		any_available = TRUE;
 
 		if (sel->cache && sel->cachecf) {
 			rt = cl->cache->runtime(task, sel->cachecf, FALSE);
@@ -683,6 +708,15 @@ rspamd_stat_cache_check(struct rspamd_stat_ctx *st_ctx,
 		}
 	}
 
+	/* If we considered classifiers but all were skipped by conditions, stop early */
+	if (any_considered && !any_available) {
+		g_set_error(err, rspamd_stat_quark(), 204, "all learn conditions "
+												   "denied learning %s in %s",
+					spam ? "spam" : "ham",
+					classifier ? classifier : "default classifier");
+		return FALSE;
+	}
+
 	if (sel == NULL) {
 		if (classifier) {
 			g_set_error(err, rspamd_stat_quark(), 404, "cannot find classifier "
@@ -710,13 +744,14 @@ rspamd_stat_classifiers_learn(struct rspamd_stat_ctx *st_ctx,
 	unsigned int i;
 	gboolean learned = FALSE, too_small = FALSE, too_large = FALSE;
 
-	if ((task->flags & RSPAMD_TASK_FLAG_ALREADY_LEARNED) && err != NULL &&
-		*err == NULL) {
+	if (task->flags & RSPAMD_TASK_FLAG_ALREADY_LEARNED) {
 		/* Do not learn twice */
-		g_set_error(err, rspamd_stat_quark(), 208, "<%s> has been already "
-												   "learned as %s, ignore it",
-					MESSAGE_FIELD(task, message_id),
-					spam ? "spam" : "ham");
+		if (err && *err == NULL) {
+			g_set_error(err, rspamd_stat_quark(), 208, "<%s> has been already "
+													   "learned as %s, ignore it",
+						MESSAGE_FIELD(task, message_id),
+						spam ? "spam" : "ham");
+		}
 
 		return FALSE;
 	}
@@ -952,6 +987,9 @@ rspamd_stat_backends_learn(struct rspamd_stat_ctx *st_ctx,
 				}
 
 				res = TRUE;
+				/* Mark that at least one backend has actually learned */
+				rspamd_mempool_set_variable(task->task_pool, "stat_learn_performed",
+											GINT_TO_POINTER(1), NULL);
 			}
 		}
 	}
@@ -1041,27 +1079,33 @@ rspamd_stat_backends_post_learn(struct rspamd_stat_ctx *st_ctx,
 		if (cl->cache) {
 			cache_run = cl->cache->runtime(task, cl->cachecf, TRUE);
 
-			/* For multi-class learning, determine spam boolean from class name if available */
-			gboolean cache_spam = spam; /* Default to original spam parameter */
-			const char *autolearn_class = rspamd_task_get_autolearn_class(task);
-			if (autolearn_class) {
-				if (strcmp(autolearn_class, "spam") == 0 || strcmp(autolearn_class, "S") == 0) {
-					cache_spam = TRUE;
-				}
-				else if (strcmp(autolearn_class, "ham") == 0 || strcmp(autolearn_class, "H") == 0) {
-					cache_spam = FALSE;
-				}
-				else {
-					/* For other classes, use a heuristic or default to spam for cache purposes */
-					cache_spam = TRUE; /* Non-ham classes are treated as spam for cache */
+			/* Update cache only if some backend actually learned */
+			if (rspamd_mempool_get_variable(task->task_pool, "stat_learn_performed")) {
+				/* For multi-class learning, determine spam boolean from class name if available */
+				gboolean cache_spam = spam; /* Default to original spam parameter */
+				const char *autolearn_class = rspamd_task_get_autolearn_class(task);
+				if (autolearn_class) {
+					if (strcmp(autolearn_class, "spam") == 0 || strcmp(autolearn_class, "S") == 0) {
+						cache_spam = TRUE;
+					}
+					else if (strcmp(autolearn_class, "ham") == 0 || strcmp(autolearn_class, "H") == 0) {
+						cache_spam = FALSE;
+					}
+					else {
+						/* For other classes, use a heuristic or default to spam for cache purposes */
+						cache_spam = TRUE; /* Non-ham classes are treated as spam for cache */
+					}
 				}
-			}
 
-			cl->cache->learn(task, cache_spam, cache_run);
+				cl->cache->learn(task, cache_spam, cache_run);
+			}
 		}
 	}
 
-	g_atomic_int_add(&task->worker->srv->stat->messages_learned, 1);
+	/* Increment learned counter only if any backend actually learned */
+	if (rspamd_mempool_get_variable(task->task_pool, "stat_learn_performed")) {
+		g_atomic_int_add(&task->worker->srv->stat->messages_learned, 1);
+	}
 
 	return res;
 }
@@ -1077,13 +1121,14 @@ rspamd_stat_classifiers_learn_class(struct rspamd_stat_ctx *st_ctx,
 	unsigned int i;
 	gboolean learned = FALSE, too_small = FALSE, too_large = FALSE;
 
-	if ((task->flags & RSPAMD_TASK_FLAG_ALREADY_LEARNED) && err != NULL &&
-		*err == NULL) {
+	if (task->flags & RSPAMD_TASK_FLAG_ALREADY_LEARNED) {
 		/* Do not learn twice */
-		g_set_error(err, rspamd_stat_quark(), 208, "<%s> has been already "
-												   "learned as %s, ignore it",
-					MESSAGE_FIELD(task, message_id),
-					class_name);
+		if (err && *err == NULL) {
+			g_set_error(err, rspamd_stat_quark(), 208, "<%s> has been already "
+													   "learned as %s, ignore it",
+						MESSAGE_FIELD(task, message_id),
+						class_name);
+		}
 
 		return FALSE;
 	}