From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Sun, 18 Jan 2026 13:19:50 +0000 (+0000)
Subject: [Feature] Add reply_trim_mode for LLM input
X-Git-Tag: 4.0.0~185^2
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=d4d5193f32ff3ca5f5df8be3bbd54f8164faa984;p=thirdparty%2Frspamd.git

[Feature] Add reply_trim_mode for LLM input
---

diff --git a/conf/modules.d/gpt.conf b/conf/modules.d/gpt.conf
index ce1ae9e648..375c7b0943 100644
--- a/conf/modules.d/gpt.conf
+++ b/conf/modules.d/gpt.conf
@@ -36,6 +36,8 @@ gpt {
   timeout = 10s;
   # Prompt for the model (use default if not set)
   #prompt = "xxx";
+  # Reply trimming mode for LLM input: always|none|replies
+  #reply_trim_mode = "replies";
   # Custom condition (lua function)
   #condition = "xxx";
   # Autolearn if gpt classified
diff --git a/lualib/llm_common.lua b/lualib/llm_common.lua
index 0d35628f83..75f0acde5b 100644
--- a/lualib/llm_common.lua
+++ b/lualib/llm_common.lua
@@ -41,11 +41,22 @@ function M.build_llm_input(task, opts)
   -- Rspamd uses bytes for limit.
   -- Let's stick with what we had but using extract_text_limited
 
+  local reply_trim_mode = opts.reply_trim_mode or 'replies'
+  local trim_replies = false
+  if reply_trim_mode == 'always' then
+    trim_replies = true
+  elseif reply_trim_mode == 'none' then
+    trim_replies = false
+  else
+    trim_replies = task:has_header('In-Reply-To') or task:has_header('References')
+  end
+
   local extraction_opts = {
     max_bytes = max_tokens * 6, -- Rough estimate
     max_words = max_tokens, -- Better estimate if available
-    strip_quotes = true, -- Default cleanup for LLM
-    smart_trim = true, -- Enable heuristics
+    strip_quotes = trim_replies,
+    strip_reply_headers = trim_replies,
+    smart_trim = trim_replies,
   }
 
   local res = lua_mime.extract_text_limited(task, extraction_opts)
diff --git a/lualib/llm_context.lua b/lualib/llm_context.lua
index 276a3d8340..3c1872a8b8 100644
--- a/lualib/llm_context.lua
+++ b/lualib/llm_context.lua
@@ -226,10 +226,16 @@ local function build_message_summary(task, sel_part, opts)
   local model_cfg = { max_tokens = 256 }
   local content_tbl
   if sel_part then
-    local itbl = llm_common.build_llm_input(task, { max_tokens = model_cfg.max_tokens })
+    local itbl = llm_common.build_llm_input(task, {
+      max_tokens = model_cfg.max_tokens,
+      reply_trim_mode = opts.reply_trim_mode,
+    })
     content_tbl = itbl
   else
-    content_tbl = llm_common.build_llm_input(task, { max_tokens = model_cfg.max_tokens })
+    content_tbl = llm_common.build_llm_input(task, {
+      max_tokens = model_cfg.max_tokens,
+      reply_trim_mode = opts.reply_trim_mode,
+    })
   end
   if type(content_tbl) ~= 'table' then
     return nil
diff --git a/lualib/plugins/neural/providers/llm.lua b/lualib/plugins/neural/providers/llm.lua
index 17fc0c9f3e..ef79bbe73f 100644
--- a/lualib/plugins/neural/providers/llm.lua
+++ b/lualib/plugins/neural/providers/llm.lua
@@ -13,9 +13,8 @@ local llm_common = require "llm_common"
 
 local N = "neural.llm"
 
-local function select_text(task)
-  local input_tbl = llm_common.build_llm_input(task)
-  return input_tbl
+local function select_text(task, opts)
+  return llm_common.build_llm_input(task, opts)
 end
 
 local function compose_llm_settings(pcfg)
@@ -50,6 +49,7 @@ local function compose_llm_settings(pcfg)
     ssl_timeout = pcfg.ssl_timeout or gpt_settings.ssl_timeout,
     write_timeout = pcfg.write_timeout or gpt_settings.write_timeout,
     read_timeout = pcfg.read_timeout or gpt_settings.read_timeout,
+    reply_trim_mode = pcfg.reply_trim_mode or gpt_settings.reply_trim_mode,
   }
 end
 
@@ -89,7 +89,7 @@ neural_common.register_provider('llm', {
       end
     end
 
-    local input_tbl = select_text(task)
+    local input_tbl = select_text(task, { reply_trim_mode = llm.reply_trim_mode })
     if not input_tbl then
       rspamd_logger.debugm(N, task, 'llm provider has no content to embed; skip')
       cont(nil)
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index 66ee20a7c8..68c31e4d6b 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -171,6 +171,7 @@ local settings = {
   type = 'openai',
   api_key = nil,
   model = 'gpt-5-mini', -- or parallel model requests: [ 'gpt-5-mini', 'gpt-4o-mini' ],
+  reply_trim_mode = 'replies',
   model_parameters = {
     ["gpt-5-mini"] = {
       max_completion_tokens = 1000,
@@ -397,7 +398,10 @@ local function default_condition(task)
   -- Unified LLM input building (subject/from/urls/body one-line)
   local model_cfg = settings.model_parameters[settings.model] or {}
   local max_tokens = model_cfg.max_completion_tokens or model_cfg.max_tokens or 1000
-  local input_tbl, sel_part = llm_common.build_llm_input(task, { max_tokens = max_tokens })
+  local input_tbl, sel_part = llm_common.build_llm_input(task, {
+    max_tokens = max_tokens,
+    reply_trim_mode = settings.reply_trim_mode,
+  })
   if not sel_part then
     return false, 'no text part found'
   end
diff --git a/test/functional/configs/neural_llm.conf b/test/functional/configs/neural_llm.conf
index b6745adee3..48c5e9fa08 100644
--- a/test/functional/configs/neural_llm.conf
+++ b/test/functional/configs/neural_llm.conf
@@ -52,7 +52,9 @@ neural {
       symbol_ham = "NEURAL_HAM";
       ann_expire = 86400;
       watch_interval = 0.5;
-      providers = [{ type = "llm"; model = "dummy-embed"; url = "http://127.0.0.1:18080"; weight = 1.0; }];
+      providers = [{ type = "llm"; model = "dummy-embed"; url = "http://127.0.0.1:18080"; weight = 1.0;
+        #reply_trim_mode = "replies"; # always|none|replies
+      }];
       fusion { normalization = "none"; }
       roc_enabled = false;
     }