From: Vsevolod Stakhov <vsevolod@rspamd.com>
Date: Thu, 28 Aug 2025 11:30:54 +0000 (+0100)
Subject: [Fix] GPT: Fix occasional damage
X-Git-Tag: 3.13.0~22^2~4
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e6fdc3b42fcfb53e758dc7c57225b1ee2a10f1bc;p=thirdparty%2Frspamd.git

[Fix] GPT: Fix occasional damage
---

diff --git a/lualib/llm_common.lua b/lualib/llm_common.lua
index 92d9a70d55..a89aafa438 100644
--- a/lualib/llm_common.lua
+++ b/lualib/llm_common.lua
@@ -25,7 +25,8 @@ local function get_meta_llm_content(task)
   return url_content, from_content
 end
 
--- Build a single text payload suitable for LLM embeddings
+-- Build structured payload suitable for LLM embeddings and chat
+-- Returns: table { subject = <string>, from = <string>, url_domains = <string>, text = <rspamd_text|string> }, part
 function M.build_llm_input(task, opts)
   opts = opts or {}
   local subject = task:get_subject() or ''
@@ -42,26 +43,25 @@ function M.build_llm_input(task, opts)
   end
 
   local max_tokens = tonumber(opts.max_tokens) or 1024
-  local text_line
+  local text
   if nwords > max_tokens then
     local words = sel_part:get_words('norm') or {}
     if #words > max_tokens then
-      text_line = table.concat(words, ' ', 1, max_tokens)
+      text = table.concat(words, ' ', 1, max_tokens)
     else
-      text_line = table.concat(words, ' ')
+      text = table.concat(words, ' ')
     end
   else
-    text_line = sel_part:get_content_oneline() or ''
+    -- Keep rspamd_text (userdata) intact; consumers (http/ucl) can use it directly
+    text = sel_part:get_content_oneline() or ''
   end
 
-  local content = table.concat({
-    'Subject: ' .. subject,
-    from_content,
-    url_content,
-    text_line,
-  }, '\n')
-
-  return content, sel_part
+  return {
+    subject = subject,
+    from = from_content,
+    url_domains = url_content,
+    text = text,
+  }, sel_part
 end
 
 -- Backwards-compat alias
diff --git a/src/plugins/lua/gpt.lua b/src/plugins/lua/gpt.lua
index 8c533ec647..1790e5e8d9 100644
--- a/src/plugins/lua/gpt.lua
+++ b/src/plugins/lua/gpt.lua
@@ -211,18 +211,18 @@ local function default_condition(task)
   end
 
   -- Unified LLM input building (subject/from/urls/body one-line)
-  local content, sel_part = llm_common.build_llm_input(task, { max_tokens = settings.max_tokens })
+  local input_tbl, sel_part = llm_common.build_llm_input(task, { max_tokens = settings.max_tokens })
   if not sel_part then
     return false, 'no text part found'
   end
-  if not content or #content == 0 then
+  if not input_tbl then
     local nwords = sel_part:get_words_count() or 0
     if nwords < 5 then
       return false, 'less than 5 words'
     end
     return false, 'no content to send'
   end
-  return true, content, sel_part
+  return true, input_tbl, sel_part
 end
 
 local function maybe_extract_json(str)
@@ -638,12 +638,11 @@ local function openai_check(task, content, sel_part)
   lua_util.debugm(N, task, "sending content to gpt: %s", content)
 
   local upstream
-
   local results = {}
 
-  local function gen_reply_closure(model, idx)
+  local function gen_reply_closure(model, i)
     return function(err, code, body)
-      results[idx].checked = true
+      results[i].checked = true
       if err then
         rspamd_logger.errx(task, '%s: request failed: %s', model, err)
         upstream:fail()
@@ -658,34 +657,46 @@ local function openai_check(task, content, sel_part)
         return
       end
 
-      local reply, reason, categories = settings.reply_conversion(task, body)
+      local reply, reason = settings.reply_conversion(task, body)
 
-      results[idx].model = model
+      results[i].model = model
 
       if reply then
-        results[idx].success = true
-        results[idx].probability = reply
-        results[idx].reason = reason
-
-        if categories then
-          results[idx].categories = categories
-        end
+        results[i].success = true
+        results[i].probability = reply
+        results[i].reason = reason
       end
 
       check_consensus_and_insert_results(task, results, sel_part)
     end
   end
 
+  -- Build messages exactly as in the original code if structured table provided
+  local user_messages
+  if type(content) == 'table' then
+    local subject_line = 'Subject: ' .. (content.subject or '')
+    user_messages = {
+      { role = 'user', content = subject_line },
+      { role = 'user', content = content.from or '' },
+      { role = 'user', content = content.url_domains or '' },
+      { role = 'user', content = content.text or '' },
+    }
+  else
+    user_messages = {
+      { role = 'user', content = content }
+    }
+  end
+
   local body_base = {
+    stream = false,
+    max_tokens = settings.max_tokens,
+    temperature = settings.temperature,
     messages = {
       {
         role = 'system',
         content = settings.prompt
       },
-      {
-        role = 'user',
-        content = content
-      }
+      lua_util.unpack(user_messages)
     }
   }
 
@@ -776,6 +787,21 @@ local function ollama_check(task, content, sel_part)
     end
   end
 
+  local user_messages
+  if type(content) == 'table' then
+    local subject_line = 'Subject: ' .. (content.subject or '')
+    user_messages = {
+      { role = 'user', content = subject_line },
+      { role = 'user', content = content.from or '' },
+      { role = 'user', content = content.url_domains or '' },
+      { role = 'user', content = content.text or '' },
+    }
+  else
+    user_messages = {
+      { role = 'user', content = content }
+    }
+  end
+
   if type(settings.model) == 'string' then
     settings.model = { settings.model }
   end
@@ -790,10 +816,7 @@ local function ollama_check(task, content, sel_part)
         role = 'system',
         content = settings.prompt
       },
-      {
-        role = 'user',
-        content = content
-      }
+      table.unpack(user_messages)
     }
   }