reason_header = "X-GPT-Reason";
# Use JSON format for response
json = false;
+ # Optional: pass request timeout to the server (in seconds)
+ # WARNING: Not all API implementations support this parameter (e.g., standard OpenAI API doesn't)
+ # Only enable if your API endpoint/proxy specifically supports max_completion_time parameter
+ # If not set, this parameter will not be sent to the server
+ # Note: the actual value sent to server is multiplied by 0.95 to account for
+ # connection setup, SSL handshake, and data transfer overhead
+ # request_timeout = 8;
# Optional user/domain context in Redis
context = {
description = 'GPT model detected malware content',
category = 'malware',
},
+ GPT_UNCERTAIN = {
+ score = 0.0,
+ description = 'GPT model was uncertain about classification',
+ category = 'uncertain',
+ },
}
-- Should be filled from extra symbols
json = false,
extra_symbols = nil,
cache_prefix = REDIS_PREFIX,
+ request_timeout = nil, -- Optional: pass request timeout to server (in seconds)
-- user/domain context options (nested table forwarded to llm_context)
context = {
enabled = false,
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
+ lua_util.debugm(N, task, "cannot convert to spam probability: %s", reply.probability)
end
end
if type(reply.usage) == 'table' then
- rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+ lua_util.debugm(N, task, 'usage: %s tokens', reply.usage.total_tokens)
end
return spam_score, reply.reason, {}
end
local first_message = reply.choices[1].message.content
+ local finish_reason = reply.choices[1].finish_reason or 'unknown'
if not first_message or first_message == "" then
- rspamd_logger.errx(task, 'no content in the first message')
+ if finish_reason == 'length' then
+ -- Token limit exceeded - provide helpful error message
+ local usage = reply.usage or {}
+ local completion_tokens = usage.completion_tokens or 0
+ local reasoning_tokens = usage.completion_tokens_details and usage.completion_tokens_details.reasoning_tokens or 0
+ rspamd_logger.errx(task, 'LLM response truncated: token limit exceeded. ' ..
+ 'Used %s completion tokens (including %s reasoning tokens). ' ..
+ 'Increase max_completion_tokens in model_parameters config for this model.',
+ completion_tokens, reasoning_tokens)
+ else
+ rspamd_logger.errx(task, 'no content in the first message (finish_reason: %s, usage: %s)',
+ finish_reason, reply.usage and ucl.to_format(reply.usage, 'json-compact') or 'none')
+ end
return
end
local categories = lua_util.str_split(clean_reply_line(lines[3]), ',')
if type(reply.usage) == 'table' then
- rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+ lua_util.debugm(N, task, 'usage: %s tokens', reply.usage.total_tokens)
end
if spam_score then
elseif reply.probability == "low" then
spam_score = 0.1
else
- rspamd_logger.infox(task, "cannot convert to spam probability: %s", reply.probability)
+ lua_util.debugm(N, task, "cannot convert to spam probability: %s", reply.probability)
end
end
if type(reply.usage) == 'table' then
- rspamd_logger.infox(task, 'usage: %s tokens', reply.usage.total_tokens)
+ lua_util.debugm(N, task, 'usage: %s tokens', reply.usage.total_tokens)
end
return spam_score, reply.reason, {}
if result.categories then
process_categories(task, result.categories)
end
- else
+ elseif result.probability < 0.5 then
task:insert_result('GPT_HAM', (0.5 - result.probability) * 2, tostring(result.probability))
if settings.autolearn then
task:set_flag("learn_ham")
if result.categories then
process_categories(task, result.categories)
end
+ else
+ -- probability == 0.5, uncertain result, don't set GPT_SPAM/GPT_HAM
+ if result.categories then
+ process_categories(task, result.categories)
+ end
end
if result.reason and settings.reason_header then
- local v = lua_util.fold_header_with_encoding(task, settings.reason_header,
- tostring(result.reason), { encode = 'auto' })
- lua_mime.modify_headers(task,
- { add = { [settings.reason_header] = { value = v, order = 1 } } })
+ if type(settings.reason_header) == 'string' and #result.reason > 0 then
+ local ok, v = pcall(lua_util.fold_header_with_encoding, task, settings.reason_header,
+ result.reason, { encode = false, structured = false })
+ if ok and v then
+ lua_mime.modify_headers(task,
+ { add = { [settings.reason_header] = { value = v, order = 1 } } })
+ else
+ rspamd_logger.warnx(task, 'cannot fold header %s: %s; using raw value', settings.reason_header,
+ v)
+ -- Fallback: use raw value without encoding
+ lua_mime.modify_headers(task,
+ { add = { [settings.reason_header] = { value = result.reason, order = 1 } } })
+ end
+ end
end
if cache_context then
-- Update long-term user/domain context after classification
if redis_params and settings.context then
- llm_context.update_after_classification(task, redis_params, settings.context, result, sel_part)
+ llm_context.update_after_classification(task, redis_params, settings.context, result, sel_part, N)
end
end
end
local nspam, nham = 0, 0
- local max_spam_prob, max_ham_prob = 0, 0
+ local max_spam_prob, max_ham_prob = 0, 1.0
local reasons = {}
for _, result in ipairs(results) do
- if result.success then
+ if result.success and result.probability then
if result.probability > 0.5 then
nspam = nspam + 1
max_spam_prob = math.max(max_spam_prob, result.probability)
lua_util.debugm(N, task, "model: %s; spam: %s; reason: '%s'",
- result.model, result.probability, result.reason)
+ result.model or 'unknown', result.probability, result.reason or 'no reason')
else
nham = nham + 1
max_ham_prob = math.min(max_ham_prob, result.probability)
lua_util.debugm(N, task, "model: %s; ham: %s; reason: '%s'",
- result.model, result.probability, result.reason)
+ result.model or 'unknown', result.probability, result.reason or 'no reason')
end
if result.reason then
},
sel_part)
else
- -- No consensus
- lua_util.debugm(N, task, "no consensus")
+ -- No consensus - still cache and set uncertain symbol to avoid re-querying LLM
+ lua_util.debugm(N, task, "no consensus: nspam=%s, nham=%s, max_spam_prob=%s, max_ham_prob=%s",
+ nspam, nham, max_spam_prob, max_ham_prob)
+ -- Use 0.5 (neutral) probability with uncertain marker
+ local uncertain_reason = reason_text or string.format(
+ "Uncertain classification: spam votes=%d (max %.2f), ham votes=%d (min %.2f)",
+ nspam, max_spam_prob, nham, max_ham_prob)
+ insert_results(task, {
+ probability = 0.5,
+ reason = uncertain_reason,
+ categories = { 'uncertain' },
+ },
+ sel_part)
+ task:insert_result('GPT_UNCERTAIN', 1.0)
end
end
end
if data then
- rspamd_logger.infox(task, 'found cached response %s', cache_key)
+ lua_util.debugm(N, task, 'found cached response %s', cache_key)
insert_results(task, data, sel_part)
else
check_llm_uncached(task, content, sel_part, context_snippet)
local function openai_check(task, content, sel_part, context_snippet)
lua_util.debugm(N, task, "sending content to gpt: %s", content)
+ if context_snippet then
+ lua_util.debugm(N, task, "with context snippet (%s chars): %s", #context_snippet, context_snippet)
+ else
+ lua_util.debugm(N, task, "no context snippet")
+ end
local upstream
local results = {}
body.response_format = { type = "json_object" }
end
+ -- Optionally add request timeout for server-side timeout control
+ -- Only pass if explicitly configured (not all API implementations support this)
+ -- Multiply by 0.95 to account for connection setup, SSL handshake, and data transfer time
+ if settings.request_timeout then
+ body.max_completion_time = settings.request_timeout * 0.95
+ end
+
body.model = model
upstream = settings.upstreams:get_upstream_round_robin()
local function ollama_check(task, content, sel_part, context_snippet)
lua_util.debugm(N, task, "sending content to gpt: %s", content)
+ if context_snippet then
+ lua_util.debugm(N, task, "with context snippet (%s chars): %s", #context_snippet, context_snippet)
+ else
+ lua_util.debugm(N, task, "no context snippet")
+ end
local upstream
local results = {}
body.response_format = { type = "json_object" }
end
+ -- Optionally add request timeout for server-side timeout control
+ -- Only pass if explicitly configured (not all API implementations support this)
+ -- Multiply by 0.95 to account for connection setup, SSL handshake, and data transfer time
+ if settings.request_timeout then
+ body.max_completion_time = settings.request_timeout * 0.95
+ end
+
body.model = model
upstream = settings.upstreams:get_upstream_round_robin()
inferred_result = { probability = 0.1, reason = 'ham by filters', categories = {} }
end
end
- llm_context.update_after_classification(task, redis_params, settings.context, inferred_result, sel_part)
+ llm_context.update_after_classification(task, redis_params, settings.context, inferred_result, sel_part, N)
end
- rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s; context updated", content)
+ lua_util.debugm(N, task, "skip checking gpt as the condition is not met: %s; context updated", content)
return
end
if not ret then
- rspamd_logger.info(task, "skip checking gpt as the condition is not met: %s", content)
+ lua_util.debugm(N, task, "skip checking gpt as the condition is not met: %s", content)
return
end
if context_enabled then
llm_context.fetch(task, redis_params, settings.context, function(_, _, snippet)
proceed(snippet)
- end)
+ end, N)
else
proceed(nil)
end