From: Vsevolod Stakhov Date: Mon, 13 Apr 2026 14:30:06 +0000 (+0100) Subject: [Feature] selectors: add fuzzy_digest, fuzzy_shingles, authenticated, received_count... X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=58bdc9f5385b5de5c79091dbf6b2ffc55d6fed7e;p=thirdparty%2Frspamd.git [Feature] selectors: add fuzzy_digest, fuzzy_shingles, authenticated, received_count (#5981) * [Feature] selectors: add fuzzy_digest, fuzzy_shingles, authenticated, received_count * [Minor] selectors: deduplicate helpers and reuse cheaper task APIs Extract the "find largest text part" loop into common.largest_text_part shared by fuzzy_digest and fuzzy_shingles. Drop the pcall wrappers around get_fuzzy_hashes since the C API does not throw. Use get_header_count('Received') instead of allocating the full received headers table just to take its length. Add selector smoke tests for authenticated and received_count. * [Minor] selectors: use lua_mime.get_displayed_text_part Switch fuzzy_digest and fuzzy_shingles to lua_mime.get_displayed_text_part, which already handles the right MUA-display semantics: prefer non-attachment HTML, fall back to plain text, then to attachment HTML/text within size limits, with a minimum-words threshold. Drops the largest_text_part helper added in the previous commit since it picked the wrong part (e.g. a verbose text/plain alternative or a .txt attachment). --- diff --git a/lualib/lua_selectors/common.lua b/lualib/lua_selectors/common.lua index c375dd34e3..2caa77a326 100644 --- a/lualib/lua_selectors/common.lua +++ b/lualib/lua_selectors/common.lua @@ -94,4 +94,4 @@ exports.create_raw_digest = create_raw_digest exports.get_cached_or_raw_digest = get_cached_or_raw_digest exports.encode_digest = encode_digest -return exports \ No newline at end of file +return exports diff --git a/lualib/lua_selectors/extractors.lua b/lualib/lua_selectors/extractors.lua index efd37558c6..14810a8568 100644 --- a/lualib/lua_selectors/extractors.lua +++ b/lualib/lua_selectors/extractors.lua @@ -17,6 +17,7 @@ limitations under the License. local fun = require 'fun' local meta_functions = require "lua_meta" local lua_util = require "lua_util" +local lua_mime = require "lua_mime" local rspamd_util = require "rspamd_util" local rspamd_url = require "rspamd_url" local common = require "lua_selectors/common" @@ -593,6 +594,63 @@ The first argument must be header name.]], end, ['description'] = 'Get hostname of the filter server', }, + -- Get strong fuzzy digest of the displayed text part + ['fuzzy_digest'] = { + ['get_value'] = function(task) + local best = lua_mime.get_displayed_text_part(task) + if not best then + return nil + end + local digest = best:get_fuzzy_hashes(task:get_mempool()) + if not digest then + return nil + end + return digest, 'string' + end, + ['description'] = [[Get strong fuzzy digest (hex string) of the part an MUA would display +(see lua_mime.get_displayed_text_part). Returns nil if the message has no usable text part.]], + }, + -- Get fuzzy shingles of the displayed text part + ['fuzzy_shingles'] = { + ['get_value'] = function(task) + local best = lua_mime.get_displayed_text_part(task) + if not best then + return {}, 'string_list' + end + local _, shingles = best:get_fuzzy_hashes(task:get_mempool()) + if type(shingles) ~= 'table' then + return {}, 'string_list' + end + local res = {} + for _, s in ipairs(shingles) do + if type(s) == 'table' and s[1] then + table.insert(res, tostring(s[1])) + end + end + return res, 'string_list' + end, + ['description'] = [[Get list of fuzzy shingle hashes (as strings) for the part an MUA would display +(see lua_mime.get_displayed_text_part). Returns an empty list if no usable text part exists +or shingles cannot be computed.]], + }, + -- Check if the message was submitted by an authenticated user + ['authenticated'] = { + ['get_value'] = function(task) + if task:get_user() then + return 'true', 'string' + end + return 'false', 'string' + end, + ['description'] = [[Returns the string 'true' if the task has an authenticated user, +otherwise 'false'. Useful as a cheap proxy for outbound/submission traffic.]], + }, + -- Number of Received headers (hop count) + ['received_count'] = { + ['get_value'] = function(task) + return tostring(task:get_header_count('Received')), 'string' + end, + ['description'] = [[Get the number of Received headers as a string (hop count for the message).]], + }, } return extractors diff --git a/test/lua/unit/selectors.lua b/test/lua/unit/selectors.lua index 437014fe2f..72876339bc 100644 --- a/test/lua/unit/selectors.lua +++ b/test/lua/unit/selectors.lua @@ -384,6 +384,15 @@ context("Selectors test", function() selector = "task_cache('cachevar2')", expect = {{"hello", "world"}} }, + + ["authenticated true"] = { + selector = "authenticated", + expect = {"true"} + }, + ["received_count"] = { + selector = "received_count", + expect = {"2"} + }, } for case_name, case in lua_util.spairs(cases_plain) do