From ae485764cb9f564e250e06a700dcc13f88bdec93 Mon Sep 17 00:00:00 2001 From: Alexander Moisseev Date: Thu, 21 May 2026 11:34:30 +0300 Subject: [PATCH] [Feature] autolearnstats: add --sort-by and --group options Add --sort-by to sort rows by a chosen column (verdict, score, ts, tid, ip, from, rcpts) with timestamp as a tiebreaker. Score is compared numerically; all other columns lexicographically. Add --group flag to insert a blank separator line between consecutive rows where the --sort-by key changes. Add unit tests for sort key extraction functions. --- lualib/rspamadm/autolearnstats.lua | 57 ++++++++++++++-- test/lua/unit/autolearnstats.lua | 105 ++++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 7 deletions(-) diff --git a/lualib/rspamadm/autolearnstats.lua b/lualib/rspamadm/autolearnstats.lua index d61914dce0..0d05080cb0 100644 --- a/lualib/rspamadm/autolearnstats.lua +++ b/lualib/rspamadm/autolearnstats.lua @@ -45,6 +45,14 @@ parser:option "-x --exclude-logs" :argname "" :default "0" :convert(tonumber) +parser:option "--sort-by" + :description("Sort rows by column value, then by timestamp. " .. + "Sorting is lexicographic except for 'score' (numeric).") + :argname "" + :choices {"verdict", "score", "ts", "tid", "ip", "from", "rcpts"} +parser:flag "--group" + :description("Insert a blank separator line between groups of rows " .. + "with the same value in the --sort-by column") -- Lua-side "can autolearn" log line (lua_bayes_learn.lua) -- Module is always "lua" regardless of worker type. @@ -217,6 +225,18 @@ local function process_logs(log_file, start_time, end_time, candidates, learned, end end +local function make_sort_key_fns(ips) + return { + verdict = function(e) return e.c.verdict end, + score = function(e) return tonumber(e.c.score) or 0 end, + ts = function(e) return e.c.ts end, + tid = function(e) return e.req_id:gsub('[<>]', '') end, + ip = function(e) return ips[e.req_id] or '-' end, + from = function(e) return e.c.from end, + rcpts = function(e) return e.c.rcpts end, + } +end + local function handler(args) local res = parser:parse(args) @@ -236,11 +256,31 @@ local function handler(args) } ) + local sort_by = res['sort_by'] + local do_group = res['group'] + local effective_sort = sort_by or 'ts' + local sort_key_fns = make_sort_key_fns(ips) + local key_fn = sort_key_fns[effective_sort] + local sorted = {} for req_id, c in pairs(candidates) do table.insert(sorted, { req_id = req_id, c = c }) end - table.sort(sorted, function(a, b) return a.c.ts < b.c.ts end) + + -- Pre-compute sort keys: O(n) instead of O(n log n) calls inside comparator, + -- and reused by the group separator logic during rendering. + for _, entry in ipairs(sorted) do + entry.sort_key = key_fn(entry) + end + + if effective_sort ~= 'ts' then + table.sort(sorted, function(a, b) + if a.sort_key ~= b.sort_key then return a.sort_key < b.sort_key end + return a.c.ts < b.c.ts + end) + else + table.sort(sorted, function(a, b) return a.sort_key < b.sort_key end) + end -- Compute column widths from actual data (plain values, no ANSI codes) local col = { @@ -283,8 +323,16 @@ local function handler(args) local n_learned = 0 local class_stats = {} + local prev_group_key = nil for _, entry in ipairs(sorted) do + if do_group then + if prev_group_key ~= nil and entry.sort_key ~= prev_group_key then + io.write('\n') + end + prev_group_key = entry.sort_key + end + local req_id = entry.req_id local c = entry.c local tid = req_id:gsub('[<>]', '') @@ -344,9 +392,10 @@ local exports = { handler = handler, description = parser._description, name = 'autolearnstats', - _pad = pad, - _cell = cell, - _MAX_COL = MAX_COL, + _pad = pad, + _cell = cell, + _MAX_COL = MAX_COL, + _make_sort_key_fns = make_sort_key_fns, } return exports diff --git a/test/lua/unit/autolearnstats.lua b/test/lua/unit/autolearnstats.lua index 46cf4a66f0..26cec301b6 100644 --- a/test/lua/unit/autolearnstats.lua +++ b/test/lua/unit/autolearnstats.lua @@ -1,7 +1,8 @@ local m = require 'rspamadm.autolearnstats' -local pad = m._pad -local cell = m._cell -local MAX_COL = m._MAX_COL +local pad = m._pad +local cell = m._cell +local MAX_COL = m._MAX_COL +local make_sort_key_fns = m._make_sort_key_fns context("autolearnstats - pad", function() test("pads short string to given width", function() @@ -80,3 +81,101 @@ context("autolearnstats - cell", function() assert_equal(60, MAX_COL) end) end) + +context("autolearnstats - sort_key_fns", function() + local function make_entry(req_id, verdict, score, from, rcpts, ts) + return { + req_id = req_id, + c = { verdict = verdict, score = score, from = from, rcpts = rcpts, ts = ts }, + } + end + + local ips = { [''] = '1.2.3.4', [''] = '10.0.0.1' } + local fns = make_sort_key_fns(ips) + + test("verdict key returns verdict string", function() + local e = make_entry('', 'spam', '8.5', 'a@b.c', 'x@y.z', '2026-01-01 00:00:00') + assert_equal('spam', fns.verdict(e)) + end) + + test("score key returns number for positive score", function() + local e = make_entry('', 'spam', '8.5', 'a@b.c', 'x@y.z', '2026-01-01 00:00:00') + assert_equal(8.5, fns.score(e)) + end) + + test("score key returns number for negative score", function() + local e = make_entry('', 'ham', '-4.0', 'a@b.c', 'x@y.z', '2026-01-01 00:00:00') + assert_equal(-4.0, fns.score(e)) + end) + + test("score numeric order is correct (not lexicographic)", function() + local e10 = make_entry('', 'spam', '10.0', '', '', '2026-01-01 00:00:00') + local e9 = make_entry('', 'spam', '9.0', '', '', '2026-01-01 00:00:01') + assert_true(fns.score(e9) < fns.score(e10)) + end) + + test("ts key returns timestamp string", function() + local e = make_entry('', 'spam', '8.5', 'a@b.c', 'x@y.z', '2026-05-21 12:34:56') + assert_equal('2026-05-21 12:34:56', fns.ts(e)) + end) + + test("tid key strips angle brackets", function() + local e = make_entry('', 'spam', '1.0', '', '', '') + assert_equal('abc123', fns.tid(e)) + end) + + test("ip key returns IP from ips table", function() + local e = make_entry('', 'spam', '1.0', '', '', '') + assert_equal('1.2.3.4', fns.ip(e)) + end) + + test("ip key returns '-' for unknown req_id", function() + local e = make_entry('', 'spam', '1.0', '', '', '') + assert_equal('-', fns.ip(e)) + end) + + test("from key returns from field", function() + local e = make_entry('', 'spam', '1.0', 'sender@example.com', '', '') + assert_equal('sender@example.com', fns.from(e)) + end) + + test("sort by verdict then ts preserves time order within group", function() + local entries = { + make_entry('', 'spam', '8.0', '', '', '2026-01-01 00:00:03'), + make_entry('', 'ham', '8.0', '', '', '2026-01-01 00:00:01'), + make_entry('', 'spam', '8.0', '', '', '2026-01-01 00:00:02'), + make_entry('', 'ham', '8.0', '', '', '2026-01-01 00:00:04'), + } + -- Simulate pre-computed sort_key as handler does + local key_fn = fns.verdict + for _, e in ipairs(entries) do e.sort_key = key_fn(e) end + table.sort(entries, function(a, b) + if a.sort_key ~= b.sort_key then return a.sort_key < b.sort_key end + return a.c.ts < b.c.ts + end) + -- 'ham' < 'spam' lexicographically; within ham: a(01) before d(04) + assert_equal('ham', entries[1].c.verdict) + assert_equal('', entries[1].req_id) + assert_equal('ham', entries[2].c.verdict) + assert_equal('', entries[2].req_id) + assert_equal('spam', entries[3].c.verdict) + assert_equal('', entries[3].req_id) + assert_equal('spam', entries[4].c.verdict) + assert_equal('', entries[4].req_id) + end) + + test("ts key used as default when no sort-by specified (--group without --sort-by)", function() + local entries = { + make_entry('', 'spam', '8.0', '', '', '2026-01-01 00:00:02'), + make_entry('', 'ham', '8.0', '', '', '2026-01-01 00:00:01'), + make_entry('', 'spam', '8.0', '', '', '2026-01-01 00:00:02'), + } + local key_fn = fns.ts -- effective_sort = 'ts' when sort_by is nil + for _, e in ipairs(entries) do e.sort_key = key_fn(e) end + table.sort(entries, function(a, b) return a.sort_key < b.sort_key end) + assert_equal('', entries[1].req_id) + -- b and c share the same ts: sort_key equal, group separator fires between + -- a unique ts and the repeated one + assert_equal(entries[2].sort_key, entries[3].sort_key) + end) +end) -- 2.47.3