--- /dev/null
+--[[
+Copyright (c) 2026, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local rspamd_regexp = require "rspamd_regexp"
+local rspamd_util = require "rspamd_util"
+
+local exports = {}
+
+local decompressor = {
+ bz2 = 'bzip2 -cd',
+ gz = 'gzip -cd',
+ xz = 'xz -cd',
+ zst = 'zstd -cd',
+}
+
+local month_map = {
+ Jan = 0, Feb = 1, Mar = 2, Apr = 3, May = 4, Jun = 5,
+ Jul = 6, Aug = 7, Sep = 8, Oct = 9, Nov = 10, Dec = 11,
+}
+
+local spinner_chars = { '/', '-', '\\', '|' }
+local spinner_update_time = 0
+
+function exports.spinner()
+ local now = os.time()
+ if (now - spinner_update_time) < 1 then
+ return
+ end
+ spinner_update_time = now
+ io.stderr:write(string.format("%s\r", spinner_chars[(now % #spinner_chars) + 1]))
+ io.stderr:flush()
+end
+
+function exports.reset_spinner()
+ spinner_update_time = 0
+end
+
+local re_rspamd_fmt = rspamd_regexp.create(
+ '^\\d{4}-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d(?:\\.\\d{3,5})? #\\d+\\(')
+local re_syslog_fmt1 = rspamd_regexp.create(
+ '^\\w{3} \\s?\\d\\d? \\d\\d:\\d\\d:\\d\\d #\\d+\\(')
+local re_syslog_fmt2 = rspamd_regexp.create(
+ '^\\w{3} \\s?\\d\\d? \\d\\d:\\d\\d:\\d\\d \\S+ rspamd\\[\\d+\\]')
+local re_syslog5424_fmt = rspamd_regexp.create(
+ '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d{1,6})?(?:Z|[-+]\\d{2}:\\d{2}) \\S+ rspamd\\[\\d+\\]')
+local re_newsyslog = rspamd_regexp.create(
+ '^\\w{3} \\s?\\d\\d? \\d\\d:\\d\\d:\\d\\d \\S+ newsyslog\\[\\d+\\]: logfile turned over$')
+local re_journalctl = rspamd_regexp.create(
+ '^-- Logs begin at \\w{3} \\d{4}-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d [A-Z]{3},' ..
+ ' end at \\w{3} \\d{4}-\\d\\d-\\d\\d \\d\\d:\\d\\d:\\d\\d [A-Z]{3}\\. --$')
+
+function exports.detect_log_format(line)
+ if re_rspamd_fmt:match(line) then
+ return 'rspamd'
+ elseif re_syslog_fmt1:match(line) or re_syslog_fmt2:match(line) then
+ return 'syslog'
+ elseif re_syslog5424_fmt:match(line) then
+ return 'syslog5424'
+ elseif re_newsyslog:match(line) or re_journalctl:match(line) then
+ return nil -- skip line
+ else
+ return false -- unknown
+ end
+end
+
+function exports.syslog2iso(ts_str)
+ local month_s, day, hh, mm, ss = ts_str:match('^(%a+)%s+(%d+)%s+(%d+):(%d+):(%d+)')
+ if not month_s then
+ return nil
+ end
+ local mon = month_map[month_s]
+ if not mon then
+ return nil
+ end
+ local now = os.time()
+ local t = os.date('*t', now)
+ local year = t.year
+ local epoch = os.time({
+ year = year, month = mon + 1, day = tonumber(day),
+ hour = tonumber(hh), min = tonumber(mm), sec = tonumber(ss)
+ })
+ if epoch > now then
+ year = year - 1
+ end
+ return string.format('%04d-%02d-%02d %02d:%02d:%02d',
+ year, mon + 1, tonumber(day), tonumber(hh), tonumber(mm), tonumber(ss))
+end
+
+function exports.extract_timestamp(line, ts_format)
+ if ts_format == 'syslog' then
+ local ts_str = line:match('^(%a+%s+%d+%s+%d+:%d+:%d+)')
+ if ts_str then
+ return exports.syslog2iso(ts_str)
+ end
+ elseif ts_format == 'syslog5424' then
+ local date, time = line:match('^(%d%d%d%d%-%d%d%-%d%d)T(%d%d:%d%d:%d%d)')
+ if date and time then
+ return date .. ' ' .. time
+ end
+ else
+ local d, t = line:match('^(%d%d%d%d%-%d%d%-%d%d)%s+(%d%d:%d%d:%d%d)')
+ if d and t then
+ return d .. ' ' .. t
+ end
+ end
+ return nil
+end
+
+function exports.normalized_time(s)
+ if not s or s == '' then
+ return ''
+ end
+ if s:match('^%d%d[:%d]*$') then
+ local t = os.date('*t')
+ return string.format('%04d-%02d-%02d %s', t.year, t.month, t.day, s)
+ end
+ return s
+end
+
+function exports.open_log_file(path)
+ local ext = path:match('%.([^%.]+)$')
+ local dc = decompressor[ext]
+ if dc then
+ return io.popen(dc .. ' ' .. path, 'r')
+ else
+ return io.open(path, 'r')
+ end
+end
+
+local re_numbered_log = rspamd_regexp.create([[\.\d+(?:\.(?:bz2|gz|xz|zst))?$]])
+
+local function numeric_index(fname)
+ local idx = fname:match('%.(%d+)%.')
+ if not idx then
+ idx = fname:match('%.(%d+)$')
+ end
+ return tonumber(idx) or 0
+end
+
+function exports.get_logfiles_list(dir, num_logs, exclude_logs)
+ exclude_logs = exclude_logs or 0
+
+ local all_files = rspamd_util.glob(dir .. '/*')
+ if not all_files or #all_files == 0 then
+ io.stderr:write(string.format("No files found in directory: %s\n", dir))
+ return {}
+ end
+
+ local unnumbered = {}
+ local numbered = {}
+
+ for _, full_path in ipairs(all_files) do
+ local err, st = rspamd_util.stat(full_path)
+ if not err and st and st.type == 'regular' then
+ local fname = full_path:match('[^/]+$')
+ if re_numbered_log:match(fname) then
+ table.insert(numbered, fname)
+ else
+ table.insert(unnumbered, fname)
+ end
+ end
+ end
+
+ table.sort(numbered, function(a, b)
+ return numeric_index(a) < numeric_index(b)
+ end)
+
+ local logs = {}
+ for _, f in ipairs(unnumbered) do
+ table.insert(logs, f)
+ end
+ for _, f in ipairs(numbered) do
+ table.insert(logs, f)
+ end
+
+ -- Apply exclude_logs and num_logs (splice from exclude_logs+1, take num_logs)
+ local start_idx = exclude_logs + 1
+ local end_idx = num_logs and (start_idx + num_logs - 1) or #logs
+ if end_idx > #logs then
+ end_idx = #logs
+ end
+
+ local selected = {}
+ for i = start_idx, end_idx do
+ table.insert(selected, logs[i])
+ end
+
+ -- Reverse order (newest last -> oldest first for processing)
+ local reversed = {}
+ for i = #selected, 1, -1 do
+ table.insert(reversed, selected[i])
+ end
+
+ io.stderr:write("\nLog files to process:\n")
+ for _, f in ipairs(reversed) do
+ io.stderr:write(string.format(" %s\n", f))
+ end
+ io.stderr:write("\n")
+
+ return reversed
+end
+
+local re_task_log = rspamd_regexp.create([[rspamd_task_write_log]])
+local re_log_line = rspamd_regexp.create(
+ '/\\(([^()]+)\\): \\[(NaN|-?\\d+(?:\\.\\d+)?)\\/(-?\\d+(?:\\.\\d+)?)\\]\\s+\\[([^]]+)\\].*time: (\\d+\\.\\d+)ms/')
+
+function exports.iterate_log(handle, start_time, end_time, callback, opts)
+ opts = opts or {}
+ local search_pattern = opts.search_pattern
+ local search_re
+ if search_pattern and search_pattern ~= '' then
+ search_re = rspamd_regexp.create(search_pattern)
+ end
+
+ local ts_format = nil
+ local enabled = (not search_re)
+
+ for line in handle:lines() do
+ if not ts_format then
+ local fmt = exports.detect_log_format(line)
+ if fmt == false then
+ io.stderr:write("Unknown log format\n")
+ return
+ elseif fmt == nil then
+ goto continue
+ else
+ ts_format = fmt
+ end
+ end
+
+ if not enabled then
+ if search_re and search_re:match(line) then
+ enabled = true
+ else
+ goto continue
+ end
+ end
+
+ if re_task_log:match(line) then
+ exports.spinner()
+
+ local ts = exports.extract_timestamp(line, ts_format)
+ if not ts then
+ goto continue
+ end
+
+ if start_time ~= '' and ts < start_time then
+ goto continue
+ end
+ if end_time and end_time ~= '' and ts > end_time then
+ goto continue
+ end
+
+ local results = re_log_line:search(line, false, true)
+ if not results or #results == 0 then
+ goto continue
+ end
+
+ local captures = results[1]
+ if not captures or #captures < 6 then
+ goto continue
+ end
+
+ local act = tostring(captures[2])
+ local score_str = tostring(captures[3])
+ local symbols_str = tostring(captures[5])
+ local scan_time_str = tostring(captures[6])
+
+ local score = tonumber(score_str) or 0
+ local scan_time = tonumber(scan_time_str) or 0
+
+ callback(ts, act, score, symbols_str, scan_time, line)
+ end
+
+ ::continue::
+ end
+end
+
+function exports.process_logs(log_file, start_time, end_time, callback, opts)
+ opts = opts or {}
+ local num_logs = opts.num_logs
+ local exclude_logs = opts.exclude_logs or 0
+
+ start_time = exports.normalized_time(start_time or '')
+ end_time = exports.normalized_time(end_time or '')
+ if end_time == '' then end_time = nil end
+
+ if log_file == '-' or log_file == '' then
+ exports.iterate_log(io.stdin, start_time, end_time, callback, opts)
+ else
+ local err, st = rspamd_util.stat(log_file)
+ if err then
+ io.stderr:write(string.format("Cannot stat %s: %s\n", log_file, err))
+ os.exit(1)
+ end
+
+ if st.type == 'directory' then
+ local logs = exports.get_logfiles_list(log_file, num_logs, exclude_logs)
+ for idx, fname in ipairs(logs) do
+ local path = log_file .. '/' .. fname
+ local h, open_err = exports.open_log_file(path)
+ if not h then
+ io.stderr:write(string.format("Cannot open %s: %s\n", path, open_err or 'unknown error'))
+ else
+ io.stderr:write(string.format("\027[J Parsing log files: [%d/%d] %s\027[G",
+ idx, #logs, fname))
+ exports.reset_spinner()
+ exports.spinner()
+ exports.iterate_log(h, start_time, end_time, callback, opts)
+ h:close()
+ end
+ end
+ io.stderr:write("\027[J\027[G")
+ else
+ local h, open_err = exports.open_log_file(log_file)
+ if not h then
+ io.stderr:write(string.format("Cannot open %s: %s\n", log_file, open_err or 'unknown error'))
+ os.exit(1)
+ end
+ exports.reset_spinner()
+ exports.spinner()
+ exports.iterate_log(h, start_time, end_time, callback, opts)
+ h:close()
+ end
+ end
+end
+
+return exports
--- /dev/null
+--[[
+Copyright (c) 2026, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local argparse = require "argparse"
+local rspamd_regexp = require "rspamd_regexp"
+local ucl = require "ucl"
+local log_utils = require "lua_log_utils"
+
+local parser = argparse()
+ :name "rspamadm logstats"
+ :description "Analyze Rspamd rules by parsing log files"
+ :help_description_margin(32)
+
+parser:option "-l --log"
+ :description "Log file or directory to read (stdin by default)"
+ :argname "<file>"
+ :default ""
+parser:option "-r --reject-score"
+ :description "Reject threshold"
+ :argname "<score>"
+ :default "15.0"
+ :convert(tonumber)
+parser:option "-j --junk-score"
+ :description "Junk score threshold"
+ :argname "<score>"
+ :default "6.0"
+ :convert(tonumber)
+parser:option "-s --symbol"
+ :description "Check specified symbol (regexp, '.*' by default)"
+ :argname "<sym>"
+ :count "*"
+parser:option "-S --symbol-bidir"
+ :description "Bidirectional symbol (splits into SYM_SPAM/SYM_HAM)"
+ :argname "<sym>"
+ :count "*"
+parser:option "-X --exclude"
+ :description "Exclude log lines if symbol fires"
+ :argname "<sym>"
+ :count "*"
+parser:option "--ignore"
+ :description "Ignore symbol in correlations"
+ :argname "<sym>"
+ :count "*"
+parser:option "-g --group"
+ :description "Group symbols (comma-separated)"
+ :argname "<syms>"
+ :count "*"
+parser:option "--mult"
+ :description "Multiply symbol score (sym=number)"
+ :argname "<sym=num>"
+ :count "*"
+parser:option "-a --alpha-score"
+ :description "Ignore score threshold"
+ :argname "<score>"
+ :default "0.1"
+ :convert(tonumber)
+parser:flag "-c --correlations"
+ :description "Enable correlations report"
+parser:option "--nrelated"
+ :description "Number of related symbols to show"
+ :argname "<n>"
+ :default "10"
+ :convert(tonumber)
+parser:option "--search-pattern"
+ :description "Do not process input until pattern is found"
+ :argname "<pattern>"
+ :default ""
+parser:option "--start"
+ :description "Starting time for log parsing"
+ :argname "<time>"
+ :default ""
+parser:option "--end"
+ :description "Ending time for log parsing"
+ :argname "<time>"
+parser:option "-n --num-logs"
+ :description "Number of recent logfiles to analyze"
+ :argname "<n>"
+ :convert(tonumber)
+parser:option "-x --exclude-logs"
+ :description "Number of latest logs to exclude"
+ :argname "<n>"
+ :default "0"
+ :convert(tonumber)
+parser:flag "--json"
+ :description "Print JSON output"
+
+local function is_ignored(sym, ignored_list)
+ for _, ex in ipairs(ignored_list) do
+ local re = rspamd_regexp.create('^' .. ex .. '$')
+ if re and re:match(sym) then
+ return true
+ end
+ end
+ return false
+end
+
+local function gen_related(htb, target_sym, nrelated)
+ local sorted = {}
+ for sym, count in pairs(htb) do
+ if sym ~= target_sym then
+ table.insert(sorted, { sym, count })
+ end
+ end
+ table.sort(sorted, function(a, b) return a[2] > b[2] end)
+
+ local result = {}
+ for i = 1, math.min(#sorted, nrelated) do
+ result[i] = sorted[i]
+ end
+ return result
+end
+
+local function stringify_related(ar, total)
+ local parts = {}
+ for _, elt in ipairs(ar) do
+ table.insert(parts, string.format("\t%s(%d: %.1f%%)",
+ elt[1], elt[2], elt[2] / (total * 1.0) * 100.0))
+ end
+ return table.concat(parts, "\n")
+end
+
+local function parse_mult_options(mult_list)
+ local result = {}
+ for _, m in ipairs(mult_list) do
+ local sym, num = m:match('^([^=]+)=(.+)$')
+ if sym and num then
+ result[sym] = tonumber(num) or 1.0
+ end
+ end
+ return result
+end
+
+local re_sym_parse = rspamd_regexp.create('/^([^(]+)(\\(([^)]+)\\))?/')
+
+local function process_related(symbols, target, source, groups, symbols_ignored,
+ symbols_mult, diff_alpha, bidir_match)
+ for _, s in ipairs(symbols) do
+ local results = re_sym_parse:search(s, false, true)
+ if not results or #results == 0 then
+ goto continue
+ end
+ local caps = results[1]
+ if not caps or #caps < 2 then
+ goto continue
+ end
+
+ local sym_name = tostring(caps[2])
+ local sym_score = 0
+
+ if groups[sym_name] then
+ sym_name = groups[sym_name]
+ end
+
+ if source == sym_name then
+ goto continue
+ end
+
+ if is_ignored(sym_name, symbols_ignored) then
+ goto continue
+ end
+
+ if caps[4] then
+ sym_score = (tonumber(tostring(caps[4])) or 0) * (symbols_mult[sym_name] or 1.0)
+ if math.abs(sym_score) < diff_alpha then
+ goto continue
+ end
+ local bm = bidir_match[sym_name]
+ if bm then
+ if sym_score >= 0 then
+ sym_name = bm.spam
+ else
+ sym_name = bm.ham
+ end
+ end
+ end
+
+ target[sym_name] = (target[sym_name] or 0) + 1
+
+ ::continue::
+ end
+end
+
+local function handler(args)
+ local res = parser:parse(args)
+
+ local reject_score = res['reject_score']
+ local junk_score = res['junk_score']
+ local symbols_search = res['symbol'] or {}
+ local symbols_bidir = res['symbol_bidir'] or {}
+ local symbols_exclude = res['exclude'] or {}
+ local symbols_ignored = res['ignore'] or {}
+ local symbols_groups = res['group'] or {}
+ local symbols_mult = parse_mult_options(res['mult'] or {})
+ local diff_alpha = res['alpha_score']
+ local correlations = res['correlations']
+ local nrelated = res['nrelated']
+ local json_output = res['json']
+
+ local bidir_match = {}
+ for _, s in ipairs(symbols_bidir) do
+ bidir_match[s] = {
+ spam = s .. '_SPAM',
+ ham = s .. '_HAM',
+ }
+ local found = false
+ for _, existing in ipairs(symbols_search) do
+ if existing == s then found = true; break end
+ end
+ if not found then
+ table.insert(symbols_search, s)
+ end
+ end
+
+ local groups = {}
+ local group_id = 0
+ for _, g in ipairs(symbols_groups) do
+ local syms = {}
+ for sym in g:gmatch('[^,]+') do
+ table.insert(syms, sym)
+ end
+ local group_name = 'group' .. group_id
+ group_id = group_id + 1
+ for _, s in ipairs(syms) do
+ groups[s] = group_name
+ local found = false
+ for _, existing in ipairs(symbols_search) do
+ if existing == s then found = true; break end
+ end
+ if not found then
+ table.insert(symbols_search, s)
+ end
+ end
+ end
+
+ if #symbols_search == 0 then
+ symbols_search = { '.*' }
+ end
+
+ -- Compile search patterns
+ local search_res = {}
+ for _, s in ipairs(symbols_search) do
+ local re = rspamd_regexp.create(s)
+ if re then
+ table.insert(search_res, { pattern = s, re = re })
+ end
+ end
+
+ -- Compile exclude patterns
+ local exclude_res = {}
+ for _, ex in ipairs(symbols_exclude) do
+ local re = rspamd_regexp.create('^' .. ex)
+ if re then
+ table.insert(exclude_res, re)
+ end
+ end
+
+ local total = 0
+ local total_spam = 0
+ local total_junk = 0
+ local sym_res = {}
+ local actions = {}
+ local timeStamp = {}
+ local scanTime = { max = 0, total = 0 }
+
+ local function process_callback(ts, act, score, symbols_str, scan_time)
+ -- Split symbols: split on , but accounting for {options} blocks
+ local symbols_raw = {}
+ local tmp = symbols_str
+ -- Split handling {}-enclosed options
+ while tmp and #tmp > 0 do
+ local sym_part, rest = tmp:match('^([^,{]+%b{})(.*)')
+ if not sym_part then
+ sym_part, rest = tmp:match('^([^,]+)(.*)')
+ end
+ if sym_part then
+ table.insert(symbols_raw, sym_part)
+ if rest and rest:sub(1, 1) == ',' then
+ rest = rest:sub(2)
+ end
+ tmp = rest
+ else
+ break
+ end
+ end
+
+ -- Check excludes
+ for _, sym in ipairs(symbols_raw) do
+ for _, ex_re in ipairs(exclude_res) do
+ if ex_re:match(sym) then
+ return
+ end
+ end
+ end
+
+ -- Update timestamps
+ if not timeStamp['end'] or ts > timeStamp['end'] then
+ timeStamp['end'] = ts
+ end
+ if not timeStamp['start'] or ts < timeStamp['start'] then
+ timeStamp['start'] = ts
+ end
+
+ -- Update scan times
+ if not scanTime['min'] or scan_time < scanTime['min'] then
+ scanTime['min'] = scan_time
+ end
+ if scan_time > scanTime['max'] then
+ scanTime['max'] = scan_time
+ end
+ scanTime['total'] = scanTime['total'] + scan_time
+
+ actions[act] = (actions[act] or 0) + 1
+ total = total + 1
+
+ local is_spam = false
+ local is_junk = false
+ if score >= reject_score then
+ total_spam = total_spam + 1
+ is_spam = true
+ elseif score >= junk_score then
+ total_junk = total_junk + 1
+ is_junk = true
+ end
+
+ local sym_names = {}
+
+ for _, sr in ipairs(search_res) do
+ for _, sym in ipairs(symbols_raw) do
+ if sr.re:match(sym) then
+ local results = re_sym_parse:search(sym, false, true)
+ if not results or #results == 0 then
+ goto continue_sym
+ end
+ local caps = results[1]
+ if not caps or #caps < 2 then
+ goto continue_sym
+ end
+
+ local sym_name = tostring(caps[2])
+ local sym_score = 0
+ local orig_name = sym_name
+
+ if caps[4] then
+ sym_score = (tonumber(tostring(caps[4])) or 0) * (symbols_mult[sym_name] or 1.0)
+ if math.abs(sym_score) < diff_alpha then
+ goto continue_sym
+ end
+ local bm = bidir_match[sym_name]
+ if bm then
+ if sym_score >= 0 then
+ sym_name = bm.spam
+ else
+ sym_name = bm.ham
+ end
+ end
+ end
+
+ -- Check that original name matches the search pattern
+ local match_re = rspamd_regexp.create('^' .. sr.pattern)
+ if match_re and not match_re:match(orig_name) then
+ goto continue_sym
+ end
+
+ if groups[sr.pattern] then
+ sym_name = groups[sr.pattern]
+ end
+
+ table.insert(sym_names, sym_name)
+
+ if not sym_res[sym_name] then
+ sym_res[sym_name] = {
+ hits = 0,
+ spam_hits = 0,
+ junk_hits = 0,
+ spam_change = 0,
+ junk_change = 0,
+ weight = 0,
+ corr = {},
+ symbols_met_spam = {},
+ symbols_met_ham = {},
+ symbols_met_junk = {},
+ }
+ end
+
+ local r = sym_res[sym_name]
+ r.hits = r.hits + 1
+ r.weight = r.weight + sym_score
+
+ if is_spam then
+ r.spam_hits = r.spam_hits + 1
+ if correlations then
+ process_related(symbols_raw, r.symbols_met_spam, sym_name,
+ groups, symbols_ignored, symbols_mult, diff_alpha, bidir_match)
+ end
+ elseif is_junk then
+ r.junk_hits = r.junk_hits + 1
+ if correlations then
+ process_related(symbols_raw, r.symbols_met_junk, sym_name,
+ groups, symbols_ignored, symbols_mult, diff_alpha, bidir_match)
+ end
+ else
+ if correlations then
+ process_related(symbols_raw, r.symbols_met_ham, sym_name,
+ groups, symbols_ignored, symbols_mult, diff_alpha, bidir_match)
+ end
+ end
+
+ if sym_score ~= 0 then
+ local score_without = score - sym_score
+ if sym_score > 0 then
+ if is_spam and score_without < reject_score then
+ r.spam_change = r.spam_change + 1
+ end
+ if is_junk and score_without < junk_score then
+ r.junk_change = r.junk_change + 1
+ end
+ else
+ if not is_spam and score_without >= reject_score then
+ r.spam_change = r.spam_change + 1
+ end
+ if not is_junk and score_without >= junk_score then
+ r.junk_change = r.junk_change + 1
+ end
+ end
+ end
+ ::continue_sym::
+ end
+ end
+ end
+
+ if correlations then
+ for _, sym in ipairs(sym_names) do
+ if not is_ignored(sym, symbols_ignored) then
+ local r = sym_res[sym]
+ for _, corr_sym in ipairs(sym_names) do
+ if corr_sym ~= sym then
+ r.corr[corr_sym] = (r.corr[corr_sym] or 0) + 1
+ end
+ end
+ end
+ end
+ end
+ end
+
+ log_utils.process_logs(res['log'], res['start'] or '', res['end'], process_callback, {
+ search_pattern = res['search_pattern'],
+ num_logs = res['num_logs'],
+ exclude_logs = res['exclude_logs'],
+ })
+
+ local total_ham = total - (total_spam + total_junk)
+
+ if json_output then
+ local result = {}
+ result.total = total
+ if timeStamp['start'] then
+ result.start = timeStamp['start']
+ end
+ if timeStamp['end'] then
+ result['end'] = timeStamp['end']
+ end
+ result.actions = actions
+ result.symbols = {}
+
+ if total > 0 then
+ for s, r in pairs(sym_res) do
+ if r.hits > 0 then
+ local th = r.hits
+ local sh = r.spam_hits
+ local jh = r.junk_hits
+ local hh = th - sh - jh
+ local htp = (total_ham ~= 0) and (hh * 100.0 / total_ham) or 0
+ local stp = (total_spam ~= 0) and (sh * 100.0 / total_spam) or 0
+ local jtp = (total_junk ~= 0) and (jh * 100.0 / total_junk) or 0
+
+ local sym_data = {
+ avg_weight = r.weight / th,
+ hits = th,
+ hits_percentage = th / total,
+ spam_hits = sh,
+ spam_to_total = sh / th,
+ spam_percentage = stp / 100.0,
+ ham_hits = hh,
+ ham_to_total = hh / th,
+ ham_percentage = htp / 100.0,
+ junk_hits = jh,
+ junk_to_total = jh / th,
+ junk_percentage = jtp / 100.0,
+ }
+
+ if r.weight ~= 0 then
+ sym_data.spam_change = r.spam_change
+ sym_data.junk_change = r.junk_change
+ end
+
+ if correlations then
+ local corr_data = {}
+ for cs, hits in pairs(r.corr) do
+ local corr_prob = hits / total
+ local sym_prob = r.hits / total
+ corr_data[cs] = corr_prob / sym_prob
+ end
+ sym_data.correllations = corr_data
+ end
+
+ result.symbols[s] = sym_data
+ end
+ end
+ end
+
+ io.write(ucl.to_format(result, 'json'))
+ io.write('\n')
+ else
+ -- Human-readable output
+ if total > 0 then
+ for s, r in pairs(sym_res) do
+ if r.hits > 0 then
+ local th = r.hits
+ local sh = r.spam_hits
+ local jh = r.junk_hits
+ local hh = th - sh - jh
+ local htp = (total_ham ~= 0) and (hh * 100.0 / total_ham) or 0
+ local stp = (total_spam ~= 0) and (sh * 100.0 / total_spam) or 0
+ local jtp = (total_junk ~= 0) and (jh * 100.0 / total_junk) or 0
+
+ io.write(string.format(
+ "%s avg. weight %.3f, hits %d(%.3f%%):\n" ..
+ " Ham %7.3f%%, %6d/%-6d (%7.3f%%)\n" ..
+ " Spam %7.3f%%, %6d/%-6d (%7.3f%%)\n" ..
+ " Junk %7.3f%%, %6d/%-6d (%7.3f%%)\n",
+ s, r.weight / th, th, (th / total * 100),
+ (hh / th * 100), hh, total_ham, htp,
+ (sh / th * 100), sh, total_spam, stp,
+ (jh / th * 100), jh, total_junk, jtp))
+
+ local schp = (total_spam > 0) and (r.spam_change / total_spam * 100.0) or 0
+ local jchp = (total_junk > 0) and (r.junk_change / total_junk * 100.0) or 0
+
+ if r.weight ~= 0 then
+ if r.weight > 0 then
+ io.write(string.format(
+ "\nSpam changes (ham/junk -> spam): %6d/%-6d (%7.3f%%)\n" ..
+ "Spam changes / total spam hits: %6d/%-6d (%7.3f%%)\n" ..
+ "Junk changes (ham -> junk): %6d/%-6d (%7.3f%%)\n" ..
+ "Junk changes / total junk hits: %6d/%-6d (%7.3f%%)\n",
+ r.spam_change, th, (r.spam_change / th * 100),
+ r.spam_change, total_spam, schp,
+ r.junk_change, th, (r.junk_change / th * 100),
+ r.junk_change, total_junk, jchp))
+ else
+ io.write(string.format(
+ "\nSpam changes (spam -> junk/ham): %6d/%-6d (%7.3f%%)\n" ..
+ "Spam changes / total spam hits : %6d/%-6d (%7.3f%%)\n" ..
+ "Junk changes (junk -> ham) : %6d/%-6d (%7.3f%%)\n" ..
+ "Junk changes / total junk hits : %6d/%-6d (%7.3f%%)\n",
+ r.spam_change, th, (r.spam_change / th * 100),
+ r.spam_change, total_spam, schp,
+ r.junk_change, th, (r.junk_change / th * 100),
+ r.junk_change, total_junk, jchp))
+ end
+ end
+
+ if correlations then
+ io.write("Correlations report:\n")
+ for cs, _ in pairs(r.corr) do
+ local corr_prob = r.hits / total
+ local merged_hits = 0
+ if r.symbols_met_spam[cs] then
+ merged_hits = merged_hits + r.symbols_met_spam[cs]
+ end
+ if r.symbols_met_junk[cs] then
+ merged_hits = merged_hits + r.symbols_met_junk[cs]
+ end
+ if r.symbols_met_ham[cs] then
+ merged_hits = merged_hits + r.symbols_met_ham[cs]
+ end
+ if merged_hits > 0 then
+ io.write(string.format("Probability of %s when %s fires: %.3f\n",
+ cs, s, ((merged_hits / total) / corr_prob)))
+ end
+ end
+
+ local spam_related = gen_related(r.symbols_met_spam, s, nrelated)
+ local junk_related = gen_related(r.symbols_met_junk, s, nrelated)
+ local ham_related = gen_related(r.symbols_met_ham, s, nrelated)
+
+ io.write("Related symbols report:\n")
+ io.write(string.format("Top related in spam:\n %s\n",
+ stringify_related(spam_related, r.spam_hits)))
+ io.write(string.format("Top related in junk:\n %s\n",
+ stringify_related(junk_related, r.junk_hits)))
+ io.write(string.format("Top related in ham:\n %s\n",
+ stringify_related(ham_related, r.hits - r.spam_hits - r.junk_hits)))
+ end
+ else
+ io.write(string.format("Symbol %s has not been met\n", s))
+ end
+
+ io.write(string.rep('-', 80) .. '\n')
+ end
+ end
+
+ io.write(string.format("\n=== Summary %s\nMessages scanned: %d",
+ string.rep('=', 68), total))
+ if timeStamp['start'] then
+ io.write(string.format(" [ %s / %s ]\n", timeStamp['start'], timeStamp['end']))
+ else
+ io.write('\n')
+ end
+ io.write('\n')
+ local sorted_actions = {}
+ for a, _ in pairs(actions) do
+ table.insert(sorted_actions, a)
+ end
+ table.sort(sorted_actions)
+ for _, a in ipairs(sorted_actions) do
+ io.write(string.format("%11s: %6.2f%%, %d\n", a, 100 * actions[a] / total, actions[a]))
+ end
+ io.write('\n')
+ if scanTime['min'] then
+ io.write(string.format("scan time min/avg/max = %.2f/%.2f/%.2f s\n",
+ scanTime['min'] / 1000,
+ (total > 0) and (scanTime['total'] / total / 1000) or 0,
+ scanTime['max'] / 1000))
+ end
+ io.write(string.rep('=', 80) .. '\n')
+ end
+end
+
+return {
+ handler = handler,
+ description = parser._description,
+ name = 'logstats'
+}
--- /dev/null
+--[[
+Copyright (c) 2026, Vsevolod Stakhov <vsevolod@rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local argparse = require "argparse"
+local rspamd_regexp = require "rspamd_regexp"
+local rspamd_ip = require "rspamd_ip"
+local log_utils = require "lua_log_utils"
+
+local parser = argparse()
+ :name "rspamadm mapstats"
+ :description "Count Rspamd multimap matches by parsing log files"
+ :help_description_margin(32)
+
+parser:option "-l --log"
+ :description "Log file or directory to read (stdin by default)"
+ :argname "<file>"
+ :default ""
+parser:option "-c --config"
+ :description "Path to config file"
+ :argname "<file>"
+ :default(rspamd_paths and rspamd_paths["CONFDIR"] and
+ (rspamd_paths["CONFDIR"] .. "/" .. "rspamd.conf") or
+ "/etc/rspamd/rspamd.conf")
+parser:option "--start"
+ :description "Starting time for log parsing"
+ :argname "<time>"
+ :default ""
+parser:option "--end"
+ :description "Ending time for log parsing"
+ :argname "<time>"
+parser:option "-n --num-logs"
+ :description "Number of recent logfiles to analyze"
+ :argname "<n>"
+ :convert(tonumber)
+parser:option "-x --exclude-logs"
+ :description "Number of latest logs to exclude"
+ :argname "<n>"
+ :default "0"
+ :convert(tonumber)
+
+local re_non_file_url = rspamd_regexp.create('/^.*(?<!file):\\/\\//')
+local re_regexp_line = rspamd_regexp.create('/^\\/(.+)\\/(\\S?)(?:\\s+(\\d+\\.?\\d*))?(?:\\s+#\\s*(.*))?$/')
+local re_plain_line = rspamd_regexp.create('/^(\\S+)(?:\\s+(\\d+\\.?\\d*))?(?:\\s+#\\s*(.*))?$/')
+local re_comment_line = rspamd_regexp.create('/^#|^\\s*$/')
+local re_sym_with_opts = rspamd_regexp.create('/([^(]+)\\([.0-9]+\\)\\{([^;]+);\\}/')
+
+local function get_multimap_config(config_path)
+ local _r, err = rspamd_config:load_ucl(config_path)
+ if not _r then
+ io.stderr:write(string.format("Cannot load config %s: %s\n", config_path, err))
+ os.exit(1)
+ end
+ _r, err = rspamd_config:parse_rcl({ 'logging', 'worker' })
+ if not _r then
+ io.stderr:write(string.format("Cannot parse config %s: %s\n", config_path, err))
+ os.exit(1)
+ end
+
+ local multimap_opts = rspamd_config:get_all_opt('multimap')
+ if not multimap_opts then
+ io.stderr:write("No multimap configuration found.\n")
+ os.exit(1)
+ end
+
+ return multimap_opts
+end
+
+local function validate_regex_flags(flags, map_file, line_num)
+ if flags and #flags > 0 then
+ local bad = flags:match('[^imsxurOL]')
+ if bad then
+ io.stderr:write(string.format(
+ "Invalid regex flag in %s at line %d: '%s' (supported: imsxurOL)\n",
+ map_file, line_num, flags))
+ return false
+ end
+ end
+ return true
+end
+
+local function get_map(symbol_cfg, map_file)
+ local fh, err = io.open(map_file, 'r')
+ if not fh then
+ io.stderr:write(string.format("Cannot open map file %s: %s\n", map_file, err or 'unknown'))
+ return {}
+ end
+
+ local entries = {}
+ local line_num = 0
+ local is_regexp = symbol_cfg.regexp and true or false
+
+ for line in fh:lines() do
+ line_num = line_num + 1
+
+ if re_comment_line:match(line) then
+ table.insert(entries, {
+ line_num = line_num,
+ is_comment = true,
+ content = line,
+ })
+ elseif is_regexp then
+ local results = re_regexp_line:search(line, false, true)
+ if not results or #results == 0 then
+ io.stderr:write(string.format("Syntax error in %s at line %d\n", map_file, line_num))
+ fh:close()
+ return {}
+ end
+ local caps = results[1]
+ if not caps or #caps < 2 then
+ io.stderr:write(string.format("Syntax error in %s at line %d\n", map_file, line_num))
+ fh:close()
+ return {}
+ end
+
+ local pattern = tostring(caps[2])
+ local flags = caps[3] and tostring(caps[3]) or ''
+ local result = caps[4] and tostring(caps[4]) or nil
+ local comment = caps[5] and tostring(caps[5]) or nil
+
+ if not validate_regex_flags(flags, map_file, line_num) then
+ fh:close()
+ return {}
+ end
+
+ -- Compile with rspamd_regexp (handles all rspamd flags natively)
+ local re_pattern = '/' .. pattern .. '/' .. flags
+ local compiled = rspamd_regexp.create(re_pattern)
+ if not compiled then
+ io.stderr:write(string.format("Invalid regex in %s at line %d\n", map_file, line_num))
+ fh:close()
+ return {}
+ end
+
+ table.insert(entries, {
+ line_num = line_num,
+ pattern = pattern,
+ flag = flags,
+ compiled = compiled,
+ result = result,
+ comment = comment,
+ count = 0,
+ })
+ else
+ local results = re_plain_line:search(line, false, true)
+ if not results or #results == 0 then
+ io.stderr:write(string.format("Syntax error in %s at line %d\n", map_file, line_num))
+ fh:close()
+ return {}
+ end
+ local caps = results[1]
+ if not caps or #caps < 2 then
+ io.stderr:write(string.format("Syntax error in %s at line %d\n", map_file, line_num))
+ fh:close()
+ return {}
+ end
+
+ local pattern = tostring(caps[2])
+ local result = caps[3] and tostring(caps[3]) or nil
+ local comment = caps[4] and tostring(caps[4]) or nil
+
+ table.insert(entries, {
+ line_num = line_num,
+ pattern = pattern,
+ result = result,
+ comment = comment,
+ count = 0,
+ })
+ end
+ end
+
+ fh:close()
+ return entries
+end
+
+local function ip_within(ip_obj, cidr_str)
+ local cidr_ip = rspamd_ip.from_string(cidr_str)
+ if not cidr_ip or not cidr_ip:is_valid() then
+ return false
+ end
+
+ -- Extract mask from CIDR notation
+ local _, mask_str = cidr_str:match('^(.+)/(%d+)$')
+ if mask_str then
+ local mask = tonumber(mask_str)
+ local ip_masked = ip_obj:copy()
+ local cidr_masked = cidr_ip:copy()
+ ip_masked:apply_mask(mask)
+ cidr_masked:apply_mask(mask)
+ return tostring(ip_masked) == tostring(cidr_masked)
+ else
+ return tostring(ip_obj) == tostring(cidr_ip)
+ end
+end
+
+local function handler(args)
+ local res = parser:parse(args)
+
+ local multimap = get_multimap_config(res['config'])
+
+ local map = {}
+ local symbols_search = {}
+ local unmatched = {}
+
+ for symbol, cfg in pairs(multimap) do
+ if type(cfg) ~= 'table' then
+ goto continue_sym
+ end
+
+ local maps_list = cfg['map']
+ if not maps_list then
+ goto continue_sym
+ end
+
+ if type(maps_list) ~= 'table' then
+ maps_list = { maps_list }
+ elseif maps_list[1] == nil then
+ -- It's a single map object, not an array
+ maps_list = { maps_list }
+ end
+
+ map[symbol] = {
+ type = cfg['type'] or 'string',
+ is_regexp = cfg['regexp'] and true or false,
+ maps = {},
+ }
+
+ local has_valid_maps = false
+ for _, map_source in ipairs(maps_list) do
+ if type(map_source) == 'table' then
+ map_source = map_source['url'] or map_source['name'] or ''
+ end
+ if type(map_source) ~= 'string' then
+ goto continue_map
+ end
+
+ -- Skip non-file maps
+ if re_non_file_url:match(map_source) then
+ io.write(string.format("%s: %s [SKIPPED]\n", symbol, map_source))
+ goto continue_map
+ end
+
+ -- Strip file:// prefix
+ local file_path = map_source:gsub('^fallback%+', ''):gsub('^file://', '')
+
+ local entries = get_map(cfg, file_path)
+ if #entries == 0 then
+ io.write(string.format("%s: %s [FAILED]\n", symbol, map_source))
+ goto continue_map
+ end
+
+ local entry_count = 0
+ for _, e in ipairs(entries) do
+ if not e.is_comment then
+ entry_count = entry_count + 1
+ end
+ end
+ io.write(string.format("%s: %s [OK] - %d entries\n", symbol, map_source, entry_count))
+
+ table.insert(map[symbol].maps, {
+ source = map_source,
+ entries = entries,
+ })
+ has_valid_maps = true
+
+ ::continue_map::
+ end
+
+ if has_valid_maps then
+ table.insert(symbols_search, symbol)
+ end
+
+ ::continue_sym::
+ end
+
+ if #symbols_search == 0 then
+ io.stderr:write("No file-based multimap symbols found. Nothing to analyze.\n")
+ os.exit(1)
+ end
+
+ io.write("====== maps added =====\n")
+
+ -- Process logs
+ local function process_callback(ts, act, score, symbols_str, scan_time)
+ if symbols_str == '' then
+ return
+ end
+
+ local symbols_raw = {}
+ for sym in symbols_str:gmatch('[^,]+') do
+ table.insert(symbols_raw, sym)
+ end
+
+ for _, s in ipairs(symbols_search) do
+ for _, sym in ipairs(symbols_raw) do
+ if not sym:find(s, 1, true) then
+ goto continue_inner
+ end
+
+ local results = re_sym_with_opts:search(sym, false, true)
+ if not results or #results == 0 then
+ unmatched[sym] = (unmatched[sym] or 0) + 1
+ goto continue_inner
+ end
+ local caps = results[1]
+ if not caps or #caps < 3 then
+ unmatched[sym] = (unmatched[sym] or 0) + 1
+ goto continue_inner
+ end
+
+ local sym_name = tostring(caps[2])
+ local sym_opt = tostring(caps[3])
+
+ if sym_name ~= s then
+ goto continue_inner
+ end
+
+ local ip_obj
+ if map[sym_name].type == 'ip' then
+ ip_obj = rspamd_ip.from_string(sym_opt)
+ if not ip_obj or not ip_obj:is_valid() then
+ io.stderr:write(string.format("Invalid IP address in symbol %s: %s\n", sym_name, sym_opt))
+ goto continue_inner
+ end
+ end
+
+ local matched = false
+ for _, map_entry in ipairs(map[sym_name].maps) do
+ for _, entry in ipairs(map_entry.entries) do
+ if entry.is_comment then
+ goto continue_entry
+ end
+
+ if map[sym_name].type == 'ip' then
+ if ip_obj and ip_within(ip_obj, entry.pattern) then
+ entry.count = entry.count + 1
+ matched = true
+ break
+ end
+ elseif map[sym_name].is_regexp then
+ if entry.compiled:match(sym_opt) then
+ entry.count = entry.count + 1
+ matched = true
+ break
+ end
+ else
+ if sym_opt == entry.pattern then
+ entry.count = entry.count + 1
+ matched = true
+ break
+ end
+ end
+
+ ::continue_entry::
+ end
+ if matched then break end
+ end
+
+ if not matched then
+ unmatched[sym] = (unmatched[sym] or 0) + 1
+ end
+
+ ::continue_inner::
+ end
+ end
+ end
+
+ log_utils.process_logs(res['log'], res['start'] or '', res['end'], process_callback, {
+ num_logs = res['num_logs'],
+ exclude_logs = res['exclude_logs'],
+ })
+
+ -- Output results
+ for _, symbol in ipairs(symbols_search) do
+ io.write(string.format("%s:\n", symbol))
+ io.write(string.format(" type=%s\n", map[symbol].type))
+
+ for _, map_entry in ipairs(map[symbol].maps) do
+ io.write(string.format("\nMap: %s\n", map_entry.source))
+ io.write("Pattern\t\t\tMatches\t\tComment\n")
+ io.write(string.rep('-', 80) .. '\n')
+
+ for _, entry in ipairs(map_entry.entries) do
+ if entry.is_comment then
+ io.write(entry.content .. '\n')
+ else
+ if map[symbol].is_regexp then
+ io.write(string.format("%-23s", '/' .. entry.pattern .. '/' .. entry.flag))
+ else
+ io.write(string.format("%-23s", entry.pattern))
+ end
+
+ if entry.count and entry.count > 0 then
+ io.write(string.format("\t%d", entry.count))
+ else
+ io.write("\t-")
+ end
+
+ if entry.comment then
+ io.write(string.format("\t\t# %s", entry.comment))
+ end
+
+ io.write('\n')
+ end
+ end
+ end
+
+ io.write(string.rep('=', 80) .. '\n')
+ end
+
+ -- Unmatched report
+ if next(unmatched) then
+ io.write("\nSymbols with unmatched values:\n")
+ io.write(string.rep('-', 80) .. '\n')
+
+ local grouped = {}
+ for key, count in pairs(unmatched) do
+ local sym_name = key:match('^(%w+)%(')
+ if sym_name then
+ if not grouped[sym_name] then
+ grouped[sym_name] = {}
+ end
+ table.insert(grouped[sym_name], { full = key, count = count })
+ end
+ end
+
+ local sorted_groups = {}
+ for sym_name in pairs(grouped) do
+ table.insert(sorted_groups, sym_name)
+ end
+ table.sort(sorted_groups)
+
+ for _, symbol in ipairs(sorted_groups) do
+ local entries = grouped[symbol]
+ table.sort(entries, function(a, b) return a.count > b.count end)
+
+ io.write(string.format("\n%s: %d unmatched value(s)\n", symbol, #entries))
+ local limit = math.min(#entries, 5)
+ for i = 1, limit do
+ io.write(string.format(" %dx: %s\n", entries[i].count, entries[i].full))
+ end
+ if #entries > 5 then
+ io.write(" ...\n")
+ end
+ end
+ end
+end
+
+return {
+ handler = handler,
+ description = parser._description,
+ name = 'mapstats'
+}