From: Vsevolod Stakhov Date: Tue, 25 Nov 2025 10:43:20 +0000 (+0000) Subject: [Fix] Harden PDF and Lua utility functions X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=refs%2Fpull%2F5762%2Fhead;p=thirdparty%2Frspamd.git [Fix] Harden PDF and Lua utility functions - lua_util_to_utf8: prevent integer overflow in buffer calculation, check for negative length from ucnv_convert - lua_task_inject_part: validate all table entries before processing - pdf.lua: refactor recursive flatten to iterative with depth limit, add input validation in apply_pdf_filter, reduce timeout to 2s, skip text extraction on timeout as results would be incomplete --- diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua index 2625f79d02..f647dfb981 100644 --- a/lualib/lua_content/pdf.lua +++ b/lualib/lua_content/pdf.lua @@ -104,7 +104,10 @@ local config = { max_pdf_objects = 10000, -- Maximum number of objects to be considered max_pdf_trailer = 10 * 1024 * 1024, -- Maximum trailer size (to avoid abuse) max_pdf_trailer_lines = 100, -- Maximum number of lines in pdf trailer - pdf_process_timeout = 10.0, -- Timeout in seconds for processing + -- Timeout for PDF processing in seconds. If exceeded, text extraction is skipped + -- as partial results would be incorrect. Can be overridden via + -- pdf.pdf_process_timeout in configuration. + pdf_process_timeout = 2.0, } -- Used to process patterns found in PDF @@ -562,6 +565,11 @@ end -- Apply PDF stream filter local function apply_pdf_filter(input, filt) + -- Validate input before processing + if not input or (type(input) == 'string' and #input == 0) then + return nil + end + if filt == 'FlateDecode' or filt == 'Fl' then return rspamd_util.inflate(input, config.max_extraction_size) elseif filt == 'ASCIIHexDecode' or filt == 'AHx' then @@ -571,6 +579,9 @@ local function apply_pdf_filter(input, filt) if to_decode:sub(-1) == '>' then to_decode = to_decode:sub(1, -2) end + if #to_decode == 0 then + return nil + end return lua_util.unhex(to_decode) end @@ -1332,8 +1343,14 @@ local function search_text(task, pdf, mpart) end bl.data = tobj.uncompressed:span(bl.start, bl.len) - lua_util.debugm(N, task, 'extracted text from object %s:%s: %s', - tobj.major, tobj.minor, bl.data) + -- Only log preview of extracted text to avoid verbose logs + if bl.len <= 256 then + lua_util.debugm(N, task, 'extracted text from object %s:%s: %s', + tobj.major, tobj.minor, bl.data) + else + lua_util.debugm(N, task, 'extracted text from object %s:%s (%d bytes)', + tobj.major, tobj.minor, bl.len) + end if bl.len < config.max_processing_size then local ret, obj_or_err = pcall(pdf_text_grammar.match, pdf_text_grammar, @@ -1367,18 +1384,34 @@ local function search_text(task, pdf, mpart) if type(chunk) == 'userdata' then text[i] = tostring(chunk) elseif type(chunk) == 'table' then - -- Nested table? + -- Iterative flatten to avoid stack overflow with deeply nested tables local function flatten(t) local res = {} - for _, v in ipairs(t) do - if type(v) == 'userdata' then - res[#res + 1] = tostring(v) - elseif type(v) == 'table' then - res[#res + 1] = flatten(v) + local stack = { { tbl = t, idx = 1 } } + local max_depth = 100 -- Limit depth to prevent infinite loops + + while #stack > 0 and #stack <= max_depth do + local frame = stack[#stack] + local tbl, idx = frame.tbl, frame.idx + + if idx > #tbl then + -- Done with this table, pop frame + stack[#stack] = nil else - res[#res + 1] = v + local v = tbl[idx] + frame.idx = idx + 1 + + if type(v) == 'userdata' then + res[#res + 1] = tostring(v) + elseif type(v) == 'table' then + -- Push new frame for nested table + stack[#stack + 1] = { tbl = v, idx = 1 } + elseif v ~= nil then + res[#res + 1] = tostring(v) + end end end + return table.concat(res, '') end text[i] = flatten(chunk) @@ -1503,7 +1536,8 @@ local function process_pdf(input, mpart, task) -- Postprocess objects postprocess_pdf_objects(task, input, pdf_object) pdf_output.objects = pdf_object.objects - if config.text_extraction then + -- Skip text extraction if timeout occurred - partial results would be incorrect + if config.text_extraction and not pdf_object.timeout_processing then search_text(task, pdf_object, mpart) end if config.url_extraction then diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index 09c947c7a7..61246b4a90 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -2796,15 +2796,26 @@ lua_task_inject_part(lua_State *L) /* Accept string, rspamd_text, or table of texts */ if (lua_type(L, 3) == LUA_TTABLE) { is_table = TRUE; - /* Calculate total length first */ + /* Calculate total length first and validate all entries */ + int table_len = lua_objlen(L, 3); + if (table_len <= 0) { + return luaL_error(L, "empty table provided"); + } + lua_pushnil(L); while (lua_next(L, 3) != 0) { struct rspamd_lua_text *t = lua_check_text_or_string(L, -1); - if (t) { - content_len += t->len; + if (!t) { + lua_pop(L, 2); /* pop value and key */ + return luaL_error(L, "invalid entry in table (expected string or text)"); } + content_len += t->len; lua_pop(L, 1); } + + if (content_len == 0) { + return luaL_error(L, "all table entries are empty"); + } } else { content_text = lua_check_text_or_string(L, 3); diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c index c3d0cb9b08..5da774aef9 100644 --- a/src/lua/lua_util.c +++ b/src/lua/lua_util.c @@ -1737,17 +1737,37 @@ lua_util_to_utf8(lua_State *L) return luaL_error(L, "invalid arguments"); } - dest_cap = t->len * 1.5 + 16; + /* Prevent integer overflow in buffer size calculation */ + if (t->len > (G_MAXINT32 / 2 - 16)) { + lua_pushnil(L); + return 1; + } + + dest_cap = t->len + (t->len / 2) + 16; dest = g_malloc(dest_cap); dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err); + /* Check for negative length (indicates error) or buffer overflow */ + if (dest_len < 0) { + g_free(dest); + lua_pushnil(L); + return 1; + } + if (err == U_BUFFER_OVERFLOW_ERROR) { g_free(dest); err = U_ZERO_ERROR; dest_cap = dest_len + 1; dest = g_malloc(dest_cap); dest_len = ucnv_convert("UTF-8", charset, dest, dest_cap, t->start, t->len, &err); + + /* Check again after retry */ + if (dest_len < 0) { + g_free(dest); + lua_pushnil(L); + return 1; + } } if (U_FAILURE(err)) {