local total_odd = math.ceil(total_checked / 2)
local total_even = math.floor(total_checked / 2)
- -- Correction for disjoint ranges if len > 32
if len > 32 then
total_odd = 16
total_even = 16
if charset and rspamd_util.to_utf8 then
local conv = rspamd_util.to_utf8(s, charset)
if conv then
- -- Check for control characters to avoid garbage
local garbage_limit = 0
local clen = #conv
for i = 1, clen do
end
if garbage_limit > 0 then
- -- Treat as garbage
return ''
end
for _, chunk in ipairs(t) do
if type(chunk) == 'string' then
table.insert(tres, chunk)
- elseif type(chunk) == 'number' then
- -- Heuristic: if number is < -200, append space
- if chunk < -200 then
- table.insert(tres, ' ')
- end
+ elseif type(chunk) == 'number' and chunk < -200 then
+ table.insert(tres, ' ')
end
end
res = table.concat(tres)
local function nary_op_handler(...)
local args = { ... }
local op = args[#args]
- -- local t = args[#args - 1] -- The table of numbers
if op == 'Tm' then
return '\n'
local function ternary_op_handler(...)
local args = { ... }
local op = args[#args]
- local a2 = args[#args - 2] -- Second to last argument (ty)
+ local a2 = args[#args - 2]
if (op == 'Td' or op == 'TD') and type(a2) == 'number' and a2 ~= 0 then
return '\n'
if type(filt) == 'string' then
filts = { filt }
elseif type(filt) == 'table' then
- -- Array of filters
filts = filt
end
if type(decode_params) == 'table' then
if decode_params.Predictor then
- -- We can handle Predictor 1 (No prediction) or maybe others in future
local predictor = tonumber(decode_params.Predictor) or 1
if predictor > 1 then
- -- For now, we just log debug and fail, or maybe try to continue if it's simple PNG prediction
- -- But without implementation, better to return nil to avoid garbage
return nil, 'predictor exists: ' .. tostring(predictor)
end
end
if next_uncompressed then
uncompressed = next_uncompressed
else
- -- If chain fails, we stop
return nil, 'filter failed: ' .. tostring(f)
end
end
return nil
end
local dict = obj.dict
-
- -- Determine length: try /Length first, fallback to parsed length
local len = obj.stream.len
local decl_len = maybe_dereference_object(dict.Length, pdf, task)
end
bl.data = tobj.uncompressed:span(bl.start, bl.len)
- -- Only log preview of extracted text to avoid verbose logs
if bl.len <= 256 then
lua_util.debugm(N, task, 'extracted text from object %s:%s: %s',
tobj.major, tobj.minor, bl.data)
for _, chunk in ipairs(obj_or_err) do
text[#text + 1] = chunk
end
- -- Add newline after each block to separate lines
text[#text + 1] = '\n'
- -- text[#text + 1] = obj_or_err
lua_util.debugm(N, task, 'attached %s from content object %s:%s to %s:%s',
obj_or_err, tobj.major, tobj.minor, obj.major, obj.minor)
else
if type(chunk) == 'userdata' then
text[i] = tostring(chunk)
elseif type(chunk) == 'table' then
- -- Iterative flatten to avoid stack overflow with deeply nested tables
local function flatten(t)
local res = {}
local stack = { { tbl = t, idx = 1 } }
- local max_depth = 100 -- Limit depth to prevent infinite loops
+ local max_depth = 100
while #stack > 0 and #stack <= max_depth do
local frame = stack[#stack]
local tbl, idx = frame.tbl, frame.idx
if idx > #tbl then
- -- Done with this table, pop frame
stack[#stack] = nil
else
local v = tbl[idx]
if type(v) == 'userdata' then
res[#res + 1] = tostring(v)
elseif type(v) == 'table' then
- -- Push new frame for nested table
stack[#stack + 1] = { tbl = v, idx = 1 }
elseif v ~= nil then
res[#res + 1] = tostring(v)
end
end
end
- -- Aggregate and inject once
+
if task.inject_part then
local all_text = {}
for _, obj in ipairs(pdf.objects) do
if obj.text and obj.text:len() > 0 then
- -- Keep as rspamd_text, don't convert to string
table.insert(all_text, obj.text)
end
end
if #all_text > 0 then
- -- Pass table of rspamd_text directly - will be efficiently merged in C
task:inject_part('text', all_text, mpart)
end
end
if (lua_type(L, 3) == LUA_TTABLE) {
is_table = TRUE;
/* Calculate total length first and validate all entries */
- int table_len = lua_objlen(L, 3);
+ int table_len = rspamd_lua_table_size(L, 3);
if (table_len <= 0) {
return luaL_error(L, "empty table provided");
}
part->part_type = RSPAMD_MIME_PART_TEXT;
part->flags |= RSPAMD_MIME_PART_COMPUTED;
- /* Set parent part if provided */
if (original_part) {
part->parent_part = original_part;
}
- /* Basic headers setup */
part->ct = rspamd_mempool_alloc0(task->task_pool, sizeof(*part->ct));
part->ct->type.begin = "text";
part->ct->charset.begin = "utf-8";
part->ct->charset.len = 5;
- /* Content setup - merge table or copy single content */
part->parsed_data.begin = rspamd_mempool_alloc(task->task_pool, content_len + 1);
part->parsed_data.len = content_len;
if (is_table) {
- /* Efficiently merge all text chunks */
char *dst = (char *) part->parsed_data.begin;
lua_pushnil(L);
while (lua_next(L, 3) != 0) {
*dst = '\0';
}
else {
- /* Single content */
memcpy((char *) part->parsed_data.begin, content, content_len);
((char *) part->parsed_data.begin)[content_len] = '\0';
}
part->raw_data = part->parsed_data;
- /* Text part specific setup */
txt_part = rspamd_mempool_alloc0(task->task_pool, sizeof(*txt_part));
txt_part->mime_part = part;
txt_part->raw.begin = part->parsed_data.begin;
txt_part->utf_stripped_text = (UText) UTEXT_INITIALIZER;
txt_part->real_charset = "utf-8";
- /* Add to message */
part->specific.txt = txt_part;
g_ptr_array_add(task->message->parts, part);
g_ptr_array_add(task->message->text_parts, txt_part);
- /* Process injected text part fully (URLs, words, normalization) */
if (task->cfg && task->message) {
- /* Use high order number to ensure injected URLs are after original ones */
uint16_t cur_url_order = 10000;
rspamd_message_process_injected_text_part(task, txt_part, &cur_url_order);
}