]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Project] Further content module work
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Sun, 24 Nov 2019 09:35:18 +0000 (09:35 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 25 Nov 2019 12:35:06 +0000 (12:35 +0000)
lualib/lua_content/ical.lua
lualib/lua_content/init.lua
src/libmime/message.c

index 4f6b61919fbc075b79cc319243cbe792e2c46ea4..c19723614fd21576d85c31ed458b27a104faedd2 100644 (file)
@@ -15,6 +15,7 @@ limitations under the License.
 ]]--
 
 local l = require 'lpeg'
+local rspamd_text = require "rspamd_text"
 
 local wsp = l.P" "
 local crlf = l.P"\r"^-1 * l.P"\n"
@@ -25,7 +26,7 @@ local elt = name * ":" * wsp^0 * value * eol
 
 local exports = {}
 
-local function ical_txt_values(input)
+local function process_ical(input, _, _)
   local control={n='\n', r='\r'}
   local escaper = l.Ct((elt / function(_,b) return (b:gsub("\\(.)", control)) end)^1)
 
@@ -35,13 +36,13 @@ local function ical_txt_values(input)
     return nil
   end
 
-  return table.concat(values, "\n")
+  return rspamd_text.fromtable(values, "\n")
 end
 
 --[[[
--- @function lua_ical.ical_txt_values(input)
+-- @function lua_ical.process(input)
 -- Returns all values from ical as a plain text. Names are completely ignored.
 --]]
-exports.ical_txt_values = ical_txt_values
+exports.process = process_ical
 
 return exports
\ No newline at end of file
index a968521393544227320d4af824dd19bbcce81ee7..994d613f47ff71d16dad915d800257995bd0b55b 100644 (file)
@@ -17,4 +17,75 @@ limitations under the License.
 --[[[
 -- @module lua_content
 -- This module contains content processing logic
---]]
\ No newline at end of file
+--]]
+
+
+local exports = {}
+local N = "lua_content"
+local lua_util = require "lua_util"
+
+local content_modules = {
+  ical = {
+    mime_type = "text/calendar",
+    module = require "lua_content/ical",
+    extensions = {'ical'},
+    output = "text"
+  },
+}
+
+local modules_by_mime_type
+local modules_by_extension
+
+local function init()
+  modules_by_mime_type = {}
+  modules_by_extension = {}
+  for k,v in pairs(content_modules) do
+    if v.mime_type then
+      modules_by_mime_type[v.mime_type] = {k, v}
+    end
+    if v.extensions then
+      for _,ext in ipairs(v.extensions) do
+        modules_by_extension[ext] = {k, v}
+      end
+    end
+  end
+end
+
+exports.maybe_process_mime_part = function(part, log_obj)
+  if not modules_by_mime_type then
+    init()
+  end
+
+  local ctype, csubtype = part:get_type()
+  local mt = string.format("%s/%s", ctype or 'application',
+      csubtype or 'octet-stream')
+  local pair = modules_by_mime_type[mt]
+
+  if not pair then
+    local ext = part:get_detected_ext()
+
+    if ext then
+      pair = modules_by_extension[ext]
+    end
+  end
+
+  if pair then
+    lua_util.debugm(N, log_obj, "found known content of type %s: %s",
+        mt, pair[1])
+
+    local data = pair[2].module.process(part:get_content(), part, log_obj)
+
+    if data then
+      lua_util.debugm(N, log_obj, "extracted content from %s: %s type",
+          pair[1], type(data))
+      part:set_specific(data)
+    else
+      lua_util.debugm(N, log_obj, "failed to extract anything from %s",
+          pair[1])
+    end
+  end
+
+end
+
+
+return exports
\ No newline at end of file
index 7d2d81a7fda417d040397b171a44b8d7131e942f..53c3cce277d3278d042c599e42ed845c86b35716 100644 (file)
@@ -694,71 +694,8 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task,
        rspamd_mime_text_part_maybe_convert (task, text_part);
 
        if (text_part->utf_raw_content != NULL) {
-               /* Check for ical */
-               rspamd_ftok_t cal_ct;
-
-               /*
-                * TODO: If we want to process more than that, we need
-                * to create some generic framework that accepts a part
-                * and returns a processed data
-                */
-               RSPAMD_FTOK_ASSIGN (&cal_ct, "calendar");
-
-               if (rspamd_ftok_casecmp (&cal_ct, &text_part->mime_part->ct->subtype) == 0) {
-                       lua_State *L = task->cfg->lua_state;
-                       gint err_idx;
-
-                       lua_pushcfunction (L, &rspamd_lua_traceback);
-                       err_idx = lua_gettop (L);
-
-                       /* Obtain function */
-                       if (!rspamd_lua_require_function (L, "lua_ical", "ical_txt_values")) {
-                               msg_err_task ("cannot require lua_ical.ical_txt_values");
-                               lua_settop (L, err_idx - 1);
-
-                               return FALSE;
-                       }
-
-                       lua_pushlstring (L, text_part->utf_raw_content->data,
-                                       text_part->utf_raw_content->len);
-
-                       if (lua_pcall (L, 1, 1, err_idx) != 0) {
-                               msg_err_task ("cannot call lua lua_ical.ical_txt_values: %s",
-                                               lua_tostring (L, -1));
-                               lua_settop (L, err_idx - 1);
-
-                               return FALSE;
-                       }
-
-                       if (lua_type (L, -1) == LUA_TSTRING) {
-                               const char *ndata;
-                               gsize nsize;
-
-                               ndata = lua_tolstring (L, -1, &nsize);
-                               text_part->utf_content = g_byte_array_sized_new (nsize);
-                               g_byte_array_append (text_part->utf_content, ndata, nsize);
-                               rspamd_mempool_add_destructor (task->task_pool,
-                                               (rspamd_mempool_destruct_t) free_byte_array_callback,
-                                               text_part->utf_content);
-                       }
-                       else if (lua_type (L, -1) == LUA_TNIL) {
-                               msg_info_task ("cannot convert text/calendar to plain text");
-                               text_part->utf_content = text_part->utf_raw_content;
-                       }
-                       else {
-                               msg_err_task ("invalid return type when calling lua_ical.ical_txt_values: %s",
-                                               lua_typename (L, lua_type (L, -1)));
-                               lua_settop (L, err_idx - 1);
-
-                               return FALSE;
-                       }
-
-                       lua_settop (L, err_idx - 1);
-               }
-               else {
-                       /* Just have the same content */
-                       text_part->utf_content = text_part->utf_raw_content;
-               }
+               /* Just have the same content */
+               text_part->utf_content = text_part->utf_raw_content;
        }
        else {
                /*
@@ -1378,7 +1315,7 @@ rspamd_message_process (struct rspamd_task *task)
        guint tw, *ptw, dw;
        struct rspamd_mime_part *part;
        lua_State *L = NULL;
-       gint func_pos = -1;
+       gint magic_func_pos = -1, content_func_pos = -1, old_top = -1;
 
        if (task->cfg) {
                L = task->cfg->lua_state;
@@ -1386,20 +1323,32 @@ rspamd_message_process (struct rspamd_task *task)
 
        rspamd_archives_process (task);
 
+       if (L) {
+               old_top = lua_gettop (L);
+       }
+
        if (L && rspamd_lua_require_function (L,
                        "lua_magic", "detect_mime_part")) {
-               func_pos = lua_gettop (L);
+               magic_func_pos = lua_gettop (L);
        }
        else {
                msg_err_task ("cannot require lua_magic.detect_mime_part");
        }
 
+       if (L && rspamd_lua_require_function (L,
+                       "lua_content", "maybe_process_mime_part")) {
+               content_func_pos = lua_gettop (L);
+       }
+       else {
+               msg_err_task ("cannot require lua_content.maybe_process_mime_part");
+       }
+
        PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
-               if (func_pos != -1 && part->parsed_data.len > 0) {
+               if (magic_func_pos != -1 && part->parsed_data.len > 0) {
                        struct rspamd_mime_part **pmime;
                        struct rspamd_task **ptask;
 
-                       lua_pushvalue (L, func_pos);
+                       lua_pushvalue (L, magic_func_pos);
                        pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
                        rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
                        *pmime = part;
@@ -1447,7 +1396,27 @@ rspamd_message_process (struct rspamd_task *task)
                                }
                        }
 
-                       lua_settop (L, func_pos);
+                       lua_settop (L, magic_func_pos);
+               }
+
+               /* Now detect content */
+               if (content_func_pos != -1 && part->parsed_data.len > 0) {
+                       struct rspamd_mime_part **pmime;
+                       struct rspamd_task **ptask;
+
+                       lua_pushvalue (L, content_func_pos);
+                       pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
+                       rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+                       *pmime = part;
+                       ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+                       rspamd_lua_setclass (L, "rspamd{task}", -1);
+                       *ptask = task;
+
+                       if (lua_pcall (L, 2, 2, 0) != 0) {
+                               msg_err_task ("cannot detect content: %s", lua_tostring (L, -1));
+                       }
+
+                       lua_settop (L, magic_func_pos);
                }
 
                if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
@@ -1455,8 +1424,8 @@ rspamd_message_process (struct rspamd_task *task)
                }
        }
 
-       if (func_pos != -1) {
-               lua_settop (L, func_pos - 1);
+       if (old_top != -1) {
+               lua_settop (L, old_top);
        }
 
        /* Calculate average words length and number of short words */