[Feature] Enchance text_part:get_content method

author Vsevolod Stakhov <vsevolod@highsecure.ru>

Mon, 6 Mar 2017 12:36:45 +0000 (12:36 +0000)

committer Vsevolod Stakhov <vsevolod@highsecure.ru>

Mon, 6 Mar 2017 12:38:28 +0000 (12:38 +0000)
author Vsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Mar 2017 12:36:45 +0000 (12:36 +0000)
committer Vsevolod Stakhov <vsevolod@highsecure.ru>
Mon, 6 Mar 2017 12:38:28 +0000 (12:38 +0000)
diff --git a/src/libmime/message.c b/src/libmime/message.c

index a4f3be5ca765b902c032bfe77cc7f6b9453eff8a..40769037b5c801deb57a29e2e4bf17cb3fcf4bbf 100644 (file)
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -497,6 +497,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
                                 &text_part->exceptions,
                                 task->urls,
                                 task->emails);
+               text_part->utf_raw_content = part_content;
  
                 if (text_part->content->len == 0) {
                         text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
@@ -526,6 +527,7 @@ rspamd_message_process_text_part (struct rspamd_task *task,
  
                 text_part->content = rspamd_mime_text_part_maybe_convert (task,
                                 text_part);
+               text_part->utf_raw_content = text_part->content;
  
                 if (text_part->content != NULL) {
                         /*
diff --git a/src/libmime/message.h b/src/libmime/message.h

index 8c0f919eab679b49d5bd31a3f9d2e88ae4d6779c..15fcfcccc1ffadcb801a6bfc61bda97ecdec8f89 100644 (file)
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -82,6 +82,7 @@ struct rspamd_mime_text_part {
         rspamd_ftok_t raw;
         rspamd_ftok_t parsed;
         GByteArray *content;
+       GByteArray *utf_raw_content;
         GByteArray *stripped_content;
         GPtrArray *newlines;    /**< positions of newlines in text                                      */
         struct html_content *html;
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c

index 6d17c3a661e03c08c0a762dafc86cb37e89eaa96..1ff3dbd582064a2ffc3b5dbfeb18d2d43403e65d 100644 (file)
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -46,8 +46,13 @@ end
   */
  LUA_FUNCTION_DEF (textpart, is_utf);
  /***
- * @method text_part:get_content()
- * Get the text of the part (html tags stripped)
+ * @method text_part:get_content([type])
+ * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
+ * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
+ * - `content_oneline`: utf8 content with HTML tags and newlines stripped
+ * - `raw`: raw content, not mime decoded nor utf8 converted
+ * - `raw_parsed`: raw content, mime decoded, not utf8 converted
+ * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
   * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
   */
  LUA_FUNCTION_DEF (textpart, get_content);
@@ -354,16 +359,51 @@ lua_textpart_get_content (lua_State * L)
  {
         struct rspamd_mime_text_part *part = lua_check_textpart (L);
         struct rspamd_lua_text *t;
+       gsize len;
+       const gchar *start, *type = NULL;
  
         if (part == NULL || IS_PART_EMPTY (part)) {
                 lua_pushnil (L);
                 return 1;
         }
  
+       if (lua_type (L, 2) == LUA_TSTRING) {
+               type = lua_tostring (L, 2);
+       }
+
         t = lua_newuserdata (L, sizeof (*t));
         rspamd_lua_setclass (L, "rspamd{text}", -1);
-       t->start = part->content->data;
-       t->len = part->content->len;
+
+       if (!type) {
+               start = part->content->data;
+               len = part->content->len;
+       }
+       else if (strcmp (type, "content") == 0) {
+               start = part->content->data;
+               len = part->content->len;
+       }
+       else if (strcmp (type, "content_oneline") == 0) {
+               start = part->stripped_content->data;
+               len = part->stripped_content->len;
+       }
+       else if (strcmp (type, "raw_parsed") == 0) {
+               start = part->parsed.begin;
+               len = part->parsed.len;
+       }
+       else if (strcmp (type, "raw_utf") == 0) {
+               start = part->utf_raw_content->data;
+               len = part->utf_raw_content->len;
+       }
+       else if (strcmp (type, "raw") == 0) {
+               start = part->raw.begin;
+               len = part->raw.len;
+       }
+       else {
+               return luaL_error (L, "invalid content type: %s", type);
+       }
+
+       t->start = start;
+       t->len = len;
         t->flags = 0;
  
         return 1;
author	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Mon, 6 Mar 2017 12:36:45 +0000 (12:36 +0000)
committer	Vsevolod Stakhov <vsevolod@highsecure.ru>
	Mon, 6 Mar 2017 12:38:28 +0000 (12:38 +0000)
src/libmime/message.c		patch \| blob \| blame \| history
src/libmime/message.h		patch \| blob \| blame \| history
src/lua/lua_mimepart.c		patch \| blob \| blame \| history