]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Feature] Add Lua binding for HTML URL rewriting (task:rewrite_html_urls)
authorVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 11 Oct 2025 14:08:13 +0000 (15:08 +0100)
committerVsevolod Stakhov <vsevolod@rspamd.com>
Sat, 11 Oct 2025 14:08:13 +0000 (15:08 +0100)
src/libserver/CMakeLists.txt
src/libserver/html/html_url_rewrite_c.cxx [new file with mode: 0644]
src/libserver/html/html_url_rewrite_c.h [new file with mode: 0644]
src/lua/lua_task.c

index 73b04856d7a1223d66609d14f988ba6439d99f64..721e09a65c47bd54630847b95bbe848c5118e993 100644 (file)
@@ -43,6 +43,7 @@ SET(LIBRSPAMDSERVERSRC
         ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url_rewrite.cxx
+        ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url_rewrite_c.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/html/html_tests.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/hyperscan_tools.cxx
         ${CMAKE_CURRENT_SOURCE_DIR}/backtrace.cxx
diff --git a/src/libserver/html/html_url_rewrite_c.cxx b/src/libserver/html/html_url_rewrite_c.cxx
new file mode 100644 (file)
index 0000000..1a3606f
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "html_url_rewrite_c.h"
+#include "html_url_rewrite.hxx"
+#include "html.hxx"
+#include "libserver/task.h"
+
+extern "C" {
+
+int rspamd_html_url_rewrite(struct rspamd_task *task,
+                                                       void *html_content,
+                                                       const char *func_name,
+                                                       int part_id,
+                                                       const char *original_html,
+                                                       gsize html_len,
+                                                       char **output_html,
+                                                       gsize *output_len)
+{
+       if (!task || !html_content || !func_name || !original_html) {
+               return -1;
+       }
+
+       auto *hc = static_cast<const rspamd::html::html_content *>(html_content);
+       std::string_view original{original_html, html_len};
+
+       auto result = rspamd::html::process_html_url_rewrite(
+               task, hc, func_name, part_id, original);
+
+       if (!result) {
+               return -1;
+       }
+
+       /* Allocate from task pool */
+       *output_html = (char *) rspamd_mempool_alloc(task->task_pool, result->size());
+       memcpy(*output_html, result->data(), result->size());
+       *output_len = result->size();
+
+       return 0;
+}
+
+}// extern "C"
diff --git a/src/libserver/html/html_url_rewrite_c.h b/src/libserver/html/html_url_rewrite_c.h
new file mode 100644 (file)
index 0000000..371b503
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_HTML_URL_REWRITE_C_H
+#define RSPAMD_HTML_URL_REWRITE_C_H
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rspamd_task;
+
+/**
+ * C wrapper for HTML URL rewriting
+ * @param task Rspamd task
+ * @param html_content HTML content pointer (void* cast of html_content*)
+ * @param func_name Lua function name for rewriting
+ * @param part_id MIME part ID
+ * @param original_html Original HTML content
+ * @param html_len Length of original HTML
+ * @param output_html Output pointer for rewritten HTML (allocated from task pool if successful)
+ * @param output_len Output length
+ * @return 0 on success, -1 on error/no rewrite
+ */
+int rspamd_html_url_rewrite(struct rspamd_task *task,
+                                                       void *html_content,
+                                                       const char *func_name,
+                                                       int part_id,
+                                                       const char *original_html,
+                                                       gsize html_len,
+                                                       char **output_html,
+                                                       gsize *output_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_HTML_URL_REWRITE_C_H
index fd0780c9a154c51058c12078bffaac2b4b4b8eb1..23b890f9237fb8e8180d4a0ea2482133ec8ddefb 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <math.h>
 #include "libmime/received.h"
+#include "libserver/html/html_url_rewrite_c.h"
 
 /***
  * @module rspamd_task
@@ -1275,6 +1276,15 @@ LUA_FUNCTION_DEF(task, get_dns_req);
  */
 LUA_FUNCTION_DEF(task, add_timer);
 
+/***
+ * @method task:rewrite_html_urls(func_name)
+ * Rewrites URLs in HTML parts using the specified Lua callback function.
+ * The callback receives (task, url) and should return the replacement URL or nil.
+ * @param {string} func_name name of Lua function to call for each URL
+ * @return {table|nil} table of rewritten HTML parts indexed by part number, or nil on error
+ */
+LUA_FUNCTION_DEF(task, rewrite_html_urls);
+
 static const struct luaL_reg tasklib_f[] = {
        LUA_INTERFACE_DEF(task, create),
        LUA_INTERFACE_DEF(task, load_from_file),
@@ -1405,6 +1415,7 @@ static const struct luaL_reg tasklib_m[] = {
        LUA_INTERFACE_DEF(task, get_all_named_results),
        LUA_INTERFACE_DEF(task, topointer),
        LUA_INTERFACE_DEF(task, add_timer),
+       LUA_INTERFACE_DEF(task, rewrite_html_urls),
        {"__tostring", rspamd_lua_class_tostring},
        {NULL, NULL}};
 
@@ -7783,6 +7794,75 @@ lua_task_add_timer(lua_State *L)
        return 0;
 }
 
+static int
+lua_task_rewrite_html_urls(lua_State *L)
+{
+       struct rspamd_task *task = lua_check_task(L, 1);
+       const char *func_name = luaL_checkstring(L, 2);
+
+       if (!func_name) {
+               return luaL_error(L, "invalid arguments: function name expected");
+       }
+
+       if (!task || !MESSAGE_FIELD_CHECK(task, text_parts)) {
+               lua_pushnil(L);
+               return 1;
+       }
+
+       /* Create result table */
+       lua_newtable(L);
+       int results = 0;
+       unsigned int i;
+       void *part;
+
+       /* Iterate through text parts */
+       PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part)
+       {
+               struct rspamd_mime_text_part *text_part = (struct rspamd_mime_text_part *) part;
+
+               /* Only process HTML parts */
+               if (!IS_TEXT_PART_HTML(text_part) || !text_part->html) {
+                       continue;
+               }
+
+               char *output_html = NULL;
+               gsize output_len = 0;
+
+               /* Process URL rewriting using C wrapper */
+               int ret = rspamd_html_url_rewrite(
+                       task,
+                       text_part->html,
+                       func_name,
+                       text_part->mime_part->part_number,
+                       (const char *) text_part->parsed.begin,
+                       text_part->parsed.len,
+                       &output_html,
+                       &output_len);
+
+               if (ret == 0 && output_html) {
+                       /* Store result in table: table[part_number] = rewritten_html */
+                       lua_pushinteger(L, text_part->mime_part->part_number);
+
+                       /* Create rspamd_text userdata for the rewritten content */
+                       struct rspamd_lua_text *t = (struct rspamd_lua_text *) lua_newuserdata(L, sizeof(struct rspamd_lua_text));
+                       rspamd_lua_setclass(L, rspamd_text_classname, -1);
+                       t->flags = 0;
+                       t->start = output_html;
+                       t->len = output_len;
+
+                       lua_settable(L, -3);
+                       results++;
+               }
+       }
+
+       if (results == 0) {
+               lua_pop(L, 1);
+               lua_pushnil(L);
+       }
+
+       return 1;
+}
+
 /* Init part */
 
 static int