From: Vsevolod Stakhov Date: Sat, 11 Oct 2025 14:08:13 +0000 (+0100) Subject: [Feature] Add Lua binding for HTML URL rewriting (task:rewrite_html_urls) X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=519930b0259f637b9efed280bdba1431dea8701f;p=thirdparty%2Frspamd.git [Feature] Add Lua binding for HTML URL rewriting (task:rewrite_html_urls) --- diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt index 73b04856d7..721e09a65c 100644 --- a/src/libserver/CMakeLists.txt +++ b/src/libserver/CMakeLists.txt @@ -43,6 +43,7 @@ SET(LIBRSPAMDSERVERSRC ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url.cxx ${CMAKE_CURRENT_SOURCE_DIR}/html/html.cxx ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url_rewrite.cxx + ${CMAKE_CURRENT_SOURCE_DIR}/html/html_url_rewrite_c.cxx ${CMAKE_CURRENT_SOURCE_DIR}/html/html_tests.cxx ${CMAKE_CURRENT_SOURCE_DIR}/hyperscan_tools.cxx ${CMAKE_CURRENT_SOURCE_DIR}/backtrace.cxx diff --git a/src/libserver/html/html_url_rewrite_c.cxx b/src/libserver/html/html_url_rewrite_c.cxx new file mode 100644 index 0000000000..1a3606f59b --- /dev/null +++ b/src/libserver/html/html_url_rewrite_c.cxx @@ -0,0 +1,55 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "html_url_rewrite_c.h" +#include "html_url_rewrite.hxx" +#include "html.hxx" +#include "libserver/task.h" + +extern "C" { + +int rspamd_html_url_rewrite(struct rspamd_task *task, + void *html_content, + const char *func_name, + int part_id, + const char *original_html, + gsize html_len, + char **output_html, + gsize *output_len) +{ + if (!task || !html_content || !func_name || !original_html) { + return -1; + } + + auto *hc = static_cast(html_content); + std::string_view original{original_html, html_len}; + + auto result = rspamd::html::process_html_url_rewrite( + task, hc, func_name, part_id, original); + + if (!result) { + return -1; + } + + /* Allocate from task pool */ + *output_html = (char *) rspamd_mempool_alloc(task->task_pool, result->size()); + memcpy(*output_html, result->data(), result->size()); + *output_len = result->size(); + + return 0; +} + +}// extern "C" diff --git a/src/libserver/html/html_url_rewrite_c.h b/src/libserver/html/html_url_rewrite_c.h new file mode 100644 index 0000000000..371b50346a --- /dev/null +++ b/src/libserver/html/html_url_rewrite_c.h @@ -0,0 +1,53 @@ +/* + * Copyright 2025 Vsevolod Stakhov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef RSPAMD_HTML_URL_REWRITE_C_H +#define RSPAMD_HTML_URL_REWRITE_C_H + +#include "config.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct rspamd_task; + +/** + * C wrapper for HTML URL rewriting + * @param task Rspamd task + * @param html_content HTML content pointer (void* cast of html_content*) + * @param func_name Lua function name for rewriting + * @param part_id MIME part ID + * @param original_html Original HTML content + * @param html_len Length of original HTML + * @param output_html Output pointer for rewritten HTML (allocated from task pool if successful) + * @param output_len Output length + * @return 0 on success, -1 on error/no rewrite + */ +int rspamd_html_url_rewrite(struct rspamd_task *task, + void *html_content, + const char *func_name, + int part_id, + const char *original_html, + gsize html_len, + char **output_html, + gsize *output_len); + +#ifdef __cplusplus +} +#endif + +#endif//RSPAMD_HTML_URL_REWRITE_C_H diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c index fd0780c9a1..23b890f923 100644 --- a/src/lua/lua_task.c +++ b/src/lua/lua_task.c @@ -32,6 +32,7 @@ #include #include "libmime/received.h" +#include "libserver/html/html_url_rewrite_c.h" /*** * @module rspamd_task @@ -1275,6 +1276,15 @@ LUA_FUNCTION_DEF(task, get_dns_req); */ LUA_FUNCTION_DEF(task, add_timer); +/*** + * @method task:rewrite_html_urls(func_name) + * Rewrites URLs in HTML parts using the specified Lua callback function. + * The callback receives (task, url) and should return the replacement URL or nil. + * @param {string} func_name name of Lua function to call for each URL + * @return {table|nil} table of rewritten HTML parts indexed by part number, or nil on error + */ +LUA_FUNCTION_DEF(task, rewrite_html_urls); + static const struct luaL_reg tasklib_f[] = { LUA_INTERFACE_DEF(task, create), LUA_INTERFACE_DEF(task, load_from_file), @@ -1405,6 +1415,7 @@ static const struct luaL_reg tasklib_m[] = { LUA_INTERFACE_DEF(task, get_all_named_results), LUA_INTERFACE_DEF(task, topointer), LUA_INTERFACE_DEF(task, add_timer), + LUA_INTERFACE_DEF(task, rewrite_html_urls), {"__tostring", rspamd_lua_class_tostring}, {NULL, NULL}}; @@ -7783,6 +7794,75 @@ lua_task_add_timer(lua_State *L) return 0; } +static int +lua_task_rewrite_html_urls(lua_State *L) +{ + struct rspamd_task *task = lua_check_task(L, 1); + const char *func_name = luaL_checkstring(L, 2); + + if (!func_name) { + return luaL_error(L, "invalid arguments: function name expected"); + } + + if (!task || !MESSAGE_FIELD_CHECK(task, text_parts)) { + lua_pushnil(L); + return 1; + } + + /* Create result table */ + lua_newtable(L); + int results = 0; + unsigned int i; + void *part; + + /* Iterate through text parts */ + PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part) + { + struct rspamd_mime_text_part *text_part = (struct rspamd_mime_text_part *) part; + + /* Only process HTML parts */ + if (!IS_TEXT_PART_HTML(text_part) || !text_part->html) { + continue; + } + + char *output_html = NULL; + gsize output_len = 0; + + /* Process URL rewriting using C wrapper */ + int ret = rspamd_html_url_rewrite( + task, + text_part->html, + func_name, + text_part->mime_part->part_number, + (const char *) text_part->parsed.begin, + text_part->parsed.len, + &output_html, + &output_len); + + if (ret == 0 && output_html) { + /* Store result in table: table[part_number] = rewritten_html */ + lua_pushinteger(L, text_part->mime_part->part_number); + + /* Create rspamd_text userdata for the rewritten content */ + struct rspamd_lua_text *t = (struct rspamd_lua_text *) lua_newuserdata(L, sizeof(struct rspamd_lua_text)); + rspamd_lua_setclass(L, rspamd_text_classname, -1); + t->flags = 0; + t->start = output_html; + t->len = output_len; + + lua_settable(L, -3); + results++; + } + } + + if (results == 0) { + lua_pop(L, 1); + lua_pushnil(L); + } + + return 1; +} + /* Init part */ static int