extern "C" {
-int rspamd_html_enumerate_urls(struct rspamd_task *task,
- void *html_content,
- int part_id,
- struct rspamd_html_url_candidate **candidates,
- gsize *n_candidates)
-{
- if (!task || !html_content || !candidates || !n_candidates) {
- return -1;
- }
-
- auto *hc = static_cast<const rspamd::html::html_content *>(html_content);
-
- // Enumerate candidates using C++ function
- auto cpp_candidates = rspamd::html::enumerate_rewrite_candidates(hc, task, part_id);
-
- if (cpp_candidates.empty()) {
- *candidates = nullptr;
- *n_candidates = 0;
- return 0;
- }
-
- // Allocate C-style array from task pool
- *n_candidates = cpp_candidates.size();
- *candidates = (struct rspamd_html_url_candidate *) rspamd_mempool_alloc(
- task->task_pool,
- sizeof(struct rspamd_html_url_candidate) * cpp_candidates.size());
-
- // Convert C++ candidates to C candidates
- for (size_t i = 0; i < cpp_candidates.size(); i++) {
- const auto &cpp_cand = cpp_candidates[i];
-
- // Allocate strings from task pool
- char *url_str = (char *) rspamd_mempool_alloc(
- task->task_pool,
- cpp_cand.absolute_url.size() + 1);
- memcpy(url_str, cpp_cand.absolute_url.data(), cpp_cand.absolute_url.size());
- url_str[cpp_cand.absolute_url.size()] = '\0';
-
- char *attr_str = (char *) rspamd_mempool_alloc(
- task->task_pool,
- cpp_cand.attr_name.size() + 1);
- memcpy(attr_str, cpp_cand.attr_name.data(), cpp_cand.attr_name.size());
- attr_str[cpp_cand.attr_name.size()] = '\0';
-
- // Get tag name
- const char *tag_name = "unknown";
- gsize tag_len = 7;
- if (cpp_cand.tag) {
- // Use rspamd_html_tag_by_id which returns const char*
- extern const char *rspamd_html_tag_by_id(int id);
- tag_name = rspamd_html_tag_by_id(cpp_cand.tag->id);
- if (tag_name) {
- tag_len = strlen(tag_name);
- }
- else {
- tag_name = "unknown";
- tag_len = 7;
- }
- }
-
- (*candidates)[i].url = url_str;
- (*candidates)[i].url_len = cpp_cand.absolute_url.size();
- (*candidates)[i].attr = attr_str;
- (*candidates)[i].attr_len = cpp_cand.attr_name.size();
- (*candidates)[i].tag = tag_name;
- (*candidates)[i].tag_len = tag_len;
- }
-
- return 0;
-}
-
int rspamd_html_url_rewrite(struct rspamd_task *task,
struct lua_State *L,
void *html_content,
struct lua_State;
-/**
- * URL candidate info for C interface
- */
-struct rspamd_html_url_candidate {
- const char *url; // Absolute URL string (NUL-terminated)
- const char *attr;// Attribute name: "href" or "src" (NUL-terminated)
- const char *tag; // Tag name (NUL-terminated)
- gsize url_len; // Length of URL string
- gsize attr_len; // Length of attr string
- gsize tag_len; // Length of tag string
-};
-
-/**
- * C wrapper for enumerating HTML URL rewrite candidates
- * @param task Rspamd task
- * @param html_content HTML content pointer (void* cast of html_content*)
- * @param part_id MIME part ID
- * @param candidates Output array of candidates (allocated from task pool if successful)
- * @param n_candidates Output count of candidates
- * @return 0 on success, -1 on error
- */
-int rspamd_html_enumerate_urls(struct rspamd_task *task,
- void *html_content,
- int part_id,
- struct rspamd_html_url_candidate **candidates,
- gsize *n_candidates);
-
/**
* C wrapper for HTML URL rewriting
* @param task Rspamd task
${CMAKE_CURRENT_SOURCE_DIR}/lua_util.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_tcp.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_html.cxx
+ ${CMAKE_CURRENT_SOURCE_DIR}/lua_html_url_rewrite.cxx
${CMAKE_CURRENT_SOURCE_DIR}/lua_sqlite3.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_cryptobox.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_map.c
--- /dev/null
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lua_html_url_rewrite.h"
+#include "libserver/html/html_url_rewrite.hxx"
+#include "libserver/html/html.h"
+#include "libserver/html/html.hxx"
+#include "libserver/task.h"
+#include "message.h"
+#include "lua_common.h"
+
+extern "C" {
+
+int lua_task_get_html_urls(lua_State *L)
+{
+ // Get task from Lua stack
+ auto *task = lua_check_task(L, 1);
+
+ if (!task || !MESSAGE_FIELD_CHECK(task, text_parts)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ // Create result table
+ lua_newtable(L);
+ int results = 0;
+
+ // Iterate through text parts
+ unsigned int i;
+ void *part;
+ PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part)
+ {
+ auto *text_part = static_cast<rspamd_mime_text_part *>(part);
+
+ // Only process HTML parts
+ if (!IS_TEXT_PART_HTML(text_part) || !text_part->html) {
+ continue;
+ }
+
+ // Skip if no UTF-8 content available
+ if (!text_part->utf_raw_content || text_part->utf_raw_content->len == 0) {
+ continue;
+ }
+
+ // Enumerate URLs directly using C++ function - no copying!
+ auto candidates = rspamd::html::enumerate_rewrite_candidates(
+ static_cast<const rspamd::html::html_content *>(text_part->html),
+ task,
+ text_part->mime_part->part_number);
+
+ if (candidates.empty()) {
+ continue;
+ }
+
+ // Create array for this part: table[part_number] = {url_info_1, url_info_2, ...}
+ lua_pushinteger(L, text_part->mime_part->part_number);
+ lua_createtable(L, candidates.size(), 0);// URLs array for this part
+
+ for (size_t j = 0; j < candidates.size(); j++) {
+ const auto &cand = candidates[j];
+
+ lua_pushinteger(L, j + 1);// 1-indexed array
+ lua_createtable(L, 0, 3); // URL info table with 3 fields: url, attr, tag
+
+ // url field - push string without copying
+ lua_pushstring(L, "url");
+ lua_pushlstring(L, cand.absolute_url.data(), cand.absolute_url.size());
+ lua_settable(L, -3);
+
+ // attr field - push string_view without copying
+ lua_pushstring(L, "attr");
+ lua_pushlstring(L, cand.attr_name.data(), cand.attr_name.size());
+ lua_settable(L, -3);
+
+ // tag field - get tag name
+ lua_pushstring(L, "tag");
+ if (cand.tag) {
+ const char *tag_name = rspamd_html_tag_by_id(cand.tag->id);
+ if (tag_name) {
+ lua_pushstring(L, tag_name);
+ }
+ else {
+ lua_pushstring(L, "unknown");
+ }
+ }
+ else {
+ lua_pushstring(L, "unknown");
+ }
+ lua_settable(L, -3);
+
+ lua_settable(L, -3);// Add url info to URLs array
+ }
+
+ lua_settable(L, -3);// Add part to main table
+ results++;
+ }
+
+ if (results == 0) {
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ }
+
+ return 1;
+}
+
+}// extern "C"
--- /dev/null
+/*
+ * Copyright 2025 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSPAMD_LUA_HTML_URL_REWRITE_H
+#define RSPAMD_LUA_HTML_URL_REWRITE_H
+
+#include "config.h"
+
+struct rspamd_task;
+struct lua_State;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * C++ Lua binding for task:get_html_urls()
+ * Extracts URLs from HTML parts without intermediate C copying
+ * @param L Lua state
+ * @return number of return values on Lua stack
+ */
+int lua_task_get_html_urls(lua_State *L);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//RSPAMD_LUA_HTML_URL_REWRITE_H
#include <math.h>
#include "libmime/received.h"
#include "libserver/html/html_url_rewrite_c.h"
+#include "lua_html_url_rewrite.h"
/***
* @module rspamd_task
* Useful for async URL checking workflows where URLs need to be batched.
* @return {table|nil} table indexed by part number, each containing an array of URL info tables with keys: url, attr, tag
*/
-LUA_FUNCTION_DEF(task, get_html_urls);
+/* Implemented in lua_html_url_rewrite.cxx as C++ binding */
+extern int lua_task_get_html_urls(lua_State *L);
static const struct luaL_reg tasklib_f[] = {
LUA_INTERFACE_DEF(task, create),
return 1;
}
-static int
-lua_task_get_html_urls(lua_State *L)
-{
- struct rspamd_task *task = lua_check_task(L, 1);
-
- if (!task || !MESSAGE_FIELD_CHECK(task, text_parts)) {
- lua_pushnil(L);
- return 1;
- }
-
- /* Create result table */
- lua_newtable(L);
- int results = 0;
- unsigned int i;
- void *part;
-
- /* Iterate through text parts */
- PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, text_parts), i, part)
- {
- struct rspamd_mime_text_part *text_part = (struct rspamd_mime_text_part *) part;
-
- /* Only process HTML parts */
- if (!IS_TEXT_PART_HTML(text_part) || !text_part->html) {
- continue;
- }
-
- /* Skip if no UTF-8 content available */
- if (!text_part->utf_raw_content || text_part->utf_raw_content->len == 0) {
- continue;
- }
-
- struct rspamd_html_url_candidate *candidates = NULL;
- gsize n_candidates = 0;
-
- /* Enumerate URLs using C wrapper */
- int ret = rspamd_html_enumerate_urls(
- task,
- text_part->html,
- text_part->mime_part->part_number,
- &candidates,
- &n_candidates);
-
- if (ret == 0 && candidates && n_candidates > 0) {
- /* Create array for this part: table[part_number] = {url_info_1, url_info_2, ...} */
- lua_pushinteger(L, text_part->mime_part->part_number);
- lua_newtable(L); /* URLs array for this part */
-
- for (gsize j = 0; j < n_candidates; j++) {
- lua_pushinteger(L, j + 1); /* 1-indexed array */
- lua_newtable(L); /* URL info table */
-
- /* url field */
- lua_pushstring(L, "url");
- lua_pushstring(L, candidates[j].url);
- lua_settable(L, -3);
-
- /* attr field */
- lua_pushstring(L, "attr");
- lua_pushstring(L, candidates[j].attr);
- lua_settable(L, -3);
-
- /* tag field */
- lua_pushstring(L, "tag");
- lua_pushstring(L, candidates[j].tag);
- lua_settable(L, -3);
-
- lua_settable(L, -3); /* Add url info to URLs array */
- }
-
- lua_settable(L, -3); /* Add part to main table */
- results++;
- }
- }
-
- if (results == 0) {
- lua_pop(L, 1);
- lua_pushnil(L);
- }
-
- return 1;
-}
+/* lua_task_get_html_urls is implemented in lua_html_url_rewrite.cxx */
/* Init part */