local res = {}
local nres = 0
+ local cta_priority_map
+
+ if params.task and params.task.get_text_parts then
+ local text_parts = params.task:get_text_parts()
+ if text_parts then
+ cta_priority_map = {}
+ for _, part in ipairs(text_parts) do
+ if part.is_html and part:is_html() and part.get_cta_urls then
+ local entries = part:get_cta_urls({original = true, with_weights = true})
+ if type(entries) == 'table' then
+ for _, entry in ipairs(entries) do
+ if entry and entry.url then
+ local url = entry.url
+ local str = tostring(url)
+ local weight = entry.weight or 0
+ local score = 6 + math.floor(weight * 10 + 0.5)
+ if not cta_priority_map[str] or score > cta_priority_map[str] then
+ cta_priority_map[str] = score
+ end
+ local redir = url:get_redirected()
+ if redir then
+ local rstr = tostring(redir)
+ if not cta_priority_map[rstr] or score > cta_priority_map[rstr] then
+ cta_priority_map[rstr] = score
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+
+ if next(cta_priority_map) == nil then
+ cta_priority_map = nil
+ end
+ end
+ end
+
local function insert_url(str, u)
if not res[str] then
res[str] = u
local esld = u:get_tld()
local str_hash = tostring(u)
+ if cta_priority_map then
+ local cta_pr = cta_priority_map[str_hash]
+ if not cta_pr and flags.redirected then
+ local redir_url = u:get_redirected()
+ if redir_url then
+ cta_pr = cta_priority_map[tostring(redir_url)]
+ end
+ end
+
+ if cta_pr then
+ priority = math.max(priority, cta_pr)
+ end
+ end
+
if esld then
-- Special cases
if (u:get_protocol() ~= 'mailto') and (not flags.html_displayed) then
struct rspamd_mime_text_part *part = lua_check_textpart(L);
unsigned int max_urls = 0;
unsigned int nret = 0;
+ gboolean return_original = FALSE;
+ gboolean include_weights = FALSE;
if (part == NULL) {
return luaL_error(L, "invalid arguments");
}
- /* Get optional max_urls parameter */
- if (lua_gettop(L) >= 2 && lua_isnumber(L, 2)) {
- max_urls = lua_tointeger(L, 2);
+ int top = lua_gettop(L);
+
+ if (top >= 2) {
+ if (lua_istable(L, 2)) {
+ lua_getfield(L, 2, "max");
+ if (lua_isnumber(L, -1)) {
+ max_urls = lua_tointeger(L, -1);
+ }
+ lua_pop(L, 1);
+ lua_getfield(L, 2, "original");
+ if (lua_isboolean(L, -1)) {
+ return_original = lua_toboolean(L, -1);
+ }
+ lua_pop(L, 1);
+ lua_getfield(L, 2, "with_weights");
+ if (lua_isboolean(L, -1)) {
+ include_weights = lua_toboolean(L, -1);
+ }
+ lua_pop(L, 1);
+ }
+ else if (lua_isnumber(L, 2)) {
+ max_urls = lua_tointeger(L, 2);
+ if (top >= 3 && lua_isboolean(L, 3)) {
+ return_original = lua_toboolean(L, 3);
+ }
+ if (top >= 4 && lua_isboolean(L, 4)) {
+ include_weights = lua_toboolean(L, 4);
+ }
+ }
+ else if (lua_isboolean(L, 2)) {
+ return_original = lua_toboolean(L, 2);
+ if (top >= 3 && lua_isnumber(L, 3)) {
+ max_urls = lua_tointeger(L, 3);
+ }
+ if (top >= 4 && lua_isboolean(L, 4)) {
+ include_weights = lua_toboolean(L, 4);
+ }
+ }
}
/* Check if this HTML part has CTA URLs */
/* Heap is already top-K, but in min-heap order - need to reverse for descending */
unsigned int result_size = max_urls > 0 ? MIN(max_urls, heap->n) : heap->n;
- lua_createtable(L, result_size, 0);
+ lua_createtable(L, result_size, include_weights ? 0 : 0);
+
+ GHashTable *seen = g_hash_table_new(g_direct_hash, g_direct_equal);
/* Iterate heap from end to start for descending order */
for (int i = (int) heap->n - 1; i >= 0 && nret < result_size; i--) {
struct rspamd_html_cta_entry *entry = &heap->a[i];
if (entry && entry->url) {
- struct rspamd_lua_url *lua_url;
+ struct rspamd_url *chosen = entry->url;
+
+ if (!return_original && chosen->ext && chosen->ext->linked_url &&
+ chosen->ext->linked_url != chosen) {
+ chosen = chosen->ext->linked_url;
+ }
+
+ if (g_hash_table_lookup(seen, chosen)) {
+ continue;
+ }
- lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
- rspamd_lua_setclass(L, rspamd_url_classname, -1);
- lua_url->url = entry->url;
- lua_rawseti(L, -2, ++nret);
+ g_hash_table_insert(seen, chosen, chosen);
+
+ if (include_weights) {
+ lua_createtable(L, 0, 2);
+ struct rspamd_lua_url *lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
+ rspamd_lua_setclass(L, rspamd_url_classname, -1);
+ lua_url->url = chosen;
+ lua_setfield(L, -2, "url");
+ lua_pushnumber(L, entry->weight);
+ lua_setfield(L, -2, "weight");
+ lua_rawseti(L, -2, ++nret);
+ }
+ else {
+ struct rspamd_lua_url *lua_url = lua_newuserdata(L, sizeof(struct rspamd_lua_url));
+ rspamd_lua_setclass(L, rspamd_url_classname, -1);
+ lua_url->url = chosen;
+ lua_rawseti(L, -2, ++nret);
+ }
}
}
+ g_hash_table_unref(seen);
+
return 1;
}
end
local function url_redirector_handler(task)
- local sp_urls = lua_util.extract_specific_urls({
- task = task,
- limit = settings.max_urls,
- filter = function(url)
- local host = url:get_host()
- if settings.redirector_hosts_map:get_key(host) then
- lua_util.debugm(N, task, 'check url %s', tostring(url))
- return true
+ local selected = {}
+ local seen = {}
+
+ local text_parts = task:get_text_parts()
+ if text_parts then
+ for _, part in ipairs(text_parts) do
+ if part:is_html() and part.get_cta_urls then
+ local cta_urls = part:get_cta_urls(settings.max_urls, true)
+ if cta_urls then
+ for _, url in ipairs(cta_urls) do
+ local host = url:get_host()
+ if host and settings.redirector_hosts_map:get_key(host) then
+ local key = tostring(url)
+ if not seen[key] then
+ lua_util.debugm(N, task, 'prefer CTA url %s for redirector', key)
+ table.insert(selected, url)
+ seen[key] = true
+ if #selected >= settings.max_urls then
+ break
+ end
+ end
+ end
+ end
+ end
+ end
+
+ if #selected >= settings.max_urls then
+ break
+ end
+ end
+ end
+
+ local remaining = settings.max_urls - #selected
+
+ if remaining > 0 then
+ local sp_urls = lua_util.extract_specific_urls({
+ task = task,
+ limit = remaining,
+ filter = function(url)
+ local host = url:get_host()
+ if host and settings.redirector_hosts_map:get_key(host) then
+ local key = tostring(url)
+ if not seen[key] then
+ lua_util.debugm(N, task, 'consider redirector url %s', key)
+ return true
+ end
+ end
+ return false
+ end,
+ no_cache = true,
+ need_content = true,
+ })
+
+ if sp_urls then
+ for _, u in ipairs(sp_urls) do
+ local key = tostring(u)
+ if not seen[key] then
+ table.insert(selected, u)
+ seen[key] = true
+ if #selected >= settings.max_urls then
+ break
+ end
+ end
end
- end,
- no_cache = true,
- need_content = true,
- })
-
- if sp_urls then
- for _, u in ipairs(sp_urls) do
- url_redirector_process_url(task, u)
end
end
+
+ for _, u in ipairs(selected) do
+ url_redirector_process_url(task, u)
+ end
end
local opts = rspamd_config:get_all_opt('url_redirector')