From: Vsevolod Stakhov Date: Fri, 2 Nov 2018 19:09:43 +0000 (+0000) Subject: [Fix] Fix urls insertion in Clickhouse module X-Git-Tag: 1.8.2~88 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d264b1dc674d09026135e7421f136447f44e14ca;p=thirdparty%2Frspamd.git [Fix] Fix urls insertion in Clickhouse module --- diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 87ae8dff64..68a435c6c8 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -489,26 +489,26 @@ local function clickhouse_collect(task) table.insert(row, {}) end - local flatten_urls = function(...) - return fun.totable(fun.map(function(k,_) return k end, ...)) + local flatten_urls = function(f, ...) + return fun.totable(fun.map(function(k,v) return f(k,v) end, ...)) end -- Urls step - local urls_tlds = {} local urls_urls = {} if task:has_urls(false) then for _,u in ipairs(task:get_urls(false)) do - urls_tlds[u:get_tld()] = true if settings['full_urls'] then - urls_urls[u:get_text()] = true + urls_urls[u:get_text()] = u else - urls_urls[u:get_host()] = true + urls_urls[u:get_host()] = u end end - table.insert(row, flatten_urls(urls_tlds)) - table.insert(row, flatten_urls(urls_urls)) + -- Get tlds + table.insert(row, flatten_urls(function(_,u) return u:get_tld() end, urls_urls)) + -- Get hosts/full urls + table.insert(row, flatten_urls(function(k, _) return k end, urls_urls)) else table.insert(row, {}) table.insert(row, {}) @@ -516,9 +516,10 @@ local function clickhouse_collect(task) -- Emails step if task:has_urls(true) then - table.insert(row, flatten_urls(fun.map(function(u) - return string.format('%s@%s', u:get_user(), u:get_host()),true - end, task:get_emails()))) + table.insert(row, flatten_urls(function(k, _) return k end, + fun.map(function(u) + return string.format('%s@%s', u:get_user(), u:get_host()),true + end, task:get_emails()))) else table.insert(row, {}) end