From: Vsevolod Stakhov Date: Sat, 13 Oct 2018 13:57:56 +0000 (+0100) Subject: [Feature] Add only unique elements to Clickhouse url arrays X-Git-Tag: 1.8.1~27 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=1fa77c23ac5d47b5cc1df15a6901ddedcbd2ec41;p=thirdparty%2Frspamd.git [Feature] Add only unique elements to Clickhouse url arrays --- diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua index 346ea2e979..d95325b24c 100644 --- a/src/plugins/lua/clickhouse.lua +++ b/src/plugins/lua/clickhouse.lua @@ -493,18 +493,22 @@ local function clickhouse_collect(task) local urls_urls = {} if task:has_urls(false) then for _,u in ipairs(task:get_urls(false)) do - table.insert(urls_tlds, u:get_tld()) + urls_tlds[u:get_tld()] = true if settings['full_urls'] then - table.insert(urls_urls, u:get_text()) + urls_urls[u:get_text()] = true else - table.insert(urls_urls, u:get_host()) + urls_urls[u:get_host()] = true end end end + local flatten_urls = function(...) + return fun.totable(fun.map(function(k,_) return k end, ...)) + end + if #urls_tlds > 0 then - table.insert(row, urls_tlds) - table.insert(row, urls_urls) + table.insert(row, flatten_urls(urls_tlds)) + table.insert(row, flatten_urls(urls_urls)) else table.insert(row, {}) table.insert(row, {}) @@ -512,8 +516,8 @@ local function clickhouse_collect(task) -- Emails step if task:has_urls(true) then - table.insert(row, fun.totable(fun.map(function(u) - return string.format('%s@%s', u:get_user(), u:get_host()) + table.insert(row, flatten_urls(fun.map(function(u) + return string.format('%s@%s', u:get_user(), u:get_host()),true end, task:get_emails()))) else table.insert(row, {})