]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Fix] Fix urls insertion in Clickhouse module
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 2 Nov 2018 19:09:43 +0000 (19:09 +0000)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 2 Nov 2018 19:09:43 +0000 (19:09 +0000)
src/plugins/lua/clickhouse.lua

index 87ae8dff64b6cbaaeae367b245bfbc96bebcd26d..68a435c6c868bd0c0d29dbbfbb972141b668c633 100644 (file)
@@ -489,26 +489,26 @@ local function clickhouse_collect(task)
     table.insert(row, {})
   end
 
-  local flatten_urls = function(...)
-    return fun.totable(fun.map(function(k,_) return k end, ...))
+  local flatten_urls = function(f, ...)
+    return fun.totable(fun.map(function(k,v) return f(k,v) end, ...))
   end
 
   -- Urls step
-  local urls_tlds = {}
   local urls_urls = {}
   if task:has_urls(false) then
 
     for _,u in ipairs(task:get_urls(false)) do
-      urls_tlds[u:get_tld()] = true
       if settings['full_urls'] then
-        urls_urls[u:get_text()] = true
+        urls_urls[u:get_text()] = u
       else
-        urls_urls[u:get_host()] = true
+        urls_urls[u:get_host()] = u
       end
     end
 
-    table.insert(row, flatten_urls(urls_tlds))
-    table.insert(row, flatten_urls(urls_urls))
+    -- Get tlds
+    table.insert(row, flatten_urls(function(_,u) return u:get_tld() end, urls_urls))
+    -- Get hosts/full urls
+    table.insert(row, flatten_urls(function(k, _) return k end, urls_urls))
   else
     table.insert(row, {})
     table.insert(row, {})
@@ -516,9 +516,10 @@ local function clickhouse_collect(task)
 
   -- Emails step
   if task:has_urls(true) then
-    table.insert(row, flatten_urls(fun.map(function(u)
-      return string.format('%s@%s', u:get_user(), u:get_host()),true
-    end, task:get_emails())))
+    table.insert(row, flatten_urls(function(k, _) return k end,
+        fun.map(function(u)
+          return string.format('%s@%s', u:get_user(), u:get_host()),true
+        end, task:get_emails())))
   else
     table.insert(row, {})
   end