From: Vsevolod Stakhov Date: Thu, 23 Jul 2015 15:11:49 +0000 (+0100) Subject: Start work on new HTML rules. X-Git-Tag: 1.0.0~299 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=34bed7350efbdd0a3b135b4b7dbf508bdbae9c1a;p=thirdparty%2Frspamd.git Start work on new HTML rules. --- diff --git a/conf/lua/html.lua b/conf/lua/html.lua new file mode 100644 index 0000000000..bd7abd987a --- /dev/null +++ b/conf/lua/html.lua @@ -0,0 +1,53 @@ +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to you under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at: +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. + +local reconf = config['regexp'] +local rspamd_regexp = require "rspamd_regexp" + +-- Messages that have only HTML part +reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' + +local function check_html_image(task, min, max) + local tp = task:get_text_parts() + + for _,p in ipairs(tp) do + if p:is_html() then + local hc = p:get_html() + local len = p:get_raw_length() + + if len >= min and len < max then + local images = hc:get_images() + + if images then + for _,i in ipairs(images) do + if i['embedded'] then + return true + end + end + end + end + end + end +end + +rspamd_config.HTML_SHORT_LINK_IMG_1 = function(task) + return check_html_image(task, 0, 1024) +end +rspamd_config.HTML_SHORT_LINK_IMG_2 = function(task) + return check_html_image(task, 1024, 1536) +end +rspamd_config.HTML_SHORT_LINK_IMG_3 = function(task) + return check_html_image(task, 1536, 2048) +end \ No newline at end of file diff --git a/conf/lua/regexp/headers.lua b/conf/lua/regexp/headers.lua index e6f079e860..e8bc7af448 100644 --- a/conf/lua/regexp/headers.lua +++ b/conf/lua/regexp/headers.lua @@ -70,10 +70,6 @@ reconf['R_MISSING_CHARSET']= string.format('content_type_is_type(text) & !conten -- Subject seems to be spam reconf['R_SAJDING'] = 'Subject=/\\bsajding(?:om|a)?\\b/iH' --- Messages that have only HTML part -reconf['MIME_HTML_ONLY'] = 'has_only_html_part()' - - -- Find forged Outlook MUA -- Yahoo groups messages local yahoo_bulk = 'Received=/from \\[\\S+\\] by \\S+\\.(?:groups|scd|dcn)\\.yahoo\\.com with NNFMP/H' diff --git a/conf/lua/rspamd.lua b/conf/lua/rspamd.lua index df480a72ce..bb1709c692 100644 --- a/conf/lua/rspamd.lua +++ b/conf/lua/rspamd.lua @@ -32,16 +32,13 @@ dofile('regexp/headers.lua') dofile('regexp/lotto.lua') dofile('regexp/fraud.lua') dofile('regexp/drugs.lua') +dofile('html.lua') local reconf = config['regexp'] local util = require "rspamd_util" -- Uncategorized rules -local html_length_1024_1536 = 'has_content_part_len(\'text\', \'html\', 1024, 1536)' -local html_link_image = '/