static guint
rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re, const guchar *in, gsize len,
- gboolean is_raw, gboolean is_multiple)
+ gboolean is_raw)
{
guint r = 0;
const gchar *start = NULL, *end = NULL;
+ guint max_hits = rspamd_regexp_get_maxhits (re);
if (len == 0) {
len = strlen (in);
NULL)) {
r++;
- if (!is_multiple || r >= 0xFF) {
+ if (max_hits > 0 && r > max_hits) {
break;
}
}
struct rspamd_re_hyperscan_cbdata *cbdata = ud;
struct rspamd_re_runtime *rt;
struct rspamd_re_cache_elt *pcre_elt;
- guint ret;
+ guint ret, maxhits;
rt = cbdata->rt;
+ pcre_elt = g_ptr_array_index (rt->cache->re, id);
if (flags & HS_FLAG_PREFILTER) {
if (!isset (rt->checked, id)) {
/* We need to match the corresponding pcre first */
- pcre_elt = g_ptr_array_index (rt->cache->re, id);
ret = rspamd_re_cache_process_pcre (rt,
pcre_elt->re,
cbdata->in + from,
to - from,
- FALSE,
- TRUE);
+ FALSE);
setbit (rt->checked, id);
rt->results[id] = ret;
}
}
else {
+ maxhits = rspamd_regexp_get_maxhits (pcre_elt->re);
setbit (rt->checked, id);
- rt->results[id] ++;
+
+ if (maxhits == 0 || rt->results[id] < maxhits) {
+ rt->results[id]++;
+ }
}
return 0;
rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
const guchar *in, gsize len,
- gboolean is_raw, gboolean is_multiple)
+ gboolean is_raw)
{
struct rspamd_re_cache_elt *elt;
struct rspamd_re_class *re_class;
re_class = rspamd_regexp_get_class (re);
#ifndef WITH_HYPERSCAN
- ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
setbit (rt->checked, re_id);
rt->results[re_id] = ret;
#else
struct rspamd_re_hyperscan_cbdata cbdata;
if (elt->match_type == RSPAMD_RE_CACHE_PCRE) {
- ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw, is_multiple);
+ ret = rspamd_re_cache_process_pcre (rt, re, in, len, is_raw);
setbit (rt->checked, re_id);
rt->results[re_id] = ret;
}
struct rspamd_re_runtime *rt,
rspamd_regexp_t *re,
struct rspamd_re_class *re_class,
- gboolean is_strong,
- gboolean is_multiple)
+ gboolean is_strong)
{
guint ret = 0, i;
GList *cur, *headerlist;
/* Match re */
if (in) {
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- strlen (in), raw, is_multiple);
+ strlen (in), raw);
debug_task ("checking header %s regexp: %s -> %d",
re_class->type_data,
rspamd_regexp_get_pattern (re), ret);
in = task->raw_headers_content.begin;
len = task->raw_headers_content.len;
ret = rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking allheader regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
break;
if (len > 0) {
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking mime regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
}
raw = FALSE;
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
}
g_hash_table_iter_init (&it, task->emails);
raw = FALSE;
ret += rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
}
debug_task ("checking url regexp: %s -> %d",
len = task->msg.len;
ret = rspamd_re_cache_process_regexp_data (rt, re, in,
- len, raw, is_multiple);
+ len, raw);
debug_task ("checking rawbody regexp: %s -> %d",
rspamd_regexp_get_pattern (re), ret);
break;
enum rspamd_re_type type,
gpointer type_data,
gsize datalen,
- gboolean is_strong,
- gboolean is_multiple)
+ gboolean is_strong)
{
guint64 re_id;
struct rspamd_re_class *re_class;
if (isset (rt->checked, re_id)) {
/* Fast path */
- if (is_multiple) {
- return rt->results[re_id];
- }
- else {
- return rt->results[re_id] ? 1 : 0;
- }
+ return rt->results[re_id];
}
else {
/* Slow path */
}
return rspamd_re_cache_exec_re (task, rt, re, re_class,
- is_strong, is_multiple);
+ is_strong);
}
return 0;
hs_flags[i] = 0;
pcre_flags = rspamd_regexp_get_pcre_flags (re);
+
if (pcre_flags & PCRE_UTF8) {
hs_flags[i] |= HS_FLAG_UTF8;
}
if (pcre_flags & PCRE_CASELESS) {
hs_flags[i] |= HS_FLAG_CASELESS;
}
+ if (pcre_flags & PCRE_MULTILINE) {
+ hs_flags[i] |= HS_FLAG_MULTILINE;
+ }
+ if (rspamd_regexp_get_maxhits (re) == 1) {
+ hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+ }
if (hs_compile (rspamd_regexp_get_pattern (re),
hs_flags[i],
* + `url`: url regexp
* - `header`: for header and rawheader regexp means the name of header
* - `strong`: case sensitive match for headers
- * - `multiple`: allow multiple matches
* @return {number} number of regexp occurences in the task (limited by 255 so far)
*/
LUA_FUNCTION_DEF (task, process_regexp);
{
struct rspamd_task *task = lua_check_task (L, 1);
struct rspamd_lua_regexp *re = NULL;
- gboolean strong = FALSE, multiple = FALSE;
+ gboolean strong = FALSE;
const gchar *type_str = NULL, *header_str = NULL;
gsize header_len = 0;
GError *err = NULL;
* + `url`: url regexp
* - `header`: for header and rawheader regexp means the name of header
* - `strong`: case sensitive match for headers
- * - `multiple`: allow multiple matches
*/
if (task != NULL) {
if (!rspamd_lua_parse_table_arguments (L, 2, &err,
- "*re=U{regexp};*type=S;header=V;strong=B;multiple=B",
+ "*re=U{regexp};*type=S;header=V;strong=B",
&re, &type_str, &header_len, &header_str,
- &strong, &multiple)) {
+ &strong)) {
msg_err_task ("cannot get parameters list: %e", err);
if (err) {
}
else {
ret = rspamd_re_cache_process (task, task->re_rt, re->re, type,
- (gpointer) header_str, header_len, strong, multiple);
+ (gpointer) header_str, header_len, strong);
}
}
}