+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
/**
* @file scan_result.h
* Scan result holder
gssize opts_len; /**< total size of all options (negative if truncated option is added) */
guint nshots;
int flags;
- struct rspamd_symbol_result *next;
+ struct rspamd_symbol_result *next; /**< for shadow results */
};
gchar st;
} url_match_t;
-#define URL_FLAG_NOHTML (1u << 0u)
-#define URL_FLAG_TLD_MATCH (1u << 1u)
-#define URL_FLAG_STAR_MATCH (1u << 2u)
-#define URL_FLAG_REGEXP (1u << 3u)
+#define URL_MATCHER_FLAG_NOHTML (1u << 0u)
+#define URL_MATCHER_FLAG_TLD_MATCH (1u << 1u)
+#define URL_MATCHER_FLAG_STAR_MATCH (1u << 2u)
+#define URL_MATCHER_FLAG_REGEXP (1u << 3u)
struct url_callback_data;
0},
{"sip:", "", url_web_start, url_web_end,
0},
- {"www.", "http://", url_web_start, url_web_end,
- 0},
+ {"www\\.[0-9a-z]", "http://", url_web_start, url_web_end,
+ URL_MATCHER_FLAG_REGEXP},
{"ftp.", "ftp://", url_web_start, url_web_end,
0},
/* Likely emails */
continue;
}
- flags = URL_FLAG_NOHTML | URL_FLAG_TLD_MATCH;
+ flags = URL_MATCHER_FLAG_NOHTML | URL_MATCHER_FLAG_TLD_MATCH;
if (linebuf[0] == '*') {
- flags |= URL_FLAG_STAR_MATCH;
+ flags |= URL_MATCHER_FLAG_STAR_MATCH;
p = strchr(linebuf, '.');
if (p == NULL) {
gint n = G_N_ELEMENTS(static_matchers), i;
for (i = 0; i < n; i++) {
- if (static_matchers[i].flags & URL_FLAG_REGEXP) {
+ if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
rspamd_multipattern_add_pattern(url_scanner->search_trie_strict,
static_matchers[i].pattern,
RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
if (sc->matchers_full) {
for (i = 0; i < n; i++) {
- if (static_matchers[i].flags & URL_FLAG_REGEXP) {
+ if (static_matchers[i].flags & URL_MATCHER_FLAG_REGEXP) {
rspamd_multipattern_add_pattern(url_scanner->search_trie_full,
static_matchers[i].pattern,
RSPAMD_MULTIPATTERN_ICASE | RSPAMD_MULTIPATTERN_UTF8 |
strnum);
ndots = 1;
- if (matcher->flags & URL_FLAG_STAR_MATCH) {
+ if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
/* Skip one more tld component */
ndots++;
}
matcher = &g_array_index(url_scanner->matchers_full, struct url_matcher,
strnum);
- if (matcher->flags & URL_FLAG_STAR_MATCH) {
+ if (matcher->flags & URL_MATCHER_FLAG_STAR_MATCH) {
/* Skip one more tld component */
ndots = 2;
}
rspamd_url_trie_is_match(struct url_matcher *matcher, const gchar *pos,
const gchar *end, const gchar *newline_pos)
{
- if (matcher->flags & URL_FLAG_TLD_MATCH) {
+ if (matcher->flags & URL_MATCHER_FLAG_TLD_MATCH) {
/* Immediately check pos for valid chars */
if (pos < end) {
if (pos != newline_pos && !g_ascii_isspace(*pos) && *pos != '/' && *pos != '?' &&
matcher = &g_array_index(cb->matchers, struct url_matcher,
strnum);
- if ((matcher->flags & URL_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
+ if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
/* Do not try to match non-html like urls in html texts */
return 0;
}
strnum);
pool = cb->pool;
- if ((matcher->flags & URL_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
+ if ((matcher->flags & URL_MATCHER_FLAG_NOHTML) && cb->how == RSPAMD_URL_FIND_STRICT) {
/* Do not try to match non-html like urls in html texts, continue matching */
return 0;
}