if (end > url_text + 4 &&
rspamd_url_find (pool, url_text, end - url_text, &url_str, FALSE,
- &url_pos) &&
+ &url_pos, NULL) &&
url_str != NULL) {
if (url_pos > 0) {
/*
struct rspamd_url *query_url, *existing;
gchar *url_str;
gint rc;
+ gboolean prefix_added;
if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) {
url->flags |= RSPAMD_URL_FLAG_OBSCURED;
if (url->querylen > 0) {
if (rspamd_url_find (pool, url->query, url->querylen, &url_str, TRUE,
- NULL)) {
+ NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (pool,
sizeof (struct rspamd_url));
target_tbl = tbl_urls;
}
+ if (prefix_added) {
+ query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ }
+
if (query_url->flags
& (RSPAMD_URL_FLAG_UNNORMALISED|RSPAMD_URL_FLAG_OBSCURED|
RSPAMD_URL_FLAG_NUMERIC)) {
rspamd_mempool_t *pool;
gint len;
gboolean is_html;
+ gboolean prefix_added;
guint newline_idx;
GPtrArray *newlines;
const gchar *start;
m.prefix,
(gint)m.m_len,
m.m_begin);
+ cb->prefix_added = TRUE;
}
else {
cb->url_str = rspamd_mempool_alloc (cb->pool, m.m_len + 1);
gboolean
rspamd_url_find (rspamd_mempool_t *pool, const gchar *begin, gsize len,
- gchar **url_str, gboolean is_html, goffset *url_pos)
+ gchar **url_str, gboolean is_html, goffset *url_pos,
+ gboolean *prefix_added)
{
struct url_callback_data cb;
gint ret;
*url_pos = cb.start - begin;
}
+ if (prefix_added) {
+ *prefix_added = cb.prefix_added;
+ }
+
return TRUE;
}
m.prefix,
(gint)m.m_len,
m.m_begin);
+ cb->prefix_added = TRUE;
}
else {
cb->url_str = rspamd_mempool_alloc (cb->pool, m.m_len + 1);
rc = rspamd_url_parse (url, cb->url_str, strlen (cb->url_str), pool);
if (rc == URI_ERRNO_OK && url->hostlen > 0) {
- if (m.add_prefix) {
+ if (cb->prefix_added) {
url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ cb->prefix_added = FALSE;
}
if (cb->func) {
struct rspamd_url *query_url, *existing;
GHashTable *target_tbl = NULL;
gint rc;
+ gboolean prefix_added;
task = cbd->task;
ex = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_process_exception));
/* We also search the query for additional url inside */
if (url->querylen > 0) {
if (rspamd_url_find (task->task_pool, url->query, url->querylen,
- &url_str, IS_PART_HTML (cbd->part), NULL)) {
+ &url_str, IS_PART_HTML (cbd->part), NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_url));
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);
+ if (prefix_added) {
+ query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ }
+
if (query_url->protocol == PROTOCOL_MAILTO) {
if (query_url->userlen > 0) {
target_tbl = task->emails;
gchar *url_str = NULL;
struct rspamd_url *query_url, *existing;
gint rc;
+ gboolean prefix_added;
/* It is just a displayed URL, we should not check it for certain things */
url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED|RSPAMD_URL_FLAG_SUBJECT;
/* We also search the query for additional url inside */
if (url->querylen > 0) {
if (rspamd_url_find (task->task_pool, url->query, url->querylen,
- &url_str, FALSE, NULL)) {
+ &url_str, FALSE, NULL, &prefix_added)) {
query_url = rspamd_mempool_alloc0 (task->task_pool,
sizeof (struct rspamd_url));
msg_debug_task ("found url %s in query of url"
" %*s", url_str, url->querylen, url->query);
+ if (prefix_added) {
+ query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
+ }
+
if ((existing = g_hash_table_lookup (task->urls,
query_url)) == NULL) {
g_hash_table_insert (task->urls,