]> git.ipfire.org Git - thirdparty/rspamd.git/commitdiff
[Minor] Strip visible parts of urls using utf rules
authorVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 14 May 2021 15:59:30 +0000 (16:59 +0100)
committerVsevolod Stakhov <vsevolod@highsecure.ru>
Fri, 14 May 2021 15:59:30 +0000 (16:59 +0100)
src/libserver/html.c

index 326c8faccd64a871d7a2d2d7ac766a809b9c2205..30c2c022bf27cb13418deceadf3f0bef529acf42 100644 (file)
@@ -2617,8 +2617,43 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
        rspamd_strlcpy (url->visible_part, dest->data + href_offset,
                        dest->len - href_offset + 1);
        dlen = dest->len - href_offset;
-       url->visible_part =
-                       (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n");
+
+       /* Strip unicode spaces from the start and the end */
+       gchar *p = url->visible_part, *end = url->visible_part + dlen;
+       gint i = 0;
+
+       while (i < dlen) {
+               UChar32 uc;
+               gint prev_i = i;
+
+               U8_NEXT(p, i, dlen, uc);
+
+               if (!u_isspace (uc)) {
+                       i = prev_i;
+                       break;
+               }
+       }
+
+       p += i;
+       dlen -= i;
+       url->visible_part = p;
+       i = end - url->visible_part - 1;
+
+       if (i > 0) {
+               gint32 dl = dlen;
+
+               while (i > 0) {
+                       UChar32 uc;
+
+                       U8_PREV(p, i, dl, uc);
+
+                       if (!u_isspace (uc)) {
+                               break;
+                       }
+               }
+
+               dlen = i;
+       }
 
 
        rspamd_html_url_is_phished (pool, url,