From: Vsevolod Stakhov Date: Tue, 29 Dec 2015 09:47:34 +0000 (+0000) Subject: Add caseless version of rabin-karp substring search X-Git-Tag: 1.1.0~185 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7b71ab56637b83470fc0bbcfab9f96422dc83c40;p=thirdparty%2Frspamd.git Add caseless version of rabin-karp substring search --- diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c index 8487e8ef2d..62d71dd2c9 100644 --- a/src/libutil/str_util.c +++ b/src/libutil/str_util.c @@ -1118,6 +1118,46 @@ rspamd_substring_search (const gchar *in, gsize inlen, return -1; } +goffset +rspamd_substring_search_caseless (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen) +{ + gint d, hash_srch, hash_in; + gsize i, j; + gchar c1, c2; + + if (inlen < srchlen) { + return -1; + } + + /* Preprocessing */ + for (d = i = 1; i < srchlen; ++i) { + /* computes d = 2^(m-1) with the left-shift operator */ + d = (d << 1); + } + + for (hash_in = hash_srch = i = 0; i < srchlen; ++i) { + hash_srch = ((hash_srch << 1) + g_ascii_tolower (srch[i])); + hash_in = ((hash_in << 1) + g_ascii_tolower (in[i])); + } + + /* Searching */ + j = 0; + while (j <= inlen - srchlen) { + + if (hash_srch == hash_in && g_ascii_strncasecmp (srch, in + j, srchlen) == 0) { + return (goffset) j; + } + + c1 = g_ascii_tolower (in[j]); + c2 = g_ascii_tolower (in[j + srchlen]); + hash_in = RKHASH (c1, c2, hash_in); + ++j; + } + + return -1; +} + goffset rspamd_string_find_eoh (GString *input) { diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h index 8ea5e3fced..da794bc16b 100644 --- a/src/libutil/str_util.h +++ b/src/libutil/str_util.h @@ -189,6 +189,17 @@ GString *rspamd_header_value_fold (const gchar *name, goffset rspamd_substring_search (const gchar *in, gsize inlen, const gchar *srch, gsize srchlen); +/** + * Search for a substring `srch` in the text `in` using Karp-Rabin algorithm in caseless matter (ASCII only) + * @param in input + * @param inlen input len + * @param srch search string + * @param srchlen length of the search string + * @return position of the first substring match or (-1) if not found + */ +goffset rspamd_substring_search_caseless (const gchar *in, gsize inlen, + const gchar *srch, gsize srchlen); + /** * Search for end-of-headers mark in the input string. Returns position just after