From: Vsevolod Stakhov Date: Thu, 21 May 2015 09:23:41 +0000 (+0100) Subject: More fixes to tokenization. X-Git-Tag: 0.9.4~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c44ddedf8c4950cc679073bb809e8d27b0186951;p=thirdparty%2Frspamd.git More fixes to tokenization. --- diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c index 086b1719b8..586b3079b0 100644 --- a/src/libstat/tokenizers/tokenizers.c +++ b/src/libstat/tokenizers/tokenizers.c @@ -223,6 +223,7 @@ rspamd_tokenizer_get_word (rspamd_fstring_t * buf, if (ex != NULL && p - buf->begin == (gint)ex->pos) { token->begin = "exception"; token->len = sizeof ("exception") - 1; + processed = token->len; state = skip_exception; continue; } @@ -257,9 +258,11 @@ set_token: *rl = processed; } - token->len = p - token->begin; - g_assert (token->len > 0); - *cur = p; + if (token->len == 0) { + token->len = p - token->begin; + g_assert (token->len > 0); + *cur = p; + } return TRUE; } @@ -275,7 +278,7 @@ rspamd_tokenize_text (gchar *text, gsize len, gboolean is_utf, GList *cur = exceptions; token_get_function func; - if (len == 0 || text == NULL) { + if (text == NULL) { return NULL; }