const unsigned char *data;
size_t len = tok->token->used;
- if (len > 0) {
+ if (len > 0 && tok->untruncated_length <= tok->max_length) {
/* Remove the trailing apostrophe - it was made
into U+0027 earlier. There can be only a single such
apostrophe, because otherwise the token would have already
*token_r = len == 0 ? "" :
fts_uni_strndup(tok->token->data, len);
buffer_set_used_size(tok->token, 0);
+ tok->untruncated_length = 0;
tok->prev_letter = LETTER_TYPE_NONE;
return (*token_r)[0] != '\0';
}
tok->prev_letter = LETTER_TYPE_NONE;
tok->prev_prev_letter = LETTER_TYPE_NONE;
+ tok->untruncated_length = 0;
buffer_set_used_size(tok->token, 0);
}
{
buffer_append(tok->token, data,
I_MIN(size, tok->max_length - tok->token->used));
+ tok->untruncated_length += size;
}
static int
const unsigned char *data = tok->token->data;
ssize_t len = tok->token->used;
- if (is_one_past_end(tok)) {
+ if (is_one_past_end(tok) &&
+ tok->untruncated_length <= tok->max_length) {
/* delete the last character */
while ((data[len-1] & 0x80) != 0)
len--;
*token_r = fts_uni_strndup(data, len);
buffer_set_used_size(tok->token, 0);
+ tok->untruncated_length = 0;
}
struct letter_fn {
"' ' '' ''' 'quoted text' 'word' 'hlo words' you're bad'''word '''pre post'''",
"'1234567890123456789012345678ä,"
+ "123456789012345678901234567x'ä,"
+ "1234567890123456789012345678x're,"
+ "1234567890123456789012345678x',"
+ "1234567890123456789012345678x'',"
+ "12345678901234567890123456789x',"
+ "12345678901234567890123456789x'',"
+ "123456789012345678901234567890x',"
+ "123456789012345678901234567890x'',"
/* whitespace: with Unicode(utf8) U+FF01(ef bc 81)(U+2000(e2 80 80) and
U+205A(e2 81 9a) and U+205F(e2 81 9f) */
"word", "pre", "post", NULL,
"1234567890123456789012345678ä",
+ "123456789012345678901234567x'",
+ "1234567890123456789012345678x'",
+ "1234567890123456789012345678x",
+ "1234567890123456789012345678x",
+ "12345678901234567890123456789x",
+ "12345678901234567890123456789x",
+ "123456789012345678901234567890",
+ "123456789012345678901234567890",
"hello", "world", "And",
"there", "was", "text", "galore",
"word", "pre", "post", NULL,
"1234567890123456789012345678ä",
+ "123456789012345678901234567x'",
+ "1234567890123456789012345678x'",
+ "1234567890123456789012345678x",
+ "1234567890123456789012345678x",
+ "12345678901234567890123456789x",
+ "12345678901234567890123456789x",
+ "123456789012345678901234567890",
+ "123456789012345678901234567890",
"hello", "world", "And",
"there", "was", "text", "galore",