From d1623103c73ed7dd8b15b2060ad656fddbed7b46 Mon Sep 17 00:00:00 2001 From: Teemu Huovila Date: Mon, 17 Aug 2015 13:14:44 +0300 Subject: [PATCH] lib-fts: Update comment on tr29 rules. --- src/lib-fts/fts-tokenizer-generic.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib-fts/fts-tokenizer-generic.c b/src/lib-fts/fts-tokenizer-generic.c index e30a9b1cda..835413f0ae 100644 --- a/src/lib-fts/fts-tokenizer-generic.c +++ b/src/lib-fts/fts-tokenizer-generic.c @@ -594,6 +594,10 @@ static struct letter_fn letter_fns[] = { #29, but tailored for FTS purposes. http://www.unicode.org/reports/tr29/ + Note: The text of tr29 is a living standard, so it keeps + changing. In newer specs some characters are combined, like AHLetter + (ALetter | Hebrew_Letter) and MidNumLetQ (MidNumLet | Single_Quote). + Adaptions: * No word boundary at Start-Of-Text or End-of-Text (Wb1 and WB2). * Break just once, not before and after. -- 2.47.3