set->whitespace_chars = p_strdup(user->pool, *tmp + 17);
} else if (strcmp(*tmp, "normalize") == 0) {
set->normalize = TRUE;
+ } else if (strcmp(*tmp, "no_snowball") == 0) {
+ set->no_snowball = TRUE;
} else {
i_error("fts_lucene: Invalid setting: %s", *tmp);
return -1;
"but Dovecot built without stemmer support");
return -1;
}
- if (set->normalize) {
- i_error("fts_lucene: normalize not currently supported "
- "without stemmer support");
- return -1;
- }
#else
if (set->default_language == NULL)
set->default_language = "english";
crc = crc32_str_more(crc, set->whitespace_chars);
if (set->normalize)
crc = crc32_str_more(crc, "n");
+ if (set->no_snowball)
+ crc = crc32_str_more(crc, "s");
return crc;
}
IndexWriter *writer;
IndexSearcher *searcher;
+ buffer_t *normalizer_buf;
Analyzer *default_analyzer, *cur_analyzer;
ARRAY(struct lucene_analyzer) analyzers;
index->set.default_language = "";
}
#ifdef HAVE_LUCENE_STEMMER
- index->default_analyzer =
- _CLNEW snowball::SnowballAnalyzer(index->normalizer,
- index->set.default_language);
-#else
- index->default_analyzer = _CLNEW standard::StandardAnalyzer();
- i_assert(index->normalizer == NULL);
+ if (!set->no_snowball) {
+ index->default_analyzer =
+ _CLNEW snowball::SnowballAnalyzer(index->normalizer,
+ index->set.default_language);
+ }
#endif
+ else {
+ index->default_analyzer = _CLNEW standard::StandardAnalyzer();
+ if (index->normalizer != NULL) {
+ index->normalizer_buf =
+ buffer_create_dynamic(default_pool, 1024);
+ }
+ }
+
i_array_init(&index->analyzers, 32);
textcat_refcount++;
textcat = NULL;
}
_CLDELETE(index->default_analyzer);
+ if (index->normalizer_buf != NULL)
+ buffer_free(&index->normalizer_buf);
i_free(index->path);
i_free(index);
}
index->doc->add(*_CLNEW Field(_T("box"), index->mailbox_guid, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
}
+ if (index->normalizer_buf != NULL) {
+ buffer_set_used_size(index->normalizer_buf, 0);
+ index->normalizer(data, size, index->normalizer_buf);
+ data = (const unsigned char *)index->normalizer_buf->data;
+ size = index->normalizer_buf->used;
+ }
+
datasize = uni_utf8_strlen_n(data, size) + 1;
wchar_t dest[datasize];
lucene_utf8_n_to_tchar(data, size, dest, datasize);
lucene_get_query_str(struct lucene_index *index,
const TCHAR *key, const char *str, bool fuzzy)
{
- const TCHAR *wvalue = t_lucene_utf8_to_tchar(index, str, TRUE);
- Analyzer *analyzer = guess_analyzer(index, str, strlen(str));
+ const TCHAR *wvalue;
+ Analyzer *analyzer;
+
+ if (index->normalizer_buf != NULL) {
+ buffer_set_used_size(index->normalizer_buf, 0);
+ index->normalizer(str, strlen(str), index->normalizer_buf);
+ buffer_append_c(index->normalizer_buf, '\0');
+ str = (const char *)index->normalizer_buf->data;
+ }
+
+ wvalue = t_lucene_utf8_to_tchar(index, str, TRUE);
+ analyzer = guess_analyzer(index, str, strlen(str));
if (analyzer == NULL)
analyzer = index->default_analyzer;