gpointer data; /**< opaque data */
};
+struct rspamd_tokenizer_config {
+ const ucl_object_t *opts; /**< other options */
+ const gchar *name; /**< name of tokenizer */
+};
+
/**
* Classifier config definition
*/
GHashTable *labels; /**< statfiles with labels */
gchar *metric; /**< metric of this classifier */
gchar *classifier; /**< classifier interface */
- gchar *tokenizer; /**< tokenizer used for classifier */
+ struct rspamd_tokenizer_config *tokenizer; /**< tokenizer used for classifier */
ucl_object_t *opts; /**< other options */
GList *pre_callbacks; /**< list of callbacks that are called before classification */
GList *post_callbacks; /**< list of callbacks that are called after classification */
struct rspamd_classifier_config *ccf;
gboolean res = TRUE;
struct rspamd_rcl_section *stat_section;
+ struct rspamd_tokenizer_config *tkcf = NULL;
ccf = rspamd_config_new_classifier (cfg, NULL);
}
}
}
+ else if (g_ascii_strcasecmp (key, "tokenizer") == 0) {
+ tkcf = rspamd_mempool_alloc0 (cfg->cfg_pool, sizeof (*tkcf));
+ if (ucl_object_type (val) == UCL_STRING) {
+ tkcf->name = ucl_object_tostring (val);
+ }
+ else if (ucl_object_type (val) == UCL_OBJECT) {
+ cur = ucl_object_find_key (val, "name");
+ if (cur != NULL) {
+ tkcf->name = ucl_object_tostring (cur);
+ tkcf->opts = val;
+ }
+ }
+ }
}
}
}
}
ccf->opts = (ucl_object_t *)obj;
+ ccf->tokenizer = tkcf;
cfg->classifiers = g_list_prepend (cfg->classifiers, ccf);
rspamd_rcl_parse_struct_string,
G_STRUCT_OFFSET (struct rspamd_classifier_config, classifier),
0);
- rspamd_rcl_add_default_handler (sub,
- "tokenizer",
- rspamd_rcl_parse_struct_string,
- G_STRUCT_OFFSET (struct rspamd_classifier_config, tokenizer),
- 0);
rspamd_rcl_add_default_handler (sub,
"min_tokens",
rspamd_rcl_parse_struct_integer,
};
static struct rspamd_tokenizer_runtime *
-rspamd_stat_get_tokenizer_runtime (const gchar *name, rspamd_mempool_t *pool,
+rspamd_stat_get_tokenizer_runtime (struct rspamd_tokenizer_config *cf,
+ rspamd_mempool_t *pool,
struct rspamd_tokenizer_runtime **ls)
{
struct rspamd_tokenizer_runtime *tok = NULL, *cur;
+ const gchar *name;
+
+ if (cf == NULL || cf->name == NULL) {
+ name = RSPAMD_DEFAULT_TOKENIZER;
+ }
+ else {
+ name = cf->name;
+ }
LL_FOREACH (*ls, cur) {
if (strcmp (cur->name, name) == 0) {
* Tokenize task using the tokenizer specified
*/
static void
-rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx,
+rspamd_stat_process_tokenize (struct rspamd_tokenizer_config *cf,
+ struct rspamd_stat_ctx *st_ctx,
struct rspamd_task *task, struct rspamd_tokenizer_runtime *tok)
{
struct mime_text_part *part;
/*
* XXX: Use normalized words if needed here
*/
- tok->tokenizer->tokenize_func (tok->tokenizer, task->task_pool,
+ tok->tokenizer->tokenize_func (cf, task->task_pool,
part->words, tok->tokens, part->is_utf);
}
if (sub != NULL) {
words = rspamd_tokenize_text (sub, strlen (sub), TRUE, 0, NULL);
if (words != NULL) {
- tok->tokenizer->tokenize_func (tok->tokenizer,
+ tok->tokenizer->tokenize_func (cf,
task->task_pool,
words,
tok->tokens,
if (tok == NULL) {
g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for tokenizers", clcf->tokenizer);
+ "for tokenizers", clcf->tokenizer ?
+ clcf->tokenizer->name : "unknown");
return RSPAMD_STAT_PROCESS_ERROR;
}
- rspamd_stat_process_tokenize (st_ctx, task, tok);
+ rspamd_stat_process_tokenize (clcf->tokenizer, st_ctx, task, tok);
cur = g_list_next (cur);
}
if (tok == NULL) {
g_set_error (err, rspamd_stat_quark (), 500, "type %s is not defined"
- "for tokenizers", clcf->tokenizer);
+ "for tokenizers", clcf->tokenizer ?
+ clcf->tokenizer->name : "unknown");
return RSPAMD_STAT_PROCESS_ERROR;
}
- rspamd_stat_process_tokenize (st_ctx, task, tok);
+ rspamd_stat_process_tokenize (clcf->tokenizer, st_ctx, task, tok);
cur = g_list_next (cur);
}