return;
}
- session->learn_symbol = *cmd_args;
+ session->learn_symbol = memory_pool_strdup (session->session_pool, *cmd_args);
cl = g_hash_table_lookup (session->cfg->classifiers_symbols, *cmd_args);
if (cl == NULL) {
r = snprintf (out_buf, sizeof (out_buf), "statfile %s is not defined" CRLF, *cmd_args);
{
struct controller_session *session = (struct controller_session *)arg;
struct classifier_ctx *cls_ctx;
- int len, i;
+ int len, i, r;
char *s, **params, *cmd, out_buf[128];
+ struct worker_task *task;
+ struct mime_text_part *part;
GList *comp_list, *cur = NULL;
GTree *tokens = NULL;
- GByteArray *content = NULL;
- struct mime_part *p;
f_str_t c;
switch (session->state) {
break;
case STATE_LEARN:
session->learn_buf = in;
- process_learn (session);
- while ((content = get_next_text_part (session->session_pool, session->parts, &cur)) != NULL) {
- c.begin = content->data;
- c.len = content->len;
+ task = construct_task (session->worker);
+
+ task->msg = memory_pool_alloc (task->task_pool, sizeof (f_str_t));
+ task->msg->begin = in->begin;
+ task->msg->len = in->len;
+
+ r = process_message (task);
+ if (r == -1) {
+ msg_warn ("read_socket: processing of message failed");
+ free_task (task, FALSE);
+ session->state = STATE_REPLY;
+ r = snprintf (out_buf, sizeof (out_buf), "cannot process message" CRLF);
+ rspamd_dispatcher_write (session->dispatcher, out_buf, r, FALSE, FALSE);
+ return FALSE;
+ }
+ cur = g_list_first (task->text_parts);
+ while (cur) {
+ part = cur->data;
+ if (part->is_empty) {
+ cur = g_list_next (cur);
+ continue;
+ }
+ c.begin = part->content->data;
+ c.len = part->content->len;
+
if (!session->learn_classifier->tokenizer->tokenize_func (session->learn_classifier->tokenizer,
session->session_pool, &c, &tokens)) {
i = snprintf (out_buf, sizeof (out_buf), "learn fail, tokenizer error" CRLF);
+ free_task (task, FALSE);
if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
return FALSE;
}
session->state = STATE_REPLY;
return TRUE;
}
+ cur = g_list_next (cur);
}
cls_ctx = session->learn_classifier->classifier->init_func (session->session_pool, session->learn_classifier);
session->learn_classifier->classifier->learn_func (cls_ctx, session->worker->srv->statfile_pool,
session->learn_symbol, tokens, session->in_class);
session->worker->srv->stat->messages_learned ++;
- /* Clean learned parts */
- while ((cur = g_list_first (session->parts))) {
- session->parts = g_list_remove_link (session->parts, cur);
- p = (struct mime_part *)cur->data;
- g_byte_array_free (p->content, FALSE);
- g_list_free_1 (cur);
- }
+ free_task (task, FALSE);
i = snprintf (out_buf, sizeof (out_buf), "learn ok" CRLF);
if (!rspamd_dispatcher_write (session->dispatcher, out_buf, i, FALSE, FALSE)) {
return FALSE;
return 0;
}
-#ifdef GMIME24
-static void
-mime_learn_foreach_callback (GMimeObject *parent, GMimeObject *part, gpointer user_data)
-#else
-static void
-mime_learn_foreach_callback (GMimeObject *part, gpointer user_data)
-#endif
-{
- struct controller_session *session = (struct controller_session *)user_data;
- struct mime_part *mime_part;
- GMimeContentType *type;
- GMimeDataWrapper *wrapper;
- GMimeStream *part_stream;
- GByteArray *part_content;
-
- /* 'part' points to the current part node that g_mime_message_foreach_part() is iterating over */
-
- /* find out what class 'part' is... */
- if (GMIME_IS_MESSAGE_PART (part)) {
- /* message/rfc822 or message/news */
- GMimeMessage *message;
-
- /* g_mime_message_foreach_part() won't descend into
- child message parts, so if we want to count any
- subparts of this child message, we'll have to call
- g_mime_message_foreach_part() again here. */
- message = g_mime_message_part_get_message ((GMimeMessagePart *) part);
-#ifdef GMIME24
- g_mime_message_foreach (message, mime_learn_foreach_callback, session);
-#else
- g_mime_message_foreach_part (message, mime_learn_foreach_callback, session);
-#endif
- g_object_unref (message);
- } else if (GMIME_IS_MESSAGE_PARTIAL (part)) {
- /* message/partial */
-
- /* this is an incomplete message part, probably a
- large message that the sender has broken into
- smaller parts and is sending us bit by bit. we
- could save some info about it so that we could
- piece this back together again once we get all the
- parts? */
- } else if (GMIME_IS_MULTIPART (part)) {
- /* multipart/mixed, multipart/alternative, multipart/related, multipart/signed, multipart/encrypted, etc... */
-
- /* we'll get to finding out if this is a signed/encrypted multipart later... */
- } else if (GMIME_IS_PART (part)) {
- /* a normal leaf part, could be text/plain or image/jpeg etc */
- wrapper = g_mime_part_get_content_object (GMIME_PART (part));
- if (wrapper != NULL) {
- part_stream = g_mime_stream_mem_new ();
- if (g_mime_data_wrapper_write_to_stream (wrapper, part_stream) != -1) {
- g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (part_stream), FALSE);
- part_content = g_mime_stream_mem_get_byte_array (GMIME_STREAM_MEM (part_stream));
- g_object_unref (part_stream);
-#ifdef GMIME24
- type = (GMimeContentType *)g_mime_object_get_content_type (GMIME_OBJECT (part));
-#else
- type = (GMimeContentType *)g_mime_part_get_content_type (GMIME_PART (part));
-#endif
- mime_part = memory_pool_alloc (session->session_pool, sizeof (struct mime_part));
- mime_part->type = type;
- mime_part->content = part_content;
- session->parts = g_list_prepend (session->parts, mime_part);
- }
- g_object_unref (wrapper);
- }
- } else {
- g_assert_not_reached ();
- }
-}
-
-int
-process_learn (struct controller_session *session)
-{
- GMimeMessage *message;
- GMimeParser *parser;
- GMimeStream *stream;
- GByteArray *tmp;
-
- tmp = memory_pool_alloc (session->session_pool, sizeof (GByteArray));
- tmp->data = session->learn_buf->begin;
- tmp->len = session->learn_buf->len;
- stream = g_mime_stream_mem_new_with_byte_array (tmp);
- /*
- * This causes g_mime_stream not to free memory by itself as it is memory allocated by
- * pool allocator
- */
- g_mime_stream_mem_set_owner (GMIME_STREAM_MEM (stream), FALSE);
-
- /* create a new parser object to parse the stream */
- parser = g_mime_parser_new_with_stream (stream);
-
- /* unref the stream (parser owns a ref, so this object does not actually get free'd until we destroy the parser) */
- g_object_unref (stream);
-
- /* parse the message from the stream */
- message = g_mime_parser_construct_message (parser);
-
- memory_pool_add_destructor (session->session_pool, (pool_destruct_func)g_object_unref, message);
-
-#ifdef GMIME24
- g_mime_message_foreach (message, mime_learn_foreach_callback, session);
-#else
- g_mime_message_foreach_part (message, mime_learn_foreach_callback, session);
-#endif
-
- /* free the parser (and the stream) */
- g_object_unref (parser);
-
- return 0;
-}
-
-/*
- * XXX: remove this function for learning
- */
-GByteArray*
-get_next_text_part (memory_pool_t *pool, GList *parts, GList **cur)
-{
- struct mime_part *p;
-
- if (*cur == NULL) {
- *cur = g_list_first (parts);
- }
- else {
- *cur = g_list_next (*cur);
- }
-
- while (*cur) {
- p = (*cur)->data;
- /* For text/plain just return bytes */
- if (g_mime_content_type_is_type (p->type, "text", "plain")) {
- msg_debug ("get_next_text_part: text/plain part");
- return p->content;
- }
-#if 0
- else if (g_mime_content_type_is_type (p->type, "text", "html")) {
- msg_debug ("get_next_text_part: try to strip html tags");
- ret = strip_html_tags (p->content, NULL);
- memory_pool_add_destructor (pool, (pool_destruct_func)free_byte_array_callback, ret);
- return ret;
- }
- else if (g_mime_content_type_is_type (p->type, "text", "xhtml")) {
- msg_debug ("get_next_text_part: try to strip html tags");
- ret = strip_html_tags (p->content, NULL);
- memory_pool_add_destructor (pool, (pool_destruct_func)free_byte_array_callback, ret);
- return ret;
- }
-#endif
- *cur = g_list_next (*cur);
- }
-
- return NULL;
-}
-
struct raw_header {
struct raw_header *next;
char *name;