From: Vsevolod Stakhov <vsevolod@rambler-co.ru>
Date: Tue, 17 Mar 2009 09:25:23 +0000 (+0300)
Subject: * Add ability to match raw headers
X-Git-Tag: 0.2.7~252
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8cc22288806b9ff7115cb8f30349029714e2d89a;p=thirdparty%2Frspamd.git

* Add ability to match raw headers
* Update documentation
---

diff --git a/README.utf8.txt b/README.utf8.txt
index c49da2476c..c5bc293c0f 100644
--- a/README.utf8.txt
+++ b/README.utf8.txt
@@ -89,6 +89,10 @@ Queue-ID - Ð¸Ð´ÐµÐ½ÑÐ¸ÑÐ¸ÐºÐ°ÑÐ¾Ñ Ð¾ÑÐµÑÐµÐ´Ð¸
 .module 'regexp' {
 	SYMBOL = "regexp_expression";
 };
+header_filters = "regexp";
+
+ÐÐ±ÑÐ°ÑÐ¸ÑÐµ Ð²Ð½Ð¸Ð¼Ð°Ð½Ð¸Ðµ, ÑÑÐ¾ Ð¼Ð¾Ð´ÑÐ»Ñ regexp Ð½Ð°Ð´Ð¾ ÑÐµÐ³Ð¸ÑÑÑÐ¸ÑÐ¾Ð²Ð°ÑÑ ÐºÐ°Ðº header filter, ÑÐ°Ðº ÐºÐ°Ðº Ð¸Ð½Ð°ÑÐµ Ð¾Ð½ Ð½Ðµ Ð±ÑÐ´ÐµÑ ÑÐ°Ð±Ð¾ÑÐ°ÑÑ.
+Ð­ÑÑ Ð¿ÑÐ¾Ð±Ð»ÐµÐ¼Ñ Ð½Ð°Ð´Ð¾ Ð¸ÑÐ¿ÑÐ°Ð²Ð»ÑÑÑ, Ð½Ð¾ ÑÑÐ¾ Ð½Ðµ Ð¿ÐµÑÐ²Ð¾Ð¾ÑÐµÑÐµÐ´Ð½Ð°Ñ Ð·Ð°Ð´Ð°ÑÐ°.
 
 Ð¤Ð¾ÑÐ¼Ð°Ñ ÑÐµÐ³ÑÐºÑÐ¿Ð¾Ð² ÑÐ°ÐºÐ¾Ð¹:
 /pattern/flags
@@ -98,14 +102,19 @@ headername=/pattern/flags
 Ð¤Ð»Ð°Ð³Ð¸ ÑÐµÐ³ÑÑÐºÐ¿Ð¾Ð²:
 i, m, s, x, u, o - ÑÐ°ÐºÐ¸Ðµ Ð¶Ðµ, ÐºÐ°Ðº Ñ perl/pcre
 H - Ð¸ÑÐµÑ Ð¿Ð¾ Ð·Ð°Ð³Ð¾Ð»Ð¾Ð²ÐºÐ°Ð¼
-M - Ð¸ÑÐµÑ Ð¿Ð¾ Ð²ÑÐµÐ¼Ñ ÑÐ¾Ð¾Ð±ÑÐµÐ½Ð¸Ñ
+M - Ð¸ÑÐµÑ Ð¿Ð¾ Ð²ÑÐµÐ¼Ñ ÑÐ¾Ð¾Ð±ÑÐµÐ½Ð¸Ñ (Ð² "ÑÑÑÐ¾Ð¼" Ð²Ð¸Ð´Ðµ)
 P - Ð¸ÑÐµÑ Ð¿Ð¾ Ð²ÑÐµÐ¼ mime ÑÐ°ÑÑÑÐ¼
 U - Ð¸ÑÐµÑ Ð¿Ð¾ url
+X - Ð¸ÑÐµÑ Ð¿Ð¾ "ÑÑÑÑÐ¼" ÑÐµÐ´ÐµÑÐ°Ð¼ (ÑÑÑ Ð½ÑÐ¶Ð½Ð¾ ÑÑÐ¸ÑÑÐ²Ð°ÑÑ ÑÐ¾Ð»Ð´Ð¸Ð½Ð³ Ð¸ ÑÑÐ°Ð²Ð¸ÑÑ, Ð³Ð´Ðµ Ð½Ð°Ð´Ð¾, /m Ð´Ð»Ñ multiline Ð¼Ð°ÑÑÐ¸Ð½Ð³Ð°)
+
 ÐÑÑÐ°Ð¶ÐµÐ½Ð¸Ðµ ÑÐµÐ³ÑÐºÑÐ¿Ð¾Ð² Ð¼Ð¾Ð¶ÐµÑ ÑÐ¾Ð´ÐµÑÐ¶Ð°ÑÑ ÑÐ»Ð¾Ð¶Ð½ÑÐµ Ð²ÑÑÐ°Ð¶ÐµÐ½Ð¸Ñ Ð¸Ð· Ð½ÐµÑÐºÐ¾Ð»ÑÐºÐ¸Ñ ÑÐµÐ³ÑÐºÑÐ¿Ð¾Ð², Ð¾Ð¿ÐµÑÐ°ÑÐ¾ÑÐ¾Ð² Ð»Ð¾Ð³Ð¸ÐºÐ¸ Ð¸ ÑÐºÐ¾Ð±Ð¾Ðº:
 SOME_SYMBOL = "To=/blah@blah/H & !(From=/blah@blah/H | Subject=/blah/H)"
+
 Ð¢Ð°ÐºÐ¶Ðµ Ð¼Ð¾Ð¶Ð½Ð¾ Ð¸ÑÐ¿Ð¾Ð»ÑÐ·Ð¾Ð²Ð°ÑÑ Ð¿ÐµÑÐµÐ¼ÐµÐ½Ð½ÑÐµ:
 $to_blah = "To=/blah@blah/H";
 $from_blah = "From=/blah@blah/H";
 $subject_blah = "Subject=/blah/H";
+
 ÑÐ¾Ð³Ð´Ð° Ð¿ÑÐµÐ´ÑÐ´ÑÑÐµÐµ Ð²ÑÑÐ°Ð¶ÐµÐ½Ð¸Ðµ Ð±ÑÐ´ÐµÑ ÑÐ°ÐºÐ¸Ð¼
+
 SOME_SYMBOL = "${to_blah} & !(${from_blah} | ${subject_blah})"
diff --git a/src/cfg_file.h b/src/cfg_file.h
index b484d5b0d4..cc31f7a0cf 100644
--- a/src/cfg_file.h
+++ b/src/cfg_file.h
@@ -50,7 +50,7 @@ enum rspamd_cred_type {
 };
 
 /**
- * Regexp type: /H - header, /M - mime, /U - url
+ * Regexp type: /H - header, /M - mime, /U - url /X - raw header
  */
 enum rspamd_regexp_type {
 	REGEXP_NONE = 0,
@@ -58,6 +58,7 @@ enum rspamd_regexp_type {
 	REGEXP_MIME,
 	REGEXP_MESSAGE,
 	REGEXP_URL,
+	REGEXP_RAW_HEADER,
 };
 
 /**
diff --git a/src/cfg_utils.c b/src/cfg_utils.c
index 3d2ce611c3..204ed65f66 100644
--- a/src/cfg_utils.c
+++ b/src/cfg_utils.c
@@ -656,6 +656,12 @@ parse_regexp (memory_pool_t *pool, char *line)
 				}
 				p ++;
 				break;
+			case 'X':
+				if (result->type == REGEXP_NONE || result->type == REGEXP_HEADER) {
+					result->type = REGEXP_RAW_HEADER;
+				}
+				p ++;
+				break;
 			/* Stop flags parsing */
 			default:
 				p = NULL;
diff --git a/src/main.h b/src/main.h
index 7b7c221ca7..a138666570 100644
--- a/src/main.h
+++ b/src/main.h
@@ -185,6 +185,7 @@ struct worker_task {
 	int parts_count;											/**< mime parts count								*/
 	GMimeMessage *message;										/**< message, parsed with GMime						*/
 	GList *parts;												/**< list of parsed parts							*/
+	char *raw_headers;											/**< list of raw headers							*/
 	TAILQ_HEAD (uriq, uri) urls;								/**< list of parsed urls							*/
 	GHashTable *results;										/**< hash table of metric_result indexed by 
 																 *    metric's name									*/
diff --git a/src/message.c b/src/message.c
index 4780a11ffb..76743f7de8 100644
--- a/src/message.c
+++ b/src/message.c
@@ -371,6 +371,16 @@ process_message (struct worker_task *task)
 		task->message_id = "undef";
 	}
 
+#ifdef GMIME24
+	task->raw_headers = g_mime_object_get_headers (GMIME_OBJECT (task->message));
+#else
+	task->raw_headers = g_mime_message_get_headers (task->message);
+#endif
+
+	if (task->raw_headers) {
+		memory_pool_add_destructor (task->task_pool, (pool_destruct_func)g_free, task->raw_headers);
+	}
+
 	task->worker->srv->stat->messages_scanned ++;
 
 	/* free the parser (and the stream) */
diff --git a/src/plugins/regexp.c b/src/plugins/regexp.c
index 06da6932fe..9ef4e1ce38 100644
--- a/src/plugins/regexp.c
+++ b/src/plugins/regexp.c
@@ -138,7 +138,7 @@ regexp_module_reconfig (struct config_file *cfg)
 static gsize
 process_regexp (struct rspamd_regexp *re, struct worker_task *task)
 {
-	char *headerv;
+	char *headerv, *c, t;
 	struct mime_part *part;
 	GList *cur;
 	struct uri *url;
@@ -190,19 +190,60 @@ process_regexp (struct rspamd_regexp *re, struct worker_task *task)
 			}
 			return 0;
 		case REGEXP_MESSAGE:
-			msg_debug ("process_message: checking message regexp: /%s/", re->regexp_text);
+			msg_debug ("process_regexp: checking message regexp: /%s/", re->regexp_text);
 			if (g_regex_match_full (re->regexp, task->msg->begin, task->msg->len, 0, 0, NULL, NULL) == TRUE) {
 				return 1;
 			}
 			return 0;
 		case REGEXP_URL:
-			msg_debug ("process_url: checking url regexp: /%s/", re->regexp_text);
+			msg_debug ("process_regexp: checking url regexp: /%s/", re->regexp_text);
 			TAILQ_FOREACH (url, &task->urls, next) {
 				if (g_regex_match (re->regexp, struri (url), 0, NULL) == TRUE) {
 					return 1;
 				}
 			}
 			return 0;
+		case REGEXP_RAW_HEADER:
+			msg_debug ("process_regexp: checking for raw header: %s with regexp: /%s/", re->header, re->regexp_text);
+			if (task->raw_headers == NULL) {
+				msg_debug ("process_regexp: cannot check for raw header in message, no headers found");
+				return 0;
+			}
+			if ((headerv = strstr (task->raw_headers, re->header)) == NULL) {
+				/* No header was found */
+				return 0;
+			}
+			/* Skip header name and start matching after regexp */
+			headerv += strlen (re->header) + 1;
+			/* Now the main problem is to find position of end of raw header */
+			c = headerv;
+			while (*c) {
+				/* We need to handle all types of line end */
+				if ((*c == '\r' && *(c + 1) == '\n')) {
+					c ++;
+					/* Check for folding */
+					if (!g_ascii_isspace (*(c + 1))) {
+						c ++;
+						break;
+					}
+				} 
+				else if (*c == '\r' || *c == '\n') {
+					if (!g_ascii_isspace (*(c + 1))) {
+						c ++;
+						break;
+					}
+				}
+				c ++;
+			}
+			/* Temporary null terminate this part of string */
+			t = *c;
+			*c = '\0';
+			if (g_regex_match (re->regexp, headerv, 0, NULL) == TRUE) {
+				*c = t;
+				return 1;
+			}
+			*c = t;
+			return 0;
 	}
 
 	/* Not reached */