From: Josef 'Jeff' Sipek Date: Fri, 29 May 2020 17:09:22 +0000 (-0400) Subject: lib: Implement a new event filter language X-Git-Tag: 2.3.13~490 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=789484d40c7101aac2b140dba32e5a8f8e4f3b24;p=thirdparty%2Fdovecot%2Fcore.git lib: Implement a new event filter language Currently, it is not used by anything. In general, the new syntax is very SQL-like. It is a boolean expression made up of key-value comparisons, parentheses, and boolean connectors AND, OR, and NOT. The key-value comparisons are of the form: Where the key is one of: (1) "event" (2) "category" (3) "source_location" (4) a field name The operator is one of: (1) = (2) > (3) < (4) >= (5) <= And the value is either: (1) a single word token, or (2) a quoted string For example, to match events with the event name "abc", we would use one of the following expressions. Note that white space is not significant between tokens, and therefore the following are all equivalent. event=abc event="abc" event = abc event = "abc" To match events with the name "abc" that include the "imap" category, we'd use: event=abc AND category=imap To match events with the name "abc" that either include the "imap" or "pop3" categories, we'd use: event=abc AND (category=imap OR category=pop3) Field names don't have any special prefix. Therefore, to match events than have the field bytes_out equal to 10, we'd use: bytes_out=10 To match events with bytes_out greater than or equal to 10, we'd use: bytes_out>=10 --- diff --git a/.gitignore b/.gitignore index f405d0133c..825101ee8c 100644 --- a/.gitignore +++ b/.gitignore @@ -125,6 +125,9 @@ src/imap/imap src/indexer/indexer src/indexer/indexer-worker src/ipc/ipc +src/lib/event-filter-parser-lexer.c +src/lib/event-filter-parser-parser.c +src/lib/event-filter-parser-parser.h src/lib/unicodemap.c src/lib/UnicodeData.txt src/lib-compression/bench-compression diff --git a/configure.ac b/configure.ac index dfb59021d7..7703d0130b 100644 --- a/configure.ac +++ b/configure.ac @@ -297,6 +297,8 @@ AC_ISC_POSIX AC_PROG_CC AC_PROG_CPP AC_PROG_CXX # lucene plugin needs this +AC_CHECK_TOOL([FLEX],[flex],AC_ERROR(flex is required)) +AC_CHECK_TOOL([BISON],[bison],AC_ERROR(bison is required)) AC_HEADER_STDC AC_C_INLINE AC_PROG_LIBTOOL diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 584f502c06..be675f1a24 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -3,10 +3,33 @@ AM_CPPFLAGS = \ noinst_LTLIBRARIES = liblib.la -BUILT_SOURCES = $(srcdir)/unicodemap.c +BUILT_SOURCES = $(srcdir)/unicodemap.c \ + event-filter-parser-lexer.c \ + event-filter-parser-parser.c \ + event-filter-parser-parser.h EXTRA_DIST = unicodemap.c unicodemap.pl UnicodeData.txt +# Squelch autoconf error about using .[ly] sources but not defining $(LEX) +# and $(YACC). Using false here avoids accidental use. +LEX=/bin/false +YACC=/bin/false + +# We use custom rules here because we want to use flex and bison instead +# of lex and yacc (or bison in yacc-compatibility mode). Both flex and +# bison can handle properly naming the generated files, and it is simpler +# and cleaner to make this rule ourselves instead of working around ylwrap +# and yywrap's antiquated notion of what is hapenning. +.l.c: + $(AM_V_GEN)$(FLEX) -o $@ $< + +.y.c: + $(AM_V_GEN)$(BISON) -o $@ $< + +# Bison generates both a header and a .c file. Without the following +# dependency, anything including the header will race the bison process. +event-filter-parser-parser.h: event-filter-parser-parser.c + $(srcdir)/UnicodeData.txt: test -f $@ || wget -O $@ https://dovecot.org/res/UnicodeData.txt @@ -33,6 +56,8 @@ liblib_la_SOURCES = \ env-util.c \ event-filter.c \ event-filter-parser.c \ + event-filter-parser-lexer.l \ + event-filter-parser-parser.y \ event-log.c \ execv-const.c \ failures.c \ @@ -192,6 +217,7 @@ headers = \ eacces-error.h \ env-util.h \ event-filter.h \ + event-filter-parser-parser.h \ event-filter-private.h \ event-log.h \ execv-const.h \ diff --git a/src/lib/event-filter-parser-lexer.l b/src/lib/event-filter-parser-lexer.l new file mode 100644 index 0000000000..bfcd5929b3 --- /dev/null +++ b/src/lib/event-filter-parser-lexer.l @@ -0,0 +1,91 @@ +/* Copyright (c) 2020 Dovecot authors, see the included COPYING file */ + +%option nounput +%option noinput +%option noyywrap +%option reentrant +%option bison-bridge +%option never-interactive +%option prefix="event_filter_parser_" + +%{ +#include "lib.h" +#include "str.h" +#include "event-filter-private.h" +#include "event-filter-parser-parser.h" + +/* mimic renaming done by bison's api.prefix %define */ +#define YYSTYPE EVENT_FILTER_PARSER_STYPE + +#define YY_INPUT(buf, result, max_size) \ + result = event_filter_parser_input_proc(buf, max_size, yyscanner) +static size_t event_filter_parser_input_proc(char *buf, size_t size, yyscan_t scanner); + +#ifdef __clang__ +#pragma clang diagnostic push +/* ignore "unknown warning" warning if we're using unpatched clang */ +#pragma clang diagnostic ignored "-Wunknown-warning-option" +/* ignore strict bool warnings in generated code */ +#pragma clang diagnostic ignored "-Wstrict-bool" +#endif +%} + +%x string + +%% + string_t *str_buf = NULL; + +\" { + BEGIN(string); + + str_buf = t_str_new(128); + } +\" { + yylval->str = str_c(str_buf); + BEGIN(INITIAL); + return STRING; + } + /* Note: these have to match the event_filter_append_escaped() behavior */ +[^\\"]+ { str_append(str_buf, yytext); } +\\\\ { str_append_c(str_buf, '\\'); } +\\\" { str_append_c(str_buf, '"'); } +\\. { str_append(str_buf, yytext); } + +[Aa][Nn][Dd] { return AND; } +[Oo][Rr] { return OR; } +[Nn][Oo][Tt] { return NOT; } +[<>=()] { return *yytext; } +[A-Za-z0-9:.*_-]+ { yylval->str = t_strdup(yytext); return TOKEN; } +[ \t\n\r] { /* ignore */ } +. { + char msg[160]; + + i_snprintf(msg, sizeof(msg), + "syntax error, unexpected character '%c'", + yytext[0]); + + event_filter_parser_error(yyextra, msg); + } +%% +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +static size_t event_filter_parser_input_proc(char *buf, size_t size, yyscan_t scanner) +{ + struct event_filter_parser_state *state; + size_t num_bytes; + + state = event_filter_parser_get_extra(scanner); + + if (state->len == state->pos) + return 0; + + i_assert(state->len > state->pos); + + num_bytes = I_MIN(state->len - state->pos, size); + memcpy(buf, state->input + state->pos, num_bytes); + state->pos += num_bytes; + + return num_bytes; +} diff --git a/src/lib/event-filter-parser-parser.y b/src/lib/event-filter-parser-parser.y new file mode 100644 index 0000000000..6658d577cb --- /dev/null +++ b/src/lib/event-filter-parser-parser.y @@ -0,0 +1,172 @@ +/* Copyright (c) 2020 Dovecot authors, see the included COPYING file */ + +%define api.pure +%define api.prefix {event_filter_parser_} +%lex-param {void *scanner} +%parse-param {struct event_filter_parser_state *state} + +%defines + +%{ +#include "lib.h" +#include "lib-event-private.h" +#include "event-filter-private.h" + +#define scanner state->scanner + +#define YYERROR_VERBOSE + +extern int event_filter_parser_lex(void *, void *); + +void event_filter_parser_error(void *scan, const char *e) +{ + struct event_filter_parser_state *state = scan; + + state->error = t_strdup_printf("event filter: %s", e); +} + +static struct event_filter_node *key_value(struct event_filter_parser_state *state, + const char *a, const char *b, + enum event_filter_node_op op) +{ + struct event_filter_node *node; + enum event_filter_node_type type; + + if (strcmp(a, "event") == 0) + type = EVENT_FILTER_NODE_TYPE_EVENT_NAME; + else if (strcmp(a, "category") == 0) + type = EVENT_FILTER_NODE_TYPE_EVENT_CATEGORY; + else if (strcmp(a, "source_location") == 0) + type = EVENT_FILTER_NODE_TYPE_EVENT_SOURCE_LOCATION; + else + type = EVENT_FILTER_NODE_TYPE_EVENT_FIELD; + + node = p_new(state->pool, struct event_filter_node, 1); + node->type = type; + node->op = op; + + switch (type) { + case EVENT_FILTER_NODE_TYPE_LOGIC: + i_unreached(); + case EVENT_FILTER_NODE_TYPE_EVENT_NAME: + node->str = p_strdup(state->pool, b); + state->has_event_name = TRUE; + break; + case EVENT_FILTER_NODE_TYPE_EVENT_SOURCE_LOCATION: { + const char *colon = strrchr(b, ':'); + const char *file; + uintmax_t line; + + /* split "filename:line-number", but also handle "filename" */ + if (colon != NULL) { + if (str_to_uintmax(colon + 1, &line) < 0) { + file = p_strdup(state->pool, b); + line = 0; + } else { + file = p_strdup_until(state->pool, b, colon); + } + } else { + file = p_strdup_empty(state->pool, b); + line = 0; + } + + node->str = file; + node->intmax = line; + break; + } + case EVENT_FILTER_NODE_TYPE_EVENT_CATEGORY: + if (!event_filter_category_to_log_type(b, &node->category.log_type)) { + node->category.name = p_strdup(state->pool, b); + node->category.ptr = event_category_find_registered(b); + } + break; + case EVENT_FILTER_NODE_TYPE_EVENT_FIELD: + node->field.key = p_strdup(state->pool, a); + node->field.value.str = p_strdup(state->pool, b); + + /* Filter currently supports only comparing strings + and numbers. */ + if (str_to_intmax(b, &node->field.value.intmax) < 0) { + /* not a number - no problem + Either we have a string, or a number with wildcards */ + node->field.value.intmax = INT_MIN; + } + break; + } + + return node; +} + +static struct event_filter_node *logic(struct event_filter_parser_state *state, + struct event_filter_node *a, + struct event_filter_node *b, + enum event_filter_node_op op) +{ + struct event_filter_node *node; + + node = p_new(state->pool, struct event_filter_node, 1); + node->type = EVENT_FILTER_NODE_TYPE_LOGIC; + node->op = op; + node->children[0] = a; + node->children[1] = b; + + return node; +} + +#ifdef __clang__ +/* ignore "unknown warning" warning if we're using unpatched clang */ +#pragma clang diagnostic ignored "-Wunknown-warning-option" +/* ignore strict bool warnings in generated code */ +#pragma clang diagnostic ignored "-Wstrict-bool" +#endif +%} + +%union { + const char *str; + enum event_filter_node_op op; + struct event_filter_node *node; +}; + +%token TOKEN STRING +%token AND OR NOT + +%type key value +%type op +%type expr key_value + +%precedence NOT +%left AND OR + +%% +filter : expr { state->output = $1; } + | %empty { state->output = NULL; } + ; + +expr : expr AND expr { $$ = logic(state, $1, $3, EVENT_FILTER_OP_AND); } + | expr OR expr { $$ = logic(state, $1, $3, EVENT_FILTER_OP_OR); } + | NOT expr { $$ = logic(state, $2, NULL, EVENT_FILTER_OP_NOT); } + | '(' expr ')' { $$ = $2; } + | key_value { $$ = $1; } + ; + +key_value : key op value { $$ = key_value(state, $1, $3, $2); } + ; + +key : TOKEN { $$ = $1; } + | STRING { $$ = $1; } + ; + +value : TOKEN { $$ = $1; } + | STRING { $$ = $1; } + | AND { $$ = "and"; } + | OR { $$ = "or"; } + | NOT { $$ = "not"; } + ; + +op : '=' { $$ = EVENT_FILTER_OP_CMP_EQ; } + | '>' { $$ = EVENT_FILTER_OP_CMP_GT; } + | '<' { $$ = EVENT_FILTER_OP_CMP_LT; } + | '>' '=' { $$ = EVENT_FILTER_OP_CMP_GE; } + | '<' '=' { $$ = EVENT_FILTER_OP_CMP_LE; } + ; +%% diff --git a/src/lib/event-filter-private.h b/src/lib/event-filter-private.h index 28292266df..6e25bbc766 100644 --- a/src/lib/event-filter-private.h +++ b/src/lib/event-filter-private.h @@ -73,4 +73,23 @@ struct event_filter_node { bool event_filter_category_to_log_type(const char *name, enum event_filter_log_type *log_type_r); +/* lexer & parser state */ +struct event_filter_parser_state { + void *scanner; + const char *input; + size_t len; + size_t pos; + + pool_t pool; + struct event_filter_node *output; + const char *error; + bool has_event_name:1; +}; + +int event_filter_parser_lex_init(void **scanner); +int event_filter_parser_lex_destroy(void *yyscanner); +int event_filter_parser_parse(struct event_filter_parser_state *state); +void event_filter_parser_set_extra(void *user, void *yyscanner); +void event_filter_parser_error(void *scan, const char *e); + #endif diff --git a/src/lib/event-filter.c b/src/lib/event-filter.c index b5ab0a6255..b1ba2c827b 100644 --- a/src/lib/event-filter.c +++ b/src/lib/event-filter.c @@ -10,6 +10,10 @@ #include "event-filter.h" #include "event-filter-private.h" +/* Note: this has to match the regexp behavior in the event filter lexer file */ +#define event_filter_append_escaped(dst, str) \ + str_append_escaped((dst), (str), strlen(str)) + enum event_filter_code { EVENT_FILTER_CODE_NAME = 'n', EVENT_FILTER_CODE_SOURCE = 's', @@ -114,6 +118,18 @@ bool event_filter_category_to_log_type(const char *name, return FALSE; } +static const char * +event_filter_category_from_log_type(enum event_filter_log_type log_type) +{ + unsigned int i; + + for (i = 0; i < N_ELEMENTS(event_filter_log_type_map); i++) { + if (event_filter_log_type_map[i].log_type == log_type) + return event_filter_log_type_map[i].name; + } + i_unreached(); +} + static void add_node(pool_t pool, struct event_filter_node **root, struct event_filter_node *new) { @@ -252,6 +268,7 @@ clone_expr(pool_t pool, struct event_filter_node *old) new->children[1] = clone_expr(pool, old->children[1]); new->str = p_strdup_empty(pool, old->str); new->intmax = old->intmax; + new->category.log_type = old->category.log_type; new->category.name = p_strdup_empty(pool, old->category.name); new->category.ptr = old->category.ptr; new->field.key = p_strdup_empty(pool, old->field.key); @@ -333,6 +350,99 @@ event_filter_export_query_expr(const struct event_filter_query_internal *query, } } +static const char * +event_filter_export_query_expr_op(enum event_filter_node_op op) +{ + switch (op) { + case EVENT_FILTER_OP_AND: + case EVENT_FILTER_OP_OR: + case EVENT_FILTER_OP_NOT: + i_unreached(); + case EVENT_FILTER_OP_CMP_EQ: + return "="; + case EVENT_FILTER_OP_CMP_GT: + return ">"; + case EVENT_FILTER_OP_CMP_LT: + return "<"; + case EVENT_FILTER_OP_CMP_GE: + return ">="; + case EVENT_FILTER_OP_CMP_LE: + return "<="; + } + + i_unreached(); +} + +static void +event_filter_export_query_expr_new(const struct event_filter_query_internal *query, + struct event_filter_node *node, + string_t *dest) +{ + switch (node->type) { + case EVENT_FILTER_NODE_TYPE_LOGIC: + str_append_c(dest, '('); + switch (node->op) { + case EVENT_FILTER_OP_AND: + event_filter_export_query_expr_new(query, node->children[0], dest); + str_append(dest, " AND "); + event_filter_export_query_expr_new(query, node->children[1], dest); + break; + case EVENT_FILTER_OP_OR: + event_filter_export_query_expr_new(query, node->children[0], dest); + str_append(dest, " OR "); + event_filter_export_query_expr_new(query, node->children[1], dest); + break; + case EVENT_FILTER_OP_NOT: + str_append(dest, "NOT "); + event_filter_export_query_expr_new(query, node->children[0], dest); + break; + case EVENT_FILTER_OP_CMP_EQ: + case EVENT_FILTER_OP_CMP_GT: + case EVENT_FILTER_OP_CMP_LT: + case EVENT_FILTER_OP_CMP_GE: + case EVENT_FILTER_OP_CMP_LE: + i_unreached(); + } + str_append_c(dest, ')'); + break; + case EVENT_FILTER_NODE_TYPE_EVENT_NAME: + str_append(dest, "event"); + str_append(dest, event_filter_export_query_expr_op(node->op)); + str_append_c(dest, '"'); + event_filter_append_escaped(dest, node->str); + str_append_c(dest, '"'); + break; + case EVENT_FILTER_NODE_TYPE_EVENT_SOURCE_LOCATION: + str_append(dest, "source_location"); + str_append(dest, event_filter_export_query_expr_op(node->op)); + str_append_c(dest, '"'); + event_filter_append_escaped(dest, node->str); + if (node->intmax != 0) + str_printfa(dest, ":%ju", node->intmax); + str_append_c(dest, '"'); + break; + case EVENT_FILTER_NODE_TYPE_EVENT_CATEGORY: + str_append(dest, "category"); + str_append(dest, event_filter_export_query_expr_op(node->op)); + if (node->category.name != NULL) { + str_append_c(dest, '"'); + event_filter_append_escaped(dest, node->category.name); + str_append_c(dest, '"'); + } else + str_append(dest, event_filter_category_from_log_type(node->category.log_type)); + break; + case EVENT_FILTER_NODE_TYPE_EVENT_FIELD: + str_append_c(dest, '"'); + event_filter_append_escaped(dest, node->field.key); + str_append_c(dest, '"'); + str_append(dest, event_filter_export_query_expr_op(node->op)); + str_append_c(dest, '"'); + event_filter_append_escaped(dest, node->field.value.str); + str_append_c(dest, '"'); + break; + } +} + static void event_filter_export_query(const struct event_filter_query_internal *query, string_t *dest) @@ -352,6 +462,15 @@ event_filter_export_query(const struct event_filter_query_internal *query, } } +static void +event_filter_export_query_new(const struct event_filter_query_internal *query, + string_t *dest) +{ + str_append_c(dest, '('); + event_filter_export_query_expr_new(query, query->expr, dest); + str_append_c(dest, ')'); +} + void event_filter_export(struct event_filter *filter, string_t *dest) { const struct event_filter_query_internal *query; @@ -365,6 +484,19 @@ void event_filter_export(struct event_filter *filter, string_t *dest) } } +void event_filter_export_new(struct event_filter *filter, string_t *dest) +{ + const struct event_filter_query_internal *query; + bool first = TRUE; + + array_foreach(&filter->queries, query) { + if (!first) + str_append(dest, " OR "); + first = FALSE; + event_filter_export_query_new(query, dest); + } +} + bool event_filter_import(struct event_filter *filter, const char *str, const char **error_r) { @@ -755,6 +887,9 @@ event_filter_query_update_category(struct event_filter_query_internal *query, case EVENT_FILTER_NODE_TYPE_EVENT_FIELD: break; case EVENT_FILTER_NODE_TYPE_EVENT_CATEGORY: + if (node->category.name == NULL) + break; /* log type */ + if (add) { if (node->category.ptr != NULL) break;