From 8a3f52fc2e60f6c274af0f781d54d0d9b91e349c Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Thu, 20 Dec 2012 21:23:42 +0100 Subject: [PATCH] MEDIUM: log-format: make the format parser more robust and more extensible The log-format parser reached a limit making it hard to add new features. It also suffers from a weak handling of certain incorrect corner cases, for example "%{foo}" is emitted as a litteral while syntactically it's an argument to no variable. Also the argument parser had to redo some of the job with some cases causing minor memory leaks (eg: ignored args). This work aims at improving the situation so that slightly better reporting is possible and that it becomes possible to extend the log format. The code has a few more states but looks significantly simpler. The parser is now capable of reporting ignored arguments and truncated lines. --- include/proto/log.h | 4 +- include/types/log.h | 17 ++-- src/log.c | 242 +++++++++++++++++++++----------------------- 3 files changed, 128 insertions(+), 135 deletions(-) diff --git a/include/proto/log.h b/include/proto/log.h index a6c59ffc48..5d1215afb9 100644 --- a/include/proto/log.h +++ b/include/proto/log.h @@ -55,10 +55,10 @@ void sess_log(struct session *s); int parse_logformat_var_args(char *args, struct logformat_node *node); /* - * Parse a variable '%varname' or '%{args}varname' in logformat + * Parse a variable '%varname' or '%{args}varname' in log-format * */ -int parse_logformat_var(char *str, size_t len, struct proxy *curproxy, struct list *list_format, int *defoptions); +int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions); /* * add to the logformat linked list diff --git a/include/types/log.h b/include/types/log.h index c81ab0de79..0ed79d4142 100644 --- a/include/types/log.h +++ b/include/types/log.h @@ -97,15 +97,16 @@ enum { LOG_FMT_SSL_VERSION, }; -/* enum for parse_logformat */ +/* enum for parse_logformat_string */ enum { - LF_TEXT = 0, - LF_SEPARATOR, - LF_VAR, // after % - - LF_STARTVAR, // % - LF_STARG, // { and within { } - LF_EDARG, // end arg } + LF_INIT = 0, // before first character + LF_TEXT, // normal text + LF_SEPARATOR, // a single separator + LF_VAR, // variable name, after '%' or '%{..}' + LF_STARTVAR, // % in text + LF_STARG, // after '%{' and berore '}' + LF_EDARG, // '}' after '%{' + LF_END, // \0 found }; diff --git a/src/log.c b/src/log.c index 0d63ba6aba..a5860024e6 100644 --- a/src/log.c +++ b/src/log.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -221,69 +222,54 @@ int parse_logformat_var_args(char *args, struct logformat_node *node) } /* - * Parse a variable '%varname' or '%{args}varname' in logformat - * + * Parse a variable '%varname' or '%{args}varname' in log-format. The caller + * must pass the args part in the pointer with its length in , + * and varname with its length in and respectively. is + * ignored when arg_len is 0. Neither nor may be null. */ -int parse_logformat_var(char *str, size_t len, struct proxy *curproxy, struct list *list_format, int *defoptions) +int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions) { - int i, j; - char *arg = NULL; // arguments - int fparam = 0; - char *name = NULL; - struct logformat_node *node = NULL; - char varname[255] = { 0 }; // variable name - - for (i = 1; i < len; i++) { // escape first char % - if (!arg && str[i] == '{') { - arg = str + i; - fparam = 1; - } else if (arg && str[i] == '}') { - char *tmp = arg; - arg = calloc(str + i - tmp, 1); // without {} - strncpy(arg, tmp + 1, str + i - tmp - 1); // copy without { and } - arg[str + i - tmp - 1] = '\0'; - fparam = 0; - } else if (!name && !fparam) { - strncpy(varname, str + i, len - i + 1); - varname[len - i] = '\0'; - for (j = 0; logformat_keywords[j].name; j++) { // search a log type - if (strcmp(varname, logformat_keywords[j].name) == 0) { - if (!((logformat_keywords[j].mode == PR_MODE_HTTP) && (curproxy->mode == PR_MODE_TCP))) { - node = calloc(1, sizeof(struct logformat_node)); - node->type = logformat_keywords[j].type; - node->options = *defoptions; - node->arg = arg; - parse_logformat_var_args(node->arg, node); - if (node->type == LOG_FMT_GLOBAL) { - *defoptions = node->options; - free(node); - } else { - if (logformat_keywords[j].config_callback != NULL) { - if (logformat_keywords[j].config_callback(node, curproxy) != 0) { - return -1; - } - } - curproxy->to_log |= logformat_keywords[j].lw; - LIST_ADDQ(list_format, &node->list); - } - if (logformat_keywords[j].replace_by) - Warning("Warning: deprecated variable '%s' in log-format, please replace it with '%s'\n", - varname, logformat_keywords[j].replace_by); - return 0; - } else { - Warning("Warning: No such variable name '%s' in this log mode\n", varname); - if (arg) - free(arg); + int j; + struct logformat_node *node; + + for (j = 0; logformat_keywords[j].name; j++) { // search a log type + if (strlen(logformat_keywords[j].name) == var_len && + strncmp(var, logformat_keywords[j].name, var_len) == 0) { + if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) { + node = calloc(1, sizeof(struct logformat_node)); + node->type = logformat_keywords[j].type; + node->options = *defoptions; + if (arg_len) { + node->arg = my_strndup(arg, arg_len); + parse_logformat_var_args(node->arg, node); + } + if (node->type == LOG_FMT_GLOBAL) { + *defoptions = node->options; + free(node->arg); + free(node); + } else { + if (logformat_keywords[j].config_callback && + logformat_keywords[j].config_callback(node, curproxy) != 0) { return -1; } + curproxy->to_log |= logformat_keywords[j].lw; + LIST_ADDQ(list_format, &node->list); } + if (logformat_keywords[j].replace_by) + Warning("Warning: deprecated variable '%s' in log-format, please replace it with '%s'\n", + logformat_keywords[j].name, logformat_keywords[j].replace_by); + return 0; + } else { + Warning("Warning: log-format variable name '%s' is not suited to HTTP mode\n", logformat_keywords[j].name); + return -1; } - Warning("Warning: No such variable name '%s' in log-format\n", varname); - if (arg) - free(arg); - return -1; } } + + j = var[var_len]; + var[var_len] = 0; + Warning("Warning: no such variable name '%s' in log-format\n", var); + var[var_len] = j; return -1; } @@ -329,9 +315,13 @@ void add_to_logformat_list(char *start, char *end, int type, struct list *list_f */ void parse_logformat_string(char *str, struct proxy *curproxy, struct list *list_format, int capabilities) { - char *sp = str; /* start pointer */ - int cformat = -1; /* current token format : LOG_TEXT, LOG_SEPARATOR, LOG_VARIABLE */ - int pformat = -1; /* previous token format */ + char *sp = str; /* start pointer for text parts */ + char *arg = NULL; /* start pointer for args */ + char *var = NULL; /* start pointer for vars */ + int arg_len = 0; + int var_len = 0; + int cformat; /* current token format */ + int pformat; /* previous token format */ struct logformat_node *tmplf, *back; int options = 0; @@ -343,87 +333,89 @@ void parse_logformat_string(char *str, struct proxy *curproxy, struct list *list free(tmplf); } - while (1) { - - // push the variable only if formats are different, not - // within a variable, and not the first iteration - if ((cformat != pformat && cformat != -1 && pformat != -1) || *str == '\0') { - if (((pformat != LF_STARTVAR && cformat != LF_VAR) && - (pformat != LF_STARTVAR && cformat != LF_STARG) && - (pformat != LF_STARG && cformat != LF_VAR)) || *str == '\0') { - if (pformat > LF_VAR) // unfinished string - pformat = LF_TEXT; - if (pformat == LF_VAR) - parse_logformat_var(sp, str - sp, curproxy, list_format, &options); - else - add_to_logformat_list(sp, str, pformat, list_format); - sp = str; - if (*str == '\0') - break; - } - } - - if (cformat != -1) - str++; // consume the string, except on the first tour - + for (cformat = LF_INIT; cformat != LF_END; str++) { pformat = cformat; - if (*str == '\0') { - cformat = LF_STARTVAR; // for breaking in all cases - continue; - } + if (!*str) + cformat = LF_END; // preset it to save all states from doing this - if (pformat == LF_STARTVAR) { // after a % - if ( (*str >= 'a' && *str <= 'z') || // parse varname - (*str >= 'A' && *str <= 'Z') || - (*str >= '0' && *str <= '9')) { - cformat = LF_VAR; // varname - continue; - } else if (*str == '{') { - cformat = LF_STARG; // variable arguments - continue; - } else { // another unexpected token - pformat = LF_TEXT; // redefine the format of the previous token to TEXT - cformat = LF_TEXT; - continue; + /* The prinicple of the two-step state machine below is to first detect a change, and + * second have all common paths processed at one place. The common paths are the ones + * encountered in text areas (LF_INIT, LF_TEXT, LF_SEPARATOR) and at the end (LF_END). + * We use the common LF_INIT state to dispatch to the different final states. + */ + switch (pformat) { + case LF_STARTVAR: // text immediately following a '%' + arg = NULL; + arg_len = var_len = 0; + if (*str == '{') { // optional argument + cformat = LF_STARG; + arg = str + 1; } - - } else if (pformat == LF_VAR) { // after a varname - if ( (*str >= 'a' && *str <= 'z') || // parse varname - (*str >= 'A' && *str <= 'Z') || - (*str >= '0' && *str <= '9')) { + else if (isalnum((int)*str)) { // variable name cformat = LF_VAR; - continue; + var = str; } - } else if (pformat == LF_STARG) { // inside variable arguments - if (*str == '}') { // end of varname + else if (*str == '%') + cformat = LF_TEXT; // convert this character to a litteral (useful for '%') + else + cformat = LF_INIT; // handle other cases of litterals + break; + + case LF_STARG: // text immediately following '%{' + if (*str == '}') { // end of arg cformat = LF_EDARG; - continue; - } else { // all tokens are acceptable within { } - cformat = LF_STARG; - continue; + arg_len = str - arg; + *str = 0; // used for reporting errors } - } else if (pformat == LF_EDARG) { // after arguments - if ( (*str >= 'a' && *str <= 'z') || // parse a varname - (*str >= 'A' && *str <= 'Z') || - (*str >= '0' && *str <= '9')) { + break; + + case LF_EDARG: // text immediately following '%{arg}' + if (isalnum((int)*str)) { // variable name cformat = LF_VAR; - continue; - } else { // if no varname after arguments, transform in TEXT - pformat = LF_TEXT; - cformat = LF_TEXT; + var = str; + break; } + + Warning("Skipping isolated argument in log-format line : '%%{%s}'\n", arg); + cformat = LF_INIT; + break; + + case LF_VAR: // text part of a variable name + var_len = str - var; + if (!isalnum((int)*str)) + cformat = LF_INIT; // not variable name anymore + break; + + default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END + cformat = LF_INIT; } - // others tokens that don't match previous conditions - if (*str == '%') { - cformat = LF_STARTVAR; - } else if (*str == ' ') { - cformat = LF_SEPARATOR; - } else { - cformat = LF_TEXT; + if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */ + switch (*str) { + case '%': cformat = LF_STARTVAR; break; + case ' ': cformat = LF_SEPARATOR; break; + case 0 : cformat = LF_END; break; + default : cformat = LF_TEXT; break; + } + } + + if (cformat != pformat || pformat == LF_SEPARATOR) { + switch (pformat) { + case LF_VAR: + parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options); + break; + case LF_TEXT: + case LF_SEPARATOR: + add_to_logformat_list(sp, str, pformat, list_format); + break; + } + sp = str; /* new start of text at every state switch and at every separator */ } } + + if (pformat == LF_STARTVAR || pformat == LF_STARG) + Warning("Ignoring end of truncated log-format line after '%s'\n", arg ? arg : "%"); } /* -- 2.47.3