From: Willy Tarreau Date: Sun, 10 Jul 2011 17:47:48 +0000 (+0200) Subject: [OPTIM] halog: check once for correct line format and reuse the pointer X-Git-Tag: v1.5-dev8~193 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=26deaf51d9784183537f1436dd10b20e8cfdb941;p=thirdparty%2Fhaproxy.git [OPTIM] halog: check once for correct line format and reuse the pointer Almost all filters first check the line format, which takes a lot of code and requires parsing back and forth. By centralizing this test, we can save about 15-20 more percent of performance for all filters. Also, the test was wrong, it was checking that the source IP address was starting with a digit, which is not always true with local IPv6 addresses. Instead, we now check that the next field (accept field) starts with an opening bracket and is followed by a digit between 0 and 3 (day of the month). Doing this has contributed a 2% speedup because all other field calculations were relative to a closer field. --- diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c index 67952fb5ba..0fbbbe7be5 100644 --- a/contrib/halog/halog.c +++ b/contrib/halog/halog.c @@ -385,7 +385,7 @@ void truncated_line(int linenum, const char *line) int main(int argc, char **argv) { - const char *b, *e, *p, *time_field; + const char *b, *e, *p, *time_field, *accept_field; const char *output_file = NULL; int f, tot, last, linenum, err, parse_err; struct timer *t = NULL, *t2; @@ -494,17 +494,33 @@ int main(int argc, char **argv) while ((line = fgets2(stdin)) != NULL) { linenum++; - time_field = NULL; + time_field = NULL; accept_field = NULL; test = 1; + + /* for any line we process, we first ensure that there is a field + * looking like the accept date field (beginning with a '['). + */ + accept_field = field_start(line, ACCEPT_FIELD + skip_fields); + if (*accept_field != '[') { + parse_err++; + continue; + } + + /* the day of month field is begin 01 and 31 */ + if (unlikely(accept_field[1] < '0' || accept_field[1] > '3')) { + parse_err++; + continue; + } + if (unlikely(filter & FILT_HTTP_ONLY)) { /* only report lines with at least 4 timers */ - - if (!time_field) - time_field = field_start(line, TIME_FIELD + skip_fields); - if (!*time_field) { - truncated_line(linenum, line); - continue; + if (unlikely(!time_field)) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (!*time_field) { + truncated_line(linenum, line); + continue; + } } e = field_stop(time_field + 1); @@ -523,11 +539,12 @@ int main(int argc, char **argv) int tps; /* only report lines with response times larger than filter_time_resp */ - if (!time_field) - time_field = field_start(line, TIME_FIELD + skip_fields); - if (!*time_field) { - truncated_line(linenum, line); - continue; + if (unlikely(!time_field)) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (!*time_field) { + truncated_line(linenum, line); + continue; + } } e = field_stop(time_field + 1); @@ -557,13 +574,11 @@ int main(int argc, char **argv) if (unlikely(filter & FILT_ERRORS_ONLY)) { /* only report erroneous status codes */ - if (!time_field) - time_field = field_start(line, TIME_FIELD + skip_fields); - if (!*time_field) { - truncated_line(linenum, line); - continue; - } - b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1); + if (time_field) + b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1); + else + b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1); + if (!*b) { truncated_line(linenum, line); continue; @@ -581,12 +596,7 @@ int main(int argc, char **argv) continue; if (unlikely(filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))) { - b = field_start(line, ACCEPT_FIELD + skip_fields); - if (!*b) { - truncated_line(linenum, line); - continue; - } - + b = accept_field; tot++; val = convert_date(b); //printf("date=%s => %d\n", b, val); @@ -603,15 +613,12 @@ int main(int argc, char **argv) if (unlikely(filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))) { int f; - if (!time_field) - time_field = field_start(line, TIME_FIELD + skip_fields); - if (!*time_field) { - truncated_line(linenum, line); - continue; - } - if (!*time_field) { - truncated_line(linenum, line); - continue; + if (unlikely(!time_field)) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); + if (!*time_field) { + truncated_line(linenum, line); + continue; + } } e = field_stop(time_field + 1); @@ -689,19 +696,11 @@ int main(int argc, char **argv) } if (unlikely(filter & FILT_COUNT_STATUS)) { - /* first, let's ensure that the line is a traffic line (beginning - * with an IP address) - */ - b = field_start(line, SOURCE_FIELD + skip_fields); - if (*b < '0' || *b > '9') { - parse_err++; - continue; - } - if (time_field) b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1); else - b = field_start(b, STATUS_FIELD - SOURCE_FIELD + 1); + b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1); + if (!*b) { truncated_line(linenum, line); continue; @@ -714,19 +713,11 @@ int main(int argc, char **argv) } if (unlikely(filter & FILT_COUNT_TERM_CODES)) { - /* first, let's ensure that the line is a traffic line (beginning - * with an IP address) - */ - b = field_start(line, SOURCE_FIELD + skip_fields); - if (*b < '0' || *b > '9') { - parse_err++; - continue; - } - if (time_field) b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1); else - b = field_start(b, TERM_CODES_FIELD - SOURCE_FIELD + 1); + b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1); + if (!*b) { truncated_line(linenum, line); continue; @@ -743,19 +734,10 @@ int main(int argc, char **argv) struct ebmb_node *srv_node; struct srv_st *srv; - /* first, let's ensure that the line is a traffic line (beginning - * with an IP address) - */ - b = field_start(line, SOURCE_FIELD + skip_fields); - if (*b < '0' || *b > '9') { - parse_err++; - continue; - } - /* the server field is before the status field, so let's * parse them in the proper order. */ - b = field_start(b, SERVER_FIELD - SOURCE_FIELD + 1); + b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1); if (!*b) { truncated_line(linenum, line); continue; @@ -840,22 +822,13 @@ int main(int argc, char **argv) } if (unlikely(filter & FILT_COUNT_URL_ANY)) { - /* first, let's ensure that the line is a traffic line (beginning - * with an IP address, or already having a time field) - */ - /* let's collect the response time */ - if (!time_field) { - b = field_start(line, SOURCE_FIELD + skip_fields); // avg 95 ns per line - if (*b < '0' || *b > '9') { - parse_err++; + if (unlikely(!time_field)) { + time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1); // avg 115 ns per line + if (!*time_field) { + truncated_line(linenum, line); continue; } - time_field = field_start(field_stop(b + 1), TIME_FIELD - SOURCE_FIELD); // avg 115 ns per line - } - if (!*time_field) { - truncated_line(linenum, line); - continue; } /* we have the field TIME_FIELD starting at . We'll