From: Willy Tarreau Date: Thu, 22 Jan 2009 17:56:50 +0000 (+0100) Subject: [CONTRIB] halog: fast log parser for haproxy X-Git-Tag: v1.3.16-rc1~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=72c285345a2212733938cc410ba8bff98c814c44;p=thirdparty%2Fhaproxy.git [CONTRIB] halog: fast log parser for haproxy halog can search errors, count lines, sort by accept date, look for traffic holes and large connection counts at output graph plots of timers. --- diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile new file mode 100644 index 0000000000..911cae6355 --- /dev/null +++ b/contrib/halog/Makefile @@ -0,0 +1,11 @@ +INCLUDE = -I../../include +OPTIMIZE = -O3 -mtune=pentium-m +CC = gcc + +OBJS = halog + +halog: halog.c + $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^ + +clean: + rm -vf $(OBJS) diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c new file mode 100644 index 0000000000..6bfc76afdd --- /dev/null +++ b/contrib/halog/halog.c @@ -0,0 +1,599 @@ +/* + * haproxy log time reporter + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* + * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c + * + * Usage: + * $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log + * Note: if min_delay < 0, it only outputs lines with status codes 5xx. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define ACCEPT_FIELD 6 +#define TIME_FIELD 9 +#define STATUS_FIELD 10 +#define CONN_FIELD 15 +#define MAXLINE 16384 +#define QBITS 4 + +#define SKIP_CHAR(p,c) do { while (1) if (!*p) break; else if (*(p++) == c) break; } while (0) + +/* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */ +static struct eb_root timers[5] = { + EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, + EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, +}; + +struct timer { + struct eb32_node node; + unsigned int count; +}; + + +#define FILT_COUNT_ONLY 0x01 +#define FILT_INVERT 0x02 +#define FILT_QUIET 0x04 +#define FILT_ERRORS_ONLY 0x08 +#define FILT_ACC_DELAY 0x10 +#define FILT_ACC_COUNT 0x20 +#define FILT_GRAPH_TIMERS 0x40 + +unsigned int filter = 0; +unsigned int filter_invert = 0; +char line[MAXLINE]; + + +void die(const char *msg) +{ + fprintf(stderr, + "%s" + "Usage: halog [-c] [-v] [-gt] [-s ] [-e] [-ad ] [-ac ] < file.log\n" + "\n", + msg ? msg : "" + ); + exit(1); +} + + +/* return pointer to first char not part of current field starting at

. */ +const char *field_stop(const char *p) +{ + unsigned char c; + + while (1) { + c = *(p++); + if (c > ' ') + continue; + if (c == ' ' || c == '\t' || c == 0) + break; + } + return p - 1; +} + +/* return field (starting from 1) in string

. Only consider + * contiguous spaces (or tabs) as one delimiter. May return pointer to + * last char if field is not found. Equivalent to awk '{print $field}'. + */ +const char *field_start(const char *p, int field) +{ + unsigned char c; + while (1) { + /* skip spaces */ + while (1) { + c = *p; + if (c > ' ') + break; + if (c == ' ' || c == '\t') + goto next; + if (!c) /* end of line */ + return p; + /* other char => new field */ + break; + next: + p++; + } + + /* start of field */ + field--; + if (!field) + return p; + + /* skip this field */ + while (1) { + c = *(p++); + if (c > ' ') + continue; + if (c == ' ' || c == '\t') + break; + if (c == '\0') + return p; + } + } +} + +/* keep only the higher bits of */ +static inline unsigned int quantify_u32(unsigned int i, int bits) +{ + int high; + + if (!bits) + return 0; + + if (i) + high = fls_auto(i); // 1 to 32 + else + high = 0; + + if (high <= bits) + return i; + + return i & ~((1 << (high - bits)) - 1); +} + +/* keep only the higher bits of the absolute value of , as well as + * its sign. */ +static inline int quantify(int i, int bits) +{ + if (i >= 0) + return quantify_u32(i, bits); + else + return -quantify_u32(-i, bits); +} + +/* Insert timer value into tree . A pre-allocated node must be passed + * in . It may be NULL, in which case the function will allocate it + * itself. It will be reset to NULL once consumed. The caller is responsible + * for freeing the node once not used anymore. The node where the value was + * inserted is returned. + */ +struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v) +{ + struct timer *t = *alloc; + struct eb32_node *n; + + if (!t) { + t = calloc(sizeof(*t), 1); + if (unlikely(!t)) { + fprintf(stderr, "%s: not enough memory\n", __FUNCTION__); + exit(1); + } + } + t->node.key = quantify(v, QBITS); // keep only the higher QBITS bits + + n = eb32i_insert(r, &t->node); + if (n == &t->node) + t = NULL; /* node inserted, will malloc next time */ + + *alloc = t; + return container_of(n, struct timer, node); +} + +/* Insert value value into tree . A pre-allocated node must be passed + * in . It may be NULL, in which case the function will allocate it + * itself. It will be reset to NULL once consumed. The caller is responsible + * for freeing the node once not used anymore. The node where the value was + * inserted is returned. + */ +struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v) +{ + struct timer *t = *alloc; + struct eb32_node *n; + + if (!t) { + t = calloc(sizeof(*t), 1); + if (unlikely(!t)) { + fprintf(stderr, "%s: not enough memory\n", __FUNCTION__); + exit(1); + } + } + t->node.key = v; + + n = eb32i_insert(r, &t->node); + if (n == &t->node) + t = NULL; /* node inserted, will malloc next time */ + + *alloc = t; + return container_of(n, struct timer, node); +} + +int str2ic(const char *s) +{ + int i = 0; + int j, k; + + if (*s != '-') { + /* positive number */ + while (1) { + j = (*s++) - '0'; + k = i * 10; + if ((unsigned)j > 9) + break; + i = k + j; + } + } else { + /* negative number */ + s++; + while (1) { + j = (*s++) - '0'; + k = i * 10; + if ((unsigned)j > 9) + break; + i = k - j; + } + } + + return i; +} + + +/* Equivalent to strtoul with a length. */ +static inline unsigned int __strl2ui(const char *s, int len) +{ + unsigned int i = 0; + while (len-- > 0) { + i = i * 10 - '0'; + i += (unsigned char)*s++; + } + return i; +} + +unsigned int strl2ui(const char *s, int len) +{ + return __strl2ui(s, len); +} + +/* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of + * the day in milliseconds. It returns -1 for all unparsable values. The parser + * looks ugly but gcc emits far better code that way. + */ +int convert_date(const char *field) +{ + unsigned int h, m, s, ms; + unsigned char c; + const char *b, *e; + + h = m = s = ms = 0; + e = field; + + /* skip the date */ + while (1) { + c = *(e++); + if (c == ':') + break; + if (!c) + goto out_err; + } + + /* hour + ':' */ + b = e; + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + h = h * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* minute + ':' */ + b = e; + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + m = m * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* second + '.' or ']' */ + b = e; + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + s = s * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + + /* if there's a '.', we have milliseconds */ + if (c == (unsigned char)('.' - '0')) { + /* millisecond second + ']' */ + b = e; + while (1) { + c = *(e++) - '0'; + if (c > 9) + break; + ms = ms * 10 + c; + } + if (c == (unsigned char)(0 - '0')) + goto out_err; + } + return (((h * 60) + m) * 60 + s) * 1000 + ms; + out_err: + return -1; +} + +void truncated_line(int linenum, const char *line) +{ + if (!(filter & FILT_QUIET)) + fprintf(stderr, "Truncated line %d: %s\n", linenum, line); +} + +int main(int argc, char **argv) +{ + const char *b, *e, *p; + const char *output_file = NULL; + int f, tot, last, linenum, err, parse_err; + struct timer *t = NULL, *t2; + struct eb32_node *n; + int val, test; + int array[5]; + int filter_acc_delay = 0, filter_acc_count = 0; + int skip_fields = 1; + + argc--; argv++; + while (argc > 0) { + if (*argv[0] != '-') + break; + + if (strcmp(argv[0], "-ad") == 0) { + if (argc < 2) die("missing option for -ad"); + argc--; argv++; + filter |= FILT_ACC_DELAY; + filter_acc_delay = atol(*argv); + } + else if (strcmp(argv[0], "-ac") == 0) { + if (argc < 2) die("missing option for -ac"); + argc--; argv++; + filter |= FILT_ACC_COUNT; + filter_acc_count = atol(*argv); + } + else if (strcmp(argv[0], "-s") == 0) { + if (argc < 2) die("missing option for -s"); + argc--; argv++; + skip_fields = atol(*argv); + } + else if (strcmp(argv[0], "-e") == 0) + filter |= FILT_ERRORS_ONLY; + else if (strcmp(argv[0], "-c") == 0) + filter |= FILT_COUNT_ONLY; + else if (strcmp(argv[0], "-q") == 0) + filter |= FILT_QUIET; + else if (strcmp(argv[0], "-v") == 0) + filter_invert = !filter_invert; + else if (strcmp(argv[0], "-gt") == 0) + filter |= FILT_GRAPH_TIMERS; + else if (strcmp(argv[0], "-o") == 0) { + if (output_file) + die("Fatal: output file name already specified.\n"); + if (argc < 2) + die("Fatal: missing output file name.\n"); + output_file = argv[1]; + } + argc--; + argv++; + } + + if (!filter) + die("No action specified.\n"); + + if (filter & FILT_ACC_COUNT && !filter_acc_count) + filter_acc_count=1; + + if (filter & FILT_ACC_DELAY && !filter_acc_delay) + filter_acc_delay = 1; + + linenum = 0; + tot = 0; + parse_err = 0; + + while (fgets(line, MAXLINE, stdin) != NULL) { + linenum++; + + if (filter & FILT_ERRORS_ONLY) { + /* only report erroneous status codes */ + b = field_start(line, STATUS_FIELD + skip_fields); + if (!*b) { + truncated_line(linenum, line); + continue; + } + if (*b == '-') { + test = 1; + } else { + val = strl2ui(b, 3); + test = (val >= 500 && val <= 599); + } + test ^= filter_invert; + if (test) { + tot++; + if (!(filter & FILT_COUNT_ONLY)) + printf("%s", line); + } + continue; + } + + if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) { + b = field_start(line, ACCEPT_FIELD + skip_fields); + if (!*b) { + truncated_line(linenum, line); + continue; + } + + val = convert_date(b); + //printf("date=%s => %d\n", b, val); + if (val < 0) { + parse_err++; + continue; + } + + t2 = insert_value(&timers[0], &t, val); + t2->count++; + continue; + } + + if (filter & FILT_GRAPH_TIMERS) { + int f; + + b = field_start(line, TIME_FIELD + skip_fields); + if (!*b) { + truncated_line(linenum, line); + continue; + } + + e = field_stop(b + 1); + /* we have field TIME_FIELD in [b]..[e-1] */ + + p = b; + err = 0; + for (f = 0; f < 5 && *p; f++) { + array[f] = str2ic(p); + if (array[f] < 0) { + array[f] = -1; + err = 1; + } + + SKIP_CHAR(p, '/'); + } + + if (f < 5) { + parse_err++; + continue; + } + + /* if we find at least one negative time, we count one error + * with a time equal to the total session time. This will + * emphasize quantum timing effects associated to known + * timeouts. Note that on some buggy machines, it is possible + * that the total time is negative, hence the reason to reset + * it. + */ + if (err) { + if (array[4] < 0) + array[4] = -1; + t2 = insert_timer(&timers[0], &t, array[4]); // total time + t2->count++; + } else { + int v; + + t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req + t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn + t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp + + v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time + if (v < 0 && !(filter & FILT_QUIET)) + fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", + line, array[0], array[1], array[2], array[3], array[4], v); + t2 = insert_timer(&timers[4], &t, v); t2->count++; + } + continue; + } + + /* all other cases mean we just want to count lines */ + tot++; + } + + if (t) + free(t); + + if (filter & FILT_COUNT_ONLY) { + printf("%d\n", tot); + exit(0); + } + + if (filter & FILT_ERRORS_ONLY) + exit(0); + + if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) { + /* sort and count all timers. Output will look like this : + * + */ + n = eb32_first(&timers[0]); + + if (n) + last = n->key; + while (n) { + unsigned int d, h, m, s, ms; + + t = container_of(n, struct timer, node); + h = n->key; + d = h - last; + last = h; + + if (d >= filter_acc_delay && t->count >= filter_acc_count) { + ms = h % 1000; h = h / 1000; + s = h % 60; h = h / 60; + m = h % 60; h = h / 60; + tot++; + printf("%02d:%02d:%02d.%03d %d %d %d\n", h, m, s, ms, last, d, t->count); + } + n = eb32_next(n); + } + } + else if (filter & FILT_GRAPH_TIMERS) { + /* sort all timers */ + for (f = 0; f < 5; f++) { + struct eb32_node *n; + int val; + + val = 0; + n = eb32_first(&timers[f]); + while (n) { + int i; + double d; + + t = container_of(n, struct timer, node); + last = n->key; + val = t->count; + + i = (last < 0) ? -last : last; + i = fls_auto(i) - QBITS; + + if (i > 0) + d = val / (double)(1 << i); + else + d = val; + + if (d > 0.0) { + printf("%d %d %f\n", f, last, d+1.0); + tot++; + } + + n = eb32_next(n); + } + } + } + + if (!(filter & FILT_QUIET)) + fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n", + linenum, tot, parse_err); + exit(0); +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */