From: Willy Tarreau Date: Fri, 20 Feb 2009 10:02:32 +0000 (+0100) Subject: [CONTRIB] halog: faster fgets() and add support for percentile reporting X-Git-Tag: v1.3.16-rc1~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=214c203c0044beaffed648bbeb015245e74cc0bd;p=thirdparty%2Fhaproxy.git [CONTRIB] halog: faster fgets() and add support for percentile reporting A new fgets implementation saves about 25-50% of the time spent parsing the logs. Percentile calculation has been added for timers using -pct. --- diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile index 911cae6355..2215e03a9a 100644 --- a/contrib/halog/Makefile +++ b/contrib/halog/Makefile @@ -1,10 +1,12 @@ INCLUDE = -I../../include OPTIMIZE = -O3 -mtune=pentium-m -CC = gcc -OBJS = halog +OBJS = halog halog64 -halog: halog.c +halog: halog.c fgets2.c + $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^ + +halog64: halog.c fgets2-64.c $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^ clean: diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c new file mode 100644 index 0000000000..409d6026a3 --- /dev/null +++ b/contrib/halog/fgets2-64.c @@ -0,0 +1,147 @@ +/* + * fast fgets() replacement for log parsing + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This function manages its own buffer and returns a pointer to that buffer + * in order to avoid expensive memory copies. It also checks for line breaks + * 32 bits at a time. It could be improved a lot using mmap() but we would + * not be allowed to replace trailing \n with zeroes and we would be limited + * to small log files on 32-bit machines. + * + */ + +#include +#include +#include +#include + +// return 1 if the integer contains at least one zero byte +static inline unsigned int has_zero(unsigned int x) +{ + if (!(x & 0xFF000000U) || + !(x & 0xFF0000U) || + !(x & 0xFF00U) || + !(x & 0xFFU)) + return 1; + return 0; +} + +static inline unsigned int has_zero64(unsigned long long x) +{ + unsigned long long x2; + + x2 = x & (x >> 8); + /* no need to split it further */ + if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL)) + return 0; // approx 11/12 return here + + if (!(x & 0xff00000000000000ULL) || + !(x & 0xff000000000000ULL) || + !(x & 0xff0000000000ULL) || + !(x & 0xff00000000ULL) || + !(x & 0xff000000UL) || + !(x & 0xff0000UL) || + !(x & 0xff00UL) || + !(x & 0xffUL)) + return 1; // approx 1/3 of the remaining return here + + return 0; +} + +#define FGETS2_BUFSIZE (256*1024) +const char *fgets2(FILE *stream) +{ + static char buffer[FGETS2_BUFSIZE + 5]; + static char *end = buffer; + static char *line = buffer; + + char *next; + int ret; + + next = line; + + while (1) { + /* this is a speed-up, we read 32 bits at once and check for an + * LF character there. We stop if found then continue one at a + * time. + */ + while (next < end && (((unsigned long)next) & 7) && *next != '\n') + next++; + + /* now next is multiple of 4 or equal to end */ + while (next <= (end-32)) { + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + } + + /* we finish if needed. Note that next might be slightly higher + * than end here because we might have gone past it above. + */ + while (next < end) { + if (*next == '\n') { + const char *start = line; + + *next = '\0'; + line = next + 1; + return start; + } + next++; + } + + /* we found an incomplete line. First, let's move the + * remaining part of the buffer to the beginning, then + * try to complete the buffer with a new read. + */ + if (line > buffer) { + if (end != line) + memmove(buffer, line, end - line); + end = buffer + (end - line); + next = end; + line = buffer; + } else { + if (end == buffer + FGETS2_BUFSIZE) + return NULL; + } + + ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end); + + if (ret <= 0) { + if (end == line) + return NULL; + + *end = '\0'; + return line; + } + + end += ret; + /* search for '\n' again */ + } +} + +#ifdef BENCHMARK +int main() { + const char *p; + unsigned int lines = 0; + + while ((p=fgets2(stdin))) + lines++; + printf("lines=%d\n", lines); + return 0; +} +#endif diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c new file mode 100644 index 0000000000..13918a73f5 --- /dev/null +++ b/contrib/halog/fgets2.c @@ -0,0 +1,138 @@ +/* + * fast fgets() replacement for log parsing + * + * Copyright 2000-2009 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This function manages its own buffer and returns a pointer to that buffer + * in order to avoid expensive memory copies. It also checks for line breaks + * 32 bits at a time. It could be improved a lot using mmap() but we would + * not be allowed to replace trailing \n with zeroes and we would be limited + * to small log files on 32-bit machines. + * + */ + +#include +#include +#include +#include + +// return 1 if the integer contains at least one zero byte +static inline unsigned int has_zero(unsigned int x) +{ + if (!(x & 0xFF000000U) || + !(x & 0xFF0000U) || + !(x & 0xFF00U) || + !(x & 0xFFU)) + return 1; + return 0; +} + + +#define FGETS2_BUFSIZE (256*1024) +const char *fgets2(FILE *stream) +{ + static char buffer[FGETS2_BUFSIZE + 5]; + static char *end = buffer; + static char *line = buffer; + + char *next; + int ret; + + next = line; + + while (1) { + /* this is a speed-up, we read 32 bits at once and check for an + * LF character there. We stop if found then continue one at a + * time. + */ + while (next < end && (((unsigned long)next) & 3) && *next != '\n') + next++; + + /* now next is multiple of 4 or equal to end */ + while (next <= (end-32)) { + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A)) + break; + next += 4; + } + + /* we finish if needed. Note that next might be slightly higher + * than end here because we might have gone past it above. + */ + while (next < end) { + if (*next == '\n') { + const char *start = line; + + *next = '\0'; + line = next + 1; + return start; + } + next++; + } + + /* we found an incomplete line. First, let's move the + * remaining part of the buffer to the beginning, then + * try to complete the buffer with a new read. + */ + if (line > buffer) { + if (end != line) + memmove(buffer, line, end - line); + end = buffer + (end - line); + next = end; + line = buffer; + } else { + if (end == buffer + FGETS2_BUFSIZE) + return NULL; + } + + ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end); + + if (ret <= 0) { + if (end == line) + return NULL; + + *end = '\0'; + return line; + } + + end += ret; + /* search for '\n' again */ + } +} + +#ifdef BENCHMARK +int main() { + const char *p; + unsigned int lines = 0; + + while ((p=fgets2(stdin))) + lines++; + printf("lines=%d\n", lines); + return 0; +} +#endif diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c index 6bfc76afdd..32b4a7fb33 100644 --- a/contrib/halog/halog.c +++ b/contrib/halog/halog.c @@ -11,7 +11,7 @@ */ /* - * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c + * gcc -O2 -o halog2 halog2.c -Iinclude src/ebtree.c src/eb32tree.c fgets2.c * * Usage: * $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log @@ -57,17 +57,19 @@ struct timer { #define FILT_ACC_DELAY 0x10 #define FILT_ACC_COUNT 0x20 #define FILT_GRAPH_TIMERS 0x40 +#define FILT_PERCENTILE 0x80 unsigned int filter = 0; unsigned int filter_invert = 0; -char line[MAXLINE]; +const char *line; +const char *fgets2(FILE *stream); void die(const char *msg) { fprintf(stderr, "%s" - "Usage: halog [-c] [-v] [-gt] [-s ] [-e] [-ad ] [-ac ] < file.log\n" + "Usage: halog [-c] [-v] [-gt] [-pct] [-s ] [-e] [-ad ] [-ac ] < file.log\n" "\n", msg ? msg : "" ); @@ -385,6 +387,8 @@ int main(int argc, char **argv) filter_invert = !filter_invert; else if (strcmp(argv[0], "-gt") == 0) filter |= FILT_GRAPH_TIMERS; + else if (strcmp(argv[0], "-pct") == 0) + filter |= FILT_PERCENTILE; else if (strcmp(argv[0], "-o") == 0) { if (output_file) die("Fatal: output file name already specified.\n"); @@ -409,7 +413,7 @@ int main(int argc, char **argv) tot = 0; parse_err = 0; - while (fgets(line, MAXLINE, stdin) != NULL) { + while ((line = fgets2(stdin)) != NULL) { linenum++; if (filter & FILT_ERRORS_ONLY) { @@ -429,7 +433,7 @@ int main(int argc, char **argv) if (test) { tot++; if (!(filter & FILT_COUNT_ONLY)) - printf("%s", line); + puts(line); } continue; } @@ -441,6 +445,7 @@ int main(int argc, char **argv) continue; } + tot++; val = convert_date(b); //printf("date=%s => %d\n", b, val); if (val < 0) { @@ -453,7 +458,7 @@ int main(int argc, char **argv) continue; } - if (filter & FILT_GRAPH_TIMERS) { + if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) { int f; b = field_start(line, TIME_FIELD + skip_fields); @@ -489,23 +494,47 @@ int main(int argc, char **argv) * that the total time is negative, hence the reason to reset * it. */ - if (err) { - if (array[4] < 0) - array[4] = -1; - t2 = insert_timer(&timers[0], &t, array[4]); // total time - t2->count++; - } else { - int v; - - t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req - t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn - t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp - v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time - if (v < 0 && !(filter & FILT_QUIET)) - fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", - line, array[0], array[1], array[2], array[3], array[4], v); - t2 = insert_timer(&timers[4], &t, v); t2->count++; + if (filter & FILT_GRAPH_TIMERS) { + if (err) { + if (array[4] < 0) + array[4] = -1; + t2 = insert_timer(&timers[0], &t, array[4]); // total time + t2->count++; + } else { + int v; + + t2 = insert_timer(&timers[1], &t, array[0]); t2->count++; // req + t2 = insert_timer(&timers[2], &t, array[2]); t2->count++; // conn + t2 = insert_timer(&timers[3], &t, array[3]); t2->count++; // resp + + v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time + if (v < 0 && !(filter & FILT_QUIET)) + fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", + line, array[0], array[1], array[2], array[3], array[4], v); + t2 = insert_timer(&timers[4], &t, v); t2->count++; + tot++; + } + } else { /* percentile */ + if (err) { + if (array[4] < 0) + array[4] = -1; + t2 = insert_value(&timers[0], &t, array[4]); // total time + t2->count++; + } else { + int v; + + t2 = insert_value(&timers[1], &t, array[0]); t2->count++; // req + t2 = insert_value(&timers[2], &t, array[2]); t2->count++; // conn + t2 = insert_value(&timers[3], &t, array[3]); t2->count++; // resp + + v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time + if (v < 0 && !(filter & FILT_QUIET)) + fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n", + line, array[0], array[1], array[2], array[3], array[4], v); + t2 = insert_value(&timers[4], &t, v); t2->count++; + tot++; + } } continue; } @@ -584,6 +613,47 @@ int main(int argc, char **argv) } } } + else if (filter & FILT_PERCENTILE) { + /* report timers by percentile : + * + * We don't count errs. + */ + struct eb32_node *n[5]; + unsigned long cum[5]; + double step; + + for (f = 1; f < 5; f++) { + n[f] = eb32_first(&timers[f]); + cum[f] = container_of(n[f], struct timer, node)->count; + } + + for (step = 1; step <= 1000;) { + unsigned int thres = tot * (step / 1000.0); + + printf("%3.1f %d ", step/10.0, thres); + for (f = 1; f < 5; f++) { + struct eb32_node *next; + while (cum[f] < thres) { + /* need to find other keys */ + next = eb32_next(n[f]); + if (!next) + break; + n[f] = next; + cum[f] += container_of(next, struct timer, node)->count; + } + + /* value still within $step % of total */ + printf("%d ", n[f]->key); + } + putchar('\n'); + if (step >= 100 && step < 900) + step += 50; // jump 5% by 5% between those steps. + else if (step >= 20 && step < 980) + step += 10; + else + step += 1; + } + } if (!(filter & FILT_QUIET)) fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",