]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
[CONTRIB] halog: faster fgets() and add support for percentile reporting
authorWilly Tarreau <w@1wt.eu>
Fri, 20 Feb 2009 10:02:32 +0000 (11:02 +0100)
committerWilly Tarreau <w@1wt.eu>
Sun, 8 Mar 2009 23:42:39 +0000 (00:42 +0100)
A new fgets implementation saves about 25-50% of the time spent parsing
the logs.

Percentile calculation has been added for timers using -pct.

contrib/halog/Makefile
contrib/halog/fgets2-64.c [new file with mode: 0644]
contrib/halog/fgets2.c [new file with mode: 0644]
contrib/halog/halog.c

index 911cae63559994138bfcf6efa4aa4443f53b8341..2215e03a9a2161c6d0ad0e17db35841ae2020128 100644 (file)
@@ -1,10 +1,12 @@
 INCLUDE  = -I../../include
 OPTIMIZE = -O3 -mtune=pentium-m
-CC       = gcc
 
-OBJS     = halog
+OBJS     = halog halog64
 
-halog: halog.c
+halog: halog.c fgets2.c
+       $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
+
+halog64: halog.c fgets2-64.c
        $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) ../../src/ebtree.c ../../src/eb32tree.c $^
 
 clean:
diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c
new file mode 100644 (file)
index 0000000..409d602
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+       if (!(x & 0xFF000000U) ||
+           !(x & 0xFF0000U) ||
+           !(x & 0xFF00U) ||
+           !(x & 0xFFU))
+               return 1;
+       return 0;
+}
+
+static inline unsigned int has_zero64(unsigned long long x)
+{
+       unsigned long long x2;
+
+       x2 = x & (x >> 8);
+       /* no need to split it further */
+       if ((x2 & 0x00FF) && (x2 & 0x00FF0000) && (x2 & 0x00FF00000000ULL) && (x2 & 0x00FF000000000000ULL))
+                       return 0; // approx 11/12 return here
+
+       if (!(x & 0xff00000000000000ULL) ||
+           !(x & 0xff000000000000ULL) ||
+           !(x & 0xff0000000000ULL) ||
+           !(x & 0xff00000000ULL) ||
+           !(x & 0xff000000UL) ||
+           !(x & 0xff0000UL) ||
+           !(x & 0xff00UL) ||
+           !(x & 0xffUL))
+               return 1; // approx 1/3 of the remaining return here
+
+       return 0;
+}
+
+#define FGETS2_BUFSIZE         (256*1024)
+const char *fgets2(FILE *stream)
+{
+       static char buffer[FGETS2_BUFSIZE + 5];
+       static char *end = buffer;
+       static char *line = buffer;
+
+       char *next;
+       int ret;
+
+       next = line;
+
+       while (1) {
+               /* this is a speed-up, we read 32 bits at once and check for an
+                * LF character there. We stop if found then continue one at a
+                * time.
+                */
+               while (next < end && (((unsigned long)next) & 7) && *next != '\n')
+                       next++;
+
+               /* now next is multiple of 4 or equal to end */
+               while (next <= (end-32)) {
+                       if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+                               break;
+                       next += 8;
+                       if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+                               break;
+                       next += 8;
+                       if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+                               break;
+                       next += 8;
+                       if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+                               break;
+                       next += 8;
+               }
+
+               /* we finish if needed. Note that next might be slightly higher
+                * than end here because we might have gone past it above.
+                */
+               while (next < end) {
+                       if (*next == '\n') {
+                               const char *start = line;
+
+                               *next = '\0';
+                               line = next + 1;
+                               return start;
+                       }
+                       next++;
+               }
+
+               /* we found an incomplete line. First, let's move the
+                * remaining part of the buffer to the beginning, then
+                * try to complete the buffer with a new read.
+                */
+               if (line > buffer) {
+                       if (end != line)
+                               memmove(buffer, line, end - line);
+                       end = buffer + (end - line);
+                       next = end;
+                       line = buffer;
+               } else {
+                       if (end == buffer + FGETS2_BUFSIZE)
+                               return NULL;
+               }
+
+               ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+               if (ret <= 0) {
+                       if (end == line)
+                               return NULL;
+
+                       *end = '\0';
+                       return line;
+               }
+
+               end += ret;
+               /* search for '\n' again */
+       }
+}
+
+#ifdef BENCHMARK
+int main() {
+       const char *p;
+       unsigned int lines = 0;
+
+       while ((p=fgets2(stdin)))
+               lines++;
+       printf("lines=%d\n", lines);
+       return 0;
+}
+#endif
diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
new file mode 100644 (file)
index 0000000..13918a7
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * fast fgets() replacement for log parsing
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This function manages its own buffer and returns a pointer to that buffer
+ * in order to avoid expensive memory copies. It also checks for line breaks
+ * 32 bits at a time. It could be improved a lot using mmap() but we would
+ * not be allowed to replace trailing \n with zeroes and we would be limited
+ * to small log files on 32-bit machines.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+// return 1 if the integer contains at least one zero byte
+static inline unsigned int has_zero(unsigned int x)
+{
+       if (!(x & 0xFF000000U) ||
+           !(x & 0xFF0000U) ||
+           !(x & 0xFF00U) ||
+           !(x & 0xFFU))
+               return 1;
+       return 0;
+}
+
+
+#define FGETS2_BUFSIZE         (256*1024)
+const char *fgets2(FILE *stream)
+{
+       static char buffer[FGETS2_BUFSIZE + 5];
+       static char *end = buffer;
+       static char *line = buffer;
+
+       char *next;
+       int ret;
+
+       next = line;
+
+       while (1) {
+               /* this is a speed-up, we read 32 bits at once and check for an
+                * LF character there. We stop if found then continue one at a
+                * time.
+                */
+               while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+                       next++;
+
+               /* now next is multiple of 4 or equal to end */
+               while (next <= (end-32)) {
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+                       if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
+                               break;
+                       next += 4;
+               }
+
+               /* we finish if needed. Note that next might be slightly higher
+                * than end here because we might have gone past it above.
+                */
+               while (next < end) {
+                       if (*next == '\n') {
+                               const char *start = line;
+
+                               *next = '\0';
+                               line = next + 1;
+                               return start;
+                       }
+                       next++;
+               }
+
+               /* we found an incomplete line. First, let's move the
+                * remaining part of the buffer to the beginning, then
+                * try to complete the buffer with a new read.
+                */
+               if (line > buffer) {
+                       if (end != line)
+                               memmove(buffer, line, end - line);
+                       end = buffer + (end - line);
+                       next = end;
+                       line = buffer;
+               } else {
+                       if (end == buffer + FGETS2_BUFSIZE)
+                               return NULL;
+               }
+
+               ret = read(fileno(stream), end, buffer + FGETS2_BUFSIZE - end);
+
+               if (ret <= 0) {
+                       if (end == line)
+                               return NULL;
+
+                       *end = '\0';
+                       return line;
+               }
+
+               end += ret;
+               /* search for '\n' again */
+       }
+}
+
+#ifdef BENCHMARK
+int main() {
+       const char *p;
+       unsigned int lines = 0;
+
+       while ((p=fgets2(stdin)))
+               lines++;
+       printf("lines=%d\n", lines);
+       return 0;
+}
+#endif
index 6bfc76afdd5cbcb5c0b019b738daf78c38f05311..32b4a7fb33225a64c265d57970678743c58e0378 100644 (file)
@@ -11,7 +11,7 @@
  */
 
 /*
- * gcc -O2 -o halog halog.c -Iinclude src/ebtree.c src/eb32tree.c
+ * gcc -O2 -o halog2 halog2.c -Iinclude src/ebtree.c src/eb32tree.c fgets2.c
  *
  * Usage:
  *    $0 [ min_delay [ min_count [ field_shift ]]] < haproxy.log
@@ -57,17 +57,19 @@ struct timer {
 #define FILT_ACC_DELAY         0x10
 #define FILT_ACC_COUNT         0x20
 #define FILT_GRAPH_TIMERS      0x40
+#define FILT_PERCENTILE                0x80
 
 unsigned int filter = 0;
 unsigned int filter_invert = 0;
-char line[MAXLINE];
+const char *line;
 
+const char *fgets2(FILE *stream);
 
 void die(const char *msg)
 {
        fprintf(stderr,
                "%s"
-               "Usage: halog [-c] [-v] [-gt] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
+               "Usage: halog [-c] [-v] [-gt] [-pct] [-s <skip>] [-e] [-ad <delay>] [-ac <count>] < file.log\n"
                "\n",
                msg ? msg : ""
                );
@@ -385,6 +387,8 @@ int main(int argc, char **argv)
                        filter_invert = !filter_invert;
                else if (strcmp(argv[0], "-gt") == 0)
                        filter |= FILT_GRAPH_TIMERS;
+               else if (strcmp(argv[0], "-pct") == 0)
+                       filter |= FILT_PERCENTILE;
                else if (strcmp(argv[0], "-o") == 0) {
                        if (output_file)
                                die("Fatal: output file name already specified.\n");
@@ -409,7 +413,7 @@ int main(int argc, char **argv)
        tot = 0;
        parse_err = 0;
 
-       while (fgets(line, MAXLINE, stdin) != NULL) {
+       while ((line = fgets2(stdin)) != NULL) {
                linenum++;
 
                if (filter & FILT_ERRORS_ONLY) {
@@ -429,7 +433,7 @@ int main(int argc, char **argv)
                        if (test) {
                                tot++;
                                if (!(filter & FILT_COUNT_ONLY))
-                                       printf("%s", line);
+                                       puts(line);
                        }
                        continue;
                }
@@ -441,6 +445,7 @@ int main(int argc, char **argv)
                                continue;
                        }
 
+                       tot++;
                        val = convert_date(b);
                        //printf("date=%s => %d\n", b, val);
                        if (val < 0) {
@@ -453,7 +458,7 @@ int main(int argc, char **argv)
                        continue;
                }
 
-               if (filter & FILT_GRAPH_TIMERS) {
+               if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE)) {
                        int f;
 
                        b = field_start(line, TIME_FIELD + skip_fields);
@@ -489,23 +494,47 @@ int main(int argc, char **argv)
                         * that the total time is negative, hence the reason to reset
                         * it.
                         */
-                       if (err) {
-                               if (array[4] < 0)
-                                       array[4] = -1;
-                               t2 = insert_timer(&timers[0], &t, array[4]);  // total time
-                               t2->count++;
-                       } else {
-                               int v;
-
-                               t2 = insert_timer(&timers[1], &t, array[0]); t2->count++;  // req
-                               t2 = insert_timer(&timers[2], &t, array[2]); t2->count++;  // conn
-                               t2 = insert_timer(&timers[3], &t, array[3]); t2->count++;  // resp
 
-                               v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
-                               if (v < 0 && !(filter & FILT_QUIET))
-                                       fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
-                                               line, array[0], array[1], array[2], array[3], array[4], v);
-                               t2 = insert_timer(&timers[4], &t, v); t2->count++;
+                       if (filter & FILT_GRAPH_TIMERS) {
+                               if (err) {
+                                       if (array[4] < 0)
+                                               array[4] = -1;
+                                       t2 = insert_timer(&timers[0], &t, array[4]);  // total time
+                                       t2->count++;
+                               } else {
+                                       int v;
+
+                                       t2 = insert_timer(&timers[1], &t, array[0]); t2->count++;  // req
+                                       t2 = insert_timer(&timers[2], &t, array[2]); t2->count++;  // conn
+                                       t2 = insert_timer(&timers[3], &t, array[3]); t2->count++;  // resp
+
+                                       v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+                                       if (v < 0 && !(filter & FILT_QUIET))
+                                               fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+                                                       line, array[0], array[1], array[2], array[3], array[4], v);
+                                       t2 = insert_timer(&timers[4], &t, v); t2->count++;
+                                       tot++;
+                               }
+                       } else { /* percentile */
+                               if (err) {
+                                       if (array[4] < 0)
+                                               array[4] = -1;
+                                       t2 = insert_value(&timers[0], &t, array[4]);  // total time
+                                       t2->count++;
+                               } else {
+                                       int v;
+
+                                       t2 = insert_value(&timers[1], &t, array[0]); t2->count++;  // req
+                                       t2 = insert_value(&timers[2], &t, array[2]); t2->count++;  // conn
+                                       t2 = insert_value(&timers[3], &t, array[3]); t2->count++;  // resp
+
+                                       v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
+                                       if (v < 0 && !(filter & FILT_QUIET))
+                                               fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
+                                                       line, array[0], array[1], array[2], array[3], array[4], v);
+                                       t2 = insert_value(&timers[4], &t, v); t2->count++;
+                                       tot++;
+                               }
                        }
                        continue;
                }
@@ -584,6 +613,47 @@ int main(int argc, char **argv)
                        }
                }
        }
+       else if (filter & FILT_PERCENTILE) {
+               /* report timers by percentile :
+                *    <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
+                * We don't count errs.
+                */
+               struct eb32_node *n[5];
+               unsigned long cum[5];
+               double step;
+
+               for (f = 1; f < 5; f++) {
+                       n[f] = eb32_first(&timers[f]);
+                       cum[f] = container_of(n[f], struct timer, node)->count;
+               }
+
+               for (step = 1; step <= 1000;) {
+                       unsigned int thres = tot * (step / 1000.0);
+
+                       printf("%3.1f %d ", step/10.0, thres);
+                       for (f = 1; f < 5; f++) {
+                               struct eb32_node *next;
+                               while (cum[f] < thres) {
+                                       /* need to find other keys */
+                                       next = eb32_next(n[f]);
+                                       if (!next)
+                                               break;
+                                       n[f] = next;
+                                       cum[f] += container_of(next, struct timer, node)->count;
+                               }
+
+                               /* value still within $step % of total */
+                               printf("%d ", n[f]->key);
+                       }
+                       putchar('\n');
+                       if (step >= 100 && step < 900)
+                               step += 50;  // jump 5% by 5% between those steps.
+                       else if (step >= 20 && step < 980)
+                               step += 10;
+                       else
+                               step += 1;
+               }
+       }
 
        if (!(filter & FILT_QUIET))
                fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",