From: Willy Tarreau Date: Tue, 3 Jan 2012 08:23:03 +0000 (+0100) Subject: OPTIM: halog: keep a fast path for the lines-count only X-Git-Tag: v1.5-dev8~48 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e1a908c369ab988448c6672066ad1d09b6919d02;p=thirdparty%2Fhaproxy.git OPTIM: halog: keep a fast path for the lines-count only Using "halog -c" is still something quite common to perform on logs, but unfortunately since the recent added controls, it was sensibly slowed down due to the parsing of the accept date field. Now we use a specific loop for the case where nothing is needed from the input, and this sped up the line counting by 2.5x. A 2.4 GHz Xeon now counts lines at a rate of 2 GB of logs per second. --- diff --git a/contrib/halog/halog.c b/contrib/halog/halog.c index df224b1b24..40852aec0c 100644 --- a/contrib/halog/halog.c +++ b/contrib/halog/halog.c @@ -631,6 +631,16 @@ int main(int argc, char **argv) else if (filter & FILT_COUNT_ONLY) line_filter = NULL; + if (!line_filter && + !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME))) { + /* read the whole file at once first */ + if (!filter_invert) + while (fgets2(stdin) != NULL) + lines_out++; + + goto skip_filters; + } + while ((line = fgets2(stdin)) != NULL) { linenum++; time_field = NULL; accept_field = NULL; @@ -788,7 +798,7 @@ int main(int argc, char **argv) lines_out++; /* we're just counting lines */ } - + skip_filters: /***************************************************** * Here we've finished reading all input. Depending on the * filters, we may still have some analysis to run on the