From: Willy Tarreau Date: Wed, 5 May 2010 10:22:08 +0000 (+0200) Subject: [OPTIM] halog: speed up fgets2-64 by about 10% X-Git-Tag: v1.4.5~12 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d2c142c7ee7829e724e611883da387bef3735bb8;p=thirdparty%2Fhaproxy.git [OPTIM] halog: speed up fgets2-64 by about 10% This version uses more 64-bit lookups and two 32-bit lookups to converge faster. This saves about 10% performance. --- diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c index 9209c087cd..236b970dd2 100644 --- a/contrib/halog/fgets2-64.c +++ b/contrib/halog/fgets2-64.c @@ -62,7 +62,7 @@ static inline unsigned int has_zero64(unsigned long long x) #define FGETS2_BUFSIZE (256*1024) const char *fgets2(FILE *stream) { - static char buffer[FGETS2_BUFSIZE + 5]; + static char buffer[FGETS2_BUFSIZE + 9]; // +9 to have zeroes past the end static char *end = buffer; static char *line = buffer; @@ -72,15 +72,35 @@ const char *fgets2(FILE *stream) next = line; while (1) { - /* this is a speed-up, we read 32 bits at once and check for an + /* this is a speed-up, we read 64 bits at once and check for an * LF character there. We stop if found then continue one at a * time. */ - while (next < end && (((unsigned long)next) & 7) && *next != '\n') - next++; - /* now next is multiple of 4 or equal to end */ - while (next <= (end-32)) { + if (next <= (end-12)) { + /* max 3 bytes tested here */ + while ((((unsigned long)next) & 3) && *next != '\n') + next++; + + /* maybe we have can skip 4 more bytes */ + if ((((unsigned long)next) & 4) && !has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + } + + /* now next is multiple of 8 or equal to end */ + while (next <= (end-68)) { + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; + if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) + break; + next += 8; if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL)) break; next += 8; @@ -95,6 +115,10 @@ const char *fgets2(FILE *stream) next += 8; } + /* maybe we can skip 4 more bytes */ + if (!has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU)) + next += 4; + /* we finish if needed. Note that next might be slightly higher * than end here because we might have gone past it above. */