From: Paul Eggert Date: Mon, 9 Aug 2010 06:14:38 +0000 (-0700) Subject: sort: speed up -R with long lines in hard locales X-Git-Tag: v8.6~75 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad31a59a370e29f38864a747045da10c82d6c912;p=thirdparty%2Fcoreutils.git sort: speed up -R with long lines in hard locales * src/sort.c (compare_random): Guess that the output will be 3X the input. This avoids the overhead of calling strxfrm twice on typical implementations. Suggested by Bruno Haible. --- diff --git a/src/sort.c b/src/sort.c index dcfd24f335..148ed3ee7a 100644 --- a/src/sort.c +++ b/src/sort.c @@ -2024,6 +2024,7 @@ compare_random (char *restrict texta, size_t lena, char stackbuf[4000]; char *buf = stackbuf; size_t bufsize = sizeof stackbuf; + void *allocated = NULL; uint32_t dig[2][MD5_DIGEST_SIZE / sizeof (uint32_t)]; struct md5_ctx s[2]; s[0] = s[1] = random_md5_state; @@ -2047,6 +2048,16 @@ compare_random (char *restrict texta, size_t lena, /* Store the transformed data into a big-enough buffer. */ + /* A 3X size guess avoids the overhead of calling strxfrm + twice on typical implementations. Don't worry about + size_t overflow, as the guess need not be correct. */ + size_t guess_bufsize = 3 * (lena + lenb) + 2; + if (bufsize < guess_bufsize) + { + bufsize = MAX (guess_bufsize, bufsize * 3 / 2); + buf = allocated = xrealloc (allocated, bufsize); + } + size_t sizea = (texta < lima ? xstrxfrm (buf, texta, bufsize) + 1 : 0); bool a_fits = sizea <= bufsize; @@ -2062,9 +2073,7 @@ compare_random (char *restrict texta, size_t lena, bufsize = sizea + sizeb; if (bufsize < SIZE_MAX / 3) bufsize = bufsize * 3 / 2; - buf = (buf == stackbuf - ? xmalloc (bufsize) - : xrealloc (buf, bufsize)); + buf = allocated = xrealloc (allocated, bufsize); if (texta < lima) strxfrm (buf, texta, sizea); if (textb < limb) @@ -2119,8 +2128,7 @@ compare_random (char *restrict texta, size_t lena, diff = xfrm_diff; } - if (buf != stackbuf) - free (buf); + free (allocated); return diff; }