From: Pádraig Brady Date: Thu, 9 May 2024 13:03:38 +0000 (+0100) Subject: wc: increase I/O size from 16 KiB to 256KiB X-Git-Tag: v9.6~237 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccf47cad93bc0b85da0401b0a9d4b652e4c930e4;p=thirdparty%2Fcoreutils.git wc: increase I/O size from 16 KiB to 256KiB Similarly to commit v9.4-143-gfcfba90d0, and enabled for AVX by commit v9.5-25-g0e4450103. This was seen to improve AVX performance by about 10% on an AMD 7800X3D (Ryzen 7 (2023)) CPU, while having neutral AVX performance, on an Intel i7-5600U (Broadwell-U (2015)) CPU. With avx not enabled, this gives about a 3% performance boost, on an Intel i7-5600U. * src/wc.c: Use the centrally configured optimum buffer size. * src/wc_avx2.c: Likewise. * NEWS: Mention the change in performance. --- diff --git a/NEWS b/NEWS index 7e8ccb34f7..febb9ac684 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,10 @@ GNU coreutils NEWS -*- outline -*- sort operates more efficiently when used on pseudo files with an apparent size of 0, like those in /proc. + wc now reads a minimum of 256KiB at a time. + This was previously 16KiB and increasing to 256KiB was seen to increase + wc -l performance by about 10% when reading cached files on modern systems. + * Noteworthy changes in release 9.5 (2024-03-28) [stable] diff --git a/src/wc.c b/src/wc.c index d70ad39363..21ffa74d9f 100644 --- a/src/wc.c +++ b/src/wc.c @@ -34,6 +34,7 @@ #include #include "system.h" +#include "ioblksize.h" #include "wc.h" /* The official name of this program (e.g., no 'g' prefix). */ @@ -43,9 +44,6 @@ proper_name ("Paul Rubin"), \ proper_name ("David MacKenzie") -/* Size of atomic reads. */ -#define BUFFER_SIZE (16 * 1024) - static bool wc_isprint[UCHAR_MAX + 1]; static bool wc_isspace[UCHAR_MAX + 1]; @@ -262,8 +260,8 @@ wc_lines (int fd) while (true) { - char buf[BUFFER_SIZE + 1]; - ssize_t bytes_read = read (fd, buf, BUFFER_SIZE); + char buf[IO_BUFSIZE + 1]; + ssize_t bytes_read = read (fd, buf, IO_BUFSIZE); if (bytes_read <= 0) return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes }; @@ -304,7 +302,7 @@ static bool wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { int err = 0; - char buf[BUFFER_SIZE + 1]; + char buf[IO_BUFSIZE + 1]; intmax_t lines, words, chars, bytes, linelength; bool count_bytes, count_chars, count_complicated; char const *file = file_x ? file_x : _("standard input"); @@ -331,7 +329,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) /* When counting only bytes, save some line- and word-counting overhead. If FD is a 'regular' Unix file, using lseek is enough - to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE + to get its 'size' in bytes. Otherwise, read blocks of IO_BUFSIZE bytes at a time until EOF. Note that the 'size' (number of bytes) that wc reports is smaller than stats.st_size when the file is not positioned at its beginning. That's why the lseek calls below are @@ -386,7 +384,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) { fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); for (ssize_t bytes_read; - (bytes_read = read (fd, buf, BUFFER_SIZE)); + (bytes_read = read (fd, buf, IO_BUFSIZE)); bytes += bytes_read) if (bytes_read < 0) { @@ -413,7 +411,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) idx_t prev = 0; /* Number of bytes carried over from previous round. */ for (ssize_t bytes_read; - ((bytes_read = read (fd, buf + prev, BUFFER_SIZE - prev)) + ((bytes_read = read (fd, buf + prev, IO_BUFSIZE - prev)) || prev); ) { @@ -448,7 +446,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) if (scanbytes < n) { - if (n == (size_t) -2 && plim - p < BUFFER_SIZE + if (n == (size_t) -2 && plim - p < IO_BUFSIZE && bytes_read) { /* An incomplete character that is not ridiculously @@ -553,7 +551,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) bool in_word = false; intmax_t linepos = 0; - for (ssize_t bytes_read; (bytes_read = read (fd, buf, BUFFER_SIZE)); ) + for (ssize_t bytes_read; (bytes_read = read (fd, buf, IO_BUFSIZE)); ) { if (bytes_read < 0) { diff --git a/src/wc_avx2.c b/src/wc_avx2.c index 5ec7147596..c3f76a625a 100644 --- a/src/wc_avx2.c +++ b/src/wc_avx2.c @@ -17,13 +17,11 @@ #include #include "wc.h" - #include "system.h" +#include "ioblksize.h" #include -#define BUFSIZE 16384 - /* Read FD and return a summary. */ extern struct wc_lines wc_lines_avx2 (int fd) @@ -35,7 +33,7 @@ wc_lines_avx2 (int fd) while (true) { - __m256i avx_buf[BUFSIZE / sizeof (__m256i)]; + __m256i avx_buf[IO_BUFSIZE / sizeof (__m256i)]; ssize_t bytes_read = read (fd, avx_buf, sizeof avx_buf); if (bytes_read <= 0) return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };