From: Pádraig Brady
Date: Thu, 9 May 2024 13:03:38 +0000 (+0100)
Subject: wc: increase I/O size from 16 KiB to 256KiB
X-Git-Tag: v9.6~237
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccf47cad93bc0b85da0401b0a9d4b652e4c930e4;p=thirdparty%2Fcoreutils.git
wc: increase I/O size from 16 KiB to 256KiB
Similarly to commit v9.4-143-gfcfba90d0,
and enabled for AVX by commit v9.5-25-g0e4450103.
This was seen to improve AVX performance by about 10%
on an AMD 7800X3D (Ryzen 7 (2023)) CPU,
while having neutral AVX performance,
on an Intel i7-5600U (Broadwell-U (2015)) CPU.
With avx not enabled, this gives about a 3% performance boost,
on an Intel i7-5600U.
* src/wc.c: Use the centrally configured optimum buffer size.
* src/wc_avx2.c: Likewise.
* NEWS: Mention the change in performance.
---
diff --git a/NEWS b/NEWS
index 7e8ccb34f7..febb9ac684 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,10 @@ GNU coreutils NEWS -*- outline -*-
sort operates more efficiently when used on pseudo files with
an apparent size of 0, like those in /proc.
+ wc now reads a minimum of 256KiB at a time.
+ This was previously 16KiB and increasing to 256KiB was seen to increase
+ wc -l performance by about 10% when reading cached files on modern systems.
+
* Noteworthy changes in release 9.5 (2024-03-28) [stable]
diff --git a/src/wc.c b/src/wc.c
index d70ad39363..21ffa74d9f 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -34,6 +34,7 @@
#include
#include "system.h"
+#include "ioblksize.h"
#include "wc.h"
/* The official name of this program (e.g., no 'g' prefix). */
@@ -43,9 +44,6 @@
proper_name ("Paul Rubin"), \
proper_name ("David MacKenzie")
-/* Size of atomic reads. */
-#define BUFFER_SIZE (16 * 1024)
-
static bool wc_isprint[UCHAR_MAX + 1];
static bool wc_isspace[UCHAR_MAX + 1];
@@ -262,8 +260,8 @@ wc_lines (int fd)
while (true)
{
- char buf[BUFFER_SIZE + 1];
- ssize_t bytes_read = read (fd, buf, BUFFER_SIZE);
+ char buf[IO_BUFSIZE + 1];
+ ssize_t bytes_read = read (fd, buf, IO_BUFSIZE);
if (bytes_read <= 0)
return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };
@@ -304,7 +302,7 @@ static bool
wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
int err = 0;
- char buf[BUFFER_SIZE + 1];
+ char buf[IO_BUFSIZE + 1];
intmax_t lines, words, chars, bytes, linelength;
bool count_bytes, count_chars, count_complicated;
char const *file = file_x ? file_x : _("standard input");
@@ -331,7 +329,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
/* When counting only bytes, save some line- and word-counting
overhead. If FD is a 'regular' Unix file, using lseek is enough
- to get its 'size' in bytes. Otherwise, read blocks of BUFFER_SIZE
+ to get its 'size' in bytes. Otherwise, read blocks of IO_BUFSIZE
bytes at a time until EOF. Note that the 'size' (number of bytes)
that wc reports is smaller than stats.st_size when the file is not
positioned at its beginning. That's why the lseek calls below are
@@ -386,7 +384,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
{
fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
for (ssize_t bytes_read;
- (bytes_read = read (fd, buf, BUFFER_SIZE));
+ (bytes_read = read (fd, buf, IO_BUFSIZE));
bytes += bytes_read)
if (bytes_read < 0)
{
@@ -413,7 +411,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
idx_t prev = 0; /* Number of bytes carried over from previous round. */
for (ssize_t bytes_read;
- ((bytes_read = read (fd, buf + prev, BUFFER_SIZE - prev))
+ ((bytes_read = read (fd, buf + prev, IO_BUFSIZE - prev))
|| prev);
)
{
@@ -448,7 +446,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
if (scanbytes < n)
{
- if (n == (size_t) -2 && plim - p < BUFFER_SIZE
+ if (n == (size_t) -2 && plim - p < IO_BUFSIZE
&& bytes_read)
{
/* An incomplete character that is not ridiculously
@@ -553,7 +551,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
bool in_word = false;
intmax_t linepos = 0;
- for (ssize_t bytes_read; (bytes_read = read (fd, buf, BUFFER_SIZE)); )
+ for (ssize_t bytes_read; (bytes_read = read (fd, buf, IO_BUFSIZE)); )
{
if (bytes_read < 0)
{
diff --git a/src/wc_avx2.c b/src/wc_avx2.c
index 5ec7147596..c3f76a625a 100644
--- a/src/wc_avx2.c
+++ b/src/wc_avx2.c
@@ -17,13 +17,11 @@
#include
#include "wc.h"
-
#include "system.h"
+#include "ioblksize.h"
#include
-#define BUFSIZE 16384
-
/* Read FD and return a summary. */
extern struct wc_lines
wc_lines_avx2 (int fd)
@@ -35,7 +33,7 @@ wc_lines_avx2 (int fd)
while (true)
{
- __m256i avx_buf[BUFSIZE / sizeof (__m256i)];
+ __m256i avx_buf[IO_BUFSIZE / sizeof (__m256i)];
ssize_t bytes_read = read (fd, avx_buf, sizeof avx_buf);
if (bytes_read <= 0)
return (struct wc_lines) { bytes_read == 0 ? 0 : errno, lines, bytes };