From 47076e3c7c22fc7557f388ad3d47228b922da71e Mon Sep 17 00:00:00 2001
From: =?utf8?q?P=C3=A1draig=20Brady?=
Date: Tue, 20 Jul 2010 18:51:01 +0100
Subject: [PATCH] provide POSIX_FADV_SEQUENTIAL hint to appropriate utils
Following on from commit dae35bac, 01-03-2010,
"sort: inform the system about our input access pattern"
apply the same hint to all appropriate utils.
This currently gives around a 5% speedup for reading
large files from fast flash devices on GNU/Linux.
* src/base64.c: Call fadvise (..., FADVISE_SEQUENTIAL);
* src/cat.c: Likewise.
* src/cksum.c: Likewise.
* src/comm.c: Likewise.
* src/cut.c: Likewise.
* src/expand.c: Likewise.
* src/fmt.c: Likewise.
* src/fold.c: Likewise.
* src/join.c: Likewise.
* src/md5sum.c: Likewise.
* src/nl.c: Likewise.
* src/paste.c: Likewise.
* src/pr.c: Likewise.
* src/ptx.c: Likewise.
* src/shuf.c: Likewise.
* src/sum.c: Likewise.
* src/tee.c: Likewise.
* src/tr.c: Likewise.
* src/tsort.c: Likewise.
* src/unexpand.c: Likewise.
* src/uniq.c: Likewise.
* src/wc.c: Likewise, unless we don't actually read().
---
src/base64.c | 3 +++
src/cat.c | 3 +++
src/cksum.c | 3 +++
src/comm.c | 3 +++
src/cut.c | 3 +++
src/expand.c | 8 +++++---
src/fmt.c | 2 ++
src/fold.c | 3 +++
src/join.c | 4 ++++
src/md5sum.c | 3 +++
src/nl.c | 3 +++
src/paste.c | 3 +++
src/pr.c | 2 ++
src/ptx.c | 3 +++
src/shuf.c | 3 +++
src/sum.c | 3 +++
src/tee.c | 3 +++
src/tr.c | 3 +++
src/tsort.c | 3 +++
src/unexpand.c | 8 +++++---
src/uniq.c | 3 +++
src/wc.c | 6 ++++++
22 files changed, 72 insertions(+), 6 deletions(-)
diff --git a/src/base64.c b/src/base64.c
index fddb61c9dd..1a36c9183b 100644
--- a/src/base64.c
+++ b/src/base64.c
@@ -26,6 +26,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "xstrtol.h"
#include "quote.h"
#include "quotearg.h"
@@ -302,6 +303,8 @@ main (int argc, char **argv)
error (EXIT_FAILURE, errno, "%s", infile);
}
+ fadvise (input_fh, FADVISE_SEQUENTIAL);
+
if (decode)
do_decode (input_fh, stdout, ignore_garbage);
else
diff --git a/src/cat.c b/src/cat.c
index c4a2a9e3ca..47b5053525 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -34,6 +34,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "full-write.h"
#include "quote.h"
#include "safe-read.h"
@@ -700,6 +701,8 @@ main (int argc, char **argv)
}
insize = io_blksize (stat_buf);
+ fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
+
/* Compare the device and i-node numbers of this input file with
the corresponding values of the (output file associated with)
stdout, and skip this input file if they coincide. Input
diff --git a/src/cksum.c b/src/cksum.c
index d240eb3862..282c777aa6 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -43,6 +43,7 @@
#include
#include
#include "system.h"
+#include "fadvise.h"
#include "xfreopen.h"
#ifdef CRCTAB
@@ -205,6 +206,8 @@ cksum (const char *file, bool print_name)
}
}
+ fadvise (fp, FADVISE_SEQUENTIAL);
+
while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0)
{
unsigned char *cp = buf;
diff --git a/src/comm.c b/src/comm.c
index ff42802cb7..06b80b0713 100644
--- a/src/comm.c
+++ b/src/comm.c
@@ -24,6 +24,7 @@
#include "system.h"
#include "linebuffer.h"
#include "error.h"
+#include "fadvise.h"
#include "hard-locale.h"
#include "quote.h"
#include "stdio--.h"
@@ -273,6 +274,8 @@ compare_files (char **infiles)
if (!streams[i])
error (EXIT_FAILURE, errno, "%s", infiles[i]);
+ fadvise (streams[i], FADVISE_SEQUENTIAL);
+
thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]);
if (ferror (streams[i]))
error (EXIT_FAILURE, errno, "%s", infiles[i]);
diff --git a/src/cut.c b/src/cut.c
index 5fcf0743a7..58776d991f 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -31,6 +31,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "getndelim2.h"
#include "hash.h"
#include "quote.h"
@@ -733,6 +734,8 @@ cut_file (char const *file)
}
}
+ fadvise (stream, FADVISE_SEQUENTIAL);
+
cut_stream (stream);
if (ferror (stream))
diff --git a/src/expand.c b/src/expand.c
index be50063b6e..249255d15b 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -40,6 +40,7 @@
#include
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
@@ -243,13 +244,14 @@ next_file (FILE *fp)
if (STREQ (file, "-"))
{
have_read_stdin = true;
- prev_file = file;
- return stdin;
+ fp = stdin;
}
- fp = fopen (file, "r");
+ else
+ fp = fopen (file, "r");
if (fp)
{
prev_file = file;
+ fadvise (fp, FADVISE_SEQUENTIAL);
return fp;
}
error (0, errno, "%s", file);
diff --git a/src/fmt.c b/src/fmt.c
index 1a268eeb43..8a5d8bd8c6 100644
--- a/src/fmt.c
+++ b/src/fmt.c
@@ -27,6 +27,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "xstrtol.h"
@@ -463,6 +464,7 @@ set_prefix (char *p)
static void
fmt (FILE *f)
{
+ fadvise (f, FADVISE_SEQUENTIAL);
tabs = false;
other_indent = 0;
next_char = get_prefix (f);
diff --git a/src/fold.c b/src/fold.c
index 9364a03f94..d5858568ca 100644
--- a/src/fold.c
+++ b/src/fold.c
@@ -24,6 +24,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "xstrtol.h"
@@ -142,6 +143,8 @@ fold_file (char const *filename, size_t width)
return false;
}
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
while ((c = getc (istream)) != EOF)
{
if (offset_out + 1 >= allocated_out)
diff --git a/src/join.c b/src/join.c
index c977116ab8..fa18c9d065 100644
--- a/src/join.c
+++ b/src/join.c
@@ -24,6 +24,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "hard-locale.h"
#include "linebuffer.h"
#include "memcasecmp.h"
@@ -617,6 +618,9 @@ join (FILE *fp1, FILE *fp2)
int diff;
bool eof1, eof2;
+ fadvise (fp1, FADVISE_SEQUENTIAL);
+ fadvise (fp2, FADVISE_SEQUENTIAL);
+
/* Read the first line of each file. */
initseq (&seq1);
getseq (fp1, &seq1, 1);
diff --git a/src/md5sum.c b/src/md5sum.c
index cbf198645a..10d4fa26c7 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -36,6 +36,7 @@
# include "sha512.h"
#endif
#include "error.h"
+#include "fadvise.h"
#include "stdio--.h"
#include "xfreopen.h"
@@ -406,6 +407,8 @@ digest_file (const char *filename, int *binary, unsigned char *bin_result)
}
}
+ fadvise (fp, FADVISE_SEQUENTIAL);
+
err = DIGEST_STREAM (fp, bin_result);
if (err)
{
diff --git a/src/nl.c b/src/nl.c
index a9446f4445..634a975c63 100644
--- a/src/nl.c
+++ b/src/nl.c
@@ -28,6 +28,7 @@
#include
#include "error.h"
+#include "fadvise.h"
#include "linebuffer.h"
#include "quote.h"
#include "xstrtol.h"
@@ -439,6 +440,8 @@ nl_file (char const *file)
}
}
+ fadvise (stream, FADVISE_SEQUENTIAL);
+
process_file (stream);
if (ferror (stream))
diff --git a/src/paste.c b/src/paste.c
index 36e1cfbc0d..dbbf52df5d 100644
--- a/src/paste.c
+++ b/src/paste.c
@@ -42,6 +42,7 @@
#include
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quotearg.h"
/* The official name of this program (e.g., no `g' prefix). */
@@ -211,6 +212,7 @@ paste_parallel (size_t nfiles, char **fnamptr)
error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]);
else if (fileno (fileptr[files_open]) == STDIN_FILENO)
opened_stdin = true;
+ fadvise (fileptr[files_open], FADVISE_SEQUENTIAL);
}
}
@@ -367,6 +369,7 @@ paste_serial (size_t nfiles, char **fnamptr)
ok = false;
continue;
}
+ fadvise (fileptr, FADVISE_SEQUENTIAL);
}
delimptr = delims; /* Set up for delimiter string. */
diff --git a/src/pr.c b/src/pr.c
index f942151a78..9c2d10c10a 100644
--- a/src/pr.c
+++ b/src/pr.c
@@ -314,6 +314,7 @@
#include
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "hard-locale.h"
#include "mbswidth.h"
#include "quote.h"
@@ -1507,6 +1508,7 @@ open_file (char *name, COLUMN *p)
error (0, errno, "%s", name);
return false;
}
+ fadvise (p->fp, FADVISE_SEQUENTIAL);
p->status = OPEN;
p->full_page_printed = false;
++total_files;
diff --git a/src/ptx.c b/src/ptx.c
index c6efb04c0f..ba5aec5192 100644
--- a/src/ptx.c
+++ b/src/ptx.c
@@ -25,6 +25,7 @@
#include "argmatch.h"
#include "diacrit.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "quotearg.h"
#include "regex.h"
@@ -538,6 +539,8 @@ swallow_file_in_memory (const char *file_name, BLOCK *block)
{
size_t in_memory_size;
+ fdadvise (file_handle, 0, 0, FADVISE_SEQUENTIAL);
+
block->start = xmalloc ((size_t) stat_block.st_size);
if ((in_memory_size = read (file_handle,
diff --git a/src/shuf.c b/src/shuf.c
index 67b19af15a..e8ae645d25 100644
--- a/src/shuf.c
+++ b/src/shuf.c
@@ -23,6 +23,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "getopt.h"
#include "quote.h"
#include "quotearg.h"
@@ -378,6 +379,8 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
n_lines = read_input (stdin, eolbyte, &input_lines);
line = input_lines;
}
diff --git a/src/sum.c b/src/sum.c
index d0d160f7b9..4b98e59358 100644
--- a/src/sum.c
+++ b/src/sum.c
@@ -26,6 +26,7 @@
#include
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "human.h"
#include "safe-read.h"
#include "xfreopen.h"
@@ -110,6 +111,8 @@ bsd_sum_file (const char *file, int print_name)
}
}
+ fadvise (fp, FADVISE_SEQUENTIAL);
+
while ((ch = getc (fp)) != EOF)
{
total_bytes++;
diff --git a/src/tee.c b/src/tee.c
index 39d989274d..a5b0369fd2 100644
--- a/src/tee.c
+++ b/src/tee.c
@@ -23,6 +23,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "stdio--.h"
#include "xfreopen.h"
@@ -157,6 +158,8 @@ tee_files (int nfiles, const char **files)
if (O_BINARY && ! isatty (STDOUT_FILENO))
xfreopen (NULL, "wb", stdout);
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
/* In the array of NFILES + 1 descriptors, make
the first one correspond to standard output. */
descriptors[0] = stdout;
diff --git a/src/tr.c b/src/tr.c
index 3722b4d84b..a5b68106be 100644
--- a/src/tr.c
+++ b/src/tr.c
@@ -25,6 +25,7 @@
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "safe-read.h"
#include "xfreopen.h"
@@ -1754,6 +1755,8 @@ main (int argc, char **argv)
if (O_BINARY && ! isatty (STDOUT_FILENO))
xfreopen (NULL, "wb", stdout);
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
if (squeeze_repeats && non_option_args == 1)
{
set_initialize (s1, complement, in_squeeze_set);
diff --git a/src/tsort.c b/src/tsort.c
index b5c187b30b..4f51f3074b 100644
--- a/src/tsort.c
+++ b/src/tsort.c
@@ -29,6 +29,7 @@
#include "system.h"
#include "long-options.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "readtokens.h"
#include "stdio--.h"
@@ -444,6 +445,8 @@ tsort (const char *file)
if (!is_stdin && ! freopen (file, "r", stdin))
error (EXIT_FAILURE, errno, "%s", file);
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
init_tokenbuffer (&tokenbuffer);
while (1)
diff --git a/src/unexpand.c b/src/unexpand.c
index e9e5c6acfb..14b8df08a4 100644
--- a/src/unexpand.c
+++ b/src/unexpand.c
@@ -41,6 +41,7 @@
#include
#include "system.h"
#include "error.h"
+#include "fadvise.h"
#include "quote.h"
#include "xstrndup.h"
@@ -262,13 +263,14 @@ next_file (FILE *fp)
if (STREQ (file, "-"))
{
have_read_stdin = true;
- prev_file = file;
- return stdin;
+ fp = stdin;
}
- fp = fopen (file, "r");
+ else
+ fp = fopen (file, "r");
if (fp)
{
prev_file = file;
+ fadvise (fp, FADVISE_SEQUENTIAL);
return fp;
}
error (0, errno, "%s", file);
diff --git a/src/uniq.c b/src/uniq.c
index df59b12346..86ca8c9579 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -25,6 +25,7 @@
#include "argmatch.h"
#include "linebuffer.h"
#include "error.h"
+#include "fadvise.h"
#include "hard-locale.h"
#include "posixver.h"
#include "quote.h"
@@ -286,6 +287,8 @@ check_file (const char *infile, const char *outfile, char delimiter)
if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout)))
error (EXIT_FAILURE, errno, "%s", outfile);
+ fadvise (stdin, FADVISE_SEQUENTIAL);
+
thisline = &lb1;
prevline = &lb2;
diff --git a/src/wc.c b/src/wc.c
index 6df7feddfc..a1922baf9f 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -29,6 +29,7 @@
#include "system.h"
#include "argv-iter.h"
#include "error.h"
+#include "fadvise.h"
#include "mbchar.h"
#include "physmem.h"
#include "quote.h"
@@ -211,6 +212,10 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
}
count_complicated = print_words || print_linelength;
+ /* Advise the kernel of our access pattern only if we will read(). */
+ if (!count_bytes || count_chars || print_lines || count_complicated)
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+
/* When counting only bytes, save some line- and word-counting
overhead. If FD is a `regular' Unix file, using lseek is enough
to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE
@@ -238,6 +243,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
}
else
{
+ fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
if (bytes_read == SAFE_READ_ERROR)
--
2.47.2