From 47076e3c7c22fc7557f388ad3d47228b922da71e Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?= Date: Tue, 20 Jul 2010 18:51:01 +0100 Subject: [PATCH] provide POSIX_FADV_SEQUENTIAL hint to appropriate utils Following on from commit dae35bac, 01-03-2010, "sort: inform the system about our input access pattern" apply the same hint to all appropriate utils. This currently gives around a 5% speedup for reading large files from fast flash devices on GNU/Linux. * src/base64.c: Call fadvise (..., FADVISE_SEQUENTIAL); * src/cat.c: Likewise. * src/cksum.c: Likewise. * src/comm.c: Likewise. * src/cut.c: Likewise. * src/expand.c: Likewise. * src/fmt.c: Likewise. * src/fold.c: Likewise. * src/join.c: Likewise. * src/md5sum.c: Likewise. * src/nl.c: Likewise. * src/paste.c: Likewise. * src/pr.c: Likewise. * src/ptx.c: Likewise. * src/shuf.c: Likewise. * src/sum.c: Likewise. * src/tee.c: Likewise. * src/tr.c: Likewise. * src/tsort.c: Likewise. * src/unexpand.c: Likewise. * src/uniq.c: Likewise. * src/wc.c: Likewise, unless we don't actually read(). --- src/base64.c | 3 +++ src/cat.c | 3 +++ src/cksum.c | 3 +++ src/comm.c | 3 +++ src/cut.c | 3 +++ src/expand.c | 8 +++++--- src/fmt.c | 2 ++ src/fold.c | 3 +++ src/join.c | 4 ++++ src/md5sum.c | 3 +++ src/nl.c | 3 +++ src/paste.c | 3 +++ src/pr.c | 2 ++ src/ptx.c | 3 +++ src/shuf.c | 3 +++ src/sum.c | 3 +++ src/tee.c | 3 +++ src/tr.c | 3 +++ src/tsort.c | 3 +++ src/unexpand.c | 8 +++++--- src/uniq.c | 3 +++ src/wc.c | 6 ++++++ 22 files changed, 72 insertions(+), 6 deletions(-) diff --git a/src/base64.c b/src/base64.c index fddb61c9dd..1a36c9183b 100644 --- a/src/base64.c +++ b/src/base64.c @@ -26,6 +26,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "xstrtol.h" #include "quote.h" #include "quotearg.h" @@ -302,6 +303,8 @@ main (int argc, char **argv) error (EXIT_FAILURE, errno, "%s", infile); } + fadvise (input_fh, FADVISE_SEQUENTIAL); + if (decode) do_decode (input_fh, stdout, ignore_garbage); else diff --git a/src/cat.c b/src/cat.c index c4a2a9e3ca..47b5053525 100644 --- a/src/cat.c +++ b/src/cat.c @@ -34,6 +34,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "full-write.h" #include "quote.h" #include "safe-read.h" @@ -700,6 +701,8 @@ main (int argc, char **argv) } insize = io_blksize (stat_buf); + fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL); + /* Compare the device and i-node numbers of this input file with the corresponding values of the (output file associated with) stdout, and skip this input file if they coincide. Input diff --git a/src/cksum.c b/src/cksum.c index d240eb3862..282c777aa6 100644 --- a/src/cksum.c +++ b/src/cksum.c @@ -43,6 +43,7 @@ #include #include #include "system.h" +#include "fadvise.h" #include "xfreopen.h" #ifdef CRCTAB @@ -205,6 +206,8 @@ cksum (const char *file, bool print_name) } } + fadvise (fp, FADVISE_SEQUENTIAL); + while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0) { unsigned char *cp = buf; diff --git a/src/comm.c b/src/comm.c index ff42802cb7..06b80b0713 100644 --- a/src/comm.c +++ b/src/comm.c @@ -24,6 +24,7 @@ #include "system.h" #include "linebuffer.h" #include "error.h" +#include "fadvise.h" #include "hard-locale.h" #include "quote.h" #include "stdio--.h" @@ -273,6 +274,8 @@ compare_files (char **infiles) if (!streams[i]) error (EXIT_FAILURE, errno, "%s", infiles[i]); + fadvise (streams[i], FADVISE_SEQUENTIAL); + thisline[i] = readlinebuffer (all_line[i][alt[i][0]], streams[i]); if (ferror (streams[i])) error (EXIT_FAILURE, errno, "%s", infiles[i]); diff --git a/src/cut.c b/src/cut.c index 5fcf0743a7..58776d991f 100644 --- a/src/cut.c +++ b/src/cut.c @@ -31,6 +31,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "getndelim2.h" #include "hash.h" #include "quote.h" @@ -733,6 +734,8 @@ cut_file (char const *file) } } + fadvise (stream, FADVISE_SEQUENTIAL); + cut_stream (stream); if (ferror (stream)) diff --git a/src/expand.c b/src/expand.c index be50063b6e..249255d15b 100644 --- a/src/expand.c +++ b/src/expand.c @@ -40,6 +40,7 @@ #include #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "xstrndup.h" @@ -243,13 +244,14 @@ next_file (FILE *fp) if (STREQ (file, "-")) { have_read_stdin = true; - prev_file = file; - return stdin; + fp = stdin; } - fp = fopen (file, "r"); + else + fp = fopen (file, "r"); if (fp) { prev_file = file; + fadvise (fp, FADVISE_SEQUENTIAL); return fp; } error (0, errno, "%s", file); diff --git a/src/fmt.c b/src/fmt.c index 1a268eeb43..8a5d8bd8c6 100644 --- a/src/fmt.c +++ b/src/fmt.c @@ -27,6 +27,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "xstrtol.h" @@ -463,6 +464,7 @@ set_prefix (char *p) static void fmt (FILE *f) { + fadvise (f, FADVISE_SEQUENTIAL); tabs = false; other_indent = 0; next_char = get_prefix (f); diff --git a/src/fold.c b/src/fold.c index 9364a03f94..d5858568ca 100644 --- a/src/fold.c +++ b/src/fold.c @@ -24,6 +24,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "xstrtol.h" @@ -142,6 +143,8 @@ fold_file (char const *filename, size_t width) return false; } + fadvise (stdin, FADVISE_SEQUENTIAL); + while ((c = getc (istream)) != EOF) { if (offset_out + 1 >= allocated_out) diff --git a/src/join.c b/src/join.c index c977116ab8..fa18c9d065 100644 --- a/src/join.c +++ b/src/join.c @@ -24,6 +24,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "hard-locale.h" #include "linebuffer.h" #include "memcasecmp.h" @@ -617,6 +618,9 @@ join (FILE *fp1, FILE *fp2) int diff; bool eof1, eof2; + fadvise (fp1, FADVISE_SEQUENTIAL); + fadvise (fp2, FADVISE_SEQUENTIAL); + /* Read the first line of each file. */ initseq (&seq1); getseq (fp1, &seq1, 1); diff --git a/src/md5sum.c b/src/md5sum.c index cbf198645a..10d4fa26c7 100644 --- a/src/md5sum.c +++ b/src/md5sum.c @@ -36,6 +36,7 @@ # include "sha512.h" #endif #include "error.h" +#include "fadvise.h" #include "stdio--.h" #include "xfreopen.h" @@ -406,6 +407,8 @@ digest_file (const char *filename, int *binary, unsigned char *bin_result) } } + fadvise (fp, FADVISE_SEQUENTIAL); + err = DIGEST_STREAM (fp, bin_result); if (err) { diff --git a/src/nl.c b/src/nl.c index a9446f4445..634a975c63 100644 --- a/src/nl.c +++ b/src/nl.c @@ -28,6 +28,7 @@ #include #include "error.h" +#include "fadvise.h" #include "linebuffer.h" #include "quote.h" #include "xstrtol.h" @@ -439,6 +440,8 @@ nl_file (char const *file) } } + fadvise (stream, FADVISE_SEQUENTIAL); + process_file (stream); if (ferror (stream)) diff --git a/src/paste.c b/src/paste.c index 36e1cfbc0d..dbbf52df5d 100644 --- a/src/paste.c +++ b/src/paste.c @@ -42,6 +42,7 @@ #include #include "system.h" #include "error.h" +#include "fadvise.h" #include "quotearg.h" /* The official name of this program (e.g., no `g' prefix). */ @@ -211,6 +212,7 @@ paste_parallel (size_t nfiles, char **fnamptr) error (EXIT_FAILURE, errno, "%s", fnamptr[files_open]); else if (fileno (fileptr[files_open]) == STDIN_FILENO) opened_stdin = true; + fadvise (fileptr[files_open], FADVISE_SEQUENTIAL); } } @@ -367,6 +369,7 @@ paste_serial (size_t nfiles, char **fnamptr) ok = false; continue; } + fadvise (fileptr, FADVISE_SEQUENTIAL); } delimptr = delims; /* Set up for delimiter string. */ diff --git a/src/pr.c b/src/pr.c index f942151a78..9c2d10c10a 100644 --- a/src/pr.c +++ b/src/pr.c @@ -314,6 +314,7 @@ #include #include "system.h" #include "error.h" +#include "fadvise.h" #include "hard-locale.h" #include "mbswidth.h" #include "quote.h" @@ -1507,6 +1508,7 @@ open_file (char *name, COLUMN *p) error (0, errno, "%s", name); return false; } + fadvise (p->fp, FADVISE_SEQUENTIAL); p->status = OPEN; p->full_page_printed = false; ++total_files; diff --git a/src/ptx.c b/src/ptx.c index c6efb04c0f..ba5aec5192 100644 --- a/src/ptx.c +++ b/src/ptx.c @@ -25,6 +25,7 @@ #include "argmatch.h" #include "diacrit.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "quotearg.h" #include "regex.h" @@ -538,6 +539,8 @@ swallow_file_in_memory (const char *file_name, BLOCK *block) { size_t in_memory_size; + fdadvise (file_handle, 0, 0, FADVISE_SEQUENTIAL); + block->start = xmalloc ((size_t) stat_block.st_size); if ((in_memory_size = read (file_handle, diff --git a/src/shuf.c b/src/shuf.c index 67b19af15a..e8ae645d25 100644 --- a/src/shuf.c +++ b/src/shuf.c @@ -23,6 +23,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "getopt.h" #include "quote.h" #include "quotearg.h" @@ -378,6 +379,8 @@ main (int argc, char **argv) usage (EXIT_FAILURE); } + fadvise (stdin, FADVISE_SEQUENTIAL); + n_lines = read_input (stdin, eolbyte, &input_lines); line = input_lines; } diff --git a/src/sum.c b/src/sum.c index d0d160f7b9..4b98e59358 100644 --- a/src/sum.c +++ b/src/sum.c @@ -26,6 +26,7 @@ #include #include "system.h" #include "error.h" +#include "fadvise.h" #include "human.h" #include "safe-read.h" #include "xfreopen.h" @@ -110,6 +111,8 @@ bsd_sum_file (const char *file, int print_name) } } + fadvise (fp, FADVISE_SEQUENTIAL); + while ((ch = getc (fp)) != EOF) { total_bytes++; diff --git a/src/tee.c b/src/tee.c index 39d989274d..a5b0369fd2 100644 --- a/src/tee.c +++ b/src/tee.c @@ -23,6 +23,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "stdio--.h" #include "xfreopen.h" @@ -157,6 +158,8 @@ tee_files (int nfiles, const char **files) if (O_BINARY && ! isatty (STDOUT_FILENO)) xfreopen (NULL, "wb", stdout); + fadvise (stdin, FADVISE_SEQUENTIAL); + /* In the array of NFILES + 1 descriptors, make the first one correspond to standard output. */ descriptors[0] = stdout; diff --git a/src/tr.c b/src/tr.c index 3722b4d84b..a5b68106be 100644 --- a/src/tr.c +++ b/src/tr.c @@ -25,6 +25,7 @@ #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "safe-read.h" #include "xfreopen.h" @@ -1754,6 +1755,8 @@ main (int argc, char **argv) if (O_BINARY && ! isatty (STDOUT_FILENO)) xfreopen (NULL, "wb", stdout); + fadvise (stdin, FADVISE_SEQUENTIAL); + if (squeeze_repeats && non_option_args == 1) { set_initialize (s1, complement, in_squeeze_set); diff --git a/src/tsort.c b/src/tsort.c index b5c187b30b..4f51f3074b 100644 --- a/src/tsort.c +++ b/src/tsort.c @@ -29,6 +29,7 @@ #include "system.h" #include "long-options.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "readtokens.h" #include "stdio--.h" @@ -444,6 +445,8 @@ tsort (const char *file) if (!is_stdin && ! freopen (file, "r", stdin)) error (EXIT_FAILURE, errno, "%s", file); + fadvise (stdin, FADVISE_SEQUENTIAL); + init_tokenbuffer (&tokenbuffer); while (1) diff --git a/src/unexpand.c b/src/unexpand.c index e9e5c6acfb..14b8df08a4 100644 --- a/src/unexpand.c +++ b/src/unexpand.c @@ -41,6 +41,7 @@ #include #include "system.h" #include "error.h" +#include "fadvise.h" #include "quote.h" #include "xstrndup.h" @@ -262,13 +263,14 @@ next_file (FILE *fp) if (STREQ (file, "-")) { have_read_stdin = true; - prev_file = file; - return stdin; + fp = stdin; } - fp = fopen (file, "r"); + else + fp = fopen (file, "r"); if (fp) { prev_file = file; + fadvise (fp, FADVISE_SEQUENTIAL); return fp; } error (0, errno, "%s", file); diff --git a/src/uniq.c b/src/uniq.c index df59b12346..86ca8c9579 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -25,6 +25,7 @@ #include "argmatch.h" #include "linebuffer.h" #include "error.h" +#include "fadvise.h" #include "hard-locale.h" #include "posixver.h" #include "quote.h" @@ -286,6 +287,8 @@ check_file (const char *infile, const char *outfile, char delimiter) if (! (STREQ (outfile, "-") || freopen (outfile, "w", stdout))) error (EXIT_FAILURE, errno, "%s", outfile); + fadvise (stdin, FADVISE_SEQUENTIAL); + thisline = &lb1; prevline = &lb2; diff --git a/src/wc.c b/src/wc.c index 6df7feddfc..a1922baf9f 100644 --- a/src/wc.c +++ b/src/wc.c @@ -29,6 +29,7 @@ #include "system.h" #include "argv-iter.h" #include "error.h" +#include "fadvise.h" #include "mbchar.h" #include "physmem.h" #include "quote.h" @@ -211,6 +212,10 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) } count_complicated = print_words || print_linelength; + /* Advise the kernel of our access pattern only if we will read(). */ + if (!count_bytes || count_chars || print_lines || count_complicated) + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); + /* When counting only bytes, save some line- and word-counting overhead. If FD is a `regular' Unix file, using lseek is enough to get its `size' in bytes. Otherwise, read blocks of BUFFER_SIZE @@ -238,6 +243,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus) } else { + fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL); while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { if (bytes_read == SAFE_READ_ERROR) -- 2.47.2