From: Pádraig Brady Date: Thu, 20 Mar 2014 10:00:13 +0000 (+0000) Subject: split: avoid unnecessary input buffering X-Git-Tag: v8.23~111 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5f9a5b3f0346f4fb6b758953d4eaeff3d6a419d6;p=thirdparty%2Fcoreutils.git split: avoid unnecessary input buffering Input buffering is best avoided because it introduces delayed processing of output for intermittent input, especially when the output size is less than that of the input buffer. This is significant when output is being further processed which could happen if split is writing to precreated fifos, or through --filter. If input is arriving quickly from a pipe then this will already be buffered before we read it, so fast arriving input shouldn't be a performance issue. * src/split.c (lines_split, lines_bytes_split, bytes_split, lines_chunk_split, bytes_chunk_extract): s/full_read/safe_read/. * THANKS.in: Mention the reporter. * NEWS: Mention the improvement. --- diff --git a/NEWS b/NEWS index c2caa427ed..f68ab4f7c1 100644 --- a/NEWS +++ b/NEWS @@ -45,6 +45,9 @@ GNU coreutils NEWS -*- outline -*- causing name look-up errors. Also look-ups are first done outside the chroot, in case the look-up within the chroot fails due to library conflicts etc. + split avoids unnecessary input buffering, immediately writing input to output + which is significant with --filter or when writing to fifos or stdout etc. + stat and tail work better with HFS+ and HFSX. stat -f --format=%T now reports the file system type, and tail -f now uses inotify for files, rather than the default of issuing a warning and reverting to polling. diff --git a/THANKS.in b/THANKS.in index 20654fe5f2..7f14a6993a 100644 --- a/THANKS.in +++ b/THANKS.in @@ -184,6 +184,7 @@ Egmont Koblinger egmont@uhulinux.hu Eirik Fuller eirik@hackrat.com Eivind eivindt@multinet.no Elbert Pol elbert.pol@gmail.com +Eldon Stegall eldon@eldondev.com Eli Zaretskii eliz@is.elta.co.il Emile LeBlanc leblanc@math.toronto.edu Emmanuel Lacour elacour@home-dn.net diff --git a/src/split.c b/src/split.c index 29d3dbfe42..dacacaa881 100644 --- a/src/split.c +++ b/src/split.c @@ -33,7 +33,6 @@ #include "error.h" #include "fd-reopen.h" #include "fcntl--.h" -#include "full-read.h" #include "full-write.h" #include "ioblksize.h" #include "quote.h" @@ -526,8 +525,8 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); bp_out = buf; to_read = n_read; @@ -562,7 +561,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) } } } - while (n_read == bufsize); + while (n_read); /* Ensure NUMBER files are created, which truncates any existing files or notifies any consumers on fifos. @@ -584,8 +583,8 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); bp = bp_out = buf; eob = bp + n_read; @@ -614,7 +613,7 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) } } } - while (n_read == bufsize); + while (n_read); } /* Split into pieces that are as large as possible while still not more @@ -633,8 +632,8 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); size_t n_left = n_read; char *sob = buf; @@ -718,7 +717,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) } } } - while (n_read == bufsize); + while (n_read); /* Handle no eol at end of file. */ if (n_hold) @@ -762,8 +761,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (n_written < file_size) { char *bp = buf, *eob; - size_t n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); else if (n_read == 0) break; /* eof. */ @@ -857,8 +856,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (start < end) { - size_t n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); else if (n_read == 0) break; /* eof. */ @@ -998,8 +997,6 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) while (true) { char *bp = buf, *eob; - /* Use safe_read() rather than full_read() here - so that we process available data immediately. */ size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile);