From 0839e6d8d9371b7301133c6a2799bab1c906d2e1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?=
Date: Sun, 19 Mar 2017 17:22:34 -0700 Subject: [PATCH] split: process more efficiently when filters exit early * src/split.c (bytes_split): Don't write to an existing filter if it has exited. When filters exit early, skip input data if possible. Refactor out 2 redundant variables. * tests/split/filter.sh: Improve test coverage given the new more efficient processing. Also use a 10TB file to expand the file systems tested on. --- src/split.c | 37 ++++++++++++++++++++++--------------- tests/split/filter.sh | 33 +++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/src/split.c b/src/split.c index 85bc052a86..01f97afadc 100644 --- a/src/split.c +++ b/src/split.c @@ -623,6 +623,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, { size_t n_read; bool new_file_flag = true; + bool filter_ok = true; uintmax_t to_write = n_bytes; uintmax_t opened = 0; bool eof; @@ -637,42 +638,48 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read, } else { + if (! filter_ok + && lseek (STDIN_FILENO, to_write, SEEK_CUR) != -1) + { + to_write = n_bytes; + new_file_flag = true; + } + n_read = safe_read (STDIN_FILENO, buf, bufsize); if (n_read == SAFE_READ_ERROR) die (EXIT_FAILURE, errno, "%s", quotef (infile)); eof = n_read == 0; } char *bp_out = buf; - size_t to_read = n_read; - while (to_write <= to_read) + while (to_write <= n_read) { - size_t w = to_write; - bool cwrite_ok = cwrite (new_file_flag, bp_out, w); + if (filter_ok || new_file_flag) + filter_ok = cwrite (new_file_flag, bp_out, to_write); opened += new_file_flag; new_file_flag = !max_files || (opened < max_files); - if (!new_file_flag && !cwrite_ok) + if (! filter_ok && ! new_file_flag) { - /* If filter no longer accepting input, stop reading. */ - n_read = to_read = 0; + /* If filters no longer accepting input, stop reading. */ + n_read = 0; eof = true; break; } - bp_out += w; - to_read -= w; + bp_out += to_write; + n_read -= to_write; to_write = n_bytes; } - if (to_read != 0) + if (n_read != 0) { - bool cwrite_ok = cwrite (new_file_flag, bp_out, to_read); + if (filter_ok || new_file_flag) + filter_ok = cwrite (new_file_flag, bp_out, n_read); opened += new_file_flag; - to_write -= to_read; new_file_flag = false; - if (!cwrite_ok && opened == max_files) + if (! filter_ok && opened == max_files) { - /* If filter no longer accepting input, stop reading. */ - n_read = 0; + /* If filters no longer accepting input, stop reading. */ break; } + to_write -= n_read; } } while (! eof); diff --git a/tests/split/filter.sh b/tests/split/filter.sh index a85093c764..a703b3bd04 100755 --- a/tests/split/filter.sh +++ b/tests/split/filter.sh @@ -18,8 +18,7 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ split -require_sparse_support_ # for 'truncate --size=$OFF_T_MAX' -eval $(getlimits) # for OFF_T limits +require_sparse_support_ # for 'truncate --size=$LARGE' xz --version || skip_ "xz (better than gzip/bzip2) required" for total_n_lines in 5 3000 20000; do @@ -52,15 +51,29 @@ returns_ 1 split -n 1/2 --filter='true' /dev/null 2>&1 || fail=1 # where they would result in a non zero exit from split. yes | head -n200K | split -b1G --filter='head -c1 >/dev/null' || fail=1 -# Do not use a size of OFF_T_MAX, since split.c applies a GNU/Hurd -# /dev/zero workaround for files of that size. Use one less: -N=$(expr $OFF_T_MAX - 1) - # Ensure that "endless" input is ignored when all filters finish -timeout 10 sh -c 'yes | split --filter="head -c1 >/dev/null" -n r/1' || fail=1 -if truncate -s$N zero.in; then - timeout 10 sh -c 'split --filter="head -c1 >/dev/null" -n 1 zero.in' || fail=1 -fi +for mode in '' 'r/'; do + FILE = '-' + if test "$mode" = ''; then + FILE = 'zero.in' + truncate -s10T "$FILE" || continue + fi + for N in 1 2; do + rm -f x??.n || framework_failure_ + timeout 10 sh -c \ + "yes | split --filter='head -c1 >\$FILE.n' -n $mode$N $FILE" || fail=1 + # Also ensure we get appropriate output from each filter + seq 1 $N | tr '0-9' 1 > stat.exp + stat -c%s x??.n > stat.out || framework_failure_ + compare stat.exp stat.out || fail=1 + done +done + +# Ensure that "endless" input _is_ processed for unbounded number of filters +for buf in 1000 1000000; do + returns_ 124 timeout .5 sh -c \ + "yes | split --filter='head -c1 >/dev/null' -b $buf" || fail=1 +done # Ensure that "endless" input _is_ processed for unbounded number of filters for buf in 1000 1000000; do -- 2.47.2