From: Paul Eggert Date: Sat, 4 Mar 2023 19:42:16 +0000 (-0800) Subject: split: port ‘split -n N /dev/null’ better to macOS X-Git-Tag: v9.2~33 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aa266f1b3dc4e12acdc46cc0f562adc03c2c0b8f;p=thirdparty%2Fcoreutils.git split: port ‘split -n N /dev/null’ better to macOS * src/split.c (input_file_size): Do not bother with lseek if the initial read probe reaches EOF, since the file size is known then. This works better on macOS, which doesn’t allow lseek on /dev/null. Do not special-case size-zero files, as the issue can occur with any size file (though /proc files are the most common). If the current position is past end of file, treat this as size zero regardless of whether the file has a usable st_size. Pass through lseek -1 return values rather than using ‘return -1’; this makes the code a bit easier to analyze (and a bit faster). Avoid undefined behavior if the size calculation overflows. (lines_chunk_split): Do not bother with lseek if it would have no effect if successful. This works better on macOS, which doesn’t allow lseek on /dev/null. * tests/split/l-chunk.sh: Adjust to match fixed behavior. --- diff --git a/NEWS b/NEWS index 31b3e30657..849f174de4 100644 --- a/NEWS +++ b/NEWS @@ -54,6 +54,10 @@ GNU coreutils NEWS -*- outline -*- long been documented to be platform-dependent. [bug introduced 1999-05-02 and only partly fixed in coreutils-8.14] + split with -l or -n no longer misbehaves on small piped input, on + small GNU/Linux /proc files, or on macOS /dev/null. + [bug introduced in coreutils-8.8] + stty ispeed and ospeed options no longer accept and silently ignore invalid speed arguments, or give false warnings for valid speeds. Now they're validated against both the general accepted set, diff --git a/src/split.c b/src/split.c index c66bc69a26..424ca9fe0b 100644 --- a/src/split.c +++ b/src/split.c @@ -283,14 +283,6 @@ CHUNKS may be:\n\ static off_t input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) { - off_t cur = lseek (fd, 0, SEEK_CUR); - if (cur < 0) - { - if (errno == ESPIPE) - errno = 0; /* Suppress confusing seek error. */ - return -1; - } - off_t size = 0; do { @@ -303,45 +295,49 @@ input_file_size (int fd, struct stat const *st, char *buf, size_t bufsize) } while (size < bufsize); - /* Note we check st_size _after_ the read() above - because /proc files on GNU/Linux are seekable - but have st_size == 0. */ - if (st->st_size == 0) + off_t cur = lseek (fd, 0, SEEK_CUR); + if (cur < 0) { - /* We've filled the buffer, from a seekable file, - which has an st_size==0, E.g., /dev/zero on GNU/Linux. - Assume there is no limit to file size. */ - errno = EOVERFLOW; - return -1; + if (errno == ESPIPE) + errno = 0; /* Suppress confusing seek error. */ + return cur; } - cur += size; off_t end; - if (usable_st_size (st) && cur <= st->st_size) + if (usable_st_size (st)) end = st->st_size; else { end = lseek (fd, 0, SEEK_END); if (end < 0) - return -1; - if (end != cur) + return end; + if (end == OFF_T_MAX) + goto overflow; /* E.g., /dev/zero on GNU/Hurd. */ + if (cur < end) { - if (lseek (fd, cur, SEEK_SET) < 0) - return -1; - if (end < cur) - end = cur; + off_t cur1 = lseek (fd, cur, SEEK_SET); + if (cur1 < 0) + return cur1; } } - size += end - cur; - if (size == OFF_T_MAX) - { - /* E.g., /dev/zero on GNU/Hurd. */ - errno = EOVERFLOW; - return -1; - } + /* Report overflow if we filled the buffer from a file with more + bytes than stat or lseek reports. This can happen with mutating + (e.g., /proc) files that are larger than the input block size. + FIXME: Handle this properly, e.g., by copying the growing file's + data into the first output file, and then splitting that output + file (which should not grow) into the other output files. */ + if (end < size) + goto overflow; + + if (cur < end && INT_ADD_WRAPV (size, end - cur, &size)) + goto overflow; return size; + + overflow: + errno = EOVERFLOW; + return -1; } /* Compute the next sequential output file name and store it into the @@ -886,7 +882,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, } else { - if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) die (EXIT_FAILURE, errno, "%s", quotef (infile)); initial_read = SIZE_MAX; } @@ -1005,7 +1002,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, } else { - if (lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) + if (initial_read < start + && lseek (STDIN_FILENO, start - initial_read, SEEK_CUR) < 0) die (EXIT_FAILURE, errno, "%s", quotef (infile)); initial_read = SIZE_MAX; } diff --git a/tests/split/l-chunk.sh b/tests/split/l-chunk.sh index cdb201746d..c94380e87c 100755 --- a/tests/split/l-chunk.sh +++ b/tests/split/l-chunk.sh @@ -24,9 +24,10 @@ echo "split: invalid number of chunks: '1o'" > exp returns_ 1 split -n l/1o 2>err || fail=1 compare exp err || fail=1 -echo "split: -: cannot determine file size" > exp -: | returns_ 1 split -n l/1 2>err || fail=1 -compare exp err || fail=1 +rm -f x* || fail=1 +: | split -n l/1 || fail=1 +compare /dev/null xaa || fail=1 +test ! -f xab || fail=1 # N can be greater than the file size # in which case no data is extracted, or empty files are written