From: Pádraig Brady Date: Wed, 1 Apr 2026 18:37:10 +0000 (+0100) Subject: cut: ensure responsive input processing X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d757d32f86bfa1b95d571a9ee566eec9d25b9d51;p=thirdparty%2Fcoreutils.git cut: ensure responsive input processing * gl/lib/mbbuf.h (fill_buf): Switch from fread() to read() as the former retries read() internally to fill the buffer. * src/cut.c: Adjust accordingly, and avoid getc() interface entirely. * bootstrap.h: Depend explicitly on fseterr. This is already depended on transitively, so should not introduce new build portability issues. --- diff --git a/bootstrap.conf b/bootstrap.conf index d0e3b8842b..bc31a58623 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -119,6 +119,7 @@ gnulib_modules=" freopen freopen-safer fseeko + fseterr fstatat fsusage fsync diff --git a/gl/lib/mbbuf.h b/gl/lib/mbbuf.h index 84e1fdecf5..ed02a83d54 100644 --- a/gl/lib/mbbuf.h +++ b/gl/lib/mbbuf.h @@ -25,7 +25,9 @@ #include #include +#include +#include "fseterr.h" #include "mcel.h" #include "idx.h" @@ -47,6 +49,7 @@ typedef struct idx_t size; /* Number of bytes allocated for BUFFER. */ idx_t length; /* Number of bytes with data in BUFFER. */ idx_t offset; /* Current position in BUFFER. */ + bool eof; /* Whether at End Of File. */ } mbbuf_t; MBBUF_INLINE idx_t @@ -68,16 +71,19 @@ mbbuf_init (mbbuf_t *mbbuf, char *buffer, idx_t size, FILE *fp) mbbuf->size = size; mbbuf->length = 0; mbbuf->offset = 0; + mbbuf->eof = false; } /* Fill the input buffer with at least MCEL_LEN_MAX bytes if possible. - Return the number of bytes available from the current offset. */ + Return the number of bytes available from the current offset. + At end of file, MBBUF.EOF is set, and zero will eventually be returned. + Note feof() will _NOT_ be set on the MBBUF.FP. */ MBBUF_INLINE idx_t mbbuf_fill (mbbuf_t *mbbuf) { idx_t available = mbbuf_avail (mbbuf); - if (available < MCEL_LEN_MAX && ! feof (mbbuf->fp)) + if (available < MCEL_LEN_MAX && ! mbbuf->eof) { idx_t start; if (!(0 < available)) @@ -87,8 +93,20 @@ mbbuf_fill (mbbuf_t *mbbuf) memmove (mbbuf->buffer, mbbuf->buffer + mbbuf->offset, available); start = available; } - mbbuf->length = fread (mbbuf->buffer + start, 1, mbbuf->size - start, - mbbuf->fp) + start; + ssize_t read_ret = read (fileno (mbbuf->fp), mbbuf->buffer + start, + mbbuf->size - start); + if (read_ret < 0) + { + fseterr (mbbuf->fp); + mbbuf->eof = true; /* Avoid any more reads(). */ + mbbuf->length = start; + } + else + { + mbbuf->eof = read_ret == 0; + mbbuf->length = read_ret + start; + } + mbbuf->offset = 0; available = mbbuf_avail (mbbuf); } diff --git a/src/cut.c b/src/cut.c index 3a941dadd7..3ea85c4661 100644 --- a/src/cut.c +++ b/src/cut.c @@ -727,17 +727,22 @@ reset_field_line (uintmax_t *field_idx, bool *found_any_selected_field, callers to hand off mid-line. */ static void -cut_bytes_buffered (FILE *stream, uintmax_t *byte_idx, bool *print_delimiter) +cut_bytes (FILE *stream) { static char bytes_in[IO_BUFSIZE]; + uintmax_t byte_idx = 0; + bool print_delimiter = false; + + current_rp = frp; while (true) { - idx_t available = fread (bytes_in, sizeof *bytes_in, sizeof bytes_in, - stream); - if (available == 0) + idx_t available = read (fileno (stream), bytes_in, sizeof bytes_in); + if (available <= 0) { - write_pending_line_delim (*byte_idx); + if (available < 0) + fseterr (stream); + write_pending_line_delim (byte_idx); break; } @@ -754,22 +759,22 @@ cut_bytes_buffered (FILE *stream, uintmax_t *byte_idx, bool *print_delimiter) while (p < end) { - sync_byte_selection (*byte_idx); + sync_byte_selection (byte_idx); - if (*byte_idx + 1 < current_rp->lo) + if (byte_idx + 1 < current_rp->lo) { - idx_t skip = MIN (end - p, current_rp->lo - (*byte_idx + 1)); + idx_t skip = MIN (end - p, current_rp->lo - (byte_idx + 1)); p += skip; - *byte_idx += skip; + byte_idx += skip; } else { - idx_t n = MIN (end - p, current_rp->hi - *byte_idx); - write_selected_item (print_delimiter, - is_range_start_index (*byte_idx + 1), + idx_t n = MIN (end - p, current_rp->hi - byte_idx); + write_selected_item (&print_delimiter, + is_range_start_index (byte_idx + 1), p, n); p += n; - *byte_idx += n; + byte_idx += n; } } @@ -777,49 +782,7 @@ cut_bytes_buffered (FILE *stream, uintmax_t *byte_idx, bool *print_delimiter) if (line_end) { processed++; - reset_item_line (byte_idx, print_delimiter); - } - } - } -} - -/* Read from stream STREAM, printing to standard output any selected bytes. - This avoids data copies and function calls for short lines, - and will defer to cut_bytes_buffered() once a longer line is encountered. */ - -static void -cut_bytes (FILE *stream) -{ - uintmax_t byte_idx = 0; - bool print_delimiter = false; - - current_rp = frp; - - while (true) - { - int c = getc (stream); - - if (c == line_delim) - reset_item_line (&byte_idx, &print_delimiter); - else if (c == EOF) - { - write_pending_line_delim (byte_idx); - break; - } - else - { - next_item (&byte_idx); - if (print_kth (byte_idx)) - { - char ch = c; - write_selected_item (&print_delimiter, - is_range_start_index (byte_idx), &ch, 1); - } - - if (SMALL_BYTE_THRESHOLD < byte_idx) - { - cut_bytes_buffered (stream, &byte_idx, &print_delimiter); - break; + reset_item_line (&byte_idx, &print_delimiter); } } } @@ -1023,7 +986,7 @@ cut_fields_bytesearch (FILE *stream) idx_t safe = mbbuf_fill (&mbbuf); if (safe == 0) break; - search.at_eof = feof (mbbuf.fp); + search.at_eof = mbbuf.eof; search.line_end_known = false; char *chunk = mbbuf.buffer + mbbuf.offset;