From: Pádraig Brady Date: Tue, 24 Mar 2026 15:35:24 +0000 (+0000) Subject: cut: optimize -b for short lines X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e73dbeb5ab8d77e0a4956525dd826fa06dd85fa0;p=thirdparty%2Fcoreutils.git cut: optimize -b for short lines For a 40% performance increase it's worth reinstating the simple original cut_bytes() which avoids data copying and function calls. Once a longer line is encountered we defer to the buffered variant. $ time src/cut.before -b2 sl.in >/dev/null real 0m0.101s $ time src/cut.after -b2 sl.in >/dev/null real 0m0.060s --- diff --git a/src/cut.c b/src/cut.c index 86cb37f7d6..3024941577 100644 --- a/src/cut.c +++ b/src/cut.c @@ -56,6 +56,9 @@ } \ while (0) +/* Above this function call overhead becomes less of a concern. + At this and below we avoid fwrite(), memchr() etc. */ +#define SMALL_BYTE_THRESHOLD 3 /* Pointer inside RP. When checking if a -b,-c,-f is selected by a finite range, we check if it is between CURRENT_RP.LO @@ -415,7 +418,7 @@ write_bytes (char const *buf, size_t n_bytes) { /* Avoid a function call for smaller amounts, using instead the macro to directly interact with the stdio buffer. */ - if (n_bytes <= 4) + if (n_bytes <= SMALL_BYTE_THRESHOLD) { for (size_t i = 0; i < n_bytes; i++) if (putchar (buf[i]) < 0) @@ -709,24 +712,22 @@ reset_field_line (uintmax_t *field_idx, bool *found_any_selected_field, parser->at_line_start = true; } -/* Read from stream STREAM, printing to standard output any selected bytes. */ +/* Read from STREAM using buffered block reads, printing selected bytes. + BYTE_IDX and PRINT_DELIMITER track the current line state and allow + callers to hand off mid-line. */ static void -cut_bytes (FILE *stream) +cut_bytes_buffered (FILE *stream, uintmax_t *byte_idx, bool *print_delimiter) { - uintmax_t byte_idx = 0; - bool print_delimiter = false; static char line_in[IO_BUFSIZE]; - current_rp = frp; - while (true) { idx_t available = fread (line_in, sizeof *line_in, sizeof line_in, stream); if (available == 0) { - write_pending_line_delim (byte_idx); + write_pending_line_delim (*byte_idx); break; } @@ -737,27 +738,28 @@ cut_bytes (FILE *stream) char *line = line_in + processed; char *line_end = memchr ((void *) line, line_delim, available - processed); - char *end = line + (line_end ? line_end - line : available - processed); + char *end = line + (line_end ? line_end - line + : available - processed); char *p = line; while (p < end) { - sync_byte_selection (byte_idx); + sync_byte_selection (*byte_idx); - if (byte_idx + 1 < current_rp->lo) + if (*byte_idx + 1 < current_rp->lo) { - idx_t skip = MIN (end - p, current_rp->lo - (byte_idx + 1)); + idx_t skip = MIN (end - p, current_rp->lo - (*byte_idx + 1)); p += skip; - byte_idx += skip; + *byte_idx += skip; } else { - idx_t n = MIN (end - p, current_rp->hi - byte_idx); - write_selected_item (&print_delimiter, - is_range_start_index (byte_idx + 1), + idx_t n = MIN (end - p, current_rp->hi - *byte_idx); + write_selected_item (print_delimiter, + is_range_start_index (*byte_idx + 1), p, n); p += n; - byte_idx += n; + *byte_idx += n; } } @@ -765,7 +767,49 @@ cut_bytes (FILE *stream) if (line_end) { processed++; - reset_item_line (&byte_idx, &print_delimiter); + reset_item_line (byte_idx, print_delimiter); + } + } + } +} + +/* Read from stream STREAM, printing to standard output any selected bytes. + This avoids data copies and function calls for short lines, + and will defer to cut_bytes_buffered() once a longer line is encountered. */ + +static void +cut_bytes (FILE *stream) +{ + uintmax_t byte_idx = 0; + bool print_delimiter = false; + + current_rp = frp; + + while (true) + { + int c = getc (stream); + + if (c == line_delim) + reset_item_line (&byte_idx, &print_delimiter); + else if (c == EOF) + { + write_pending_line_delim (byte_idx); + break; + } + else + { + next_item (&byte_idx); + if (print_kth (byte_idx)) + { + char ch = c; + write_selected_item (&print_delimiter, + is_range_start_index (byte_idx), &ch, 1); + } + + if (SMALL_BYTE_THRESHOLD < byte_idx) + { + cut_bytes_buffered (stream, &byte_idx, &print_delimiter); + break; } } }