From: Pádraig Brady
Date: Sat, 21 Mar 2026 14:15:48 +0000 (+0000) Subject: cut: optimize when no delimiter in input X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ea6a7ba547ea18302d255b5a4013d0ea2d5d2c88;p=thirdparty%2Fcoreutils.git cut: optimize when no delimiter in input This is about 20x faster. Note we only do the delimiter search once per chunk, and it's usually quick as delimiters wouldn't be too far into the a chunk if present, so we don't bother to cache the found delimiter. --- diff --git a/src/cut.c b/src/cut.c index 15f2c0fa9d..208fa6e4a6 100644 --- a/src/cut.c +++ b/src/cut.c @@ -936,6 +936,31 @@ cut_fields_bytesearch (FILE *stream) .blank_delimited = whitespace_delimited }; + /* Shortcut the case were there is no delimiter in input, + as directly outputting without parsing is 20x faster. */ + if (field_idx == 1 + && !suppress_non_delimited && !whitespace_delimited + && !field_delim_is_line_delim () + && !have_pending_line + && field_1_n_bytes == 0 + && !skip_line_remainder + && !find_bytesearch_field_delim (chunk, safe)) + { + char *last_line_delim = feof (mbbuf.fp) ? chunk + safe - 1 + : memrchr ((void *) chunk, line_delim, safe); + if (last_line_delim) + { + idx_t n = last_line_delim - chunk + 1; + write_bytes (chunk, n); + if (feof (mbbuf.fp) && chunk[n - 1] != line_delim) + write_line_delim (); + mbbuf_advance (&mbbuf, n); + if (feof (mbbuf.fp)) + return; + continue; + } + } + while (processed < safe) { char *terminator = NULL; diff --git a/tests/cut/cut.pl b/tests/cut/cut.pl index 8c3c06653e..bfccdbe85f 100755 --- a/tests/cut/cut.pl +++ b/tests/cut/cut.pl @@ -170,6 +170,7 @@ my @Tests = ['newline-4', '-d:', '-f1', {IN=>"a:1\nb:2"}, {OUT=>"a\nb\n"}], ['newline-5', '-d:', '-f2', {IN=>"a:1\nb:2\n"}, {OUT=>"1\n2\n"}], ['newline-6', '-d:', '-f2', {IN=>"a:1\nb:2"}, {OUT=>"1\n2\n"}], + ['newline-6a', '-d:', '-f2', {IN=>"a\nb"}, {OUT=>"a\nb\n"}], ['newline-7', '-s', '-d:', '-f1', {IN=>"a:1\nb:2"}, {OUT=>"a\nb\n"}], ['newline-8', '-s', '-d:', '-f1', {IN=>"a:1\nb:2\n"}, {OUT=>"a\nb\n"}], ['newline-9', '-s', '-d:', '-f1', {IN=>"a1\nb2"}, {OUT=>""}],