From: Pádraig Brady
Date: Wed, 10 Jun 2026 06:02:40 +0000 (+0530) Subject: cut: fix NUL termination of multi-byte field delimiter X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0a1a7f902204b62c24a4eebaeeedd5754559b691;p=thirdparty%2Fcoreutils.git cut: fix NUL termination of multi-byte field delimiter * src/cut.c (delim_bytes): Add a spare byte so a maximum-length multi-byte delimiter stays NUL terminated for the strstr needle in find_field_delim. Note in 9.11 this buffer was larger (16 bytes) and so didn't not have a buffer over-read issue. (main): Ensure DELIM_BYTES is NUL terminated after each --delimeter. Also explicitly writing the NUL allows for better testing by triggering overflow issues in default builds, if the buffer is not sized correctly. * NEWS: Mention the bug fix. * tests/cut/cut.pl: Add test cases. Reported at https://github.com/coreutils/coreutils/pull/286 --- diff --git a/NEWS b/NEWS index b822ae46ae..ac6a0d5ca1 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,10 @@ GNU coreutils NEWS -*- outline -*- mistakenly exit with a nonzero status. [This bug was present in "the beginning".] + 'cut -d' with multiple multi-byte delimiter options specified + will correctly match the last delimiter specified. + [bug introduced with multi-byte support in coreutils-9.11] + 'head' and 'tail' now quote names in file headers when needed. [This bug was present in "the beginning".] diff --git a/src/cut.c b/src/cut.c index 3ec406586e..83f24244da 100644 --- a/src/cut.c +++ b/src/cut.c @@ -89,8 +89,9 @@ static bool complement; /* The delimiter character for multibyte field mode. */ static mcel_t delim_mcel; -/* The delimiter bytes. */ -static char delim_bytes[MCEL_LEN_MAX]; +/* The delimiter bytes + (to be NUL terminated for strstr() optimization). */ +static char delim_bytes[MCEL_LEN_MAX + 1]; /* The delimiter for each line/record. */ static unsigned char line_delim = '\n'; @@ -1323,7 +1324,8 @@ main (int argc, char **argv) mcel_t g = delim_mcel = mcel_scanz (optarg); if (optarg[0] && optarg[g.len]) FATAL_ERROR (_("the delimiter must be a single character")); - copy_bytes (delim_bytes, optarg, g.len); + /* Note NUL is copied for use with strstr. */ + copy_bytes (delim_bytes, optarg, g.len + 1); delim_specified = true; break; } diff --git a/tests/cut/cut.pl b/tests/cut/cut.pl index 33d6388b9b..6f589a9b59 100755 --- a/tests/cut/cut.pl +++ b/tests/cut/cut.pl @@ -360,6 +360,9 @@ if ($mb_locale ne 'C') ['mb-delim-10', '-s', '-d', "\xc3\xa9", '-f2', {IN=>"a\0b\0"}, {OUT=>""}, {ENV => "LC_ALL=$mb_locale"}], + ['mb-delim-11', '-f1', '-d', "\xF0\x9F\x98\x80", # MCEL_LEN_MAX + {IN=>"a\xF0\x9F\x98\x80b\n"}, {OUT=>"a\n"}, + {ENV => "LC_ALL=$mb_locale"}], ['mb-w-delim-1', '-w', '-f2', {IN=>"a\xe2\x80\x83b\n"}, {OUT=>"b\n"}, {ENV => "LC_ALL=$mb_locale"}], ['mb-w-delim-2', '-sw', '-f2', {IN=>"a\xc2\xa0b\n"}, {OUT=>""}, @@ -438,6 +441,11 @@ if ($mb_locale ne 'C') # -F in multi-byte locale ['mb-F-1', '-F', '2', {IN=>"\xc3\xa9\t\xc3\xbc\n"}, {OUT=>"\xc3\xbc\n"}, + {ENV => "LC_ALL=$mb_locale"}], + + # In coreutils-9.11 this corrupted the delimiter "string" + ['mb-mismatch', '-f1', '-d', "\xF0\x9F\x98\x80", '-d', "\xEF\xBC\x8C", + {IN=>"a\xEF\xBC\x8Cb\n"}, {OUT=>"a\n"}, {ENV => "LC_ALL=$mb_locale"}]; }