mistakenly exit with a nonzero status.
[This bug was present in "the beginning".]
+ 'cut -d' with multiple multi-byte delimiter options specified
+ will correctly match the last delimiter specified.
+ [bug introduced with multi-byte support in coreutils-9.11]
+
'head' and 'tail' now quote names in file headers when needed.
[This bug was present in "the beginning".]
/* The delimiter character for multibyte field mode. */
static mcel_t delim_mcel;
-/* The delimiter bytes. */
-static char delim_bytes[MCEL_LEN_MAX];
+/* The delimiter bytes
+ (to be NUL terminated for strstr() optimization). */
+static char delim_bytes[MCEL_LEN_MAX + 1];
/* The delimiter for each line/record. */
static unsigned char line_delim = '\n';
mcel_t g = delim_mcel = mcel_scanz (optarg);
if (optarg[0] && optarg[g.len])
FATAL_ERROR (_("the delimiter must be a single character"));
- copy_bytes (delim_bytes, optarg, g.len);
+ /* Note NUL is copied for use with strstr. */
+ copy_bytes (delim_bytes, optarg, g.len + 1);
delim_specified = true;
break;
}
['mb-delim-10', '-s', '-d', "\xc3\xa9", '-f2',
{IN=>"a\0b\0"}, {OUT=>""},
{ENV => "LC_ALL=$mb_locale"}],
+ ['mb-delim-11', '-f1', '-d', "\xF0\x9F\x98\x80", # MCEL_LEN_MAX
+ {IN=>"a\xF0\x9F\x98\x80b\n"}, {OUT=>"a\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
['mb-w-delim-1', '-w', '-f2', {IN=>"a\xe2\x80\x83b\n"}, {OUT=>"b\n"},
{ENV => "LC_ALL=$mb_locale"}],
['mb-w-delim-2', '-sw', '-f2', {IN=>"a\xc2\xa0b\n"}, {OUT=>""},
# -F in multi-byte locale
['mb-F-1', '-F', '2', {IN=>"\xc3\xa9\t\xc3\xbc\n"},
{OUT=>"\xc3\xbc\n"},
+ {ENV => "LC_ALL=$mb_locale"}],
+
+ # In coreutils-9.11 this corrupted the delimiter "string"
+ ['mb-mismatch', '-f1', '-d', "\xF0\x9F\x98\x80", '-d', "\xEF\xBC\x8C",
+ {IN=>"a\xEF\xBC\x8Cb\n"}, {OUT=>"a\n"},
{ENV => "LC_ALL=$mb_locale"}];
}