From: Pádraig Brady Date: Wed, 11 Mar 2026 22:06:43 +0000 (+0000) Subject: cut: support single byte -d with GB18030 input X-Git-Tag: v9.11~123 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=32f1de5b4fccc5adf81827d26dd7348f07c5a19c;p=thirdparty%2Fcoreutils.git cut: support single byte -d with GB18030 input * src/cut.c * tests/cut/mb-non-utf8.sh * tests/local.mk --- diff --git a/src/cut.c b/src/cut.c index 39626fd907..82e9065b69 100644 --- a/src/cut.c +++ b/src/cut.c @@ -249,7 +249,8 @@ is_range_start_index (uintmax_t k) static inline bool single_byte_field_delim_ok (void) { - return delim_length == 1 && (MB_CUR_MAX <= 1 || mcel_isbasic (delim_bytes[0])); + return delim_length == 1 + && (MB_CUR_MAX <= 1 || to_uchar (delim_bytes[0]) < 0x30); } static inline bool diff --git a/tests/cut/mb-non-utf8.sh b/tests/cut/mb-non-utf8.sh new file mode 100755 index 0000000000..00d9d0467c --- /dev/null +++ b/tests/cut/mb-non-utf8.sh @@ -0,0 +1,33 @@ +#!/bin/sh +# Test cut with non-UTF-8 multibyte locales. + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ cut printf + +export LC_ALL=zh_CN.gb18030 + +test "$(locale charmap 2>/dev/null | sed 's/gb/GB/')" = GB18030 || + skip_ 'GB18030 charset support not detected' + +for delim in ',' ':' "$(env printf '\xa2\xe3')" "$(env printf '\xff')"; do + num_out=$(printf "1${delim}2${delim}3\n" \ + | cut -d "$delim" -f2,3 --output-delimiter=_) + test "$num_out" = "2_3" || fail=1 +done + +Exit $fail diff --git a/tests/local.mk b/tests/local.mk index 590978297a..c92c7e3df8 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -294,6 +294,7 @@ all_tests = \ tests/chcon/chcon-fail.sh \ tests/misc/coreutils.sh \ tests/cut/cut.pl \ + tests/cut/mb-non-utf8.sh \ tests/cut/bounded-memory.sh \ tests/cut/cut-huge-range.sh \ tests/wc/wc.pl \