]> git.ipfire.org Git - thirdparty/coreutils.git/commitdiff
cut: support single byte -d with GB18030 input
authorPádraig Brady <P@draigBrady.com>
Wed, 11 Mar 2026 22:06:43 +0000 (22:06 +0000)
committerPádraig Brady <P@draigBrady.com>
Sun, 5 Apr 2026 12:15:55 +0000 (13:15 +0100)
* src/cut.c
* tests/cut/mb-non-utf8.sh
* tests/local.mk

src/cut.c
tests/cut/mb-non-utf8.sh [new file with mode: 0755]
tests/local.mk

index 39626fd9077fe93457ba9a91c90ce9976db6f1b6..82e9065b692d68d127d15eef7e38e1e065bf52e4 100644 (file)
--- a/src/cut.c
+++ b/src/cut.c
@@ -249,7 +249,8 @@ is_range_start_index (uintmax_t k)
 static inline bool
 single_byte_field_delim_ok (void)
 {
-  return delim_length == 1 && (MB_CUR_MAX <= 1 || mcel_isbasic (delim_bytes[0]));
+  return delim_length == 1
+         && (MB_CUR_MAX <= 1 || to_uchar (delim_bytes[0]) < 0x30);
 }
 
 static inline bool
diff --git a/tests/cut/mb-non-utf8.sh b/tests/cut/mb-non-utf8.sh
new file mode 100755 (executable)
index 0000000..00d9d04
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/sh
+# Test cut with non-UTF-8 multibyte locales.
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ cut printf
+
+export LC_ALL=zh_CN.gb18030
+
+test "$(locale charmap 2>/dev/null | sed 's/gb/GB/')" = GB18030 ||
+  skip_ 'GB18030 charset support not detected'
+
+for delim in ',' ':' "$(env printf '\xa2\xe3')" "$(env printf '\xff')"; do
+  num_out=$(printf "1${delim}2${delim}3\n" \
+            | cut -d "$delim" -f2,3 --output-delimiter=_)
+  test "$num_out" = "2_3" || fail=1
+done
+
+Exit $fail
index 590978297a21acd86cf196853039c7a7ad0a2862..c92c7e3df8bbb5867c47aa69bea00b543b08badf 100644 (file)
@@ -294,6 +294,7 @@ all_tests =                                 \
   tests/chcon/chcon-fail.sh                    \
   tests/misc/coreutils.sh                      \
   tests/cut/cut.pl                             \
+  tests/cut/mb-non-utf8.sh                     \
   tests/cut/bounded-memory.sh                  \
   tests/cut/cut-huge-range.sh                  \
   tests/wc/wc.pl                               \