From: Sylvestre Ledru <sylvestre@debian.org>
Date: Fri, 3 Apr 2026 10:23:52 +0000 (+0200)
Subject: tests: split: verify non-UTF-8 bytes are preserved in filenames
X-Git-Tag: v9.11~56
X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=829593317db7768153eee864d46fcb4509360f88;p=thirdparty%2Fcoreutils.git

tests: split: verify non-UTF-8 bytes are preserved in filenames

* tests/split/non-utf8.sh: New test to ensure that non-UTF-8 bytes
in the prefix and --additional-suffix are preserved as-is in output
filenames, rather than being replaced by UTF-8 replacement characters.
* tests/local.mk: Register new test.
https://github.com/uutils/coreutils/pull/11397
https://github.com/coreutils/coreutils/pull/239
---

diff --git a/tests/local.mk b/tests/local.mk
index 1e1f0b3fbb..884743259d 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -460,6 +460,7 @@ all_tests =					\
   tests/split/record-sep.sh			\
   tests/split/numeric.sh			\
   tests/split/guard-input.sh			\
+  tests/split/non-utf8.sh			\
   tests/split/split-io-err.sh			\
   tests/stat/stat-birthtime.sh			\
   tests/stat/stat-fmt.sh			\
diff --git a/tests/split/non-utf8.sh b/tests/split/non-utf8.sh
new file mode 100755
index 0000000000..224b7a9b5f
--- /dev/null
+++ b/tests/split/non-utf8.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# Verify that split preserves non-UTF-8 bytes in prefix and suffix.
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ split
+
+echo a > "$(bad_unicode)" \
+  || skip_ 'bad unicode not supported in shell or file system'
+
+# Non-UTF-8 bytes in prefix should be preserved, not replaced
+# by UTF-8 replacement characters (0xEF 0xBF 0xBD).
+prefix="$(bad_unicode)"
+printf 'AB' | split -b1 - "$prefix" || fail=1
+test -f "$(printf '%saa' $prefix)" || fail=1
+test -f "$(printf '%sab' $prefix)" || fail=1
+
+# Non-UTF-8 bytes in --additional-suffix should also be preserved.
+suffix="$(bad_unicode)"
+printf 'AB' | split -b1 --additional-suffix="$suffix" - q || fail=1
+test -f "$(printf 'qaa%s' "$suffix")" || fail=1
+test -f "$(printf 'qab%s' "$suffix")" || fail=1
+
+Exit $fail