tests: split: verify non-UTF-8 bytes are preserved in filenames

author Sylvestre Ledru <sylvestre@debian.org>

Fri, 3 Apr 2026 10:23:52 +0000 (12:23 +0200)

committer Pádraig Brady <P@draigBrady.com>

Mon, 6 Apr 2026 16:53:45 +0000 (17:53 +0100)
author Sylvestre Ledru <sylvestre@debian.org>
Fri, 3 Apr 2026 10:23:52 +0000 (12:23 +0200)
committer Pádraig Brady <P@draigBrady.com>
Mon, 6 Apr 2026 16:53:45 +0000 (17:53 +0100)
diff --git a/tests/local.mk b/tests/local.mk

index 1e1f0b3fbbe22277c4d521ad8cb28d5f43dd90e0..884743259dcb6a32e8747908620d4e071a1efa2b 100644 (file)
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -460,6 +460,7 @@ all_tests =                                 \
    tests/split/record-sep.sh                    \
    tests/split/numeric.sh                       \
    tests/split/guard-input.sh                   \
+  tests/split/non-utf8.sh                      \
    tests/split/split-io-err.sh                  \
    tests/stat/stat-birthtime.sh                 \
    tests/stat/stat-fmt.sh                       \
diff --git a/tests/split/non-utf8.sh b/tests/split/non-utf8.sh

new file mode 100755 (executable)

index 0000000..224b7a9
--- /dev/null
+++ b/tests/split/non-utf8.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# Verify that split preserves non-UTF-8 bytes in prefix and suffix.
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ split
+
+echo a > "$(bad_unicode)" \
+  || skip_ 'bad unicode not supported in shell or file system'
+
+# Non-UTF-8 bytes in prefix should be preserved, not replaced
+# by UTF-8 replacement characters (0xEF 0xBF 0xBD).
+prefix="$(bad_unicode)"
+printf 'AB' | split -b1 - "$prefix" || fail=1
+test -f "$(printf '%saa' $prefix)" || fail=1
+test -f "$(printf '%sab' $prefix)" || fail=1
+
+# Non-UTF-8 bytes in --additional-suffix should also be preserved.
+suffix="$(bad_unicode)"
+printf 'AB' | split -b1 --additional-suffix="$suffix" - q || fail=1
+test -f "$(printf 'qaa%s' "$suffix")" || fail=1
+test -f "$(printf 'qab%s' "$suffix")" || fail=1
+
+Exit $fail
author	Sylvestre Ledru <sylvestre@debian.org>
	Fri, 3 Apr 2026 10:23:52 +0000 (12:23 +0200)
committer	Pádraig Brady <P@draigBrady.com>
	Mon, 6 Apr 2026 16:53:45 +0000 (17:53 +0100)
tests/local.mk		patch \| blob \| blame \| history
tests/split/non-utf8.sh	[new file with mode: 0755]	patch \| blob