From: Sylvestre Ledru Date: Fri, 3 Apr 2026 10:23:52 +0000 (+0200) Subject: tests: split: verify non-UTF-8 bytes are preserved in filenames X-Git-Tag: v9.11~56 X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=829593317db7768153eee864d46fcb4509360f88;p=thirdparty%2Fcoreutils.git tests: split: verify non-UTF-8 bytes are preserved in filenames * tests/split/non-utf8.sh: New test to ensure that non-UTF-8 bytes in the prefix and --additional-suffix are preserved as-is in output filenames, rather than being replaced by UTF-8 replacement characters. * tests/local.mk: Register new test. https://github.com/uutils/coreutils/pull/11397 https://github.com/coreutils/coreutils/pull/239 --- diff --git a/tests/local.mk b/tests/local.mk index 1e1f0b3fbb..884743259d 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -460,6 +460,7 @@ all_tests = \ tests/split/record-sep.sh \ tests/split/numeric.sh \ tests/split/guard-input.sh \ + tests/split/non-utf8.sh \ tests/split/split-io-err.sh \ tests/stat/stat-birthtime.sh \ tests/stat/stat-fmt.sh \ diff --git a/tests/split/non-utf8.sh b/tests/split/non-utf8.sh new file mode 100755 index 0000000000..224b7a9b5f --- /dev/null +++ b/tests/split/non-utf8.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# Verify that split preserves non-UTF-8 bytes in prefix and suffix. + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src +print_ver_ split + +echo a > "$(bad_unicode)" \ + || skip_ 'bad unicode not supported in shell or file system' + +# Non-UTF-8 bytes in prefix should be preserved, not replaced +# by UTF-8 replacement characters (0xEF 0xBF 0xBD). +prefix="$(bad_unicode)" +printf 'AB' | split -b1 - "$prefix" || fail=1 +test -f "$(printf '%saa' $prefix)" || fail=1 +test -f "$(printf '%sab' $prefix)" || fail=1 + +# Non-UTF-8 bytes in --additional-suffix should also be preserved. +suffix="$(bad_unicode)" +printf 'AB' | split -b1 --additional-suffix="$suffix" - q || fail=1 +test -f "$(printf 'qaa%s' "$suffix")" || fail=1 +test -f "$(printf 'qab%s' "$suffix")" || fail=1 + +Exit $fail