From: Sylvestre Ledru Date: Mon, 6 Apr 2026 13:49:29 +0000 (+0200) Subject: tests: ls: add quoting-utf8 test for Unicode quotes in UTF-8 locales X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3a9901daadb2476969de8efce5948121f2006d2b;p=thirdparty%2Fcoreutils.git tests: ls: add quoting-utf8 test for Unicode quotes in UTF-8 locales * tests/ls/quoting-utf8.sh: New test verifying that --quoting-style=locale and --quoting-style=clocale use Unicode left/right single quotation marks in UTF-8 locales, and that embedded apostrophes and double quotes are not escaped when the delimiters are different characters. Also check C locale fallback to ASCII quotes. * tests/local.mk: Reference the new test. https://github.com/coreutils/coreutils/pull/243 --- diff --git a/tests/local.mk b/tests/local.mk index c92c7e3df8..beb84eaf40 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -708,6 +708,7 @@ all_tests = \ tests/ls/non-utf8-hidden.sh \ tests/ls/selinux-segfault.sh \ tests/ls/quote-align.sh \ + tests/ls/quoting-utf8.sh \ tests/ls/size-align.sh \ tests/ls/readdir-mountpoint-inode.sh \ tests/ls/recursive.sh \ diff --git a/tests/ls/quoting-utf8.sh b/tests/ls/quoting-utf8.sh new file mode 100755 index 0000000000..a89c0db317 --- /dev/null +++ b/tests/ls/quoting-utf8.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# Ensure --quoting-style=locale/clocale uses Unicode quotes in UTF-8 locales + +# Copyright (C) 2026 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/tests/init.sh"; +print_ver_ ls + +test "$(LC_ALL=en_US.UTF-8 locale charmap 2>/dev/null)" = UTF-8 || + skip_ 'en_US.UTF-8 locale not available' + +touch 'hello world' "it's" 'say "hi"' 'tab here' || framework_failure_ + +# Note we use en_US as there are no translations provided, +# and so locale quoting for UTF-8 is hardcoded to these quoting characters: +# U+2018 = \xe2\x80\x98 (LEFT SINGLE QUOTATION MARK) +# U+2019 = \xe2\x80\x99 (RIGHT SINGLE QUOTATION MARK) +lq=$(printf '\342\200\230') +rq=$(printf '\342\200\231') + +# In UTF-8 locales, both locale and clocale should use Unicode quotes +for style in locale clocale; do + LC_ALL=en_US.UTF-8 ls --quoting-style=$style > out_${style}_utf8 || fail=1 + + # Verify Unicode left/right quotes are present + grep "$lq" out_${style}_utf8 > /dev/null 2>&1 \ + || { echo "$style UTF-8: missing Unicode left quote"; fail=1; } + grep "$rq" out_${style}_utf8 > /dev/null 2>&1 \ + || { echo "$style UTF-8: missing Unicode right quote"; fail=1; } + + # Verify 'hello world' is quoted with Unicode quotes + grep "^${lq}hello world${rq}\$" out_${style}_utf8 > /dev/null 2>&1 \ + || { echo "$style UTF-8: 'hello world' not properly quoted"; fail=1; } + + # Embedded apostrophe should NOT be escaped (delimiters are different chars) + grep "^${lq}it's${rq}\$" out_${style}_utf8 > /dev/null 2>&1 || \ + { echo "$style UTF-8: embedded apostrophe shouldn't be escaped"; fail=1; } + + # Embedded double quote should NOT be escaped + grep "^${lq}say \"hi\"${rq}\$" out_${style}_utf8 > /dev/null 2>&1 || \ + { echo "$style UTF-8: embedded double quote shouldn't be escaped"; fail=1; } + + # Control characters should still be C-escaped + grep "tab\\\\there" out_${style}_utf8 > /dev/null 2>&1 \ + || { echo "$style UTF-8: tab should be escaped as \\t"; fail=1; } +done + +# In C locale, locale uses ASCII single quotes, clocale uses ASCII double quotes +LC_ALL=C ls --quoting-style=locale > out_locale_c || fail=1 +grep "^'hello world'\$" out_locale_c > /dev/null 2>&1 \ + || { echo "locale C: expected ASCII single quotes"; fail=1; } + +LC_ALL=C ls --quoting-style=clocale > out_clocale_c || fail=1 +grep '^"hello world"$' out_clocale_c > /dev/null 2>&1 \ + || { echo "clocale C: expected ASCII double quotes"; fail=1; } + +Exit $fail