From: Viktor Szakats Date: Wed, 16 Jul 2025 02:25:08 +0000 (+0200) Subject: tests: fix UTF-8 detection, per-test `LC_*` settings, CI coverage X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=1cc8a5235f76e744433cbf28ec98ecb972158387;p=thirdparty%2Fcurl.git tests: fix UTF-8 detection, per-test `LC_*` settings, CI coverage - runtests: fix `codeset-utf8` feature detection. Before this patch it detected if the calling environment had UTF-8 enabled. If not, UTF-8 tests were all skipped. After this patch, it detects if UTF-8 is supported by the calling environment regardless of what's currently enabled. Follow-up to 0b70b23ef4d007031bc2ae4fc63d5ed9136bc2b5 #15039 - GHA/linux: sync `codeset-test` to also reset `LC_CTYPE` and `LC_NUMBER`. To give it more spin. Follow-up to c221c0ee5935497168c52686a9d8cc87b45bbca9 #17938 - GHA/macos: fix to actually enable `codeset-test`. Also set `LC_ALL`, which seems necessary to trigger issues. Follow-up to c221c0ee5935497168c52686a9d8cc87b45bbca9 #17938 - tests/data: replace `LC_CTYPE` env with `LC_ALL` in all tests requiring a locale. Also to avoid potential issues with a blank or unset `LC_ALL`, as seen earlier. And to ensure that the override works on all platforms (as tested in CI.) Slight downside is that this now resets the language/culture to `C`. Ref: b4c9982382469398115cc0e3e0747e79db083455 #4743 Ref: 23208e330ac0c2164d59971baf79e87c45da1840 #4738 - replace `en_US.UTF-8` with `C.UTF-8` to be language/culture-agnostic. - TEST-SUITE.md: drop `UTF-8` as a requirement for tests. Tests shall work (or least be skipped) without UTF-8 support. Tests requiring UTF-8 locale: 165, 962, 963, 964, 965, 966, 967, 1448, 1560, 2046, 2047 Tests requiring UTF-8 locale, but passing without one anyway: 955, 956, 957, 958, 959, 960, 961, 968, 1034, 1035 Spec 1997: https://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html Spec 2008: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html Ref: c221c0ee5935497168c52686a9d8cc87b45bbca9 #17938 Ref: 7cf8414fabc3063cc3d2121eacec4a6daa4164a8 Ref: 4c140a56283703161e5f26ae022bad694a481603 Ref: 28faaacee287b019bcf2961da3bf2f91d331bcbd #2436 Ref: ecd1d020abdae3c3ce3643ddab3106501e62e7c0 Closes #17988 --- diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index dbb4d5d2d3..973516abde 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -685,7 +685,12 @@ jobs: fi fi [ -x ~/venv/bin/activate ] && source ~/venv/bin/activate - [[ "${MATRIX_INSTALL_STEPS}" = *'codeset-test'* ]] && export LC_ALL=C + if [[ "${MATRIX_INSTALL_STEPS}" = *'codeset-test'* ]]; then + locale || true + export LC_ALL=C + export LC_CTYPE=C + export LC_NUMERIC=fr_FR.UTF-8 + fi if [ "${MATRIX_BUILD}" = 'cmake' ]; then cmake --build bld --verbose --target "${TEST_TARGET}" else diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index bb1b376b27..b35c9a08c4 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -214,6 +214,7 @@ jobs: MATRIX_BUILD: ${{ matrix.build.generate && 'cmake' || 'autotools' }} MATRIX_COMPILER: '${{ matrix.compiler }}' MATRIX_INSTALL: '${{ matrix.build.install }}' + MATRIX_INSTALL_STEPS: '${{ matrix.build.install_steps }}' MATRIX_MACOS_VERSION_MIN: '${{ matrix.build.macos-version-min }}' strategy: fail-fast: false @@ -381,7 +382,6 @@ jobs: MATRIX_CHKPREFILL: '${{ matrix.build.chkprefill }}' MATRIX_CONFIGURE: '${{ matrix.build.configure }}' MATRIX_GENERATE: '${{ matrix.build.generate }}' - MATRIX_INSTALL_STEPS: '${{ matrix.build.install_steps }}' run: | if [[ "${MATRIX_COMPILER}" = 'gcc'* ]]; then sysroot="$("${CC}" --print-sysroot)" # Must match the SDK gcc was built for @@ -481,6 +481,8 @@ jobs: TFLAGS="-j20 ${TFLAGS}" source ~/venv/bin/activate if [[ "${MATRIX_INSTALL_STEPS}" = *'codeset-test'* ]]; then + locale || true + export LC_ALL=C export LC_CTYPE=C export LC_NUMERIC=fr_FR.UTF-8 fi diff --git a/docs/tests/TEST-SUITE.md b/docs/tests/TEST-SUITE.md index 1e387898f7..2110cfb362 100644 --- a/docs/tests/TEST-SUITE.md +++ b/docs/tests/TEST-SUITE.md @@ -53,7 +53,6 @@ SPDX-License-Identifier: curl - `openssl` (the command line tool, for generating test server certificates) - `openssh` or `SunSSH` (for SCP and SFTP tests) - `nghttpx` (for HTTP/2 and HTTP/3 tests) - - An available `en_US.UTF-8` locale ### Installation of impacket diff --git a/tests/data/test1034 b/tests/data/test1034 index ef671dfe2e..eda13bb23e 100644 --- a/tests/data/test1034 +++ b/tests/data/test1034 @@ -29,8 +29,7 @@ proxy codeset-utf8 -LC_ALL= -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 HTTP over proxy with malformatted IDN host name diff --git a/tests/data/test1035 b/tests/data/test1035 index b6f30ae56b..c8eb282aff 100644 --- a/tests/data/test1035 +++ b/tests/data/test1035 @@ -27,8 +27,7 @@ proxy codeset-utf8 -LC_ALL= -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 HTTP over proxy with too long IDN host name diff --git a/tests/data/test1448 b/tests/data/test1448 index 96d64b85a5..f7ff09f952 100644 --- a/tests/data/test1448 +++ b/tests/data/test1448 @@ -43,8 +43,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 Redirect following to UTF-8 IDN host name diff --git a/tests/data/test1560 b/tests/data/test1560 index 976cc98ec2..ce8c7c42a7 100644 --- a/tests/data/test1560 +++ b/tests/data/test1560 @@ -13,7 +13,7 @@ urlapi none -LC_ALL=en_US.UTF-8 +LC_ALL=C.UTF-8 file diff --git a/tests/data/test165 b/tests/data/test165 index 0b5cfd2346..4996a3564f 100644 --- a/tests/data/test165 +++ b/tests/data/test165 @@ -33,8 +33,7 @@ proxy codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 HTTP over proxy with IDN host name diff --git a/tests/data/test2046 b/tests/data/test2046 index cf25d0c57b..555515aa4a 100644 --- a/tests/data/test2046 +++ b/tests/data/test2046 @@ -43,8 +43,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 Connection reuse with IDN host name diff --git a/tests/data/test2047 b/tests/data/test2047 index c32e6aabc7..9c671b1785 100644 --- a/tests/data/test2047 +++ b/tests/data/test2047 @@ -44,8 +44,7 @@ proxy codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 Connection reuse with IDN host name over HTTP proxy diff --git a/tests/data/test955 b/tests/data/test955 index 817b47a67e..633c2e369d 100644 --- a/tests/data/test955 +++ b/tests/data/test955 @@ -24,8 +24,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support - UTF-8 based sender (local part only) diff --git a/tests/data/test956 b/tests/data/test956 index 4546f2ee61..9472e96e15 100644 --- a/tests/data/test956 +++ b/tests/data/test956 @@ -21,8 +21,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support - UTF-8 based recipient (local part only) diff --git a/tests/data/test957 b/tests/data/test957 index 9548627b98..671d73bbf2 100644 --- a/tests/data/test957 +++ b/tests/data/test957 @@ -22,8 +22,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP VRFY without SMTPUTF8 support - UTF-8 recipient (local part only) diff --git a/tests/data/test958 b/tests/data/test958 index f1006a0a66..203254bfc2 100644 --- a/tests/data/test958 +++ b/tests/data/test958 @@ -22,8 +22,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP external VRFY without SMTPUTF8 - UTF-8 recipient (local part only) diff --git a/tests/data/test959 b/tests/data/test959 index 2154600da2..5693e34944 100644 --- a/tests/data/test959 +++ b/tests/data/test959 @@ -25,8 +25,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support - UTF-8 based sender (host part only) diff --git a/tests/data/test960 b/tests/data/test960 index fb50a326b2..ced2ba0b54 100644 --- a/tests/data/test960 +++ b/tests/data/test960 @@ -22,8 +22,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support - UTF-8 based recipient (host part only) diff --git a/tests/data/test961 b/tests/data/test961 index 2068d8fc84..c52d5df344 100644 --- a/tests/data/test961 +++ b/tests/data/test961 @@ -23,8 +23,7 @@ smtp codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP external VRFY without SMTPUTF8 - UTF-8 recipient (host part only) diff --git a/tests/data/test962 b/tests/data/test962 index 80292862ba..3f737b65ea 100644 --- a/tests/data/test962 +++ b/tests/data/test962 @@ -23,8 +23,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support - UTF-8 based sender (host part only) diff --git a/tests/data/test963 b/tests/data/test963 index 5d3bae7b56..bce626c5e3 100644 --- a/tests/data/test963 +++ b/tests/data/test963 @@ -23,8 +23,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP without SMTPUTF8 support (IDN) - UTF-8 recipient (host part only) diff --git a/tests/data/test964 b/tests/data/test964 index 12c312eccd..04e5b1c152 100644 --- a/tests/data/test964 +++ b/tests/data/test964 @@ -24,8 +24,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP external VRFY without SMTPUTF8 (IDN) - UTF-8 recipient (host part) diff --git a/tests/data/test965 b/tests/data/test965 index 0d29c09849..601db62fe0 100644 --- a/tests/data/test965 +++ b/tests/data/test965 @@ -26,8 +26,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP with SMTPUTF8 support - UTF-8 based sender diff --git a/tests/data/test966 b/tests/data/test966 index b642e19a9d..646e807589 100644 --- a/tests/data/test966 +++ b/tests/data/test966 @@ -26,8 +26,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP with SMTPUTF8 support - UTF-8 based recipient diff --git a/tests/data/test967 b/tests/data/test967 index f6e2e32382..7cacbcbdd2 100644 --- a/tests/data/test967 +++ b/tests/data/test967 @@ -30,8 +30,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP external VRFY with SMTPUTF8 support diff --git a/tests/data/test968 b/tests/data/test968 index 8532c35e7d..4df600483b 100644 --- a/tests/data/test968 +++ b/tests/data/test968 @@ -27,8 +27,7 @@ IDN codeset-utf8 -LC_ALL=en_US.UTF-8 -LC_CTYPE=en_US.UTF-8 +LC_ALL=C.UTF-8 SMTP VRFY with SMTPUTF8 support diff --git a/tests/runtests.pl b/tests/runtests.pl index 23736f2ac5..43718fa58b 100755 --- a/tests/runtests.pl +++ b/tests/runtests.pl @@ -83,6 +83,7 @@ BEGIN { use Digest::MD5 qw(md5); use List::Util 'sum'; use I18N::Langinfo qw(langinfo CODESET); +use POSIX qw(setlocale LC_ALL); use serverhelp qw( server_exe @@ -484,6 +485,25 @@ sub parseprotocols { push @protocols, 'none'; } +####################################################################### +# Check if the operating environment supports UTF-8. +sub is_utf8_supported { + my $result; + my $old_LC_ALL; + my $was_defined = defined $ENV{'LC_ALL'}; + if($was_defined) { + $old_LC_ALL = $ENV{'LC_ALL'}; + } + setlocale(LC_ALL, $ENV{'LC_ALL'} = "C.UTF-8"); + $result = lc(langinfo(CODESET())) eq "utf-8"; + if($was_defined) { + $ENV{'LC_ALL'} = $old_LC_ALL; + } + else { + delete $ENV{'LC_ALL'}; + } + return $result; +} ####################################################################### # Check & display information about curl and the host the test suite runs on. @@ -808,7 +828,7 @@ sub checksystemfeatures { # Use this as a proxy for any cryptographic authentication $feature{"crypto"} = $feature{"NTLM"} || $feature{"Kerberos"} || $feature{"SPNEGO"}; $feature{"local-http"} = servers::localhttp(); - $feature{"codeset-utf8"} = lc(langinfo(CODESET())) eq "utf-8"; + $feature{"codeset-utf8"} = is_utf8_supported(); if($feature{"codeset-utf8"}) { $ENV{'CURL_TEST_HAVE_CODESET_UTF8'} = 1; }