+2026-05-25 Bruno Haible <bruno@clisp.org>
+
+ mbscasecmp tests: Enhance tests.
+ * tests/test-mbscasecmp.c (test_ascii): New function, extracted from
+ main.
+ (test_utf_8): Likewise. Add test cases with incomplete characters.
+ (main): Invoke them. Accept a numeric argument.
+ * tests/test-mbscasecmp-4.sh: Renamed from tests/test-mbscasecmp.sh.
+ * tests/test-mbscasecmp-3.sh: New file, based on
+ tests/test-mbmemcasecmp-3.sh.
+ * modules/mbscasecmp-tests (Files): Update after rename. Add
+ locale-en.m4, locale-fr.m4.
+ (configure.ac): Invoke gt_LOCALE_EN_UTF8, gt_LOCALE_FR_UTF8.
+ (Makefile.am): Arrange to run test-mbscasecmp-3.sh,
+ test-mbscasecmp-4.sh, instead of test-mbscasecmp.sh.
+
2026-05-25 Bruno Haible <bruno@clisp.org>
mbs_endswith tests: Enhance tests.
Files:
-tests/test-mbscasecmp.sh
+tests/test-mbscasecmp-3.sh
+tests/test-mbscasecmp-4.sh
tests/test-mbscasecmp.c
tests/macros.h
+m4/locale-en.m4
+m4/locale-fr.m4
m4/locale-tr.m4
m4/codeset.m4
setlocale
configure.ac:
+gt_LOCALE_EN_UTF8
+gt_LOCALE_FR_UTF8
gt_LOCALE_TR_UTF8
Makefile.am:
-TESTS += test-mbscasecmp.sh
-TESTS_ENVIRONMENT += LOCALE_TR_UTF8='@LOCALE_TR_UTF8@'
+TESTS += test-mbscasecmp-3.sh test-mbscasecmp-4.sh
+TESTS_ENVIRONMENT += \
+ LOCALE_EN_UTF8='@LOCALE_EN_UTF8@' \
+ LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' \
+ LOCALE_TR_UTF8='@LOCALE_TR_UTF8@'
check_PROGRAMS += test-mbscasecmp
test_mbscasecmp_LDADD = $(LDADD) $(LIBUNISTRING) $(SETLOCALE_LIB) $(MBRTOWC_LIB) $(LIBC32CONV)
--- /dev/null
+#!/bin/sh
+
+# Test whether a specific UTF-8 locale is installed.
+: "${LOCALE_EN_UTF8=en_US.UTF-8}"
+: "${LOCALE_FR_UTF8=fr_FR.UTF-8}"
+if test "$LOCALE_EN_UTF8" = none && test $LOCALE_FR_UTF8 = none; then
+ if test -f /usr/bin/localedef; then
+ echo "Skipping test: no english or french Unicode locale is installed"
+ else
+ echo "Skipping test: no english or french Unicode locale is supported"
+ fi
+ exit 77
+fi
+
+# It's sufficient to test in one of the two locales.
+if test $LOCALE_FR_UTF8 != none; then
+ testlocale=$LOCALE_FR_UTF8
+else
+ testlocale="$LOCALE_EN_UTF8"
+fi
+
+LC_ALL="$testlocale" \
+${CHECKER} ./test-mbscasecmp${EXEEXT} 3
#include "macros.h"
-int
-main ()
+static void
+test_ascii (void)
{
- /* configure should already have checked that the locale is supported. */
- if (setlocale (LC_ALL, "") == NULL)
- return 1;
-
ASSERT (mbscasecmp ("paragraph", "Paragraph") == 0);
ASSERT (mbscasecmp ("paragrapH", "parAgRaph") == 0);
ASSERT (mbscasecmp ("para", "paragraph") < 0);
ASSERT (mbscasecmp ("paragraph", "para") > 0);
+}
+static void
+test_utf_8 (bool turkish)
+{
/* The following tests shows how mbscasecmp() is different from
strcasecmp(). */
- ASSERT (mbscasecmp ("\303\266zg\303\274r", "\303\226ZG\303\234R") == 0); /* özgür */
- ASSERT (mbscasecmp ("\303\226ZG\303\234R", "\303\266zg\303\274r") == 0); /* özgür */
+ if (turkish)
+ {
+ ASSERT (mbscasecmp ("\303\266zg\303\274r", "\303\226ZG\303\234R") == 0); /* özgür */
+ ASSERT (mbscasecmp ("\303\226ZG\303\234R", "\303\266zg\303\274r") == 0); /* özgür */
+
+ /* This test shows how strings of different size can compare equal. */
+ ASSERT (mbscasecmp ("turkish", "TURK\304\260SH") == 0);
+ ASSERT (mbscasecmp ("TURK\304\260SH", "turkish") == 0);
+ }
+
+ /* Incomplete characters. See
+ https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf
+ page 128 table 3-11. */
+
+ /* "\341\200\240" = 0xE1 0x80 0xA0 = U+1020. */
+ ASSERT (mbscasecmp ("\341\200", "\341\200") == 0);
+ ASSERT (mbscasecmp ("\341\200X", "\341\200x") == 0);
+ ASSERT (mbscasecmp ("\341", "\341") == 0);
+ ASSERT (mbscasecmp ("\341X", "\341x") == 0);
+ /* "\360\221\222\240" = 0xF0 0x91 0x92 0xA0 = U+114A0. */
+ ASSERT (mbscasecmp ("\360\221\222", "\360\221\222") == 0);
+ ASSERT (mbscasecmp ("\360\221\222X", "\360\221\222x") == 0);
+ ASSERT (mbscasecmp ("\360\221", "\360\221") == 0);
+ ASSERT (mbscasecmp ("\360\221X", "\360\221x") == 0);
+ ASSERT (mbscasecmp ("\360", "\360") == 0);
+ ASSERT (mbscasecmp ("\360X", "\360x") == 0);
+}
+
+int
+main (int argc, char *argv[])
+{
+ /* configure should already have checked that the locale is supported. */
+ if (setlocale (LC_ALL, "") == NULL)
+ return 1;
+
+ test_ascii ();
+
+ if (argc > 1)
+ switch (argv[1][0])
+ {
+ case '3':
+ /* Locale encoding is UTF-8, locale is not Turkish. */
+ test_utf_8 (false);
+ return test_exit_status;
- /* This test shows how strings of different size can compare equal. */
- ASSERT (mbscasecmp ("turkish", "TURK\304\260SH") == 0);
- ASSERT (mbscasecmp ("TURK\304\260SH", "turkish") == 0);
+ case '4':
+ /* Locale encoding is UTF-8, locale is Turkish. */
+ test_utf_8 (true);
+ return test_exit_status;
+ }
- return test_exit_status;
+ return 1;
}