From: Paul Eggert Date: Fri, 24 Apr 2026 23:00:34 +0000 (-0700) Subject: uchar-h: etc. namespace cleanup X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b6d5b2f729697c1f3a5c99e249eabc01921e0716;p=thirdparty%2Fgnulib.git uchar-h: etc. namespace cleanup This is only a partial cleanup; to be cleaner we’d need to move declarations of Gnulib extensions like c32isalpha into a separate .h file. However, if no Gnulib modules that extend are used, Gnulib is now pretty clean on recent GNUish platforms. * lib/uchar.in.h: On GNUish platforms, include , , only if needed. Do not include , as we never need it directly: even if we use ’s memset via mbszero, should include if needed, as defines mbszero. Move a static_assert from here to tests, as the static_assert uses a symbol that is no longer guaranteed to be visible. * modules/uchar-h (Depends-on): Do not depend on assert-h. * tests/test-uchar-h.c: Move a static_assert here from lib/uchar.in.h, and include so that wchar_t is guaranteed to be visible. --- diff --git a/ChangeLog b/ChangeLog index b364f67a61..6a07acdad5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2026-04-24 Paul Eggert + + uchar-h: etc. namespace cleanup + This is only a partial cleanup; to be cleaner we’d need to + move declarations of Gnulib extensions like c32isalpha + into a separate .h file. However, if no Gnulib modules that + extend are used, Gnulib is now pretty clean on + recent GNUish platforms. + * lib/uchar.in.h: On GNUish platforms, include , + , only if needed. Do not include , + as we never need it directly: even if we use ’s memset + via mbszero, should include if needed, as + defines mbszero. Move a static_assert from here to tests, + as the static_assert uses a symbol that is no longer guaranteed + to be visible. + * modules/uchar-h (Depends-on): Do not depend on assert-h. + * tests/test-uchar-h.c: Move a static_assert here from lib/uchar.in.h, + and include so that wchar_t is guaranteed to be visible. + 2026-04-24 Bruno Haible version-etc: Optionally emit another line for the man page. diff --git a/doc/posix-headers/uchar.texi b/doc/posix-headers/uchar.texi index db5c893fa6..228759763c 100644 --- a/doc/posix-headers/uchar.texi +++ b/doc/posix-headers/uchar.texi @@ -5,7 +5,8 @@ ISO C23 specification:@* @url{https://www.open-std.org/jtc1/sc22/wg14/www/docs/n POSIX specification:@* @url{https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/uchar.h.html} -Defines the types @code{char16_t}, @code{char32_t} and declares the +Defines the types @code{char16_t}, @code{char32_t}, +@code{mbstate_t} and @code{size_t}, and declares the functions @code{mbrtoc16}, @code{c16rtomb}, @code{mbrtoc32}, @code{c32rtomb}. @@ -35,6 +36,28 @@ This is the case in ISO C 11 compliant but not ISO C 23 compliant implementations. @end itemize +Portability problems fixed by Gnulib modules +@code{btoc32}, +@code{c32_apply_mapping}, @code{c32_apply_type_test}, +@code{c32_get_mapping}, @code{c32_get_type_test}, +@code{c32isalnum}, @code{c32isalpha}, @code{c32isblank}, +@code{c32iscntrl}, @code{c32isdigit}, @code{c32isgraph}, +@code{c32islower}, @code{c32isprint}, @code{c32ispunct}, +@code{c32isspace}, @code{c32isupper}, @code{c32isxdigit}, +@code{c32rtomb}, @code{c32snrtombs}, @code{c32srtombs}, +@code{c32stombs}, @code{c32swidth}, @code{c32tob}, +@code{c32tolower}, @code{c32toupper}, @code{c32width}: +@itemize +@item +The @code{char32_t} type of ISO C 23 and POSIX @code{} lacks +counterparts of several standard @code{wchar_t} functions declared in +@code{} and @code{}. +This shortcoming is ameliorated by these Gnulib modules, +each of which arranges for @code{} to declare a +Gnulib-specific function and to make visible the symbols @code{wint_t} +and @code{WEOF} if the symbols are part of that function's API. +@end itemize + Portability problems not fixed by Gnulib: @itemize @item diff --git a/lib/uchar.in.h b/lib/uchar.in.h index 300f7b5fc1..585dc9809c 100644 --- a/lib/uchar.in.h +++ b/lib/uchar.in.h @@ -53,15 +53,48 @@ #error "Please include config.h first." #endif -/* Get uint_least16_t, uint_least32_t. */ -#include - -/* Get mbstate_t, size_t. */ -#include - -/* For the inline functions. */ -#include -#include +/* If needed, get uint_least16_t, uint_least32_t. + Although POSIX allows to make all symbols visible from , + our includers should not rely on this. */ +#if (! (defined __cplusplus \ + ? @CXX_HAVE_UCHAR_H@ || @CXX_HAS_UCHAR_TYPES@ \ + : @HAVE_UCHAR_H@) \ + || @GNULIBHEADERS_OVERRIDE_CHAR16_T@ || @GNULIBHEADERS_OVERRIDE_CHAR32_T@) +# include +#endif + +/* If needed, get btowc, mbstate_t, mbszero, size_t, wctob, wint_t, WEOF. + Although POSIX allows to make all symbols visible from , + our includers should not rely on this, except they can rely on wint_t and WEOF + when part of the API of a Gnulib module extending that needs + these two symbols. */ +#if (! (/* The underlying defines mbstate_t, size_t. */ \ + defined __cplusplus ? @CXX_HAVE_UCHAR_H@ : @HAVE_UCHAR_H@) \ + || (/* These need wint_t and maybe WEOF and a function. */ \ + @GNULIB_BTOC32@ || @GNULIB_C32TOB@) \ + || (/* These need mbszero. */ \ + (@GNULIB_C32STOMBS@ || @GNULIB_MBSTOC32S@))) +# include +#endif + +/* If needed, get iswalnum, iswalpha, iswblank, iswcntrl, iswctype, + iswdigit, iswgraph, iswlower, iswprint, iswpunct, iswspace, + iswupper, iswxdigit, towctrans, towlower, towupper, wctrans, + wctrans_t, wctype, wctype_t, wint_t, WEOF. + Our includers should not rely on this, except they can rely on wint_t and WEOF + when part of the API of a Gnulib module extending that needs + these two symbols. */ +#if (/* These need wint_t and maybe wctrans_t, wctype_t, WEOF, \ + and a function. */ 0 \ + || @GNULIB_C32ISALNUM@ || @GNULIB_C32ISALPHA@ || @GNULIB_C32ISBLANK@ \ + || @GNULIB_C32ISCNTRL@ || @GNULIB_C32ISDIGIT@ || @GNULIB_C32ISGRAPH@ \ + || @GNULIB_C32ISLOWER@ || @GNULIB_C32ISPRINT@ || @GNULIB_C32ISPUNCT@ \ + || @GNULIB_C32ISSPACE@ || @GNULIB_C32ISUPPER@ || @GNULIB_C32ISXDIGIT@ \ + || @GNULIB_C32TOLOWER@ || @GNULIB_C32TOUPPER@ \ + || @GNULIB_C32_APPLY_MAPPING@ || @GNULIB_C32_APPLY_TYPE_TEST@ \ + || @GNULIB_C32_GET_MAPPING@ || @GNULIB_C32_GET_TYPE_TEST@) +# include +#endif /* The __attribute__ feature is available in gcc versions 2.5 and later. The attribute __pure__ was added in gcc 2.96. */ @@ -86,8 +119,7 @@ _GL_INLINE_HEADER_BEGIN #if !(defined __cplusplus ? @CXX_HAVE_UCHAR_H@ || @CXX_HAS_CHAR8_TYPE@ : @HAVE_UCHAR_H@) /* An 8-bit variant of wchar_t. - Note: This type is only mandated by ISO C 23 or newer, and it does - denote UTF-8 units. */ + Note: This type is mandated by ISO C 23 or newer, and denotes UTF-8 units. */ typedef unsigned char char8_t; #elif @GNULIBHEADERS_OVERRIDE_CHAR8_T@ @@ -100,9 +132,9 @@ typedef unsigned char gl_char8_t; #if !(defined __cplusplus ? @CXX_HAVE_UCHAR_H@ || @CXX_HAS_UCHAR_TYPES@ : @HAVE_UCHAR_H@) /* A 16-bit variant of wchar_t. - Note: This type is only mandated by ISO C 11 or newer. In ISO C 23 + Note: This type is mandated by ISO C 11 or newer. In ISO C 23 and newer, it denotes UTF-16 units; in older versions of ISO C it did - so only on platforms on which __STDC_UTF_16__ was defined. */ + so on platforms on which __STDC_UTF_16__ was defined. */ typedef uint_least16_t char16_t; #elif @GNULIBHEADERS_OVERRIDE_CHAR16_T@ @@ -115,9 +147,9 @@ typedef uint_least16_t gl_char16_t; #if !(defined __cplusplus ? @CXX_HAVE_UCHAR_H@ || @CXX_HAS_UCHAR_TYPES@ : @HAVE_UCHAR_H@) /* A 32-bit variant of wchar_t. - Note: This type is only mandated by ISO C 11 or newer. In ISO C 23 + Note: This type is mandated by ISO C 11 or newer. In ISO C 23 and newer, it denotes UTF-32 code points; in older versions of ISO C - it did so only on platforms on which __STDC_UTF_32__ was defined. + it did so on platforms on which __STDC_UTF_32__ was defined. In gnulib, we guarantee that it denotes UTF-32 code points if and only if the module 'uchar-h-c23' is in use. */ typedef uint_least32_t char32_t; @@ -145,17 +177,15 @@ typedef uint_least32_t gl_char32_t; - because GCC >= 4.9 defines these macros on all platforms, even on FreeBSD and Solaris. We should better not use __STD_UTF_16__, __STD_UTF_32__ either, because - these macros are misspellings, only defined by Android's . */ + these macros are misspellings, defined only by Android's . */ #if defined __STDC_ISO_10646__ && !_GL_SMALL_WCHAR_T /* glibc, musl libc */ # define _GL_WCHAR_T_IS_UCS4 1 #endif -#if _GL_WCHAR_T_IS_UCS4 -static_assert (sizeof (char32_t) == sizeof (wchar_t)); -#endif -/* Convert a single-byte character to a 32-bit wide character. */ +/* Convert a single-byte character C to a 32-bit wide character, + or to WEOF if C is invalid. */ #if @GNULIB_BTOC32@ # if _GL_WCHAR_T_IS_UCS4 && !defined IN_BTOC32 _GL_BEGIN_C_LINKAGE @@ -471,7 +501,7 @@ _GL_CXXALIASWARN (c32width); #endif -/* Converts a 32-bit wide character to a multibyte character. */ +/* Convert a 32-bit wide character to a multibyte character. */ #if @GNULIB_C32RTOMB@ # if @REPLACE_C32RTOMB@ # if !(defined __cplusplus && defined GNULIB_NAMESPACE) @@ -599,8 +629,8 @@ _GL_CXXALIASWARN (c32swidth); #endif -/* Converts a 32-bit wide character to unibyte character. - Returns the single-byte representation of WC if it exists, +/* Convert a 32-bit wide character to unibyte character. + Return the single-byte representation of WC if it exists, or EOF otherwise. */ #if @GNULIB_C32TOB@ # if _GL_WCHAR_T_IS_UCS4 && !defined IN_C32TOB @@ -623,7 +653,7 @@ _GL_CXXALIASWARN (c32tob); #endif -/* Converts a multibyte character to a 32-bit wide character. */ +/* Convert a multibyte character to a 32-bit wide character. */ #if @GNULIB_MBRTOC32@ # if @REPLACE_MBRTOC32@ # if !(defined __cplusplus && defined GNULIB_NAMESPACE) @@ -653,7 +683,7 @@ _GL_WARN_ON_USE (mbrtoc32, "mbrtoc32 is not portable - " #endif -/* Converts a multibyte character and returns the next 16-bit wide +/* Convert a multibyte character and returns the next 16-bit wide character. */ #if @GNULIB_MBRTOC16@ # if @REPLACE_MBRTOC16@ diff --git a/modules/uchar-h b/modules/uchar-h index 6360859857..f5eb3241e4 100644 --- a/modules/uchar-h +++ b/modules/uchar-h @@ -13,7 +13,6 @@ snippet/arg-nonnull snippet/c++defs snippet/warn-on-use extern-inline -assert-h stdint-h wchar-h wctype-h diff --git a/tests/test-uchar-h.c b/tests/test-uchar-h.c index e59bfdb85f..4b0667df58 100644 --- a/tests/test-uchar-h.c +++ b/tests/test-uchar-h.c @@ -20,6 +20,12 @@ #include +#include + +#if _GL_WCHAR_T_IS_UCS4 +static_assert (sizeof (char32_t) == sizeof (wchar_t)); +#endif + /* Check that the types are defined. */ mbstate_t a = { 0 }; size_t b = 5;