quotearg: escape C1 codes in C locale

author Paul Eggert <eggert@cs.ucla.edu>

Sun, 31 May 2026 04:57:03 +0000 (21:57 -0700)

committer Paul Eggert <eggert@cs.ucla.edu>

Sun, 31 May 2026 05:11:43 +0000 (22:11 -0700)
author Paul Eggert <eggert@cs.ucla.edu>
Sun, 31 May 2026 04:57:03 +0000 (21:57 -0700)
committer Paul Eggert <eggert@cs.ucla.edu>
Sun, 31 May 2026 05:11:43 +0000 (22:11 -0700)
diff --git a/ChangeLog b/ChangeLog

index e6ce36d261191991cddeab8045ada0d540155c51..70e2eaffea278f10513ac0216f5229d7dbe6d947 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2026-05-30  Paul Eggert  <eggert@cs.ucla.edu>
+
+       quotearg: escape C1 codes in C locale
+       Problem reported by Lasse Collin <https://bugs.gnu.org/81135#29>.
+       For lib/quotearg.c, m4/quotearg.m4 and modules/quotearg,
+       go back to the old way of doing things, without the recently-added
+       USE_C_LOCALE stuff.  Then make the following changes instead:
+       * lib/quotearg.c [_QUOTEARG_AVOID_UCHAR_H]: Don’t include <uchar.h>.
+       (mbrto2c, mbstate_t) [_QUOTEARG_AVOID_UCHAR_H && defined __UCLIBC__
+       && !defined __UCLIBC_HAS_WCHAR__]: New macros.
+       Also, don’t include <wchar.h> or <wctype.h>.
+       (char32_t, c32isprint, mbrtoc32, GNULIB_MBRTOC32_REGULAR)
+       [_QUOTEARG_AVOID_UCHAR_H]: New macros.
+       (mbszero) [!GNULIB_defined_mbszero]: New macro.
+
  2026-05-29  Paul Eggert  <eggert@cs.ucla.edu>
  
         quotearg: improve USE_C_LOCALE + UTF-8
diff --git a/lib/quotearg.c b/lib/quotearg.c

index ea6fde1acac9e69fc4699eb228931caa0c0a98ce..d0a516a60d36fe4c89540632d60d8d229a26752c 100644 (file)
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -33,106 +33,44 @@
  #include "minmax.h"
  #include "xalloc.h"
  
+#include <ctype.h>
  #include <errno.h>
  #include <limits.h>
  #include <stdint.h>
  #include <stdlib.h>
  #include <string.h>
  
-/* If USE_C_LOCALE is nonzero, this file defines functions that
-   use the "C" locale, regardless of the current locale.
-   The functions also treat unassigned characters as printable.
-   Applications defining this macro might avoid the need for Gnulib's
-   c32isprint, gettext-h, mbrtoc32, mbsinit, and uchar-h modules,
-   but they also need the c-ctype module.  */
-#ifndef USE_C_LOCALE
-# define USE_C_LOCALE 0
-#endif
-
-/* On several platforms, the default locale uses UTF-8, contrary to POSIX:
-   - musl libc has no unibyte locales; the "C" locale uses UTF-8.
-   - On macOS, all modern locales use the UTF-8 encoding.
-   - BeOS and Haiku have a single locale, and it has UTF-8 encoding.
-   - On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the
-     "C" locale.  Furthermore, when you attempt to set the "C" or "POSIX"
-     locale via setlocale(), what you get is a "C" locale with UTF-8 encoding,
-     that is, effectively the "C.UTF-8" locale.  */
-#if (defined MUSL_LIBC || (defined __APPLE__ && defined __MACH__) \
-     || defined __BEOS__ || defined __HAIKU__ || defined __ANDROID__)
-# define C_LOCALE_MIGHT_BE_MULTIBYTE true
-#else
-# define C_LOCALE_MIGHT_BE_MULTIBYTE false
-#endif
+/* Use Gnulib <uchar.h> if not avoided by the app.  Applications
+   defining _QUOTEARG_AVOID_UCHAR_H might avoid the need for Gnulib's
+   c32isprint, mbrtoc32, mbsinit, mbszero, and uchar-h modules.  */
  
-#if USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE
-# include <c-ctype.h>
-typedef unsigned char wch;
-typedef struct incomplete_mbstate *mbstate;
-# ifndef GNULIB_MBRTOC32_REGULAR
-#  define GNULIB_MBRTOC32_REGULAR 1
-# endif
-#else
+#ifndef _QUOTEARG_AVOID_UCHAR_H
+# include <uchar.h>
  # include <wchar.h>
-typedef mbstate_t mbstate;
-# if USE_C_LOCALE
-#  include <c-ctype.h>
+#else
+# if !defined __UCLIBC__ || defined __UCLIBC_HAS_WCHAR__
+#  include <wchar.h>
  #  include <wctype.h>
-typedef wchar_t wch;
+#  define char32_t wchar_t
+#  define c32isprint iswprint
  # else
-#  include <ctype.h>
-#  include <uchar.h>
-typedef char32_t wch;
+#  define mbrtowc(pwc, s, n, ps) ((size_t) ((*(pwc) = *(s)) ? 1 : 0))
+#  define char32_t unsigned char
+#  define mbstate_t signed char /* Any complete type will do.  */
+#  define c32isprint isprint
+# endif
+# define mbrtoc32 mbrtowc
+# ifndef GNULIB_MBRTOC32_REGULAR
+#  define GNULIB_MBRTOC32_REGULAR 1
  # endif
  #endif
  
-static void
-mbs_clear (MAYBE_UNUSED mbstate *ps)
-{
-#if !USE_C_LOCALE || C_LOCALE_MIGHT_BE_MULTIBYTE
-  mbszero (ps);
-#endif
-}
-
-static size_t
-mbrtowch (wch *pwc, char const *s, size_t n, MAYBE_UNUSED mbstate *ps)
-{
-#if !USE_C_LOCALE
-  return mbrtoc32 (pwc, s, n, ps);
-#elif C_LOCALE_MIGHT_BE_MULTIBYTE
-  return mbrtowc (pwc, s, n, ps);
-#else
-  return n && (*pwc = *s);
-#endif
-}
-
-static bool
-chisprint (unsigned char c)
-{
-#if USE_C_LOCALE
-  return !c_iscntrl (c);
-#else
-  return isprint (c) != 0;
-#endif
-}
-
-static bool
-wchisprint (wch w)
-{
-#if !USE_C_LOCALE
-  return c32isprint (w);
-#elif C_LOCALE_MIGHT_BE_MULTIBYTE
-  return iswcntrl (w) == 0;
-#else
-  return chisprint (w);
+#if !GNULIB_defined_mbszero
+# define mbszero(ps) (*(mbstate_t *) {(ps)} = (mbstate_t) {0})
  #endif
-}
  
-#if USE_C_LOCALE
-# define _(msgid) msgid
-#else
-# include "gettext.h"
-# define _(msgid) dgettext (GNULIB_TEXT_DOMAIN, msgid)
-#endif
+#include "gettext.h"
+#define _(msgid) dgettext (GNULIB_TEXT_DOMAIN, msgid)
  #define N_(msgid) msgid
  
  #ifndef SIZE_MAX
@@ -314,9 +252,9 @@ gettext_quote (char const *msgid, enum quoting_style s)
       and means we need not use a function like locale_charset that
       has other dependencies.  */
    static char const quote[][4] = { "\xe2\x80\x98", "\xe2\x80\x99" };
-  wch w;
-  mbstate mbs; mbs_clear (&mbs);
-  if (mbrtowch (&w, quote[0], 3, &mbs) == 3 && w == 0x2018)
+  char32_t w;
+  mbstate_t mbs; mbszero (&mbs);
+  if (mbrtoc32 (&w, quote[0], 3, &mbs) == 3 && w == 0x2018)
      return quote[msgid[0] == '\''];
  
    return (s == clocale_quoting_style ? "\"" : "'");
@@ -343,8 +281,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                            char const *left_quote,
                            char const *right_quote)
  {
-  bool unibyte_locale = ((USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE)
-                         || MB_CUR_MAX == 1);
+  bool unibyte_locale = MB_CUR_MAX == 1;
  
    size_t len = 0;
    size_t orig_buffersize = 0;
@@ -691,11 +628,11 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
              if (unibyte_locale)
                {
                  m = 1;
-                printable = chisprint (c);
+                printable = isprint (c) != 0;
                }
              else
                {
-                mbstate mbs; mbs_clear (&mbs);
+                mbstate_t mbs; mbszero (&mbs);
  
                  m = 0;
                  printable = true;
@@ -704,8 +641,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
  
                  for (;;)
                    {
-                    wch w;
-                    size_t bytes = mbrtowch (&w, &arg[i + m],
+                    char32_t w;
+                    size_t bytes = mbrtoc32 (&w, &arg[i + m],
                                               argsize - (i + m), &mbs);
                      if (bytes == 0)
                        break;
@@ -743,7 +680,7 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                                  }
                            }
  
-                        if (! wchisprint (w))
+                        if (! c32isprint (w))
                            printable = false;
                          m += bytes;
                        }
diff --git a/m4/quotearg.m4 b/m4/quotearg.m4

index ceca5925adf50de8379a1675157351eecd8545d7..330743b3bf3c818ede10e0625d55e49ae316a1b4 100644 (file)
--- a/m4/quotearg.m4
+++ b/m4/quotearg.m4
@@ -1,5 +1,5 @@
  # quotearg.m4
-# serial 11
+# serial 10
  dnl Copyright (C) 2002, 2004-2026 Free Software Foundation, Inc.
  dnl This file is free software; the Free Software Foundation
  dnl gives unlimited permission to copy and/or distribute it,
@@ -9,6 +9,5 @@ dnl This file is offered as-is, without any warranty.
  AC_DEFUN([gl_QUOTEARG],
  [
    AC_REQUIRE([AC_C_RESTRICT])
-  AC_REQUIRE([gl_MUSL_LIBC])
    :
  ])
diff --git a/modules/quotearg b/modules/quotearg

index 02e54d989df697cd902313707550498c45423c72..17a7c1314c08ff1155387cbf7177c942789f5529 100644 (file)
--- a/modules/quotearg
+++ b/modules/quotearg
@@ -6,11 +6,9 @@ lib/quotearg.h
  lib/quotearg.c
  lib/quote.h
  m4/quotearg.m4
-m4/musl.m4
  
  Depends-on:
  attribute
-c-ctype
  c32isprint
  extensions
  gettext-h
author	Paul Eggert <eggert@cs.ucla.edu>
	Sun, 31 May 2026 04:57:03 +0000 (21:57 -0700)
committer	Paul Eggert <eggert@cs.ucla.edu>
	Sun, 31 May 2026 05:11:43 +0000 (22:11 -0700)
ChangeLog		patch \| blob \| blame \| history
lib/quotearg.c		patch \| blob \| blame \| history
m4/quotearg.m4		patch \| blob \| blame \| history
modules/quotearg		patch \| blob \| blame \| history