]> git.ipfire.org Git - thirdparty/coreutils.git/commitdiff
maint: prefer char32_t to wchar_t
authorPaul Eggert <eggert@cs.ucla.edu>
Fri, 22 Sep 2023 16:45:12 +0000 (09:45 -0700)
committerPaul Eggert <eggert@cs.ucla.edu>
Sat, 23 Sep 2023 07:28:27 +0000 (00:28 -0700)
This should work better on non-glibc platforms that don’t
use Unicode for wchar_t.  However, POSIX appears to prohibit
this for printf.c so leave that alone.
* bootstrap.conf (gnulib_modules): Add btoc32, c32iscntrl,
c32isprint, c32isspace, c32width, mbrtoc32.  Remove btoc, wcwidth.
* src/df.c, src/ls.c, src/wc.c:
Include uchar.h instead of wchar.h and wctype.h.
* src/df.c (replace_invalid_chars):
* src/ls.c (quote_name_buf):
* src/wc.c (isnbspace, wc):
Use char32_t instead of wchar_t.

bootstrap.conf
src/df.c
src/ls.c
src/printf.c
src/wc.c

index 0b1ed95f632c65c656d076285d03d00201b2679e..fdb059ccea98b8d5735da24190adeafecf6c8bf8 100644 (file)
@@ -45,10 +45,14 @@ gnulib_modules="
   backupfile
   base32
   base64
-  btowc
+  btoc32
   buffer-lcm
   byteswap
   c-strcase
+  c32iscntrl
+  c32isprint
+  c32isspace
+  c32width
   canon-host
   canonicalize
   chmodat
@@ -161,6 +165,7 @@ gnulib_modules="
   malloc-gnu
   manywarnings
   mbrlen
+  mbrtoc32
   mbrtowc
   mbschr
   mbslen
@@ -282,7 +287,6 @@ gnulib_modules="
   verify
   verror
   version-etc-fsf
-  wcwidth
   winsz-ioctl
   winsz-termios
   write-any-file
index 1f3665a4af0e6d2b68ffcd260dd4b4bbf3b33865..5a41ad38ace5658d88fd55e8291e6ad54b2c037e 100644 (file)
--- a/src/df.c
+++ b/src/df.c
@@ -23,8 +23,7 @@
 #include <sys/types.h>
 #include <getopt.h>
 #include <c-ctype.h>
-#include <wchar.h>
-#include <wctype.h>
+#include <uchar.h>
 
 #include "system.h"
 #include "assure.h"
@@ -324,13 +323,13 @@ replace_invalid_chars (char *cell)
 
   for (char *src = cell; src != srcend; src += n)
     {
-      wchar_t wc;
+      char32_t wc;
       size_t srcbytes = srcend - src;
-      n = mbrtowc (&wc, src, srcbytes, &mbstate);
+      n = mbrtoc32 (&wc, src, srcbytes, &mbstate);
       bool ok = n <= srcbytes;
 
       if (ok)
-        ok = !iswcntrl (wc);
+        ok = !c32iscntrl (wc);
       else
         n = 1;
 
index eb74f16785e6935868280bb932482a6dee102357..769ae85a75b10f089011c703701b49f1301acd15 100644 (file)
--- a/src/ls.c
+++ b/src/ls.c
@@ -55,7 +55,7 @@
 #include <getopt.h>
 #include <signal.h>
 #include <selinux/selinux.h>
-#include <wchar.h>
+#include <uchar.h>
 
 #if HAVE_LANGINFO_CODESET
 # include <langinfo.h>
@@ -4612,11 +4612,11 @@ quote_name_buf (char **inbuf, size_t bufsize, char *name,
                     mbstate_t mbstate; mbszero (&mbstate);
                     do
                       {
-                        wchar_t wc;
+                        char32_t wc;
                         size_t bytes;
                         int w;
 
-                        bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
+                        bytes = mbrtoc32 (&wc, p, plimit - p, &mbstate);
 
                         if (bytes == (size_t) -1)
                           {
@@ -4644,7 +4644,7 @@ quote_name_buf (char **inbuf, size_t bufsize, char *name,
                           /* A null wide character was encountered.  */
                           bytes = 1;
 
-                        w = wcwidth (wc);
+                        w = c32width (wc);
                         if (w >= 0)
                           {
                             /* A printable multibyte character.
index 16ea1c17f1955f9b55375a28832a6f0af935eb84..f36b455190cc119a7f66f48e8a24522ad439b0b7 100644 (file)
@@ -176,6 +176,7 @@ FUNC_NAME (char const *s)                                            \
           wchar_t wc;                                                   \
           size_t slen = strlen (s);                                     \
           ssize_t bytes;                                                \
+          /* Use mbrtowc not mbrtoc32, as per POSIX.  */                \
           bytes = mbrtowc (&wc, s, slen, &mbstate);                     \
           if (0 < bytes)                                                \
             {                                                           \
index b0f92c6b73ff979f6182d36c6b53416efd105c33..c0b37b557698930fd3cbf9635364348e9a9fc67e 100644 (file)
--- a/src/wc.c
+++ b/src/wc.c
@@ -23,8 +23,7 @@
 #include <stdio.h>
 #include <getopt.h>
 #include <sys/types.h>
-#include <wchar.h>
-#include <wctype.h>
+#include <uchar.h>
 
 #include "system.h"
 #include "assure.h"
@@ -218,7 +217,7 @@ iswnbspace (wint_t wc)
 static int
 isnbspace (int c)
 {
-  return iswnbspace (btowc (c));
+  return iswnbspace (btoc32 (c));
 }
 
 /* FILE is the name of the file (or null for standard input)
@@ -483,7 +482,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
           bytes_read += prev;
           do
             {
-              wchar_t wide_char;
+              char32_t wide_char;
               size_t n;
               bool wide = true;
 
@@ -501,7 +500,7 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 #if SUPPORT_OLD_MBRTOWC
                   backup_state = state;
 #endif
-                  n = mbrtowc (&wide_char, p, bytes_read, &state);
+                  n = mbrtoc32 (&wide_char, p, bytes_read, &state);
                   if (n == (size_t) -2)
                     {
 #if SUPPORT_OLD_MBRTOWC
@@ -553,17 +552,17 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
                   in_word = false;
                   break;
                 default:
-                  if (wide && iswprint (wide_char))
+                  if (wide && c32isprint (wide_char))
                     {
-                      /* wcwidth can be expensive on OSX for example,
+                      /* c32width can be expensive on OSX for example,
                          so avoid if not needed.  */
                       if (print_linelength)
                         {
-                          int width = wcwidth (wide_char);
+                          int width = c32width (wide_char);
                           if (width > 0)
                             linepos += width;
                         }
-                      if (iswspace (wide_char) || iswnbspace (wide_char))
+                      if (c32isspace (wide_char) || iswnbspace (wide_char))
                         goto mb_word_separator;
                       in_word = true;
                     }