]> git.ipfire.org Git - thirdparty/coreutils.git/commitdiff
printf: support printing the numeric value of multi-byte chars
authorPádraig Brady <P@draigBrady.com>
Fri, 18 Mar 2022 14:52:36 +0000 (14:52 +0000)
committerPádraig Brady <P@draigBrady.com>
Sat, 19 Mar 2022 16:57:07 +0000 (16:57 +0000)
* src/printf.c (STRTOX): Update to support multi-byte chars.
* tests/misc/printf-mb.sh: Add a new test.
* tests/local.mk: Reference the new test.
* NEWS: Mention the improvement.
Fixes https://bugs.gnu.org/54388

NEWS
src/printf.c
tests/local.mk
tests/misc/printf-mb.sh [new file with mode: 0755]

diff --git a/NEWS b/NEWS
index ce60bad4a9736f76869a51b26850f301b8871c0e..6d6f204eee18005de57634be4f9b96f6dd94420e 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -108,6 +108,8 @@ GNU coreutils NEWS                                    -*- outline -*-
   any extra final progress just before synchronizing output data,
   since synchronizing can take a long time.
 
+  printf now supports printing the numeric value of multi-byte characters.
+
   sort --debug now diagnoses issues with --field-separator characters
   that conflict with characters possibly used in numbers.
 
index 5f84475fdcc6b55ee5e7184d9c0b07b9b26425e3..68c3883419b93a051a6ff3469f8d8d7089343270 100644 (file)
@@ -53,6 +53,7 @@
 #include <config.h>
 #include <stdio.h>
 #include <sys/types.h>
+#include <wchar.h>
 
 #include "system.h"
 #include "cl-strtod.h"
@@ -170,6 +171,21 @@ FUNC_NAME (char const *s)                                           \
     {                                                                   \
       unsigned char ch = *++s;                                          \
       val = ch;                                                                 \
+                                                                         \
+      if (MB_CUR_MAX > 1 && *(s + 1))                                   \
+        {                                                               \
+          mbstate_t mbstate = { 0, };                                   \
+          wchar_t wc;                                                   \
+          size_t slen = strlen (s);                                     \
+          ssize_t bytes;                                                \
+          bytes = mbrtowc (&wc, s, slen, &mbstate);                     \
+          if (0 < bytes)                                                \
+            {                                                           \
+              val = wc;                                                         \
+              s += bytes - 1;                                           \
+            }                                                           \
+        }                                                               \
+                                                                         \
       /* If POSIXLY_CORRECT is not set, then give a warning that there  \
          are characters following the character constant and that GNU   \
          printf is ignoring those characters.  If POSIXLY_CORRECT *is*  \
index f97ddcb98a7687e3d5dd016a93138ebb3dda78be..0f77786192b764872a2ff4f3b961cbc69cf2b052 100644 (file)
@@ -344,6 +344,7 @@ all_tests =                                 \
   tests/misc/printf.sh                         \
   tests/misc/printf-cov.pl                     \
   tests/misc/printf-hex.sh                     \
+  tests/misc/printf-mb.sh                      \
   tests/misc/printf-surprise.sh                        \
   tests/misc/printf-quote.sh                   \
   tests/misc/pwd-long.sh                       \
diff --git a/tests/misc/printf-mb.sh b/tests/misc/printf-mb.sh
new file mode 100755 (executable)
index 0000000..ad21dbe
--- /dev/null
@@ -0,0 +1,52 @@
+#!/bin/sh
+# tests for printing multi-byte values of characters
+
+# Copyright (C) 2022 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ printf
+
+prog='env printf'
+
+unset LC_ALL
+f=$LOCALE_FR_UTF8
+: ${LOCALE_FR_UTF8=none}
+if test "$LOCALE_FR_UTF8" != "none"; then
+  (
+   #valid multi-byte
+   LC_ALL=$f $prog '%04x\n' '"á' >>out 2>>err
+   #invalid multi-byte
+   LC_ALL=$f $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+   #uni-byte
+   LC_ALL=C $prog '%04x\n' "'$($prog '\xe1')" >>out 2>>err
+   #valid multi-byte, with trailing
+   LC_ALL=$f $prog '%04x\n' '"á"' >>out 2>>err
+  )
+  cat <<\EOF > exp || framework_failure_
+00e1
+00e1
+00e1
+00e1
+EOF
+  compare exp out || fail=1
+
+  cat <<EOF > exp_err
+printf: warning: ": character(s) following character constant have been ignored
+EOF
+  compare exp_err err || fail=1
+fi
+
+Exit $fail