]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
libstdc++: Fix handling of field width for wide strings and characters [PR119593]
authorTomasz Kamiński <tkaminsk@redhat.com>
Thu, 3 Apr 2025 08:23:45 +0000 (10:23 +0200)
committerTomasz Kamiński <tkaminsk@redhat.com>
Thu, 3 Apr 2025 11:46:20 +0000 (13:46 +0200)
This patch corrects handling of UTF-32LE and UTF32-BE in
__unicode::__literal_encoding_is_unicode<_CharT>, so they are
recognized as unicode and functions produces correct result for wchar_t.

Use `__unicode::__field_width` to compute the estimated witdh
of the charcter for unicode wide encoding.

PR libstdc++/119593

libstdc++-v3/ChangeLog:

* include/bits/unicode.h
(__unicode::__literal_encoding_is_unicode<_CharT>):
Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
* include/std/format (__formatter_str::_S_character_width):
Define.
(__formatter_str::_S_character_width): Updated passed char
length.
* testsuite/std/format/functions/format.cc: Test for wchar_t.

Reviewed-by: Jonathan Wakely <jwakely@redhat.com>
Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
libstdc++-v3/include/bits/unicode.h
libstdc++-v3/include/std/format
libstdc++-v3/testsuite/std/format/functions/format.cc

index 24b1ac3d53d67c1b3ba1e74eb8daeeb0c40407de..99d972eccff8e21a7bc1df23ac0cad4fc7a7a6ee 100644 (file)
@@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
              string_view __s(__enc);
              if (__s.ends_with("//"))
                __s.remove_suffix(2);
+             if (__s.ends_with("LE") || __s.ends_with("BE"))
+               __s.remove_suffix(2);
              return __s == "16" || __s == "32";
            }
        }
index c3327e1d38411464f62b910f0b51d3c9d20b4937..9ef719edcf03603c1245efd53c376afc9b9ab11d 100644 (file)
@@ -1277,12 +1277,26 @@ namespace __format
                                                  _M_spec);
        }
 
+      [[__gnu__::__always_inline__]]
+      static size_t
+      _S_character_width(_CharT __c)
+      {
+       // N.B. single byte cannot encode charcter of width greater than 1
+       if constexpr (sizeof(_CharT) > 1u && 
+                       __unicode::__literal_encoding_is_unicode<_CharT>())
+         return __unicode::__field_width(__c);
+       else
+         return 1u;
+      }
+
       template<typename _Out>
        typename basic_format_context<_Out, _CharT>::iterator
        _M_format_character(_CharT __c,
                      basic_format_context<_Out, _CharT>& __fc) const
        {
-         return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
+         return __format::__write_padded_as_spec({&__c, 1u},
+                                                 _S_character_width(__c),
+                                                 __fc, _M_spec);
        }
 
       template<typename _Int>
index 7fc420170458de178ef987f416da5687981ad1f2..d8dbf4634133faff297f75bab601c7681e138c0a 100644 (file)
@@ -501,9 +501,14 @@ test_unicode()
 {
   // Similar to sC example in test_std_examples, but not from the standard.
   // Verify that the character "🤡" has estimated field width 2,
-  // rather than estimated field width equal to strlen("🤡"), which would be 4.
+  // rather than estimated field width equal to strlen("🤡"), which would be 4,
+  // or just width 1 for single character.
   std::string sC = std::format("{:*<3}", "🤡");
   VERIFY( sC == "🤡*" );
+  std::wstring wsC = std::format(L"{:*<3}", L"🤡");
+  VERIFY( wsC == L"🤡*" );
+  wsC = std::format(L"{:*<3}", L'🤡');
+  VERIFY( wsC == L"🤡*" );
 
   // Verify that "£" has estimated field width 1, not strlen("£") == 2.
   std::string sL = std::format("{:*<3}", "£");
@@ -517,7 +522,6 @@ test_unicode()
   std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
   VERIFY( sP == "£ *" );
   sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
-  VERIFY( sP == "£* **" );
 
   // Verify field width handling for extended grapheme clusters,
   // and that a cluster gets output as a single item, not truncated.