c++: Implement C++26 P1854R4 - Making non-encodable string literals ill-formed [PR110341]

author Jakub Jelinek <jakub@redhat.com>

Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)

committer Jakub Jelinek <jakub@redhat.com>

Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)
author Jakub Jelinek <jakub@redhat.com>
Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)
committer Jakub Jelinek <jakub@redhat.com>
Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C

index 7bca12489555e1ad6ffd1e3704bf847a68b13d8b..d9e982f5c011006b32835dd7be7155d0ced22cbc 100644 (file)
--- a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
@@ -1,6 +1,6 @@
  /* { dg-do compile { target c++17 } } */
  
  const static char c0 = u8'';           // { dg-error "empty character" }
-const static char c1 = u8'ab';         // { dg-error "character constant too long for its type" }
-const static char c2 = u8'\u0124';     // { dg-error "character constant too long for its type" }
-const static char c3 = u8'\U00064321';  // { dg-error "character constant too long for its type" }
+const static char c1 = u8'ab';         // { dg-error "multi-character literal cannot have an encoding prefix" }
+const static char c2 = u8'\u0124';     // { dg-error "character not encodable in a single code unit" }
+const static char c3 = u8'\U00064321';  // { dg-error "character not encodable in a single code unit" }
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C

index 77fa3a606dc5448d0a841326c80d292c44562b2f..d86dfc917906077e3da4686f397450eded2c6a87 100644 (file)
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C
@@ -4,18 +4,19 @@
  
  char a = 'a';
  int b = 'ab';                  // { dg-warning "multi-character character constant" }
-int c = '\u05D9';              // { dg-warning "multi-character character constant" }
+int c = '\u05D9';              // { dg-error "character not encodable in a single execution character code unit" }
  #if __SIZEOF_INT__ > 2
-int d = '\U0001F525';          // { dg-warning "multi-character character constant" "" { target int32 } }
+int d = '\U0001F525';          // { dg-error "character not encodable in a single execution character code unit" "" { target int32 } }
  #endif
-int e = 'abcd';                        // { dg-warning "multi-character character constant" }
+int e = 'abcd';                        // { dg-warning "multi-character character constant" "" { target int32plus } }
+                               // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "" { target { ! int32plus } } .-1 }
  wchar_t f = L'f';
-wchar_t g = L'gh';             // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl';           // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525';     // { dg-error "character constant too long for its type" "" { target { c++23 && { ! 4byte_wchar_t } } } }
-                               // { dg-warning "character constant too long for its type" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
+wchar_t g = L'gh';             // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl';           // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525';     // { dg-error "multi-character literal cannot have an encoding prefix" "" { target { c++23 && { ! 4byte_wchar_t } } } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 }
  #ifdef __cpp_char8_t
  typedef char8_t u8;
  #else
@@ -23,20 +24,20 @@ typedef char u8;
  #endif
  #if __cpp_unicode_characters >= 201411 
  u8 j = u8'j';
-u8 k = u8'kl';                 // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525';         // { dg-error "character constant too long for its type" "" { target c++17 }  }
+u8 k = u8'kl';                 // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525';         // { dg-error "character not encodable in a single code unit" "" { target c++17 }  }
  #endif
  #if __cpp_unicode_characters >= 200704
  char16_t m = u'm';
-char16_t n = u'no';            // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
  char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525';    // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525';    // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
  char32_t q = U'm';
-char32_t r = U'no';            // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
  char32_t s = U'\u05D9';
  char32_t t = U'\U0001F525';
  #endif
-wchar_t u = L'\u0065\u0301';           // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é';            // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301';           // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C

index a63e0fffe6014b94fde25a3985f79822db2ec846..270de65988cb6150c6dccc6878756900cc3eb427 100644 (file)
--- a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
+++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C
@@ -11,12 +11,12 @@ int d = '\U0001F525';               // { dg-warning "multi-character character constant" "" {
  #endif
  int e = 'abcd';                        // { dg-warning "multi-character character constant" }
  wchar_t f = L'f';
-wchar_t g = L'gh';             // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t h = L'ijkl';           // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t i = L'\U0001F525';     // { dg-error "character constant too long for its type" "" { target { c++23 } } }
-                               // { dg-warning "character constant too long for its type" "" { target { c++20_down } } .-1 }
+wchar_t g = L'gh';             // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'ijkl';           // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'\U0001F525';     // { dg-error "character not encodable in a single code unit" "" { target { c++23 } } }
+                               // { dg-warning "character not encodable in a single code unit" "" { target { c++20_down } } .-1 }
  #ifdef __cpp_char8_t
  typedef char8_t u8;
  #else
@@ -24,20 +24,20 @@ typedef char u8;
  #endif
  #if __cpp_unicode_characters >= 201411 
  u8 j = u8'j';
-u8 k = u8'kl';                 // { dg-error "character constant too long for its type" "" { target c++17 } }
-u8 l = u8'\U0001F525';         // { dg-error "character constant too long for its type" "" { target c++17 }  }
+u8 k = u8'kl';                 // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+u8 l = u8'\U0001F525';         // { dg-error "character not encodable in a single code unit" "" { target c++17 }  }
  #endif
  #if __cpp_unicode_characters >= 200704
  char16_t m = u'm';
-char16_t n = u'no';            // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t n = u'no';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
  char16_t o = u'\u05D9';
-char16_t p = u'\U0001F525';    // { dg-error "character constant too long for its type" "" { target c++11 } }
+char16_t p = u'\U0001F525';    // { dg-error "character not encodable in a single code unit" "" { target c++11 } }
  char32_t q = U'm';
-char32_t r = U'no';            // { dg-error "character constant too long for its type" "" { target c++11 } }
+char32_t r = U'no';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } }
  char32_t s = U'\u05D9';
  char32_t t = U'\U0001F525';
  #endif
-wchar_t u = L'\u0065\u0301';           // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
-wchar_t v = L'é';            // { dg-error "character constant too long for its type" "" { target c++23 } }
-                               // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 }
+wchar_t u = L'\u0065\u0301';           // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t v = L'é';            // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                               // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
diff --git a/gcc/testsuite/g++.dg/cpp26/literals1.C b/gcc/testsuite/g++.dg/cpp26/literals1.C

new file mode 100644 (file)

index 0000000..d51f2f8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/literals1.C
@@ -0,0 +1,66 @@
+// C++26 P1854R4 - Making non-encodable string literals ill-formed
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target int32 }
+// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=UTF-8" }
+
+int a = 'abcd';                                                // { dg-warning "multi-character character constant" }
+int b = '\x61\x62\x63\x64';                            // { dg-warning "multi-character character constant" }
+int c = 'á';                                          // { dg-error "character not encodable in a single execution character code unit" }
+int d = '😁';                                                // { dg-error "character not encodable in a single execution character code unit" }
+int e = '\N{FACE WITH TEARS OF JOY}';                  // { dg-error "character not encodable in a single execution character code unit" }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+int f = '\U0001F602';                                  // { dg-error "character not encodable in a single execution character code unit" }
+wchar_t g = L'abcd';                                   // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                                                       // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'\x61\x62\x63\x64';                       // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                                                       // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'á';
+char16_t j = u'abcd';                                  // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t k = u'\x61\x62\x63\x64';                      // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t l = u'á';
+char16_t m = u'😁';                                  // { dg-error "character not encodable in a single code unit" }
+char16_t n = u'\N{FACE WITH TEARS OF JOY}';            // { dg-error "character not encodable in a single code unit" { target c++23 } }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+char16_t o = u'\U0001F602';                            // { dg-error "character not encodable in a single code unit" }
+char32_t p = U'abcd';                                  // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t q = U'\x61\x62\x63\x64';                      // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t r = U'á';
+char32_t s = U'😁';
+char32_t t = U'\N{FACE WITH TEARS OF JOY}';            // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+char32_t u = U'\U0001F602';
+#if __cpp_unicode_characters >= 201411L
+auto v = u8'abcd';                                     // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto w = u8'\x61\x62\x63\x64';                         // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto x = u8'á';                                               // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto y = u8'😁';                                     // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto z = u8'\N{FACE WITH TEARS OF JOY}';               // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 }
+auto aa = u8'\U0001F602';                              // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+#endif
+const char *ab = "😁";
+const char *ac = "\N{FACE WITH TEARS OF JOY}";         // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char *ad = "\U0001F602";
+const char16_t *ae = u"😁";
+const char16_t *af = u"\N{FACE WITH TEARS OF JOY}";    // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char16_t *ag = u"\U0001F602";
+const char32_t *ah = U"😁";
+const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}";    // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char32_t *aj = U"\U0001F602";
+auto ak = u8"😁";
+auto al = u8"\N{FACE WITH TEARS OF JOY}";              // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+auto am = u8"\U0001F602";
+int an = '\x123456789';                                        // { dg-error "hex escape sequence out of range" }
+wchar_t ao = L'\x123456789abcdef0';                    // { dg-error "hex escape sequence out of range" }
+char16_t ap = u'\x12345678';                           // { dg-error "hex escape sequence out of range" }
+char32_t aq = U'\x123456789abcdef0';                   // { dg-error "hex escape sequence out of range" }
+#if __cpp_unicode_characters >= 201411L
+auto ar = u8'\x123456789abcdef0';                      // { dg-error "hex escape sequence out of range" "" { target c++17 } }
+#endif
+char as = '\xff';
+#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32
+wchar_t at = L'\xffffffff';
+#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16
+wchar_t at = L'\xffff';
+#endif
+int au = '\x1234';                                     // { dg-error "hex escape sequence out of range" }
+int av = 'abcdefghijklmnop';                           // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" }
diff --git a/gcc/testsuite/g++.dg/cpp26/literals2.C b/gcc/testsuite/g++.dg/cpp26/literals2.C

new file mode 100644 (file)

index 0000000..11e4406
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp26/literals2.C
@@ -0,0 +1,68 @@
+// C++26 P1854R4 - Making non-encodable string literals ill-formed
+// { dg-do compile { target c++11 } }
+// { dg-require-effective-target int32 }
+// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=ISO-8859-1" }
+/* { dg-require-iconv "ISO-8859-1" } */
+
+int a = 'abcd';                                                // { dg-warning "multi-character character constant" }
+int b = '\x61\x62\x63\x64';                            // { dg-warning "multi-character character constant" }
+int c = 'á';
+int d = '😁';                                                // { dg-error "converting to execution character set" }
+int e = '\N{FACE WITH TEARS OF JOY}';                  // { dg-error "converting UCN to execution character set" }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+int f = '\U0001F602';                                  // { dg-error "converting UCN to execution character set" }
+wchar_t g = L'abcd';                                   // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                                                       // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t h = L'\x61\x62\x63\x64';                       // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } }
+                                                       // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 }
+wchar_t i = L'á';
+char16_t j = u'abcd';                                  // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t k = u'\x61\x62\x63\x64';                      // { dg-error "multi-character literal cannot have an encoding prefix" }
+char16_t l = u'á';
+char16_t m = u'😁';                                  // { dg-error "character not encodable in a single code unit" }
+char16_t n = u'\N{FACE WITH TEARS OF JOY}';            // { dg-error "character not encodable in a single code unit" { target c++23 } }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+char16_t o = u'\U0001F602';                            // { dg-error "character not encodable in a single code unit" }
+char32_t p = U'abcd';                                  // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t q = U'\x61\x62\x63\x64';                      // { dg-error "multi-character literal cannot have an encoding prefix" }
+char32_t r = U'á';
+char32_t s = U'😁';
+char32_t t = U'\N{FACE WITH TEARS OF JOY}';            // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+char32_t u = U'\U0001F602';
+#if __cpp_unicode_characters >= 201411L
+auto v = u8'abcd';                                     // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto w = u8'\x61\x62\x63\x64';                         // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
+auto x = u8'á';                                               // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto y = u8'😁';                                     // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+auto z = u8'\N{FACE WITH TEARS OF JOY}';               // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 }
+auto aa = u8'\U0001F602';                              // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
+#endif
+const char *ab = "😁";                                       // { dg-error "converting to execution character set" }
+const char *ac = "\N{FACE WITH TEARS OF JOY}";         // { dg-error "converting UCN to execution character set" }
+                                                       // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 }
+const char *ad = "\U0001F602";                         // { dg-error "converting UCN to execution character set" }
+const char16_t *ae = u"😁";
+const char16_t *af = u"\N{FACE WITH TEARS OF JOY}";    // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char16_t *ag = u"\U0001F602";
+const char32_t *ah = U"😁";
+const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}";    // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+const char32_t *aj = U"\U0001F602";
+auto ak = u8"😁";
+auto al = u8"\N{FACE WITH TEARS OF JOY}";              // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } }
+auto am = u8"\U0001F602";
+int an = '\x123456789';                                        // { dg-error "hex escape sequence out of range" }
+wchar_t ao = L'\x123456789abcdef0';                    // { dg-error "hex escape sequence out of range" }
+char16_t ap = u'\x12345678';                           // { dg-error "hex escape sequence out of range" }
+char32_t aq = U'\x123456789abcdef0';                   // { dg-error "hex escape sequence out of range" }
+#if __cpp_unicode_characters >= 201411L
+auto ar = u8'\x123456789abcdef0';                      // { dg-error "hex escape sequence out of range" "" { target c++17 } }
+#endif
+char as = '\xff';
+#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32
+wchar_t at = L'\xffffffff';
+#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16
+wchar_t at = L'\xffff';
+#endif
+int au = '\x1234';                                     // { dg-error "hex escape sequence out of range" }
+int av = 'abcdefghijklmnop';                           // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" }
diff --git a/gcc/testsuite/g++.dg/cpp2a/ucn2.C b/gcc/testsuite/g++.dg/cpp2a/ucn2.C

index ee7011b4a3b7163b0aff276101ef0f338d0bbff6..53ee06e897135ebd568234e4d341abf49549d3ba 100644 (file)
--- a/gcc/testsuite/g++.dg/cpp2a/ucn2.C
+++ b/gcc/testsuite/g++.dg/cpp2a/ucn2.C
@@ -12,18 +12,18 @@ const char32_t *f = U"\uD802";              // { dg-error "is not a valid universal characte
  const char32_t *g = U"\U0000DFF0";     // { dg-error "is not a valid universal character" }
  const char32_t *h = U"\U00110001";     // { dg-error "is outside the UCS codespace" "" { target c++20 } }
  #if __cpp_unicode_characters >= 201411
-const char8_t i = u8'\u00C0';          // { dg-error "character constant too long for its type" "" { target c++17 } }
+const char8_t i = u8'\u00C0';          // { dg-error "character not encodable in a single code unit" "" { target c++17 } }
  #endif
-const char16_t j = u'\U0001F914';      // { dg-error "character constant too long for its type" }
+const char16_t j = u'\U0001F914';      // { dg-error "character not encodable in a single code unit" }
  const char32_t k = U'\U0001F914';
  #if __cpp_unicode_characters >= 201411
-const char8_t l = u8'ab';              // { dg-error "character constant too long for its type" "" { target c++17 } }
+const char8_t l = u8'ab';              // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } }
  #endif
-const char16_t m = u'ab';              // { dg-error "character constant too long for its type" }
-const char32_t n = U'ab';              // { dg-error "character constant too long for its type" }
+const char16_t m = u'ab';              // { dg-error "multi-character literal cannot have an encoding prefix" }
+const char32_t n = U'ab';              // { dg-error "multi-character literal cannot have an encoding prefix" }
  #if __cpp_unicode_characters >= 201411
  const char8_t o = u8'\U00110002';      // { dg-error "is outside the UCS codespace" "" { target c++20 } }
-                                       // { dg-error "character constant too long for its type" "" { target c++17 } .-1 }
+                                       // { dg-error "character not encodable in a single code unit" "" { target c++17 } .-1 }
  #endif
  const char16_t p = u'\U00110003';      // { dg-error "is outside the UCS codespace" "" { target c++20 } }
                                         // { dg-error "converting UCN to execution character set" "" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/g++.dg/ext/utf16-4.C b/gcc/testsuite/g++.dg/ext/utf16-4.C

index 030e085a82e491e8f99c169aba243c75160e2155..feb81667b7b5986ca44cc954fede6911eb45c98e 100644 (file)
--- a/gcc/testsuite/g++.dg/ext/utf16-4.C
+++ b/gcc/testsuite/g++.dg/ext/utf16-4.C
@@ -4,8 +4,8 @@
  
  
  const static char16_t  c0 = u'';               /* { dg-error "empty character" } */
-const static char16_t  c1 = u'ab';             /* { dg-error "constant too long" } */
-const static char16_t  c2 = u'\U00064321';     /* { dg-error "constant too long" } */
+const static char16_t  c1 = u'ab';             /* { dg-error "multi-character literal cannot have an encoding prefix" } */
+const static char16_t  c2 = u'\U00064321';     /* { dg-error "character not encodable in a single code unit" } */
  
  const static char16_t  c3 = 'a';
  const static char16_t  c4 = U'a';
@@ -14,5 +14,6 @@ const static char16_t c6 = U'\U00064321';     /* { dg-warning "conversion from .char
  const static char16_t  c7 = L'a';
  const static char16_t  c8 = L'\u2029';
  const static char16_t  c9 = L'\U00064321';     /* { dg-warning "conversion from .wchar_t. to .char16_t. changes value from .410401. to .17185." "" { target { 4byte_wchar_t } } } */
-                                               /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */
+                                               /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } .-1 } */
+                                               /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-2 } */
  int main () {}
diff --git a/gcc/testsuite/g++.dg/ext/utf32-4.C b/gcc/testsuite/g++.dg/ext/utf32-4.C

index 96bf0bb2b4a4ec33fe089c6e62e1621bca9c26a9..8310bf4055c46d4f248ab9afa97a521345014a34 100644 (file)
--- a/gcc/testsuite/g++.dg/ext/utf32-4.C
+++ b/gcc/testsuite/g++.dg/ext/utf32-4.C
@@ -3,15 +3,16 @@
  /* { dg-do compile { target c++11 } } */
  
  const static char32_t  c0 = U'';               /* { dg-error "empty character" } */
-const static char32_t  c1 = U'ab';             /* { dg-error "constant too long" } */
+const static char32_t  c1 = U'ab';             /* { dg-error "multi-character literal cannot have an encoding prefix" } */
  const static char32_t  c2 = U'\U00064321';
  
  const static char32_t  c3 = 'a';
  const static char32_t  c4 = u'a';
  const static char32_t  c5 = u'\u2029';
-const static char32_t  c6 = u'\U00064321';     /* { dg-error "constant too long" } */
+const static char32_t  c6 = u'\U00064321';     /* { dg-error "character not encodable in a single code unit" } */
  const static char32_t  c7 = L'a';
  const static char32_t  c8 = L'\u2029';
-const static char32_t  c9 = L'\U00064321';     /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */  
+const static char32_t  c9 = L'\U00064321';     /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } } */
+                                               /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-1 } */
  
  int main () {}
diff --git a/gcc/testsuite/gcc.dg/c23-utf8char-3.c b/gcc/testsuite/gcc.dg/c23-utf8char-3.c

index e152edbed84522a8c7c17785ef6d9307c684cf0d..0d82af52f0513f28e13986a24088d1f52db626af 100644 (file)
--- a/gcc/testsuite/gcc.dg/c23-utf8char-3.c
+++ b/gcc/testsuite/gcc.dg/c23-utf8char-3.c
@@ -3,6 +3,6 @@
  /* { dg-options "-std=c23 -pedantic-errors" } */
  
  unsigned char a = u8''; /* { dg-error "empty character constant" } */
-unsigned char b = u8'ab'; /* { dg-error "character constant too long for its type" } */
-unsigned char c = u8'\u00ff'; /* { dg-error "character constant too long for its type" } */
+unsigned char b = u8'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */
+unsigned char c = u8'\u00ff'; /* { dg-error "character not encodable in a single code unit" } */
  unsigned char d = u8'\x100'; /* { dg-error "hex escape sequence out of range" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/charconst-4.c b/gcc/testsuite/gcc.dg/cpp/charconst-4.c

index 9ea5e8ab89692489ffdb67fce16ecae22dcc5c6c..03706c5dac2e84ac76811b871f8be4f90db2e8a6 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/charconst-4.c
+++ b/gcc/testsuite/gcc.dg/cpp/charconst-4.c
@@ -38,7 +38,7 @@ extern void abort (void);
  # error Charconst incorrectly sign-extended
  #endif
  
-#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "too long" } */
+#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
  # error Overly long charconst truncates wrongly for preprocessor
  #endif
  
@@ -46,7 +46,7 @@ int main ()
  {
    if (POS_CHARCONST < 0)
      abort ();
-  if (LONG_CHARCONST != SHORT_CHARCONST)  /* { dg-warning "too long" } */
+  if (LONG_CHARCONST != SHORT_CHARCONST)  /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
      abort ();
    return 0;
  }
diff --git a/gcc/testsuite/gcc.dg/cpp/charconst.c b/gcc/testsuite/gcc.dg/cpp/charconst.c

index 8934d6a6721db7a131b5367589c9d40cb788d4b3..a2a5717c663b63954f4910721dd349909b9c75bf 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/charconst.c
+++ b/gcc/testsuite/gcc.dg/cpp/charconst.c
@@ -11,9 +11,9 @@
  #endif
  #if L''                        /* { dg-error "empty" "empty wide charconst" } */
  #endif
-#if 'very long'                /* { dg-warning "too long" "long charconst" } */
+#if 'very long'                /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */
  #endif
-#if L'very long'       /* { dg-warning "too long" "long wide charconst" } */
+#if L'very long'       /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */
  #endif
  /* Don't do this test for L'ab'; it depends upon sizeof (wchar_t).  */
  #if 'ab'               /* { dg-warning "multi-char" "multi-character" } */
@@ -27,10 +27,10 @@ void foo ()
    c = '';              /* { dg-error "empty" "empty charconst" } */
    w = L'';             /* { dg-error "empty" "empty wide charconst" } */
  
-  c = 'very long';     /* { dg-warning "too long" "long charconst" } */
-  w = L'very long';    /* { dg-warning "too long" "long wide charconst" } */
+  c = 'very long';     /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */
+  w = L'very long';    /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */
  
    c = 'ab';            /* { dg-warning "multi-char" "multi-char" } */
    /* Wide charconsts cannot contain more than one wide character.  */
-  w = L'ab';           /* { dg-warning "too long" "multi-char wide" } */
+  w = L'ab';           /* { dg-warning "multi-character literal cannot have an encoding prefix" "multi-char wide" } */
  }
diff --git a/gcc/testsuite/gcc.dg/cpp/if-2.c b/gcc/testsuite/gcc.dg/cpp/if-2.c

index dc136b4563446a5cd11411b0a09520f3d47b3f13..e969aa0b473bb89e1061c10c23993851e285cd97 100644 (file)
--- a/gcc/testsuite/gcc.dg/cpp/if-2.c
+++ b/gcc/testsuite/gcc.dg/cpp/if-2.c
@@ -21,7 +21,7 @@
  #if 'abcd' /* { dg-warning "(multi-character character constant)|(character constant (is )?too long)" "multi-character charconst" } */
  #endif
  
-#if 'abcdefghi' /* { dg-warning "character constant (is )?too long" "charconst too long" } */
+#if 'abcdefghi' /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */
  #endif
  
  #if '' /* { dg-error "empty character constant" "empty charconst" } */
diff --git a/gcc/testsuite/gcc.dg/utf16-4.c b/gcc/testsuite/gcc.dg/utf16-4.c

index 60e00f60ee4d3f90e907233629351d46f0fff142..e108d00afdfd98ac1329191b4ac0a54167db7f00 100644 (file)
--- a/gcc/testsuite/gcc.dg/utf16-4.c
+++ b/gcc/testsuite/gcc.dg/utf16-4.c
@@ -6,8 +6,8 @@
  typedef __CHAR16_TYPE__ char16_t;
  
  char16_t       c0 = u'';               /* { dg-error "empty character" } */
-char16_t       c1 = u'ab';             /* { dg-warning "constant too long" } */
-char16_t       c2 = u'\U00064321';     /* { dg-warning "constant too long" } */
+char16_t       c1 = u'ab';             /* { dg-warning "multi-character literal cannot have an encoding prefix" } */
+char16_t       c2 = u'\U00064321';     /* { dg-warning "character not encodable in a single code unit" } */
  
  char16_t       c3 = 'a';
  char16_t       c4 = U'a';
@@ -16,6 +16,6 @@ char16_t      c6 = U'\U00064321';     /* { dg-warning "conversion from .(long )?unsigned
  char16_t       c7 = L'a';
  char16_t       c8 = L'\u2029';
  char16_t       c9 = L'\U00064321';     /* { dg-warning "conversion" "" { target { 4byte_wchar_t } } } */
-                                       /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */
+                                       /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } .-1 } */
  
  int main () {}
diff --git a/gcc/testsuite/gcc.dg/utf32-4.c b/gcc/testsuite/gcc.dg/utf32-4.c

index aa7f66a36851761f15a7163c7566341f3261f1c1..72086bc7c6a111a85e1b0d14c600bb487c11d0a5 100644 (file)
--- a/gcc/testsuite/gcc.dg/utf32-4.c
+++ b/gcc/testsuite/gcc.dg/utf32-4.c
@@ -6,15 +6,15 @@
  typedef __CHAR32_TYPE__ char32_t;
  
  char32_t       c0 = U'';               /* { dg-error "empty character" } */
-char32_t       c1 = U'ab';             /* { dg-warning "constant too long" } */
+char32_t       c1 = U'ab';             /* { dg-warning "multi-character literal cannot have an encoding prefix" } */
  char32_t       c2 = U'\U00064321';
  
  char32_t       c3 = 'a';
  char32_t       c4 = u'a';
  char32_t       c5 = u'\u2029';
-char32_t       c6 = u'\U00064321';     /* { dg-warning "constant too long" } */
+char32_t       c6 = u'\U00064321';     /* { dg-warning "character not encodable in a single code unit" } */
  char32_t       c7 = L'a';
  char32_t       c8 = L'\u2029';
-char32_t       c9 = L'\U00064321';     /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */
+char32_t       c9 = L'\U00064321';     /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } } */
  
  int main () {}
diff --git a/libcpp/charset.cc b/libcpp/charset.cc

index d5a027502cd6980831cb9a64a62b0b3c841c63ac..9a944d94360cadc636f1dadd55be766b117fffea 100644 (file)
--- a/libcpp/charset.cc
+++ b/libcpp/charset.cc
@@ -446,6 +446,73 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp,
    return 0;
  }
  
+
+/* Special routine which just counts number of characters in the
+   string, what exactly is stored into the output doesn't matter
+   as long as it is one uchar per character.  */
+
+static inline int
+one_count_chars (iconv_t, const uchar **inbufp, size_t *inbytesleftp,
+                uchar **outbufp, size_t *outbytesleftp)
+{
+  cppchar_t s = 0;
+  int rval;
+
+  /* Check for space first, since we know exactly how much we need.  */
+  if (*outbytesleftp < 1)
+    return E2BIG;
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+  rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
+  if (rval)
+    return rval;
+#else
+  if (*inbytesleftp < 1)
+    return EINVAL;
+  static const uchar utf_ebcdic_map[256] = {
+    /* See table 4 in http://unicode.org/reports/tr16/tr16-7.2.html  */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+    1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
+    1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+    9, 9, 9, 9, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4,
+    1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 0
+  };
+  rval = utf_ebcdic_map[**inbufp];
+  if (rval == 9)
+    return EILSEQ;
+  if (rval == 0)
+    rval = 1;
+  if (rval >= 2)
+    {
+      if (*inbytesleftp < rval)
+       return EINVAL;
+      for (int i = 1; i < rval; ++i)
+       if (utf_ebcdic_map[(*inbufp)[i]] != 9)
+         return EILSEQ;
+    }
+  *inbytesleftp -= rval;
+  *inbufp += rval;
+#endif
+
+  **outbufp = ' ';
+
+  *outbufp += 1;
+  *outbytesleftp -= 1;
+  return 0;
+}
+
+
  /* Helper routine for the next few functions.  The 'const' on
     one_conversion means that we promise not to modify what function is
     pointed to, which lets the inliner see through it.  */
@@ -529,6 +596,15 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen,
    return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
  }
  
+/* Magic conversion which just counts characters from input, so
+   only to->len is significant.  */
+static bool
+convert_count_chars (iconv_t cd, const uchar *from,
+                    size_t flen, struct _cpp_strbuf *to)
+{
+  return conversion_loop (one_count_chars, cd, from, flen, to);
+}
+
  /* Identity conversion, used when we have no alternative.  */
  static bool
  convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -2574,21 +2650,49 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
  }
  
  \f
+/* Return number of source characters in STR.  */
+static unsigned
+count_source_chars (cpp_reader *pfile, cpp_string str, cpp_ttype type)
+{
+  cpp_string str2 = { 0, 0 };
+  bool (*saved_diagnostic_handler) (cpp_reader *, enum cpp_diagnostic_level,
+                                   enum cpp_warning_reason, rich_location *,
+                                   const char *, va_list *)
+    ATTRIBUTE_FPTR_PRINTF(5,0);
+  saved_diagnostic_handler = pfile->cb.diagnostic;
+  pfile->cb.diagnostic = noop_diagnostic_cb;
+  convert_f save_func = pfile->narrow_cset_desc.func;
+  pfile->narrow_cset_desc.func = convert_count_chars;
+  bool ret = cpp_interpret_string (pfile, &str, 1, &str2, type);
+  pfile->narrow_cset_desc.func = save_func;
+  pfile->cb.diagnostic = saved_diagnostic_handler;
+  if (ret)
+    {
+      if (str2.text != str.text)
+       free ((void *)str2.text);
+      return str2.len;
+    }
+  else
+    return 0;
+}
+
  /* Subroutine of cpp_interpret_charconst which performs the conversion
     to a number, for narrow strings.  STR is the string structure returned
     by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
-   cpp_interpret_charconst.  TYPE is the token type.  */
+   cpp_interpret_charconst.  TOKEN is the token.  */
  static cppchar_t
  narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
                          unsigned int *pchars_seen, int *unsignedp,
-                        enum cpp_ttype type)
+                        const cpp_token *token)
  {
+  enum cpp_ttype type = token->type;
    size_t width = CPP_OPTION (pfile, char_precision);
    size_t max_chars = CPP_OPTION (pfile, int_precision) / width;
    size_t mask = width_to_mask (width);
    size_t i;
    cppchar_t result, c;
    bool unsigned_p;
+  bool diagnosed = false;
  
    /* The value of a multi-character character constant, or a
       single-character character constant whose representation in the
@@ -2612,11 +2716,55 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
  
    if (type == CPP_UTF8CHAR)
      max_chars = 1;
-  if (i > max_chars)
+  else if (i > 1 && CPP_OPTION (pfile, cplusplus) && CPP_PEDANTIC (pfile))
      {
+      /* C++ as a DR since
+        P1854R4 - Making non-encodable string literals ill-formed
+        makes multi-character narrow character literals if any of the
+        characters in the literal isn't encodable in char/unsigned char
+        ill-formed.  We need to count the number of c-chars and compare
+        that to str.len.  */
+      unsigned src_chars = count_source_chars (pfile, token->val.str, type);
+
+      if (src_chars)
+       {
+         if (str.len > src_chars)
+           {
+             if (src_chars <= 2)
+               diagnosed
+                 = cpp_error (pfile, CPP_DL_PEDWARN,
+                              "character not encodable in a single execution "
+                              "character code unit");
+             else
+               diagnosed
+                 = cpp_error (pfile, CPP_DL_PEDWARN,
+                              "at least one character in a multi-character "
+                              "literal not encodable in a single execution "
+                              "character code unit");
+             if (diagnosed && i > max_chars)
+               i = max_chars;
+           }
+       }
+    }
+  if (diagnosed)
+    /* Already diagnosed above.  */;
+  else if (i > max_chars)
+    {
+      unsigned src_chars
+       = count_source_chars (pfile, token->val.str,
+                             type == CPP_UTF8CHAR ? CPP_CHAR : type);
+
+      if (type != CPP_UTF8CHAR)
+       cpp_error (pfile, CPP_DL_WARNING,
+                  "multi-character literal with %ld characters exceeds "
+                  "'int' size of %ld bytes", (long) i, (long) max_chars);
+      else if (src_chars > 2)
+       cpp_error (pfile, CPP_DL_ERROR,
+                  "multi-character literal cannot have an encoding prefix");
+      else
+       cpp_error (pfile, CPP_DL_ERROR,
+                  "character not encodable in a single code unit");
        i = max_chars;
-      cpp_error (pfile, type == CPP_UTF8CHAR ? CPP_DL_ERROR : CPP_DL_WARNING,
-                "character constant too long for its type");
      }
    else if (i > 1 && CPP_OPTION (pfile, warn_multichar))
      cpp_warning (pfile, CPP_W_MULTICHAR, "multi-character character constant");
@@ -2651,12 +2799,13 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
  /* Subroutine of cpp_interpret_charconst which performs the conversion
     to a number, for wide strings.  STR is the string structure returned
     by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
-   cpp_interpret_charconst.  TYPE is the token type.  */
+   cpp_interpret_charconst.  TOKEN is the token.  */
  static cppchar_t
  wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
                        unsigned int *pchars_seen, int *unsignedp,
-                      enum cpp_ttype type)
+                      const cpp_token *token)
  {
+  enum cpp_ttype type = token->type;
    bool bigend = CPP_OPTION (pfile, bytes_big_endian);
    size_t width = converter_for_type (pfile, type).width;
    size_t cwidth = CPP_OPTION (pfile, char_precision);
@@ -2692,14 +2841,25 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
       character exactly fills a wchar_t, so a multi-character wide
       character constant is guaranteed to overflow.  */
    if (str.len > nbwc * 2)
-    cpp_error (pfile, (CPP_OPTION (pfile, cplusplus)
-                      && (type == CPP_CHAR16
-                          || type == CPP_CHAR32
-                          /* In C++23 this is error even for L'ab'.  */
-                          || (type == CPP_WCHAR
-                              && CPP_OPTION (pfile, size_t_literals))))
-                     ? CPP_DL_ERROR : CPP_DL_WARNING,
-              "character constant too long for its type");
+    {
+      cpp_diagnostic_level level = CPP_DL_WARNING;
+      unsigned src_chars
+       = count_source_chars (pfile, token->val.str, CPP_CHAR);
+
+      if (CPP_OPTION (pfile, cplusplus)
+         && (type == CPP_CHAR16
+             || type == CPP_CHAR32
+             /* In C++23 this is error even for L'ab'.  */
+             || (type == CPP_WCHAR
+                 && CPP_OPTION (pfile, size_t_literals))))
+       level = CPP_DL_ERROR;
+      if (src_chars > 2)
+       cpp_error (pfile, level,
+                  "multi-character literal cannot have an encoding prefix");
+      else
+       cpp_error (pfile, level,
+                  "character not encodable in a single code unit");
+    }
  
    /* Truncate the constant to its natural width, and simultaneously
       sign- or zero-extend to the full width of cppchar_t.  */
@@ -2754,10 +2914,10 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
  
    if (wide)
      result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp,
-                                   token->type);
+                                   token);
    else
      result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp,
-                                     token->type);
+                                     token);
  
    if (str.text != token->val.str.text)
      free ((void *)str.text);
author	Jakub Jelinek <jakub@redhat.com>
	Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)
committer	Jakub Jelinek <jakub@redhat.com>
	Tue, 14 Nov 2023 17:28:34 +0000 (18:28 +0100)
gcc/testsuite/g++.dg/cpp1z/utf8-neg.C		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/cpp23/wchar-multi1.C		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/cpp23/wchar-multi2.C		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/cpp26/literals1.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/g++.dg/cpp26/literals2.C	[new file with mode: 0644]	patch \| blob
gcc/testsuite/g++.dg/cpp2a/ucn2.C		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/ext/utf16-4.C		patch \| blob \| blame \| history
gcc/testsuite/g++.dg/ext/utf32-4.C		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/c23-utf8char-3.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/charconst-4.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/charconst.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/cpp/if-2.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/utf16-4.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/utf32-4.c		patch \| blob \| blame \| history
libcpp/charset.cc		patch \| blob \| blame \| history