]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
libcpp: reject codepoints above 0x10FFFF
authorBen Boeckel <ben.boeckel@kitware.com>
Tue, 6 Jun 2023 20:50:22 +0000 (16:50 -0400)
committerJason Merrill <jason@redhat.com>
Mon, 19 Jun 2023 21:20:41 +0000 (17:20 -0400)
Unicode does not support such values because they are unrepresentable in
UTF-16.

libcpp/

* charset.cc: Reject encodings of codepoints above 0x10FFFF.
UTF-16 does not support such codepoints and therefore all
Unicode rejects such values.

Signed-off-by: Ben Boeckel <ben.boeckel@kitware.com>
libcpp/charset.cc

index d7f323b2cd526f3a42b5007f2037432ebadef585..d4f573e365f09f80ed8f7884c5ee046d94b32ccc 100644 (file)
@@ -1886,6 +1886,13 @@ cpp_valid_utf8_p (const char *buffer, size_t num_bytes)
       int err = one_utf8_to_cppchar (&iter, &bytesleft, &cp);
       if (err)
        return false;
+
+      /* Additionally, Unicode declares that all codepoints above 0010FFFF are
+        invalid because they cannot be represented in UTF-16.
+
+        Reject such values.*/
+      if (cp >= UCS_LIMIT)
+       return false;
     }
   /* No problems encountered.  */
   return true;