From: Tom Tromey Date: Wed, 20 Jun 2001 16:21:24 +0000 (+0000) Subject: re PR java/2319 (invalid UTF-8 sequences should be rejected) X-Git-Tag: prereleases/libstdc++-3.0.95~3686 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c01b7cdf97e69255dd4a5dddda782ba29a32b3d1;p=thirdparty%2Fgcc.git re PR java/2319 (invalid UTF-8 sequences should be rejected) * lex.c (java_read_char): Disallow invalid and overlong sequences. Fixes PR java/2319. From-SVN: r43475 --- diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog index f55ad6cf1682..96e1ba5743c4 100644 --- a/gcc/java/ChangeLog +++ b/gcc/java/ChangeLog @@ -1,3 +1,8 @@ +2001-06-19 Tom Tromey + + * lex.c (java_read_char): Disallow invalid and overlong + sequences. Fixes PR java/2319. + 2001-06-05 Jeff Sturm * decl.c (create_primitive_vtable): Don't call make_decl_rtl. diff --git a/gcc/java/lex.c b/gcc/java/lex.c index 28a73e3874b4..35cd31749ca2 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -454,15 +454,21 @@ java_read_char (lex) if (c == EOF) return UEOF; if (c < 128) - return (unicode_t)c; + return (unicode_t) c; else { if ((c & 0xe0) == 0xc0) { c1 = getc (lex->finput); if ((c1 & 0xc0) == 0x80) - return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); - c = c1; + { + unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); + /* Check for valid 2-byte characters. We explicitly + allow \0 because this encoding is common in the + Java world. */ + if (r == 0 || (r >= 0x80 && r <= 0x7ff)) + return r; + } } else if ((c & 0xf0) == 0xe0) { @@ -471,16 +477,23 @@ java_read_char (lex) { c2 = getc (lex->finput); if ((c2 & 0xc0) == 0x80) - return (unicode_t)(((c & 0xf) << 12) + - (( c1 & 0x3f) << 6) + (c2 & 0x3f)); - else - c = c2; + { + unicode_t r = (unicode_t)(((c & 0xf) << 12) + + (( c1 & 0x3f) << 6) + + (c2 & 0x3f)); + /* Check for valid 3-byte characters. + Don't allow surrogate, \ufffe or \uffff. */ + if (r >= 0x800 && r <= 0xffff + && ! (r >= 0xd800 && r <= 0xdfff) + && r != 0xfffe && r != 0xffff) + return r; + } } - else - c = c1; } - /* We simply don't support invalid characters. */ + /* We simply don't support invalid characters. We also + don't support 4-, 5-, or 6-byte UTF-8 sequences, as these + cannot be valid Java characters. */ java_lex_error ("malformed UTF-8 character", 0); } }