]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
json-parser: Fix unicode parsing
authorAki Tuomi <aki.tuomi@dovecot.fi>
Fri, 20 Oct 2017 11:36:32 +0000 (14:36 +0300)
committerAki Tuomi <aki.tuomi@dovecot.fi>
Mon, 23 Oct 2017 12:04:02 +0000 (15:04 +0300)
Check for surrogates and validate ucs4 codepoint.

src/lib/json-parser.c

index bdefd6d8ec2f5e3434dfb8d03efe9166cf0de435..7a654d58f11dd057ff872489a6a013c6e199fe8c 100644 (file)
@@ -39,6 +39,7 @@ struct json_parser {
        enum json_state state;
        ARRAY(enum json_state) nesting;
        unsigned int nested_skip_count;
+
        bool skipping;
        bool seen_eof;
 };
@@ -202,9 +203,73 @@ static int json_skip_string(struct json_parser *parser)
        return 0;
 }
 
+static int json_parse_unicode_escape(struct json_parser *parser)
+{
+       unichar_t chr, hi_surg;
+
+       parser->data++;
+       if (parser->end - parser->data < 4) {
+               /* wait for more data */
+               parser->data = parser->end;
+               return 0;
+       }
+       chr = hex2dec(parser->data, 4);
+       if (UTF16_VALID_HIGH_SURROGATE(chr)) {
+               /* possible surrogate pair */
+               hi_surg = chr;
+               chr = 0;
+               parser->data += 4;
+               if (parser->data >= parser->end) {
+                       /* wait for more data */
+                       parser->data = parser->end;
+                       return 0;
+               }
+               if ((parser->end - parser->data) < 2) {
+                       if (parser->data[0] == '\\') {
+                               /* wait for more data */
+                               parser->data = parser->end;
+                               return 0;
+                       }
+                       /* error */
+               }
+               if ((parser->end - parser->data) < 6) {
+                       if (parser->data[0] == '\\' &&
+                           parser->data[1] == 'u') {
+                               /* wait for more data */
+                               parser->data = parser->end;
+                               return 0;
+                       }
+                       /* error */
+               } else {
+                       chr = hex2dec(&parser->data[2], 4);
+               }
+               if (parser->data[0] != '\\' || parser->data[1] != 'u' ||
+                   UTF16_VALID_LOW_SURROGATE(chr)) {
+                       parser->error =
+                               t_strdup_printf("High surrogate 0x%04x seen, "
+                                               "but not followed by low surrogate",
+                                               hi_surg);
+                       return -1;
+               }
+               chr = uni_join_surrogate(hi_surg, chr);
+               parser->data += 2;
+       }
+
+       if (!uni_is_valid_ucs4(chr)) {
+               parser->error =
+                       t_strdup_printf("Invalid unicode character U+%04x", chr);
+               return -1;
+       }
+       uni_ucs4_to_utf8_c(chr, parser->value);
+       parser->data += 3;
+       return 1;
+}
+
 static int json_parse_string(struct json_parser *parser, bool allow_skip,
                             const char **value_r)
 {
+       int ret;
+
        if (*parser->data != '"')
                return -1;
        parser->data++;
@@ -248,15 +313,8 @@ static int json_parse_string(struct json_parser *parser, bool allow_skip,
                                str_append_c(parser->value, '\t');
                                break;
                        case 'u':
-                               parser->data++;
-                               if (parser->end - parser->data < 4) {
-                                       /* wait for more data */
-                                       parser->data = parser->end;
-                                       return 0;
-                               }
-                               uni_ucs4_to_utf8_c(hex2dec(parser->data, 4),
-                                                  parser->value);
-                               parser->data += 3;
+                               if ((ret=json_parse_unicode_escape(parser)) <= 0)
+                                       return ret;
                                break;
                        default:
                                return -1;
@@ -462,7 +520,8 @@ json_try_parse_next(struct json_parser *parser, enum json_type *type_r,
                        *type_r = JSON_TYPE_NULL;
                        *value_r = NULL;
                } else {
-                       parser->error = "Invalid data as value";
+                       if (parser->error == NULL)
+                               parser->error = "Invalid data as value";
                        return -1;
                }
                if (ret == 0) {