From: Aki Tuomi Date: Fri, 20 Oct 2017 11:36:32 +0000 (+0300) Subject: json-parser: Fix unicode parsing X-Git-Tag: 2.3.0.rc1~784 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2f40603562d4cf3fad0bc8ff1f10f652cedd77df;p=thirdparty%2Fdovecot%2Fcore.git json-parser: Fix unicode parsing Check for surrogates and validate ucs4 codepoint. --- diff --git a/src/lib/json-parser.c b/src/lib/json-parser.c index bdefd6d8ec..7a654d58f1 100644 --- a/src/lib/json-parser.c +++ b/src/lib/json-parser.c @@ -39,6 +39,7 @@ struct json_parser { enum json_state state; ARRAY(enum json_state) nesting; unsigned int nested_skip_count; + bool skipping; bool seen_eof; }; @@ -202,9 +203,73 @@ static int json_skip_string(struct json_parser *parser) return 0; } +static int json_parse_unicode_escape(struct json_parser *parser) +{ + unichar_t chr, hi_surg; + + parser->data++; + if (parser->end - parser->data < 4) { + /* wait for more data */ + parser->data = parser->end; + return 0; + } + chr = hex2dec(parser->data, 4); + if (UTF16_VALID_HIGH_SURROGATE(chr)) { + /* possible surrogate pair */ + hi_surg = chr; + chr = 0; + parser->data += 4; + if (parser->data >= parser->end) { + /* wait for more data */ + parser->data = parser->end; + return 0; + } + if ((parser->end - parser->data) < 2) { + if (parser->data[0] == '\\') { + /* wait for more data */ + parser->data = parser->end; + return 0; + } + /* error */ + } + if ((parser->end - parser->data) < 6) { + if (parser->data[0] == '\\' && + parser->data[1] == 'u') { + /* wait for more data */ + parser->data = parser->end; + return 0; + } + /* error */ + } else { + chr = hex2dec(&parser->data[2], 4); + } + if (parser->data[0] != '\\' || parser->data[1] != 'u' || + UTF16_VALID_LOW_SURROGATE(chr)) { + parser->error = + t_strdup_printf("High surrogate 0x%04x seen, " + "but not followed by low surrogate", + hi_surg); + return -1; + } + chr = uni_join_surrogate(hi_surg, chr); + parser->data += 2; + } + + if (!uni_is_valid_ucs4(chr)) { + parser->error = + t_strdup_printf("Invalid unicode character U+%04x", chr); + return -1; + } + uni_ucs4_to_utf8_c(chr, parser->value); + parser->data += 3; + return 1; +} + static int json_parse_string(struct json_parser *parser, bool allow_skip, const char **value_r) { + int ret; + if (*parser->data != '"') return -1; parser->data++; @@ -248,15 +313,8 @@ static int json_parse_string(struct json_parser *parser, bool allow_skip, str_append_c(parser->value, '\t'); break; case 'u': - parser->data++; - if (parser->end - parser->data < 4) { - /* wait for more data */ - parser->data = parser->end; - return 0; - } - uni_ucs4_to_utf8_c(hex2dec(parser->data, 4), - parser->value); - parser->data += 3; + if ((ret=json_parse_unicode_escape(parser)) <= 0) + return ret; break; default: return -1; @@ -462,7 +520,8 @@ json_try_parse_next(struct json_parser *parser, enum json_type *type_r, *type_r = JSON_TYPE_NULL; *value_r = NULL; } else { - parser->error = "Invalid data as value"; + if (parser->error == NULL) + parser->error = "Invalid data as value"; return -1; } if (ret == 0) {