From: Stephan Bosch Date: Wed, 7 Aug 2019 18:38:33 +0000 (+0200) Subject: lib-json: Implement low-level JSON parser X-Git-Tag: 2.4.0~2395 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=69200f0ba68144df865f134ba0e08e324ae63dcf;p=thirdparty%2Fdovecot%2Fcore.git lib-json: Implement low-level JSON parser --- diff --git a/src/lib-json/Makefile.am b/src/lib-json/Makefile.am index a1d011aa44..6ce494ea9f 100644 --- a/src/lib-json/Makefile.am +++ b/src/lib-json/Makefile.am @@ -6,17 +6,39 @@ AM_CPPFLAGS = \ libjson_la_SOURCES = \ json-syntax.c \ - json-types.c + json-types.c \ + json-parser.new.c libjson_la_LIBADD = -lm headers = \ json-syntax.h \ - json-types.h + json-types.h \ + json-parser.new.h -test_programs = +test_programs = \ + test-json-parser noinst_PROGRAMS = $(test_programs) +test_libs = \ + libjson.la \ + ../lib-test/libtest.la \ + ../lib-charset/libcharset.la \ + ../lib/liblib.la \ + $(MODULE_LIBS) +test_deps = \ + libjson.la \ + ../lib-test/libtest.la \ + ../lib-charset/libcharset.la \ + ../lib/liblib.la + +test_json_parser_SOURCE = \ + test-json-parser.c +test_json_parser_LDADD = \ + $(test_libs) +test_json_parser_DEPENDENCIES = \ + $(test_deps) + pkginc_libdir=$(pkgincludedir) pkginc_lib_HEADERS = $(headers) diff --git a/src/lib-json/json-parser.new.c b/src/lib-json/json-parser.new.c new file mode 100644 index 0000000000..d983da93f4 --- /dev/null +++ b/src/lib-json/json-parser.new.c @@ -0,0 +1,2205 @@ +/* Copyright (c) 2017-2023 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "net.h" +#include "str.h" +#include "strescape.h" +#include "array.h" +#include "istream-private.h" + +#include "json-syntax.h" +#include "json-parser.new.h" + +#include +#include + +/* From RFC 7159: + + JSON-text = ws value ws + + ; value + + value = false / null / true / object / array / number / string + + false = %x66.61.6c.73.65 ; false + null = %x6e.75.6c.6c ; null + true = %x74.72.75.65 ; true + + ; object + + object = begin-object [ member *( value-separator member ) ] + end-object + member = string name-separator value + + ; array + + array = begin-array [ value *( value-separator value ) ] end-array + + ; number + + number = [ minus ] int [ frac ] [ exp ] + int = zero / ( digit1-9 *DIGIT ) + + frac = decimal-point 1*DIGIT + decimal-point = %x2E ; . + + exp = e [ minus / plus ] 1*DIGIT + e = %x65 / %x45 ; e E + + digit1-9 = %x31-39 ; 1-9 + zero = %x30 ; 0 + minus = %x2D ; - + plus = %x2B ; + + + ; string + + string = quotation-mark *char quotation-mark + + char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + escape = %x5C ; \ + quotation-mark = %x22 ; " + unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + + ; structural characters + + begin-array = ws %x5B ws ; [ left square bracket + begin-object = ws %x7B ws ; { left curly bracket + end-array = ws %x5D ws ; ] right square bracket + end-object = ws %x7D ws ; } right curly bracket + name-separator = ws %x3A ws ; : colon + value-separator = ws %x2C ws ; , comma + + ; white space + + ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ) ; Carriage return + */ + +/* + * JSON parser + */ + +/* As this parser is in many ways very similar to a normal recursive descent + parser, it (partly) uses the normal call stack. However, it will backtrack + once it reaches this level (just like it does when it is halted for more + data), to prevent a process stack overflow. The syntax stack keeps growing + though, meaning that it can parse arbitrary syntax nesting depths. */ +#define JSON_PARSER_MAX_CALL_STACK_DEPTH 32 + +struct json_string_istream; + +enum json_parse_result { + /* Parsing interrupted (meaning is context-dependent) */ + JSON_PARSE_INTERRUPTED = -7, + /* Parsing reached expected boundary */ + JSON_PARSE_BOUNDARY = -6, + /* Buffer for current syntax element is full; element is too large */ + JSON_PARSE_OVERFLOW = -5, + /* Parsed to end of currently buffered data */ + JSON_PARSE_NO_DATA = -4, + /* Prevent call stack overflow + (to support arbitrarely deeply nested input) */ + JSON_PARSE_CALL_STACK_OVERFLOW = -3, + /* Encountered invalid/unexpected syntax */ + JSON_PARSE_UNEXPECTED_EOF = -2, + /* Encountered invalid/unexpected syntax */ + JSON_PARSE_ERROR = -1, + /* Parsed OK, but no match */ + JSON_PARSE_OK = 0 +}; + +typedef int +(*json_parser_func_t)(struct json_parser *parser, + struct json_parser_state *state); + +struct json_parser_state { + unsigned int state; + void *context; + void *param; + unsigned int count; +}; + +struct json_parser_level { + json_parser_func_t func; + struct json_parser_state state; + int result; + + bool backtracked:1; + bool finished:1; +}; + +struct json_parser { + enum json_parser_flags flags; + + struct json_limits limits; + + const struct json_parser_callbacks *callbacks; + void *context; + + /* State information */ + ARRAY(struct json_parser_level) level_stack; + unsigned int level_stack_pos; + unsigned int call_stack_depth; + + struct istream *input; + uoff_t input_offset; + + const unsigned char *begin, *cur, *end; + + unichar_t current_char; + int current_char_len; + + struct { + uoff_t line_number, value_line_number; + uoff_t column; + } loc; + + string_t *buffer; + string_t *object_member; + struct json_data content_data; + + char *error; + + bool parsed_nul_char:1; + bool parsed_control_char:1; + bool parsed_float:1; + bool callback_interrupted:1; + bool callback_running:1; + bool finished_level:1; + bool end_of_input:1; + bool started:1; + bool have_object_member:1; +}; + +static inline bool json_parser_is_busy(struct json_parser *parser) +{ + return (parser->level_stack_pos > 0 || parser->cur < parser->end); +} + +struct json_parser * +json_parser_init(struct istream *input, const struct json_limits *limits, + enum json_parser_flags flags, + const struct json_parser_callbacks *callbacks, void *context) +{ + struct json_parser *parser; + + parser = i_new(struct json_parser, 1); + parser->flags = flags; + + i_array_init(&parser->level_stack, 8); + + parser->input = input; + i_stream_ref(input); + parser->input_offset = input->v_offset; + + if (limits != NULL) + parser->limits = *limits; + if (parser->limits.max_string_size == 0) + parser->limits.max_string_size = JSON_DEFAULT_MAX_STRING_SIZE; + if (parser->limits.max_name_size == 0) + parser->limits.max_name_size = JSON_DEFAULT_MAX_NAME_SIZE; + if (parser->limits.max_nesting == 0) + parser->limits.max_nesting = JSON_DEFAULT_MAX_NESTING; + if (parser->limits.max_list_items == 0) + parser->limits.max_list_items = JSON_DEFAULT_MAX_LIST_ITEMS; + + parser->callbacks = callbacks; + parser->context = context; + + parser->loc.line_number = 1; + + return parser; +} + +void json_parser_deinit(struct json_parser **_parser) +{ + struct json_parser *parser = *_parser; + + if (parser == NULL) + return; + *_parser = NULL; + + str_free(&parser->buffer); + str_free(&parser->object_member); + array_free(&parser->level_stack); + i_stream_unref(&parser->input); + i_free(parser->error); + i_free(parser); +} + +/* + * External error handling + */ + +void json_parser_error(struct json_parser *parser, const char *format, ...) +{ + va_list args; + + i_free(parser->error); + va_start(args, format); + parser->error = i_strdup_vprintf(format, args); + va_end(args); +} + +/* + * Callbacks + */ + +void json_parser_interrupt(struct json_parser *parser) +{ + i_assert(parser->callback_running); + parser->callback_interrupted = TRUE; +} + +static inline void json_parser_callback_init(struct json_parser *parser) +{ + i_free(parser->error); + i_assert(!parser->callback_running); + parser->callback_running = TRUE; + parser->callback_interrupted = FALSE; +} + +static inline int json_parser_callback_deinit(struct json_parser *parser) +{ + i_assert(parser->callback_running); + parser->callback_running = FALSE; + + if (parser->error != NULL) + return JSON_PARSE_ERROR; + if (parser->callback_interrupted) + return JSON_PARSE_INTERRUPTED; + return JSON_PARSE_OK; +} + +static int +json_parser_callback_parse_list_open(struct json_parser *parser, + void *parent_context, bool object, + void **list_context_r) +{ + const char *name; + + if (parser->callbacks == NULL || + parser->callbacks->parse_list_open == NULL) + return JSON_PARSE_OK; + + name = (parser->have_object_member ? + str_c(parser->object_member) : NULL); + + json_parser_callback_init(parser); + parser->callbacks->parse_list_open(parser->context, parent_context, + name, object, list_context_r); + return json_parser_callback_deinit(parser); +} + +static int +json_parser_callback_parse_list_close(struct json_parser *parser, + void *list_context, bool object) +{ + if (parser->callbacks == NULL || + parser->callbacks->parse_list_close == NULL) + return JSON_PARSE_OK; + + json_parser_callback_init(parser); + parser->callbacks->parse_list_close(parser->context, list_context, + object); + return json_parser_callback_deinit(parser); +} + +static int +json_parser_callback_parse_object_member(struct json_parser *parser, + void *parent_context) +{ + const char *name; + + i_assert(parser->have_object_member); + + if (parser->callbacks == NULL || + parser->callbacks->parse_object_member == NULL) + return JSON_PARSE_OK; + + name = str_c(parser->object_member); + + json_parser_callback_init(parser); + parser->callbacks->parse_object_member(parser->context, parent_context, + name); + return json_parser_callback_deinit(parser); +} + +static int +json_parser_callback_parse_value(struct json_parser *parser, + void *parent_context, enum json_type type, + const struct json_value *value) +{ + const char *name; + + if (parser->callbacks == NULL || + parser->callbacks->parse_value == NULL) + return JSON_PARSE_OK; + + name = (parser->have_object_member ? + str_c(parser->object_member) : NULL); + + json_parser_callback_init(parser); + parser->callbacks->parse_value(parser->context, parent_context, + name, type, value); + return json_parser_callback_deinit(parser); +} + +static void +json_parser_number_range_error(struct json_parser *parser, int dir) +{ + if (dir > 0) { + json_parser_error(parser, "Number overflow: " + "Positive number exceeds range"); + return; + } + if (dir < 0) { + json_parser_error(parser, "Number overflow: " + "Negative number exceeds range"); + return; + } + json_parser_error(parser, "Number underflow: " + "Required precision exceeds range"); +} + +/* Parses a signed integer from the string representation of a floating point + number (fraction is truncated) */ +static int str_float_to_intmax(const char *str, intmax_t *num_r) +{ + const char *p, *dp = NULL; + bool neg = FALSE, eneg = FALSE; + uintmax_t un = 0, e = 0; + + /* Skip over base */ + p = str; + if (*p == '+' || *p == '-') { + neg = (*p == '-'); + p++; + } + if (*p < '0' || *p > '9') + return -1; + for (; *p >= '0' && *p <= '9'; p++); + /* Fractional part */ + if (*p == '.') { + dp = p; + p++; + for (; *p >= '0' && *p <= '9'; p++); + } + /* Parse exponent */ + if (*p == 'e' || *p == 'E') { + if (dp == NULL) + dp = p; + p++; + if (*p == '+' || *p == '-') { + eneg = (*p == '-'); + p++; + } + for (; *p >= '0' && *p <= '9'; p++) { + if (e >= (UINTMAX_MAX / 10)) { + if (e > UINTMAX_MAX / 10) + return -1; + if ((uintmax_t)(*p - '0') > + (UINTMAX_MAX % 10)) + return -1; + } + e = e * 10 + (*p - '0'); + } + } + if (*p != '\0') + return -1; + if (dp == NULL) + dp = p; + /* Move back to integer part */ + p = (neg ? str+1 : str); + /* Apply negative exponent */ + if (eneg) { + if ((uintmax_t)(dp-p) <= e) { + /* Value is [-1 .. 1] */ + *num_r = 0; + return 0; + } + dp -= e; + e = 0; + i_assert(dp > str); + } + /* Parse integer */ + while (*p >= '0' && *p <= '9') { + if (un >= (UINTMAX_MAX / 10)) { + if (un > UINTMAX_MAX / 10) + return -1; + if ((uintmax_t)(*p - '0') > (UINTMAX_MAX % 10)) + return -1; + } + un = un * 10 + (*p- '0'); + p++; + if (p == dp) { + /* Encountered (updated) decimal point position */ + if (eneg) { + /* Negative exponent applied; exit here */ + break; + } + if (*p != '.') { + /* No fraction; exit here */ + break; + } + /* Exponent is zero; exit here */ + if (e == 0) + break; + p++; + } else if (p > dp) { + /* Keep parsing fractional part until exponent is + exhausted */ + if (--e == 0) + break; + } + } + if (un > 0 && !eneg) { + /* Apply remainder of positive exponent */ + while (e > 0) { + e--; + if (un > UINTMAX_MAX / 10) + return -1; + un = un * 10; + } + } + /* Apply sign */ + if (!neg) { + if (un > (uintmax_t)INTMAX_MAX) + return -1; + *num_r = (intmax_t)un; + } else { + if (un > (uintmax_t)INTMAX_MAX + 1) + return -1; + *num_r = -(intmax_t)un; + } + return 0; +} + +static int +json_parser_callback_number_value(struct json_parser *parser, + void *list_context) +{ + struct json_value value; + const char *numstr = str_c(parser->buffer); + + i_zero(&value); + + if ((parser->flags & JSON_PARSER_FLAG_NUMBERS_AS_STRING) != 0) { + value.content_type = JSON_CONTENT_TYPE_STRING; + value.content.str = numstr; + } else { + if (str_float_to_intmax(numstr, &value.content.intnum) < 0) { + json_parser_number_range_error( + parser, (*numstr == '-' ? -1 : 1)); + return JSON_PARSE_ERROR; + } + value.content_type = JSON_CONTENT_TYPE_INTEGER; + } + + return json_parser_callback_parse_value(parser, list_context, + JSON_TYPE_NUMBER, &value); +} + +static int +json_parser_callback_string_value(struct json_parser *parser, + void *list_context) +{ + struct json_value value; + + i_zero(&value); + + if (parser->parsed_nul_char || + (parser->flags & JSON_PARSER_FLAG_STRINGS_AS_DATA) != 0) { + struct json_data *data = &parser->content_data; + + i_zero(data); + data->data = str_data(parser->buffer); + data->size = str_len(parser->buffer); + data->contains_nul = parser->parsed_nul_char; + data->contains_control = parser->parsed_control_char; + + value.content_type = JSON_CONTENT_TYPE_DATA; + value.content.data = data; + } else { + value.content_type = JSON_CONTENT_TYPE_STRING; + value.content.str = str_c(parser->buffer); + } + + return json_parser_callback_parse_value(parser, list_context, + JSON_TYPE_STRING, &value); +} + +static int +json_parser_callback_true_value(struct json_parser *parser, + void *list_context) +{ + struct json_value value; + + i_zero(&value); + return json_parser_callback_parse_value(parser, list_context, + JSON_TYPE_TRUE, &value); +} + +static int +json_parser_callback_false_value(struct json_parser *parser, + void *list_context) +{ + struct json_value value; + + i_zero(&value); + return json_parser_callback_parse_value(parser, list_context, + JSON_TYPE_FALSE, &value); +} + +static int +json_parser_callback_null_value(struct json_parser *parser, + void *list_context) +{ + struct json_value value; + + i_zero(&value); + return json_parser_callback_parse_value(parser, list_context, + JSON_TYPE_NULL, &value); +} + +/* + * Data handling + */ + +static inline bool json_parser_have_data(struct json_parser *parser) +{ + return (parser->current_char_len > 0 || + parser->cur < parser->end); +} + +static void +json_parser_set_data(struct json_parser *parser, + const unsigned char *data, size_t size) +{ + parser->begin = data; + parser->cur = data; + parser->end = data + size; +} + +static int json_parser_read(struct json_parser *parser) +{ + const unsigned char *data; + size_t size; + int ret; + + i_assert(parser->end >= parser->begin); + ret = i_stream_read_data(parser->input, &data, &size, + (size_t)(parser->end - parser->begin)); + if (ret <= 0) { + /* As long as the input stream buffer is large enough to hold a + single UTF-8 code point (4 bytes), the parser will always + clear enough of the buffer that it can never be full upon the + next read. */ + i_assert(ret != -2); + + if (parser->input->stream_errno == 0) { + /* Just make sure we're still looking at the correct + buffer */ + data = i_stream_get_data(parser->input, &size); + json_parser_set_data(parser, data, size); + } + return ret; + } + + json_parser_set_data(parser, data, size); + return size; +} + +/* + * Unicode character handling + */ + +static inline const char *json_parser_curchar_str(struct json_parser *parser) +{ + unichar_t ch = parser->current_char; + + i_assert(parser->current_char_len > 0); + if (ch >= 0x20 && ch < 0x7f) + return t_strdup_printf("'%c'", (char) ch); + switch (ch) { + case 0x00: + return ""; + case '\r': + return ""; + case '\n': + return ""; + case '\t': + return ""; + } + if (ch <= 0xffff) + return t_strdup_printf("U+%04lX", (unsigned long int) ch); + + return t_strdup_printf("U+%06lX", (unsigned long int) ch); +} + +static int json_parser_readchar(struct json_parser *parser) +{ + int ret; + + if (parser->cur >= parser->end) + return JSON_PARSE_NO_DATA; + + ret = uni_utf8_get_char_buf(parser->cur, (parser->end - parser->cur), + &parser->current_char); + if (ret <= 0) { + if (ret < 0) { + json_parser_error(parser, "Invalid UTF-8 character"); + return JSON_PARSE_ERROR; + } + if (parser->end_of_input) { + json_parser_error(parser, + "Incomplete UTF-8 character at end of input"); + return JSON_PARSE_UNEXPECTED_EOF; + } + return JSON_PARSE_NO_DATA; + } + + if (parser->current_char > 0x10ffff || + (parser->current_char & 0xfff800) == 0x00d800) { + /* Should be checked in unichar.h */ + json_parser_error(parser, "Invalid Unicode character U+%04lX", + (unsigned long int)parser->current_char); + return JSON_PARSE_ERROR; + } + + /* Update parser location */ + if (parser->current_char == '\n') + parser->loc.line_number++; + else + parser->loc.column++; + + parser->current_char_len = ret; + return JSON_PARSE_OK; +} + +static inline int +json_parser_curchar(struct json_parser *parser, unichar_t *ch_r) +{ + int ret; + + if (parser->current_char_len <= 0) { + ret = json_parser_readchar(parser); + if (ret < JSON_PARSE_OK) + return ret; + i_assert(parser->current_char_len > 0); + } + if (ch_r != NULL) + *ch_r = parser->current_char; + return JSON_PARSE_OK; +} + +static inline void json_parser_shift(struct json_parser *parser) +{ + i_assert(parser->current_char_len > 0); + parser->cur += parser->current_char_len; + i_assert(parser->cur <= parser->end); + parser->current_char_len = 0; + + if (parser->current_char == '\n') + parser->loc.column = 0; +} + +static inline size_t json_parser_available_size(struct json_parser *parser) +{ + i_assert(parser->cur <= parser->end); + return (parser->end - parser->cur); +} + +static inline size_t +json_parser_shifted_size(struct json_parser *parser, + const unsigned char *offset) +{ + i_assert(offset <= parser->cur); + return (parser->cur - offset); +} + +static inline size_t +json_parser_parsed_size(struct json_parser *parser, + const unsigned char *offset) +{ + return json_parser_shifted_size(parser, offset) + + parser->current_char_len; +} + +/* + * Parser core + */ + +static inline int +json_parser_call(struct json_parser *parser, + json_parser_func_t parse_func, void *param) +{ + struct json_parser_level *level; + struct json_parser_state state; + unsigned int level_stack_pos; + int status; + + if (!json_parser_have_data(parser) && !parser->end_of_input) + return JSON_PARSE_NO_DATA; + + if (parser->level_stack_pos > parser->limits.max_nesting) { + json_parser_error( + parser, "Data is nested too deep (max %u levels)", + parser->limits.max_nesting); + return JSON_PARSE_ERROR; + } + + /* Ascend syntax stack */ + parser->level_stack_pos++; + level_stack_pos = parser->level_stack_pos; + + level = array_idx_get_space(&parser->level_stack, level_stack_pos-1); + + if (level->result == JSON_PARSE_OVERFLOW) { + /* We're backtracking from an overflow */ + i_assert(level->func == parse_func); + --parser->level_stack_pos; + status = level->result; + level->result = 0; + level->backtracked = TRUE; + return status; + } + if (level->finished) { + /* This level is finished in json_parser_run(); + return result */ + i_assert(level->func == parse_func); + status = level->result; + --parser->level_stack_pos; + i_zero(level); + parser->finished_level = TRUE; + return status; + } + + if (level->backtracked) { + /* Continue in earlier backtracked level */ + level->backtracked = FALSE; + } else { + /* Start parsing at new syntax level */ + i_assert(level->func == NULL); + i_zero(level); + level->func = parse_func; + level->state.param = param; + } + + if (parser->call_stack_depth >= JSON_PARSER_MAX_CALL_STACK_DEPTH) { + /* Backtrack to clear the call stack */ + return JSON_PARSE_CALL_STACK_OVERFLOW; + } + + parser->call_stack_depth++; + + state = level->state; + status = parse_func(parser, &state); + level = array_idx_modifiable(&parser->level_stack, level_stack_pos-1); + level->state = state; + + i_assert(parser->call_stack_depth > 0); + parser->call_stack_depth--; + + switch (status) { + case JSON_PARSE_OVERFLOW: + level->backtracked = TRUE; + --parser->level_stack_pos; + return status; + case JSON_PARSE_CALL_STACK_OVERFLOW: + case JSON_PARSE_NO_DATA: + case JSON_PARSE_BOUNDARY: + case JSON_PARSE_INTERRUPTED: + /* Parsing halted at this position */ + return status; + default: + break; + } + + /* Level finished immediately */ + --parser->level_stack_pos; + level->func = NULL; + level->backtracked = FALSE; + return status; +} + +static int +json_parser_run(struct json_parser *parser, json_parser_func_t parse_func) +{ + struct json_parser_level *level; + struct json_parser_state state; + unsigned int overflow_stack_pos; + int ret; + + /* Exit early if there is no data */ + if (!json_parser_have_data(parser) && !parser->end_of_input) + return JSON_PARSE_NO_DATA; + + /* Make sure parse functions get no partial characters */ + if ((ret = json_parser_curchar(parser, NULL)) < JSON_PARSE_OK) { + if (ret != JSON_PARSE_NO_DATA || !parser->end_of_input) + return ret; + } + + if (parser->level_stack_pos == 0) { + /* Start parsing */ + parser->call_stack_depth = 0; + ret = json_parser_call(parser, parse_func, NULL); + i_assert(parser->call_stack_depth == 0); + if (ret != JSON_PARSE_CALL_STACK_OVERFLOW) + return ret; + } + + /* Continue parsing */ + level = NULL; + overflow_stack_pos = 0; + do { + unsigned int level_stack_pos; + bool first = TRUE; + + if (level != NULL) { + first = FALSE; + level->result = ret; + if (ret != JSON_PARSE_OVERFLOW) { + /* Mark previous level as finished; meaning that + json_parser_call() in the current level will + return level->result */ + level->finished = TRUE; + } + } + + level_stack_pos = parser->level_stack_pos; + + level = array_idx_get_space(&parser->level_stack, + level_stack_pos-1); + + parser->finished_level = FALSE; + + /* Call the level parse function */ + parser->call_stack_depth = 0; + i_assert(level->func != NULL); + state = level->state; + ret = level->func(parser, &state); + level = array_idx_modifiable(&parser->level_stack, + level_stack_pos-1); + level->state = state; + i_assert(parser->call_stack_depth == 0); + + switch (ret) { + case JSON_PARSE_OVERFLOW: + if (overflow_stack_pos == 0) + overflow_stack_pos = parser->level_stack_pos; + break; + case JSON_PARSE_OK: + break; + case JSON_PARSE_CALL_STACK_OVERFLOW: + /* Unwrapped call stack; continue */ + level = NULL; + continue; + default: + if (overflow_stack_pos > 0) + parser->level_stack_pos = overflow_stack_pos; + return ret; + } + + /* Descend the syntax stack */ + parser->level_stack_pos--; + + i_assert(first || parser->finished_level || + parser->end_of_input || ret == JSON_PARSE_OVERFLOW); + } while (parser->level_stack_pos > 0); + i_assert(level != NULL); + level->func = NULL; + + if (overflow_stack_pos > 0) + parser->level_stack_pos = overflow_stack_pos; + return ret; +} + +/* + * Buffers + */ + +static inline void json_parser_reset_buffer(struct json_parser *parser) +{ + if (parser->buffer == NULL) + parser->buffer = str_new(default_pool, 256); + else + str_truncate(parser->buffer, 0); +} + +static inline void +json_parser_append_buffer(struct json_parser *parser, + buffer_t *buffer, const unsigned char *offset) +{ + size_t size = json_parser_shifted_size(parser, offset); + + if (size == 0) + return; + str_append_data(buffer, offset, size); +} + +/* + * JSON syntax + */ + +/* ws */ + +static int json_parser_skip_ws(struct json_parser *parser) +{ + unichar_t ch; + int ret; + + /* ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ) ; Carriage return + */ + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + if (!json_unichar_is_ws(ch)) + return JSON_PARSE_OK; + json_parser_shift(parser); + } + if (ret == JSON_PARSE_NO_DATA) { + return (parser->end_of_input ? + JSON_PARSE_OK : JSON_PARSE_NO_DATA); + } + return ret; +} + +/* false, null, true */ + +static int +json_parser_do_parse_literal(struct json_parser *parser, + struct json_parser_state *state) +{ + enum { _LIT_START = 0, _LIT_NEXT, _LIT_END }; + const char *literal = (const char *)state->param; + const char *p = (const char *)state->context; + unichar_t ch; + int ret; + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + switch (state->state) { + case _LIT_START: + p = literal; + i_assert(*p != '\0'); + i_assert((unichar_t)*p == ch); + p++; + if (*p == '\0') { + state->state = _LIT_END; + return JSON_PARSE_OK; + } + state->state = _LIT_NEXT; + json_parser_shift(parser); + continue; + case _LIT_NEXT: + if ((unichar_t)*p != ch) { + json_parser_error( + parser, "Expected value '%s', " + "but encounted '%s' + %s", + literal, t_strdup_until(literal, p), + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + p++; + if (*p == '\0') { + state->state = _LIT_END; + json_parser_shift(parser); + return JSON_PARSE_OK; + } + json_parser_shift(parser); + continue; + default: + i_unreached(); + } + } + state->context = (void *)p; + return ret; +} +static int +json_parser_parse_literal(struct json_parser *parser, const char *literal) +{ + return json_parser_call(parser, json_parser_do_parse_literal, + (void*)literal); +} + +/* number */ + +static int +json_parser_do_parse_number(struct json_parser *parser, + struct json_parser_state *state) +{ + enum { _NUM_START = 0, _NUM_INT, _NUM_ZERO, _NUM_NONZERO, + _NUM_DOT, _NUM_FRAC, _NUM_FRAC_NEXT, _NUM_E, _NUM_E_PM, + _NUM_EXP, _NUM_EXP_NEXT, _NUM_END }; + string_t *buf = parser->buffer; + const unsigned char *offset = parser->cur; + size_t max_size = parser->limits.max_string_size; + unichar_t ch; + int ret; + + /* number = [ minus ] int [ frac ] [ exp ] + int = zero / ( digit1-9 *DIGIT ) + + frac = decimal-point 1*DIGIT + decimal-point = %x2E ; . + + exp = e [ minus / plus ] 1*DIGIT + e = %x65 / %x45 ; e E + + digit1-9 = %x31-39 ; 1-9 + zero = %x30 ; 0 + minus = %x2D ; - + plus = %x2B ; + + */ + + i_assert(max_size > 0); + i_assert(str_len(buf) <= max_size); + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + if ((str_len(buf) + + json_parser_parsed_size(parser, offset)) > max_size) + return JSON_PARSE_OVERFLOW; + switch (state->state) { + case _NUM_START: + parser->parsed_float = FALSE; + state->state = _NUM_INT; + if (ch == '-') { + json_parser_shift(parser); + continue; + } + /* Fall through */ + case _NUM_INT: + if (ch == '0') { + state->state = _NUM_ZERO; + json_parser_shift(parser); + continue; + } + if (json_unichar_is_digit(ch)) { + state->state = _NUM_NONZERO; + json_parser_shift(parser); + continue; + } + json_parser_error(parser, + "Expected digit, but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + case _NUM_ZERO: + if (json_unichar_is_digit(ch)) { + json_parser_error(parser, + "Numbers cannot have leading zeros"); + return JSON_PARSE_ERROR; + } + state->state = _NUM_DOT; + continue; + case _NUM_NONZERO: + if (!json_unichar_is_digit(ch)) { + state->state = _NUM_DOT; + continue; + } + json_parser_shift(parser); + continue; + case _NUM_DOT: + if (ch == 'e' || ch == 'E') { + parser->parsed_float = TRUE; + state->state = _NUM_E_PM; + json_parser_shift(parser); + continue; + } + if (ch == '.') { + parser->parsed_float = TRUE; + state->state = _NUM_FRAC; + json_parser_shift(parser); + continue; + } + json_parser_append_buffer(parser, buf, offset); + state->state = _NUM_END; + return JSON_PARSE_OK; + case _NUM_FRAC: + if (!json_unichar_is_digit(ch)) { + json_parser_error(parser, + "Expected digit in number fraction, " + "but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + state->state = _NUM_FRAC_NEXT; + json_parser_shift(parser); + continue; + case _NUM_FRAC_NEXT: + if (!json_unichar_is_digit(ch)) { + state->state = _NUM_E; + continue; + } + json_parser_shift(parser); + continue; + case _NUM_E: + if (ch == 'e' || ch == 'E') { + state->state = _NUM_E_PM; + json_parser_shift(parser); + continue; + } + json_parser_append_buffer(parser, buf, offset); + state->state = _NUM_END; + return JSON_PARSE_OK; + case _NUM_E_PM: + state->state = _NUM_EXP; + if (ch == '-' || ch == '+') { + json_parser_shift(parser); + continue; + } + /* Fall through */ + case _NUM_EXP: + if (!json_unichar_is_digit(ch)) { + json_parser_error(parser, + "Expected digit in number exponent, " + "but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + state->state = _NUM_EXP_NEXT; + json_parser_shift(parser); + continue; + case _NUM_EXP_NEXT: + if (json_unichar_is_digit(ch)) { + json_parser_shift(parser); + continue; + } + json_parser_append_buffer(parser, buf, offset); + state->state = _NUM_END; + return JSON_PARSE_OK; + default: + i_unreached(); + } + } + if (ret == JSON_PARSE_NO_DATA) { + if ((str_len(buf) + + json_parser_parsed_size(parser, offset)) > max_size) + return JSON_PARSE_OVERFLOW; + if (parser->end_of_input) { + switch (state->state) { + case _NUM_ZERO: + case _NUM_NONZERO: + case _NUM_DOT: + case _NUM_FRAC_NEXT: + case _NUM_E: + case _NUM_EXP_NEXT: + json_parser_append_buffer(parser, buf, offset); + return JSON_PARSE_OK; + default: + break; + } + json_parser_error(parser, + "Encountered end of input inside number"); + return JSON_PARSE_UNEXPECTED_EOF; + } + json_parser_append_buffer(parser, buf, offset); + } + return ret; +} +static int json_parser_parse_number(struct json_parser *parser) +{ + return json_parser_call(parser, json_parser_do_parse_number, NULL); +} + +/* string */ + +static int +json_parser_finish_bad_unicode_escape( + struct json_parser *parser, + struct json_parser_state *state ATTR_UNUSED) +{ + unichar_t ch; + int ret; + + ret = json_parser_curchar(parser, &ch); + if (ret == JSON_PARSE_OK) { + json_parser_error(parser, + "Invalid digit %s in Unicode escape sequence", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + return ret; +} + +static int +json_parser_parse_unicode_escape(struct json_parser *parser, + struct json_parser_state *state, + size_t max_size) +{ + string_t *buf = (string_t *)state->param; + unichar_t hi_surg = (unichar_t)(uintptr_t)state->context; + unichar_t ch, ech; + int ret, i; + + if (hi_surg != 0x0000 && (hi_surg & 0xfffc00) != 0xd800) { + /* Already parsed, but string buffer was full. */ + ech = hi_surg; + if ((str_len(buf) + uni_ucs4_to_utf8_len(ech)) > max_size) { + /* Buffer is more than full when the escaped + character is added; return overflow. */ + return JSON_PARSE_OVERFLOW; + } + uni_ucs4_to_utf8_c(ech, buf); + return JSON_PARSE_OK; + } + + /* No need to create a level on the parser stack, since we can just wait + until sufficient input is available. */ + if (json_parser_available_size(parser) < 4) + return JSON_PARSE_NO_DATA; + ech = 0; + i = 0; + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + switch (ch) { + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + ech = (ech << 4) + (unichar_t)(ch - 'a' + 10); + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + ech = (ech << 4) + (unichar_t)(ch - 'A' + 10); + break; + case '0': case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + ech = (ech << 4) + (unichar_t)(ch - '0'); + break; + default: + json_parser_error(parser, + "Invalid digit %s in Unicode escape sequence", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + json_parser_shift(parser); + if (++i >= 4) + break; + } + if (ret == JSON_PARSE_NO_DATA) { + /* We already checked that 4 octets are available for for hex + digits. The only thing that could have happened is that we + encountered the beginnings of an UTF-8 character and no more + input is available. Finish it at a deeper parse level that + always returns error once the UTF-8 character is complete. */ + return json_parser_call( + parser, json_parser_finish_bad_unicode_escape, NULL); + } + if (ech == 0x0000) { + if ((parser->flags & + JSON_PARSER_FLAG_STRINGS_ALLOW_NUL) == 0) { + json_parser_error(parser, + "String contains escaped NUL character"); + return JSON_PARSE_ERROR; + } + parser->parsed_nul_char = TRUE; + } + if (hi_surg != 0x0000) { + i_assert((hi_surg & 0xfffc00) == 0xd800); + if ((ech & 0xfffc00) != 0xdc00) { + json_parser_error(parser, + "String contains lonely Unicode high surrogate " + "'\\u%04lX'", (unsigned long int)hi_surg); + return JSON_PARSE_ERROR; + } + ech = (ech & 0x3ff) | ((hi_surg & 0x3ff) << 10); + ech += 0x10000; + hi_surg = 0x0000; + state->context = (void*)(uintptr_t)hi_surg; + } else if ((ech & 0xfffc00) == 0xd800) { + hi_surg = ech; + state->context = (void*)(uintptr_t)hi_surg; + } else if ((ech & 0xfffc00) == 0xdc00) { + json_parser_error(parser, + "String contains lonely Unicode low surrogate " + "'\\u%04lX'", (unsigned long int)ech); + return JSON_PARSE_ERROR; + } + if (hi_surg == 0x0000) { + if (!uni_is_valid_ucs4(ech)) { + json_parser_error(parser, + "String contains invalid escaped " + "Unicode character U+%04lX", + (unsigned long int)ech); + return JSON_PARSE_ERROR; + } + if (json_unichar_is_control(ech)) + parser->parsed_control_char = TRUE; + + if ((str_len(buf) + uni_ucs4_to_utf8_len(ech)) > max_size) { + /* Buffer is more than full when the escaped character + is added; return overflow. Store the parsed character + for the next call. */ + state->context = (void*)(uintptr_t)ech; + return JSON_PARSE_OVERFLOW; + } + uni_ucs4_to_utf8_c(ech, buf); + } + return JSON_PARSE_OK; +} + +static inline int +json_parser_parse_unicode_escape_close(struct json_parser *parser, + struct json_parser_state *state) +{ + unichar_t hi_surg = (unichar_t)(uintptr_t)state->context; + + if (hi_surg != 0x0000) { + i_assert((hi_surg & 0xfffc00) == 0xd800); + json_parser_error(parser, + "String contains lonely Unicode high surrogate " + "'\\u%04lX'", (unsigned long int)hi_surg); + return JSON_PARSE_ERROR; + } + + state->context = (void*)(uintptr_t)0x000; + return JSON_PARSE_OK; +} + +static int +json_parser_do_parse_string(struct json_parser *parser, + struct json_parser_state *state, size_t max_size) +{ + enum { _STR_START = 0, _STR_CHAR, _STR_ESCAPE, _STR_ESCAPE_U, + _STR_END }; + string_t *buf = (string_t *)state->param; + const unsigned char *offset = parser->cur; + unichar_t ch; + int ret; + + /* string = quotation-mark *char quotation-mark + + char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + escape = %x5C ; \ + quotation-mark = %x22 ; " + unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + */ + + i_assert(str_len(buf) <= max_size); + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + switch (state->state) { + /* quotation-mark */ + case _STR_START: + if (ch != '"') { + json_parser_error(parser, + "Expected string, but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + json_parser_shift(parser); + offset = parser->cur; + parser->parsed_nul_char = FALSE; + parser->parsed_control_char = FALSE; + state->state = _STR_CHAR; + continue; + /* char */ + case _STR_CHAR: + /* escape */ + if (ch == '\\') { + i_assert((str_len(buf) + + json_parser_shifted_size(parser, offset)) + <= max_size); + json_parser_append_buffer(parser, buf, offset); + state->state = _STR_ESCAPE; + json_parser_shift(parser); + continue; + } + ret = json_parser_parse_unicode_escape_close( + parser, state); + if (ret < JSON_PARSE_OK) + return ret; + if (ch == '"') { + i_assert((str_len(buf) + + json_parser_shifted_size(parser, offset)) + <= max_size); + json_parser_append_buffer(parser, buf, offset); + state->state = _STR_END; + json_parser_shift(parser); + return JSON_PARSE_OK; + } + /* unescaped = %x20-21 / %x23-5B / %x5D-10FFFF */ + if (!json_unichar_is_uchar(ch)) { + json_parser_error(parser, + "String contains invalid character %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + if ((str_len(buf) + + json_parser_parsed_size(parser, offset)) + > max_size) { + /* Buffer is more than full when current + character is added; just add the pending + characters we skipped so far and return + overflow. */ + json_parser_append_buffer(parser, buf, offset); + return JSON_PARSE_OVERFLOW; + } + json_parser_shift(parser); + continue; + /* escape */ + case _STR_ESCAPE: + if (str_len(buf) >= max_size) + return JSON_PARSE_OVERFLOW; + state->state = _STR_CHAR; + switch (ch) { + /* %x22 / ; " quotation mark U+0022 */ + case '"': + str_append_c(buf, '"'); + break; + /* %x5C / ; \ reverse solidus U+005C */ + case '\\': + str_append_c(buf, '\\'); + break; + /* %x2F / ; / solidus U+002F */ + case '/': + str_append_c(buf, '/'); + break; + /* %x62 / ; b backspace U+0008 */ + case 'b': + parser->parsed_control_char = TRUE; + str_append_c(buf, 0x08); + break; + /* %x66 / ; f form feed U+000C */ + case 'f': + parser->parsed_control_char = TRUE; + str_append_c(buf, 0x0c); + break; + /* %x6E / ; n line feed U+000A */ + case 'n': + str_append_c(buf, '\n'); + break; + /* %x72 / ; r carriage return U+000D */ + case 'r': + str_append_c(buf, '\r'); + break; + /* %x74 / ; t tab U+0009 */ + case 't': + str_append_c(buf, '\t'); + break; + /* %x75 4HEXDIG ) ; uXXXX U+XXXX */ + case 'u': + state->state = _STR_ESCAPE_U; + json_parser_shift(parser); + continue; + default: + json_parser_error(parser, + "Invalid escape sequence '\\' + %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + ret = json_parser_parse_unicode_escape_close( + parser, state); + if (ret < JSON_PARSE_OK) + return ret; + json_parser_shift(parser); + offset = parser->cur; + continue; + /* %x75 4HEXDIG */ + case _STR_ESCAPE_U: + ret = json_parser_parse_unicode_escape( + parser, state, max_size); + if (ret < JSON_PARSE_OK) + return ret; + offset = parser->cur; + state->state = _STR_CHAR; + continue; + default: + i_unreached(); + } + } + if (ret == JSON_PARSE_NO_DATA) { + if (parser->end_of_input) { + switch (state->state) { + case _STR_START: + json_parser_error(parser, + "Expected string, " + "but encountered end of input"); + return JSON_PARSE_UNEXPECTED_EOF; + case _STR_CHAR: + case _STR_ESCAPE: + case _STR_ESCAPE_U: + json_parser_error(parser, + "Encountered end of input inside string"); + return JSON_PARSE_UNEXPECTED_EOF; + default: + break; + } + i_unreached(); + } + if (state->state == _STR_CHAR) { + i_assert((str_len(buf) + + json_parser_shifted_size(parser, offset)) + <= max_size); + json_parser_append_buffer(parser, buf, offset); + } + } + return ret; +} + +static int +json_parser_do_parse_string_value(struct json_parser *parser, + struct json_parser_state *state) +{ + size_t max_size = parser->limits.max_string_size; + + return json_parser_do_parse_string(parser, state, max_size); +} + +static int +json_parser_parse_string_value(struct json_parser *parser, string_t *buf) +{ + return json_parser_call(parser, json_parser_do_parse_string_value, + (void *)buf); +} + +static int +json_parser_do_parse_object_member(struct json_parser *parser, + struct json_parser_state *state) +{ + return json_parser_do_parse_string(parser, state, + parser->limits.max_name_size); +} + +static int +json_parser_parse_object_member(struct json_parser *parser, string_t *buf) +{ + return json_parser_call(parser, json_parser_do_parse_object_member, + (void *)buf); +} + +/* value */ + +static int +json_parser_parse_value(struct json_parser *parser, void *context); +static int +json_parser_do_parse_value(struct json_parser *parser, + struct json_parser_state *state) +{ + enum { _VALUE_START = 0, _VALUE_ARRAY, _VALUE_ARRAY_EMPTY, + _VALUE_ARRAY_VALUE, _VALUE_ARRAY_COMMA, _VALUE_ARRAY_COMMA_WS, + _VALUE_OBJECT, _VALUE_OBJECT_EMPTY, _VALUE_OBJECT_MEMBER, + _VALUE_OBJECT_NAME_WS, _VALUE_OBJECT_COLON, + _VALUE_OBJECT_COLON_WS, _VALUE_OBJECT_VALUE, + _VALUE_OBJECT_COMMA, _VALUE_OBJECT_COMMA_WS, + _VALUE_NUMBER, _VALUE_STRING, _VALUE_FALSE, _VALUE_NULL, + _VALUE_TRUE, _VALUE_WS, _VALUE_END }; + void *parent_context = state->param; + unichar_t ch; + int ret; + + /* value = false / null / true / object / array / number / string + + false = %x66.61.6c.73.65 ; false + null = %x6e.75.6c.6c ; null + true = %x74.72.75.65 ; true + + ; object + + object = begin-object [ member *( value-separator member ) ] + end-object + member = string name-separator value + + ; array + + array = begin-array [ value *( value-separator value ) ] end-array + + ; structural characters + + begin-array = ws %x5B ws ; [ left square bracket + begin-object = ws %x7B ws ; { left curly bracket + end-array = ws %x5D ws ; ] right square bracket + end-object = ws %x7D ws ; } right curly bracket + name-separator = ws %x3A ws ; : colon + value-separator = ws %x2C ws ; , comma + */ + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + switch (state->state) { + case _VALUE_START: + switch (ch) { + /* array */ + case '[': + state->state = _VALUE_ARRAY; + state->count = 1; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_open( + parser, parent_context, FALSE, + &state->context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* object */ + case '{': + state->state = _VALUE_OBJECT; + state->count = 1; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_open( + parser, parent_context, TRUE, + &state->context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* number */ + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + json_parser_reset_buffer(parser); + state->state = _VALUE_NUMBER; + continue; + /* string */ + case '"': + json_parser_reset_buffer(parser); + state->state = _VALUE_STRING; + continue; + /* false */ + case 'f': + state->state = _VALUE_FALSE; + continue; + /* null */ + case 'n': + state->state = _VALUE_NULL; + continue; + /* true */ + case 't': + state->state = _VALUE_TRUE; + continue; + default: + break; + } + json_parser_error(parser, + "Expected value, but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + /* "[" ws */ + case _VALUE_ARRAY: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + if (parser->object_member != NULL) + str_truncate(parser->object_member, 0); + parser->have_object_member = FALSE; + state->state = _VALUE_ARRAY_EMPTY; + continue; + /* "[" ws "]" */ + case _VALUE_ARRAY_EMPTY: + if (ch == ']') { + state->state = _VALUE_WS; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_close( + parser, state->context, FALSE); + if (ret < JSON_PARSE_OK) + return ret; + continue; + } + state->state = _VALUE_ARRAY_VALUE; + continue; + /* value */ + case _VALUE_ARRAY_VALUE: + ret = json_parser_parse_value(parser, state->context); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_ARRAY_COMMA; + continue; + /* "," */ + case _VALUE_ARRAY_COMMA: + if (ch == ']') { + state->state = _VALUE_WS; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_close( + parser, state->context, FALSE); + if (ret < JSON_PARSE_OK) + return ret; + continue; + } + if (ch != ',') { + json_parser_error(parser, + "Expected ',' or ']', " + "but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + if (++state->count > parser->limits.max_list_items) { + json_parser_error(parser, + "Too many items in array"); + return JSON_PARSE_ERROR; + } + state->state = _VALUE_ARRAY_COMMA_WS; + json_parser_shift(parser); + continue; + /* "," ws */ + case _VALUE_ARRAY_COMMA_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_ARRAY_VALUE; + continue; + /* "{" ws */ + case _VALUE_OBJECT: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + if (parser->object_member != NULL) + str_truncate(parser->object_member, 0); + parser->have_object_member = FALSE; + state->state = _VALUE_OBJECT_EMPTY; + continue; + /* "{" ws "}" */ + case _VALUE_OBJECT_EMPTY: + if (ch == '}') { + state->state = _VALUE_WS; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_close( + parser, state->context, TRUE); + if (ret < JSON_PARSE_OK) + return ret; + continue; + } + if (parser->object_member == NULL) { + parser->object_member = + str_new(default_pool, 128); + } + state->state = _VALUE_OBJECT_MEMBER; + continue; + /* member */ + case _VALUE_OBJECT_MEMBER: + ret = json_parser_parse_object_member( + parser, parser->object_member); + if (ret < JSON_PARSE_OK) { + if (ret == JSON_PARSE_OVERFLOW) { + json_parser_error(parser, + "Excessive object member name size"); + return JSON_PARSE_ERROR; + } + return ret; + } + if (parser->parsed_nul_char) { + json_parser_error(parser, + "Encountered NUL character in object member name"); + return JSON_PARSE_ERROR; + } + parser->have_object_member = TRUE; + state->state = _VALUE_OBJECT_NAME_WS; + ret = json_parser_callback_parse_object_member( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* string ws */ + case _VALUE_OBJECT_NAME_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_OBJECT_COLON; + continue; + /* ":" */ + case _VALUE_OBJECT_COLON: + if (ch != ':') { + json_parser_error(parser, + "Expected ':', but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + state->state = _VALUE_OBJECT_COLON_WS; + json_parser_shift(parser); + continue; + /* ":" ws */ + case _VALUE_OBJECT_COLON_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_OBJECT_VALUE; + continue; + /* value */ + case _VALUE_OBJECT_VALUE: + ret = json_parser_parse_value(parser, state->context); + if (ret < JSON_PARSE_OK) + return ret; + if (parser->object_member != NULL) + str_truncate(parser->object_member, 0); + parser->have_object_member = FALSE; + state->state = _VALUE_OBJECT_COMMA; + continue; + /* "," */ + case _VALUE_OBJECT_COMMA: + if (ch == '}') { + state->state = _VALUE_WS; + json_parser_shift(parser); + ret = json_parser_callback_parse_list_close( + parser, state->context, TRUE); + if (ret < JSON_PARSE_OK) + return ret; + continue; + } + if (ch != ',') { + json_parser_error(parser, + "Expected ',' or '}', " + "but encountered %s", + json_parser_curchar_str(parser)); + return JSON_PARSE_ERROR; + } + if (++state->count > parser->limits.max_list_items) { + json_parser_error(parser, + "Too many fields in object"); + return JSON_PARSE_ERROR; + } + state->state = _VALUE_OBJECT_COMMA_WS; + json_parser_shift(parser); + continue; + /* "," ws */ + case _VALUE_OBJECT_COMMA_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + str_truncate(parser->object_member, 0); + parser->have_object_member = FALSE; + state->state = _VALUE_OBJECT_MEMBER; + continue; + /* number */ + case _VALUE_NUMBER: + ret = json_parser_parse_number(parser); + if (ret < JSON_PARSE_OK) { + if (ret == JSON_PARSE_OVERFLOW) { + json_parser_error(parser, + "Excessive number string size"); + return JSON_PARSE_ERROR; + } + return ret; + } + state->state = _VALUE_WS; + ret = json_parser_callback_number_value( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* string */ + case _VALUE_STRING: + ret = json_parser_parse_string_value( + parser, parser->buffer); + if (ret < JSON_PARSE_OK) { + if (ret != JSON_PARSE_OVERFLOW) + return ret; + json_parser_error(parser, + "Excessive string size (> %zu)", + parser->limits.max_string_size); + return JSON_PARSE_ERROR; + } + state->state = _VALUE_WS; + ret = json_parser_callback_string_value( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* false */ + case _VALUE_FALSE: + ret = json_parser_parse_literal(parser, "false"); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_WS; + ret = json_parser_callback_false_value( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* null */ + case _VALUE_NULL: + ret = json_parser_parse_literal(parser, "null"); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_WS; + ret = json_parser_callback_null_value( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* true */ + case _VALUE_TRUE: + ret = json_parser_parse_literal(parser, "true"); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_WS; + ret = json_parser_callback_true_value( + parser, parent_context); + if (ret < JSON_PARSE_OK) + return ret; + continue; + /* value ws */ + case _VALUE_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _VALUE_END; + return JSON_PARSE_OK; + default: + i_unreached(); + } + } + if (ret == JSON_PARSE_NO_DATA && parser->end_of_input) { + switch (state->state) { + case _VALUE_START: + json_parser_error(parser, + "Expected value, " + "but encountered end of input"); + return JSON_PARSE_UNEXPECTED_EOF; + case _VALUE_ARRAY: + case _VALUE_ARRAY_EMPTY: + case _VALUE_ARRAY_VALUE: + case _VALUE_ARRAY_COMMA: + case _VALUE_ARRAY_COMMA_WS: + json_parser_error(parser, + "Encountered end of input inside array"); + return JSON_PARSE_UNEXPECTED_EOF; + case _VALUE_OBJECT: + case _VALUE_OBJECT_EMPTY: + case _VALUE_OBJECT_MEMBER: + case _VALUE_OBJECT_NAME_WS: + case _VALUE_OBJECT_COLON: + case _VALUE_OBJECT_COLON_WS: + case _VALUE_OBJECT_VALUE: + case _VALUE_OBJECT_COMMA: + case _VALUE_OBJECT_COMMA_WS: + json_parser_error(parser, + "Encountered end of input inside object"); + return JSON_PARSE_UNEXPECTED_EOF; + case _VALUE_NUMBER: + return json_parser_callback_number_value(parser, + parent_context); + case _VALUE_STRING: + return json_parser_callback_string_value(parser, + parent_context); + case _VALUE_FALSE: + return json_parser_callback_false_value(parser, + parent_context); + case _VALUE_NULL: + return json_parser_callback_null_value(parser, + parent_context); + case _VALUE_TRUE: + return json_parser_callback_true_value(parser, + parent_context); + case _VALUE_WS: + break; + default: + i_unreached(); + } + return JSON_PARSE_OK; + } + return ret; +} +static int +json_parser_parse_value(struct json_parser *parser, void *context) +{ + return json_parser_call(parser, json_parser_do_parse_value, context); +} + +/* JSON-text */ + +static int +json_parser_parse_text(struct json_parser *parser, + struct json_parser_state *state) +{ + enum { _TEXT_START = 0, _TEXT_WS, _TEXT_VALUE, _TEXT_END }; + unichar_t ch; + int ret; + + /* JSON-text = ws value ws */ + + while ((ret = json_parser_curchar(parser, &ch)) == JSON_PARSE_OK) { + switch (state->state) { + /* BOM */ + case _TEXT_START: + state->state = _TEXT_WS; + if (ch == 0xFEFF) { + if ((parser->flags & + JSON_PARSER_FLAG_ALLOW_BOM) != 0) { + /* Ignore it */ + json_parser_shift(parser); + continue; + } + json_parser_error(parser, + "Encountered byte order mark at the beginning of input, " + "which is not allowed"); + return JSON_PARSE_ERROR; + } + /* Fall through */ + /* ws */ + case _TEXT_WS: + ret = json_parser_skip_ws(parser); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _TEXT_VALUE; + continue; + /* value */ + case _TEXT_VALUE: + ret = json_parser_parse_value(parser, NULL); + if (ret < JSON_PARSE_OK) + return ret; + state->state = _TEXT_END; + return JSON_PARSE_OK; + default: + i_unreached(); + } + } + if (ret == JSON_PARSE_NO_DATA && parser->end_of_input) { + switch (state->state) { + case _TEXT_START: + case _TEXT_WS: + break; + case _TEXT_VALUE: + case _TEXT_END: + return JSON_PARSE_OK; + default: + i_unreached(); + } + json_parser_error(parser, "JSON text has no value"); + return JSON_PARSE_ERROR; + } + return ret; +} + +/* + * API + */ + +static int json_parser_continue(struct json_parser *parser) +{ + int status, ret; + + if (parser->error != NULL) + return JSON_PARSE_ERROR; + if (parser->started && + !json_parser_is_busy(parser)) { + return JSON_PARSE_OK; + } + + ret = 0; + do { + if (!json_parser_have_data(parser)) + continue; + status = json_parser_run(parser, + json_parser_parse_text); + parser->started = TRUE; + + switch (status) { + case JSON_PARSE_ERROR: + case JSON_PARSE_UNEXPECTED_EOF: + return status; + default: + break; + } + + i_stream_skip(parser->input, + (size_t)(parser->cur - parser->begin)); + parser->begin = parser->cur; + + switch (status) { + case JSON_PARSE_INTERRUPTED: + case JSON_PARSE_OVERFLOW: + case JSON_PARSE_BOUNDARY: + return status; + case JSON_PARSE_NO_DATA: + break; + case JSON_PARSE_OK: + if (parser->cur < parser->end) { + json_parser_error(parser, + "Spurious data at end of JSON text"); + return JSON_PARSE_ERROR; + } + status = JSON_PARSE_NO_DATA; + break; + case JSON_PARSE_ERROR: + case JSON_PARSE_UNEXPECTED_EOF: + i_unreached(); + } + i_assert(status == JSON_PARSE_NO_DATA); + } while ((ret = json_parser_read(parser)) > 0); + + if (ret < 0) { + if (parser->input->stream_errno != 0) { + json_parser_error(parser, "read(%s) failed: %s", + i_stream_get_name(parser->input), + i_stream_get_error(parser->input)); + return JSON_PARSE_ERROR; + } + + parser->end_of_input = TRUE; + + status = json_parser_run(parser, json_parser_parse_text); + switch (status) { + case JSON_PARSE_ERROR: + case JSON_PARSE_UNEXPECTED_EOF: + case JSON_PARSE_INTERRUPTED: + case JSON_PARSE_OK: + case JSON_PARSE_BOUNDARY: + return status; + case JSON_PARSE_NO_DATA: + break; + default: + i_unreached(); + } + json_parser_error(parser, "Premature end of input"); + return JSON_PARSE_UNEXPECTED_EOF; + } + return JSON_PARSE_NO_DATA; +} + +int json_parse_more(struct json_parser *parser, const char **error_r) +{ + int ret; + + *error_r = NULL; + + ret = json_parser_continue(parser); + switch (ret) { + case JSON_PARSE_ERROR: + case JSON_PARSE_UNEXPECTED_EOF: + *error_r = parser->error; + return -1; + case JSON_PARSE_OK: + break; + case JSON_PARSE_INTERRUPTED: + if (parser->end_of_input) + return 1; + return 0; + case JSON_PARSE_OVERFLOW: + case JSON_PARSE_NO_DATA: + return 0; + default: + i_unreached(); + } + + return 1; +} + +void json_parser_get_location(struct json_parser *parser, + struct json_parser_location *loc_r) +{ + i_zero(loc_r); + i_assert(parser->input->v_offset >= parser->input_offset); + loc_r->offset = parser->input->v_offset - parser->input_offset + + (parser->cur - parser->begin); + loc_r->line = parser->loc.line_number; + loc_r->value_line = parser->loc.value_line_number; + loc_r->column = parser->loc.column; +} diff --git a/src/lib-json/json-parser.new.h b/src/lib-json/json-parser.new.h new file mode 100644 index 0000000000..2079ece9c1 --- /dev/null +++ b/src/lib-json/json-parser.new.h @@ -0,0 +1,106 @@ +#ifndef JSON_PARSER_H +#define JSON_PARSER_H + +#include "json-types.h" + +#define json_parser_init json_parser_new_init +#define json_parser_deinit json_parser_new_deinit + +// FIXME: don't bother recording values if we're only validating. + +/* + * JSON parser + */ + +struct json_parser; +struct json_parser_state; + +enum json_parser_flags { + /* Strictly adhere to RFC 7159 */ + JSON_PARSER_FLAG_STRICT = BIT(0), + /* Allow the \0 character in string values */ + JSON_PARSER_FLAG_STRINGS_ALLOW_NUL = BIT(1), + /* Return all string values in a data buffer. Normally, this is + only done for strings containing \0 characters. */ + JSON_PARSER_FLAG_STRINGS_AS_DATA = BIT(2), + /* Return number values as a string (by default, numbers are truncated + to an integer). + */ + JSON_PARSER_FLAG_NUMBERS_AS_STRING = BIT(3), + /* Allow Byte Order Mark at beginning of input */ + JSON_PARSER_FLAG_ALLOW_BOM = BIT(4) +}; + +struct json_parser_callbacks { + /* The `context' parameter is always the context value that was + originally passed to json_parser_init(). The `parent_context' is + always the context of the array/object the parsed value is nested + within. The `name' parameter is the object member name for this field + if the surrounding syntax is an object. */ + + /* Called when the parser encounters the opening of an array or object + (as indicated by the `object' parameter. The list_context_r return + parameter can be used to set the context for this object/array, + making it available as `parent_context' to the contained values once + parsed. + */ + void (*parse_list_open)(void *context, void *parent_context, + const char *name, bool object, + void **list_context_r); + /* Called when the parser encounters the closing of an array or object + (as indicated by the `object' parameter. + */ + void (*parse_list_close)(void *context, void *parent_context, + bool object); + + /* (optional) Called when the parser parses an object member name. This + allows a preview on the member name, before its value is fully + parsed. + */ + void (*parse_object_member)(void *context, void *parent_context, + const char *name); + /* Called when the parser parses a value that is not an object or array. + The type and content of the value are provided. + */ + void (*parse_value)(void *context, void *parent_context, + const char *name, enum json_type type, + const struct json_value *value); +}; + +struct json_parser_location { + /* Octet offset in the input stream relative to the position at the + creation of the parser */ + uoff_t offset; + /* The current line number */ + uoff_t line; + /* The line number for the start of the current value */ + uoff_t value_line; + /* Unicode character (codepoint!) offset in the current line */ + uoff_t column; +}; + +struct json_parser * +json_parser_init(struct istream *input, const struct json_limits *limits, + enum json_parser_flags flags, + const struct json_parser_callbacks *callbacks, + void *context); +void json_parser_deinit(struct json_parser **_parser); + +/* Report a parse error (from within a callback). */ +void ATTR_FORMAT(2, 3) +json_parser_error(struct json_parser *parser, const char *format, ...); +/* Interrupt parser and return from json_parse_more(). This function can + only be called from a parse callback. Until json_parse_more() is called + again, any values (strings,buffers) passed in the callback remain valid. + */ +void json_parser_interrupt(struct json_parser *parser); + +/* Returns -1 on error, 0 if parser is interrupted or needs more data, + or 1 if the complete JSON text is parsed. */ +int json_parse_more(struct json_parser *parser, const char **error_r); + +/* Get the current location of the parser */ +void json_parser_get_location(struct json_parser *parser, + struct json_parser_location *loc_r); + +#endif diff --git a/src/lib-json/test-json-parser.c b/src/lib-json/test-json-parser.c new file mode 100644 index 0000000000..8c5ffbf451 --- /dev/null +++ b/src/lib-json/test-json-parser.c @@ -0,0 +1,2414 @@ +/* Copyright (c) 2017-2023 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "str.h" +#include "istream.h" +#include "ostream.h" +#include "istream-base64.h" +#include "test-common.h" + +#include "json-parser.new.h" + +#include + +static bool debug = FALSE; + +/* + * Test: valid json + */ + +struct json_valid_parse_test { + const char *input; + struct json_limits limits; + enum json_parser_flags flags; +}; + +static const struct json_valid_parse_test +valid_parse_tests[] = { + /* Test cases from https://github.com/nst/JSONTestSuite.git + Copyright (c) 2016 Nicolas Seriot + MIT License (see COPYING.MIT) + */ + { + // y_array_arraysWithSpaces.json + .input = "[[] ]", + }, + { + // y_array_empty.json + .input = "[]", + }, + { + // y_array_empty-string.json + .input = "[\"\"]", + }, + { + // y_array_ending_with_newline.json + .input = "[\"a\"]", + }, + { + // y_array_false.json + .input = "[false]", + }, + { + // y_array_heterogeneous.json + .input = "[null, 1, \"1\", {}]", + }, + { + // y_array_null.json + .input = "[null]", + }, + { + // y_array_with_1_and_newline.json + .input = "[1\n" + "]", + }, + { + // y_array_with_leading_space.json + .input = " [1]", + }, + { + // y_array_with_several_null.json + .input = "[1,null,null,null,2]", + }, + { + // y_array_with_trailing_space.json + .input = "[2] ", + }, + { + // y_number_0e+1.json + .input = "[0e+1]", + }, + { + // y_number_0e1.json + .input = "[0e1]", + }, + { + // y_number_after_space.json + .input = "[ 4]", + }, + { + // y_number_double_close_to_zero.json + .input = "[-0.000000000000000000000000000000000000" + "000000000000000000000000000000000000000001]\n", + }, + { + // y_number_int_with_exp.json + .input = "[20e1]", + }, + { + // y_number.json + .input = "[123e65]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // y_number_minus_zero.json + .input = "[-0]", + }, + { + // y_number_negative_int.json + .input = "[-123]", + }, + { + // y_number_negative_one.json + .input = "[-1]", + }, + { + // y_number_negative_zero.json + .input = "[-0]", + }, + { + // y_number_real_capital_e.json + .input = "[1E22]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // y_number_real_capital_e_neg_exp.json + .input = "[1E-2]", + }, + { + // y_number_real_capital_e_pos_exp.json + .input = "[1E+2]", + }, + { + // y_number_real_exponent.json + .input = "[123e45]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // y_number_real_fraction_exponent.json + .input = "[123.456e78]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // y_number_real_neg_exp.json + .input = "[1e-2]", + }, + { + // y_number_real_pos_exponent.json + .input = "[1e+2]", + }, + { + // y_number_simple_int.json + .input = "[123]", + }, + { + // y_number_simple_real.json + .input = "[123.456789]", + }, + { + // y_object_basic.json + .input = "{\"asd\":\"sdf\"}", + }, + { + // y_object_duplicated_key_and_value.json + .input = "{\"a\":\"b\",\"a\":\"b\"}", + }, + { + // y_object_duplicated_key.json + .input = "{\"a\":\"b\",\"a\":\"c\"}", + }, + { + // y_object_empty.json + .input = "{}", + }, + { + // y_object_empty_key.json + .input = "{\"\":0}", + }, + { + // y_object_extreme_numbers.json + .input = "{ \"min\": -1.0e+28, \"max\": 1.0e+28 }", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // y_object.json + .input = "{\"asd\":\"sdf\", \"dfg\":\"fgh\"}", + }, + { + // y_object_long_strings.json + .input = "{\"x\":[{\"id\": " + "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}], " + "\"id\": " + "\"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\"}", + }, + { + // y_object_simple.json + .input = "{\"a\":[]}", + }, + { + // y_object_string_unicode.json + .input = "{\"title\":" + "\"\\u041f\\u043e\\u043b\\u0442\\u043e\\u0440\\u0430 " + "\\u0417\\u0435\\u043c\\u043b\\u0435\\u043a\\u043e" + "\\u043f\\u0430\" }", + }, + { + // y_object_with_newlines.json + .input = "{\n" + "\"a\": \"b\"\n" + "}", + }, + { + // y_string_1_2_3_bytes_UTF-8_sequences.json + .input = "[\"\\u0060\\u012a\\u12AB\"]", + }, + { + // y_string_accepted_surrogate_pair.json + .input = "[\"\\uD801\\udc37\"]", + }, + { + // y_string_accepted_surrogate_pairs.json + .input = "[\"\\ud83d\\ude39\\ud83d\\udc8d\"]", + }, + { + // y_string_allowed_escapes.json, + .input = "[\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"]", + }, + { + // y_string_backslash_and_u_escaped_zero.json + .input = "[\"\\\\u0000\"]", + }, + { + // y_string_backslash_doublequotes.json + .input = "[\"\\\"\"]", + }, + { + // y_string_comments.json + .input = "[\"a/*b*/c/*d//e\"]", + }, + { + // y_string_double_escape_a.json + .input = "[\"\\\\a\"]", + }, + { + // y_string_double_escape_n.json + .input = "[\"\\\\n\"]", + }, + { + // y_string_escaped_control_character.json + .input = "[\"\\u0012\"]", + }, + { + // y_string_escaped_noncharacter.json + .input = "[\"\\uFFFF\"]", + }, + { + // y_string_in_array.json + .input = "[\"asd\"]", + }, + { + // y_string_in_array_with_leading_space.json + .input = "[ \"asd\"]", + }, + { + // y_string_last_surrogates_1_and_2.json + .input = "[\"\\uDBFF\\uDFFF\"]", + }, + { + // y_string_nbsp_uescaped.json + .input = "[\"new\\u00A0line\"]", + }, + { + // y_string_nonCharacterInUTF-8_U+10FFFF.json + .input = "[\"\xf4\x8f\xbf\xbf\"]", + }, + { + // y_string_nonCharacterInUTF-8_U+1FFFF.json + .input = "[\"\xf0\x9b\xbf\xbf\"]", + }, + { + // y_string_nonCharacterInUTF-8_U+FFFF.json + .input = "[\"\xef\xbf\xbf\"]", + }, + { + // y_string_null_escape.json + .input = "[\"\\u0000\"]", + .flags = JSON_PARSER_FLAG_STRINGS_ALLOW_NUL, + }, + { + // y_string_one-byte-utf-8.json + .input = "[\"\\u002c\"]", + }, + { + // y_string_pi.json + .input = "[\"\xcf\x80\"]", + }, + { + // y_string_simple_ascii.json + .input = "[\"asd \"]", + }, + { + // y_string_space.json + .input = "\" \"", + }, + { + // y_string_surrogates_U+1D11E_MUSICAL_SYMBOL_G_CLEF.json + .input = "[\"\\uD834\\uDd1e\"]", + }, + { + // y_string_three-byte-utf-8.json + .input = "[\"\\u0821\"]", + }, + { + // y_string_two-byte-utf-8.json + .input = "[\"\\u0123\"]", + }, + { + // y_string_u+2028_line_sep.json + .input = "[\"\xe2\x80\xa8\"]", + }, + { + // y_string_u+2029_par_sep.json + .input = "[\"\xe2\x80\xa9\"]", + }, + { + // y_string_uescaped_newline.json + .input = "[\"new\\u000Aline\"]", + }, + { + // y_string_uEscape.json + .input = "[\"\\u0061\\u30af\\u30EA\\u30b9\"]", + }, + { + // y_string_unescaped_char_delete.json + .input = "[\"\x7f\"]", + }, + { + // y_string_unicode_2.json + .input = "[\"\xe2\x8d\x82\xe3\x88\xb4\xe2\x8d\x82\"]", + }, + { + // y_string_unicodeEscapedBackslash.json + .input = "[\"\\u005C\"]", + }, + { + // y_string_unicode_escaped_double_quote.json + .input = "[\"\\u0022\"]", + }, + { + // y_string_unicode.json + .input = "[\"\\uA66D\"]", + }, + { + // y_string_unicode_U+10FFFE_nonchar.json + .input = "[\"\\uDBFF\\uDFFE\"]", + }, + { + // y_string_unicode_U+1FFFE_nonchar.json + .input = "[\"\\uD83F\\uDFFE\"]", + }, + { + // y_string_unicode_U+200B_ZERO_WIDTH_SPACE.json + .input = "[\"\\u200B\"]", + }, + { + // y_string_unicode_U+2064_invisible_plus.json + .input = "[\"\\u2064\"]", + }, + { + // y_string_unicode_U+FDD0_nonchar.json + .input = "[\"\\uFDD0\"]", + }, + { + // y_string_unicode_U+FFFE_nonchar.json + .input = "[\"\\uFFFE\"]", + }, + { + // y_string_utf8.json + .input = "[\"\xe2\x82\xac\xf0\x9d\x84\x9e\"]", + }, + { + // y_string_with_del_character.json + .input = "[\"a\x7f""a\"]", + }, + { + // y_structure_lonely_false.json + .input = "false", + }, + { + // y_structure_lonely_int.json + .input = "42", + }, + { + // y_structure_lonely_negative_real.json + .input = "-0.1", + }, + { + // y_structure_lonely_null.json + .input = "null", + }, + { + // y_structure_lonely_string.json + .input = "\"asd\"", + }, + { + // y_structure_lonely_true.json + .input = "true", + }, + { + // y_structure_string_empty.json + .input = "\"\"", + }, + { + // y_structure_trailing_newline.json + .input = "[\"a\"]\n", + }, + { + // y_structure_true_in_array.json + .input = "[true]", + }, + { + // y_structure_whitespace_array.json + .input = " [] ", + }, + { + // i_number_double_huge_neg_exp.json + .input = "[123.456e-789]", + }, + { + // i_number_huge_exp.json + .input = "[0.4e0066999999999999999999999999999999999" + "99999999999999999999999999999999999999999999999" + "99999999999999999999999999999999999969999999006]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_neg_int_huge_exp.json + .input = "[-1e+9999]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_pos_double_huge_exp.json + .input = "[1.5e+9999]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_real_neg_overflow.json + .input = "[-123123e100000]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_real_pos_overflow.json + .input = "[123123e100000]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_real_underflow.json + .input = "[123e-10000000]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_too_big_neg_int.json + .input = "[-123123123123123123123123123123]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING + }, + { + // i_number_too_big_pos_int.json + .input = "[100000000000000000000]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_number_very_big_negative_int.json + .input = "[-237462374673276894279832749832423479823246327846]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // i_structure_500_nested_arrays.json + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + .limits = { .max_nesting = 500 }, + }, + /* From json.org */ + { + .input = "[\n" + " \"JSON Test Pattern pass1\",\n" + " {\"object with 1 member\":[\"array with 1 element\"]},\n" + " {},\n" + " [],\n" + " -42,\n" + " true,\n" + " false,\n" + " null,\n" + " {\n" + " \"integer\": 1234567890,\n" + " \"real\": -9876.543210,\n" + " \"e\": 0.123456789e-12,\n" + " \"E\": 1.234567890E+34,\n" + " \"\": 23456789012E66,\n" + " \"zero\": 0,\n" + " \"one\": 1,\n" + " \"space\": \" \",\n" + " \"quote\": \"\\\"\",\n" + " \"backslash\": \"\\\\\",\n" + " \"controls\": \"\\b\\f\\n\\r\\t\",\n" + " \"slash\": \"/ & \\/\",\n" + " \"alpha\": \"abcdefghijklmnopqrstuvwyz\",\n" + " \"ALPHA\": \"ABCDEFGHIJKLMNOPQRSTUVWYZ\",\n" + " \"digit\": \"0123456789\",\n" + " \"0123456789\": \"digit\",\n" + " \"special\": \"`1~!@#$%^&*()_+-={':[,]}|;.?\",\n" + " \"hex\": \"\\u0123\\u4567\\u89AB\\uCDEF\\uabcd\\uef4A\",\n" + " \"true\": true,\n" + " \"false\": false,\n" + " \"null\": null,\n" + " \"array\":[ ],\n" + " \"object\":{ },\n" + " \"address\": \"50 St. James Street\",\n" + " \"url\": \"http://www.JSON.org/\",\n" + " \"comment\": \"// /* */\": \" \",\n" + " \" s p a c e d \" :[1,2 , 3\n" + "\n" + ",\n" + "\n" + "4 , 5 , 6 ,7 ]," + "\"compact\":[1,2,3,4,5,6,7],\n" + " \"jsontext\": \"{\\\"object with 1 member\\\":" + "[\\\"array with 1 element\\\"]}\",\n" + " \"quotes\": \"" \\u0022 %22 0x22 034 "\",\n" + " \"\\/\\\\\\\"\\uCAFE\\uBABE\\uAB98\\uFCDE\\ubcda\\uef4A" + "\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?\"\n" + ": \"A key can be any string\"\n" + " },\n" + " 0.5 ,98.6\n" + ",\n" + "99.44\n" + ",\n" + "\n" + "1066,\n" + "1e1,\n" + "0.1e1,\n" + "1e-1,\n" + "1e00,2e+00,2e-00\n" + ",\"rosebud\"]", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + .input = + "[[[[[[[[[[[[[[[[[[[\"Not too deep\"]]]]]]]]]]]]]]]]]]]", + }, + { + .input = + "{\n" + " \"JSON Test Pattern pass3\": {\n" + " \"The outermost value\": \"must be an object or array.\",\n" + " \"In this test\": \"It is an object.\"\n" + " }\n" + "}\n", + }, + /* Test cases from Jansson project (http://www.digip.org/jansson/) + Copyright (c) 2009-2020 Petri Lehtinen + MIT License (see COPYING.MIT) + */ + { + // valid/utf-surrogate-four-byte-encoding/input + .input = "[\"\\uD834\\uDD1E surrogate, four-byte UTF-8\"]\n", + }, + { + // valid/real-subnormal-number/input + .input = "[1.8011670033376514e-308]\n", + }, + { + // valid/empty-object-in-array/input + .input = "[{}]\n", + }, + { + // valid/one-byte-utf-8/input + .input = "[\"\\u002c one-byte UTF-8\"]\n", + }, + { + // valid/two-byte-utf-8/input + .input = "[\"\\u0123 two-byte UTF-8\"]\n", + }, + { + // valid/real-positive-exponent/input + .input = "[1e+2]\n", + }, + { + // valid/negative-zero/input + .input = "[-0]\n", + }, + { + // valid/simple-int-1/input + .input = "[1]\n", + }, + { + // valid/escaped-utf-control-char/input + .input = "[\"\\u0012 escaped control character\"]\n", + }, + { + // valid/three-byte-utf-8/input + .input = "[\"\\u0821 three-byte UTF-8\"]\n", + }, + { + // valid/empty-object/input + .input = "{}\n", + }, + { + // valid/empty-string/input + .input = "[\"\"]\n", + }, + { + // valid/real-exponent/input + .input = "[123e45]\n", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // valid/string-escapes/input + .input = "[\"\\\"\\\\\\/\\b\\f\\n\\r\\t\"]\n", + }, + { + // valid/simple-ascii-string/input + .input = "[\"abcdefghijklmnopqrstuvwxyz1234567890 \"]\n", + }, + { + // valid/real-negative-exponent/input + .input = "[1e-2]\n", + }, + { + // valid/real-underflow/input + .input = "[123e-10000000]\n", + }, + { + // valid/null/input + .input = "[null]\n", + }, + { + // valid/real-fraction-exponent/input + .input = "[123.456e78]\n", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // valid/true/input + .input = "[true]\n", + }, + { + // valid/simple-object/input + .input = "{\"a\":[]}\n", + }, + { + // valid/real-capital-e-negative-exponent/input + .input = "[1E-2]\n", + }, + { + // valid/empty-array/input + .input = "[]\n", + }, + { + // valid/negative-one/input + .input = "[-1]\n", + }, + { + // valid/short-string/input + .input = "[\"a\"]\n", + }, + { + // valid/simple-int-123/input + .input = "[123]\n", + }, + { + // valid/false/input + .input = "[false]\n", + }, + { + // valid/simple-int-0/input + .input = "[0]\n", + }, + { + // valid/real-capital-e/input + .input = "[1E22]\n", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + }, + { + // valid/complex-array/input + .input = "[1,2,3,4,\n" + "\"a\", \"b\", \"c\",\n" + "{\"foo\": \"bar\", \"core\": \"dump\"},\n" + "true, false, true, true, null, false\n" + "]\n", + }, + { + // valid/real-capital-e-positive-exponent/input + .input = "[1E+2]\n", + }, + { + // valid/negative-int/input + .input = "[-123]\n", + }, + { + // valid/utf-8-string/input + .input = "[\"\xe2\x82\xac\xc3\xbe\xc4\xb1\xc5\x93\xc9" + "\x99\xc3\x9f\xc3\xb0 some utf-8 \xc4\xb8\xca\x92" + "\xc3\x97\xc5\x8b\xc2\xb5\xc3\xa5\xc3\xa4\xc3\xb6" + "\xf0\x9d\x84\x9e\"]\n", + }, + { + // valid/simple-real/input + .input = "[123.456789]\n", + /* Limits */ + }, + { + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + }, + { + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + .limits = { .max_nesting = 105 }, + }, + { + .input = + "[1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0]\n", + .limits = { .max_list_items = 50 }, + }, + { + .input = + "\"123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890\"", + .limits = { .max_string_size = 90 }, + }, + { + .input = + "123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + .limits = { .max_string_size = 90 }, + }, + { + .input = + "{\"123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890\": 90}", + .limits = { .max_name_size = 90 }, + }, + /* Problems found by fuzzer */ + { + .input = "0e11111111111111110", + .flags = JSON_PARSER_FLAG_STRICT, + }, +}; + +static const unsigned int valid_parse_test_count = + N_ELEMENTS(valid_parse_tests); + +static void test_json_parse_valid(void) +{ + unsigned int i; + + for (i = 0; i < valid_parse_test_count; i++) T_BEGIN { + const struct json_valid_parse_test *test; + struct istream *input; + struct json_parser *parser; + const char *text, *error = NULL; + unsigned int pos, text_len; + int ret = 0; + + test = &valid_parse_tests[i]; + + text = test->input; + text_len = strlen(text); + input = test_istream_create_data(text, text_len); + + test_begin(t_strdup_printf("json text valid [%d]", i)); + + parser = json_parser_init(input, + &test->limits, test->flags, NULL, NULL); + + for (pos = 0; pos <= text_len && ret == 0; pos++) { + test_istream_set_size(input, pos); + ret = json_parse_more(parser, &error); + if (ret < 0) { + if (debug) + i_debug("DATA: `%s'", text); + break; + } + } + test_out_reason_quiet("parse success (trickle)", + ret > 0, error); + + json_parser_deinit(&parser); + + i_stream_seek(input, 0); + parser = json_parser_init(input, + &test->limits, test->flags, NULL, NULL); + + test_istream_set_size(input, text_len); + ret = json_parse_more(parser, &error); + if (ret < 0) { + if (debug) + i_debug("DATA: `%s'", text); + } + test_out_reason_quiet("parse success (buffered)", + ret > 0, error); + json_parser_deinit(&parser); + + test_end(); + + i_stream_unref(&input); + + } T_END; +} + +/* + * Test: invalid json + */ + +struct json_invalid_parse_test { + const char *input; + size_t input_len; + struct json_limits limits; + enum json_parser_flags flags; + bool base64; +}; + +static const struct json_invalid_parse_test +invalid_parse_tests[] = { + /* Test cases from https://github.com/nst/JSONTestSuite.git + Copyright (c) 2016 Nicolas Seriot + MIT License (see COPYING.MIT) + */ + { + // n_array_1_true_without_comma.json + .input = "[1 true]", + }, + { + // n_array_a_invalid_utf8.json + .input = "[a\xe5]", + }, + { + // n_array_colon_instead_of_comma.json + .input = "[\"\": 1]", + }, + { + // n_array_comma_after_close.json + .input = "[\"\"],", + }, + { + // n_array_comma_and_number.json + .input = "[,1]", + }, + { + // n_array_double_comma.json + .input = "[1,,2]", + }, + { + // n_array_double_extra_comma.json + .input = "[\"x\",,]", + }, + { + // n_array_extra_close.json + .input = "[\"x\"]]", + }, + { + // n_array_extra_comma.json + .input = "[\"\",]", + }, + { + // n_array_incomplete_invalid_value.json + .input = "[x", + }, + { + // n_array_incomplete.json + .input = "[\"x\"", + }, + { + // n_array_inner_array_no_comma.json + .input = "[3[4]]", + }, + { + // n_array_invalid_utf8.json + .input = "[\xff]", + }, + { + // n_array_items_separated_by_semicolon.json + .input = "[1:2]", + }, + { + // n_array_just_comma.json + .input = "[,]", + }, + { + // n_array_just_minus.json + .input = "[-]", + }, + { + // n_array_missing_value.json + .input = "[ , \"\"]", + }, + { + // n_array_newlines_unclosed.json + .input = "[\"a\",\n" + "4\n" + ",1,", + }, + { + // n_array_number_and_comma.json + .input = "[1,]", + }, + { + // n_array_number_and_several_commas.json + .input = "[1,,]", + }, + { + // n_array_spaces_vertical_tab_formfeed.json + .input = "[\"\va\"\\f]", + }, + { + // n_array_star_inside.json + .input = "[*]", + }, + { + // n_array_unclosed.json + .input = "[\"\"", + }, + { + // n_array_unclosed_trailing_comma.json + .input = "[1,", + }, + { + // n_array_unclosed_with_new_lines.json + .input = "[1,\n" + "1\n" + ",1", + }, + { + // n_array_unclosed_with_object_inside.json + .input = "[{}", + }, + { + // n_incomplete_false.json + .input = "[fals]", + }, + { + // n_incomplete_null.json + .input = "[nul]", + }, + { + // n_incomplete_true.json + .input = "[tru]", + }, + { + // n_multidigit_number_then_00.json + .input = "123\x00", + .input_len = 4, + }, + { + // n_number_0.1.2.json + .input = "[0.1.2]", + }, + { + // n_number_-01.json + .input = "[-01]", + }, + { + // n_number_0.3e.json + .input = "[0.3e]", + }, + { + // n_number_0.3e+.json + .input = "[0.3e+]", + }, + { + // n_number_0_capital_E.json + .input = "[0E]", + }, + { + // n_number_0_capital_E+.json + .input = "[0E+]", + }, + { + // n_number_0.e1.json + .input = "[0.e1]", + }, + { + // n_number_0e.json + .input = "[0e]", + }, + { + // n_number_0e+.json + .input = "[0e+]", + }, + { + // n_number_1_000.json + .input = "[1 000.0]", + }, + { + // n_number_1.0e-.json + .input = "[1.0e-]", + }, + { + // n_number_1.0e.json + .input = "[1.0e]", + }, + { + // n_number_1.0e+.json + .input = "[1.0e+]", + }, + { + // n_number_-1.0..json + .input = "[-1.0.]", + }, + { + // n_number_1eE2.json + .input = "[1eE2]", + }, + { + // n_number_.-1.json + .input = "[.-1]", + }, + { + // n_number_+1.json + .input = "[+1]", + }, + { + // n_number_.2e-3.json + .input = "[.2e-3]", + }, + { + // n_number_2.e-3.json + .input = "[2.e-3]", + }, + { + // n_number_2.e+3.json + .input = "[2.e+3]", + }, + { + // n_number_2.e3.json + .input = "[2.e3]", + }, + { + // n_number_-2..json + .input = "[-2.]", + }, + { + // n_number_9.e+.json + .input = "[9.e+]", + }, + { + // n_number_expression.json + .input = "[1+2]", + }, + { + // n_number_hex_1_digit.json + .input = "[0x1]", + }, + { + // n_number_hex_2_digits.json + .input = "[0x42]", + }, + { + // n_number_infinity.json + .input = "[Infinity]", + }, + { + // n_number_+Inf.json + .input = "[+Inf]", + }, + { + // n_number_Inf.json + .input = "[Inf]", + }, + { + // n_number_invalid+-.json + .input = "[0e+-1]", + }, + { + // n_number_invalid-negative-real.json + .input = "[-123.123foo]", + }, + { + // n_number_invalid-utf-8-in-bigger-int.json + .input = "[123\xe5]", + }, + { + // n_number_invalid-utf-8-in-exponent.json + .input = "[1e1\xe5]", + }, + { + // n_number_invalid-utf-8-in-int.json + .input = "[0\xe5]\n", + }, + { + // n_number_++.json + .input = "[++1234]", + }, + { + // n_number_minus_infinity.json + .input = "[-Infinity]", + }, + { + // n_number_minus_sign_with_trailing_garbage.json + .input = "[-foo]", + }, + { + // n_number_minus_space_1.json + .input = "[- 1]", + }, + { + // n_number_-NaN.json + .input = "[-NaN]", + }, + { + // n_number_NaN.json + .input = "[NaN]", + }, + { + // n_number_neg_int_starting_with_zero.json + .input = "[-012]", + }, + { + // n_number_neg_real_without_int_part.json + .input = "[-.123]", + }, + { + // n_number_neg_with_garbage_at_end.json + .input = "[-1x]", + }, + { + // n_number_real_garbage_after_e.json + .input = "[1ea]", + }, + { + // n_number_real_with_invalid_utf8_after_e.json + .input = "[1e\xe5]", + }, + { + // n_number_real_without_fractional_part.json + .input = "[1.]", + }, + { + // n_number_starting_with_dot.json + .input = "[.123]", + }, + { + // n_number_U+FF11_fullwidth_digit_one.json + .input = "[\xef\xbc\x91]", + }, + { + // n_number_with_alpha_char.json + .input = "[1.8011670033376514H-308]", + }, + { + // n_number_with_alpha.json + .input = "[1.2a-3]", + }, + { + // n_number_with_leading_zero.json + .input = "[012]", + }, + { + // n_object_bad_value.json + .input = "[\"x\", truth]", + }, + { + // n_object_bracket_key.json + .input = "{[: \"x\"}\n", + }, + { + // n_object_comma_instead_of_colon.json + .input = "{\"x\", null}", + }, + { + // n_object_double_colon.json + .input = "{\"x\"::\"b\"}", + }, + { + // n_object_emoji.json + .input = "{\xf0\x9f\x87\xa8\xf0\x9f\x87\xad}", + }, + { + // n_object_garbage_at_end.json + .input = "{\"a\":\"a\" 123}", + }, + { + // n_object_key_with_single_quotes.json + .input = "{key: 'value'}", + }, + { + // n_object_missing_colon.json + .input = "{\"a\" b}", + }, + { + // n_object_missing_key.json + .input = "{:\"b\"}", + }, + { + // n_object_missing_semicolon.json + .input = "{\"a\" \"b\"}", + }, + { + // n_object_missing_value.json + .input = "{\"a\":", + }, + { + // n_object_no-colon.json + .input = "{\"a\"", + }, + { + // n_object_non_string_key_but_huge_number_instead.json + .input = "{9999E9999:1}", + }, + { + // n_object_non_string_key.json + .input = "{1:1}", + }, + { + // n_object_pi_in_key_and_trailing_comma.json + .input = "{\"\xb9\":\"0\",}", + }, + { + // n_object_repeated_null_null.json + .input = "{null:null,null:null}", + }, + { + // n_object_several_trailing_commas.json + .input = "{\"id\":0,,,,,}", + }, + { + // n_object_single_quote.json + .input = "{'a':0}", + }, + { + // n_object_trailing_comma.json + .input = "{\"id\":0,}", + }, + { + // n_object_trailing_comment.json + .input = "{\"a\":\"b\"}/**/", + }, + { + // n_object_trailing_comment_open.json + .input = "{\"a\":\"b\"}/**//", + }, + { + // n_object_trailing_comment_slash_open_incomplete.json + .input = "{\"a\":\"b\"}/", + }, + { + // n_object_trailing_comment_slash_open.json + .input = "{\"a\":\"b\"}//", + }, + { + // n_object_two_commas_in_a_row.json + .input = "{\"a\":\"b\",,\"c\":\"d\"}", + }, + { + // n_object_unquoted_key.json + .input = "{a: \"b\"}", + }, + { + // n_object_unterminated-value.json + .input = "{\"a\":\"a", + }, + { + // n_object_with_single_string.json + .input = "{ \"foo\" : \"bar\", \"a\" }", + }, + { + // n_object_with_trailing_garbage.json + .input = "{\"a\":\"b\"}#", + }, + { + // n_single_space.json + .input = " ", + }, + { + // n_string_1_surrogate_then_escape.json + .input = "[\"\\uD800\\\"]", + }, + { + // n_string_1_surrogate_then_escape_u1.json + .input = "[\"\\uD800\\u1\"]", + }, + { + // n_string_1_surrogate_then_escape_u1x.json + .input = "[\"\\uD800\\u1x\"]", + }, + { + // n_string_1_surrogate_then_escape_u.json + .input = "[\"\\uD800\\u\"]", + }, + { + // n_string_accentuated_char_no_quotes.json + .input = "[\xc3\xa9]", + }, + { + // n_string_backslash_00.json + .input = "[\"\\\x00\"]", + }, + { + // n_string_escaped_backslash_bad.json + .input = "[\"\\\\\\\"]", + }, + { + // n_string_escaped_ctrl_char_tab.json + .input = "[\"\\\t\"]", + }, + { + // n_string_escaped_emoji.json + .input = "[\"\\\xf0\x9f\x8c\x80\"]", + }, + { + // n_string_escape_x.json + .input = "[\"\\x00\"]", + }, + { + // n_string_incomplete_escaped_character.json + .input = "[\"\\u00A\"]", + }, + { + // n_string_incomplete_escape.json + .input = "[\"\\\"]", + }, + { + // n_string_incomplete_surrogate_escape_invalid.json + .input = "[\"\\uD800\\uD800\\x\"]", + }, + { + // n_string_incomplete_surrogate.json + .input = "[\"\\uD834\\uDd\"]", + }, + { + // n_string_invalid_backslash_esc.json + .input = "[\"\\a\"]", + }, + { + // n_string_invalid_unicode_escape.json + .input = "[\"\\uqqqq\"]", + }, + { + // n_string_invalid_utf8_after_escape.json + .input = "[\"\\\xe5\"]", + }, + { + // n_string_invalid-utf-8-in-escape.json + .input = "[\"\\u\xe5\"]", + }, + { + // n_string_leading_uescaped_thinspace.json + .input = "[\\u0020\"asd\"]", + }, + { + // n_string_no_quotes_with_bad_escape.json + .input = "[\\n]", + }, + { + // n_string_single_doublequote.json + .input = "\"", + }, + { + // n_string_single_quote.json + .input = "['single quote']", + }, + { + // n_string_single_string_no_double_quotes.json + .input = "abc", + }, + { + // n_string_start_escape_unclosed.json + .input = "[\"\\", + }, + { + // n_string_unescaped_crtl_char.json + .input = "[\"a\x00a\"]", + }, + { + // n_string_unescaped_newline.json + .input = "[\"new\n" + "line\"]", + }, + { + // n_string_unescaped_tab.json + .input = "[\"\t\"]", + }, + { + // n_string_unicode_CapitalU.json + .input = "\"\\UA66D\"", + }, + { + // n_string_with_trailing_garbage.json + .input = "\"\"x", + }, + { + // n_structure_angle_bracket_..json + .input = "<.>", + }, + { + // n_structure_angle_bracket_null.json + .input = "[]", + }, + { + // n_structure_array_trailing_garbage.json + .input = "[1]x", + }, + { + // n_structure_array_with_extra_array_close.json + .input = "[1]]", + }, + { + // n_structure_array_with_unclosed_string.json + .input = "[\"asd]", + }, + { + // n_structure_ascii-unicode-identifier.json + .input = "a\xc3\xa5", + }, + { + // n_structure_capitalized_True.json + .input = "[True]", + }, + { + // n_structure_close_unopened_array.json + .input = "1]", + }, + { + // n_structure_comma_instead_of_closing_brace.json + .input = "{\"x\": true,", + }, + { + // n_structure_double_array.json + .input = "[][]", + }, + { + // n_structure_end_array.json + .input = "]", + }, + { + // n_structure_incomplete_UTF8_BOM.json + .input = "\xef\xbb{}", + }, + { + // n_structure_lone-invalid-utf-8.json + .input = "\xe5", + }, + { + // n_structure_lone-open-bracket.json + .input = "[", + }, + { + // n_structure_no_data.json + .input = "", + }, + { + // n_structure_null-byte-outside-string.json + .input = "[\x00]", + }, + { + // n_structure_number_with_trailing_garbage.json + .input = "2@", + }, + { + // n_structure_object_followed_by_closing_object.json + .input = "{}}", + }, + { + // n_structure_object_unclosed_no_value.json + .input = "{\"\":", + }, + { + // n_structure_object_with_comment.json + .input = "{\"a\":/*comment*/\"b\"}", + }, + { + // n_structure_object_with_trailing_garbage.json + .input = "{\"a\": true} \"x\"", + }, + { + // n_structure_open_array_apostrophe.json + .input = "['", + }, + { + // n_structure_open_array_comma.json + .input = "[,", + }, + { + // n_structure_open_array_open_object.json + .input = "[{", + }, + { + // n_structure_open_array_open_string.json + .input = "[\"a", + }, + { + // n_structure_open_array_string.json + .input = "[\"a\"", + }, + { + // n_structure_open_object_close_array.json + .input = "{]", + }, + { + // n_structure_open_object_comma.json + .input = "{,", + }, + { + // n_structure_open_object.json + .input = "{", + }, + { + // n_structure_open_object_open_array.json + .input = "{[", + }, + { + // n_structure_open_object_open_string.json + .input = "{\"a", + }, + { + // n_structure_open_object_string_with_apostrophes.json + .input = "{'a'", + }, + { + // n_structure_open_open.json + .input = "[\"\\{[\"\\{[\"\\{[\"\\{", + }, + { + // n_structure_single_eacute.json + .input = "\xe9", + }, + { + // n_structure_single_star.json + .input = "*", + }, + { + // n_structure_trailing_#.json + .input = "{\"a\":\"b\"}#{}", + }, + { + // n_structure_U+2060_word_joined.json + .input = "[\xe2\x81\xa0]", + }, + { + // n_structure_uescaped_LF_before_string.json + .input = "[\\u000A\"\"]", + }, + { + // n_structure_unclosed_array.json + .input = "[1", + }, + { + // n_structure_unclosed_array_partial_null.json + .input = "[ false, nul", + }, + { + // n_structure_unclosed_array_unfinished_false.json + .input = "[ true, fals", + }, + { + // n_structure_unclosed_array_unfinished_true.json + .input = "[ false, tru", + }, + { + // n_structure_unclosed_object.json + .input = "{\"asd\":\"asd\"", + }, + { + // n_structure_unicode-identifier.json + .input = "\xc3\xa5", + }, + { + // n_structure_UTF8_BOM_no_data.json + .input = "\xef\xbb\xbf", + }, + { + // n_structure_whitespace_formfeed.json + .input = "[\f]", + }, + { + // n_structure_whitespace_U+2060_word_joiner.json + .input = "[\xe2\x81\xa0]", + }, + { + // i_number_huge_exp.json + .input = "[0.4e0066999999999999999999999999999999999" + "99999999999999999999999999999999999999999999999" + "99999999999999999999999999999999999969999999006]", + }, + { + // i_number_neg_int_huge_exp.json + .input = "[-1e+9999]", + }, + { + // i_number_pos_double_huge_exp.json + .input = "[1.5e+9999]", + }, + { + // i_number_real_neg_overflow.json + .input = "[-123123e100000]", + }, + { + // i_number_real_pos_overflow.json + .input = "[123123e100000]", +#if 0 // FIXME: check once float is implemented + }, + { + // i_number_real_underflow.json + .input = "[123e-10000000]", +#endif + }, + { + // i_number_too_big_neg_int.json + .input = "[-123123123123123123123123123123]", + }, + { + // i_number_too_big_pos_int.json + .input = "[100000000000000000000]", + }, + { + // i_number_very_big_negative_int.json + .input = "[-237462374673276894279832749832423479823246327846]", + }, + { + // i_object_key_lone_2nd_surrogate.json + .input = "{\"\\uDFAA\":0}", + }, + { + // i_string_1st_surrogate_but_2nd_missing.json + .input = "[\"\\uDADA\"]", + }, + { + // i_string_1st_valid_surrogate_2nd_invalid.json + .input = "[\"\\uD888\\u1234\"]", + }, + { + // i_string_incomplete_surrogate_and_escape_valid.json + .input = "[\"\\uD800\\n\"]", + }, + { + // i_string_incomplete_surrogate_pair.json + .input = "[\"\\uDd1ea\"]", + }, + { + // i_string_incomplete_surrogates_escape_valid.json + .input = "[\"\\uD800\\uD800\\n\"]", + }, + { + // i_string_invalid_lonely_surrogate.json + .input = "[\"\\ud800\"]", + }, + { + // i_string_invalid_surrogate.json + .input = "[\"\\ud800abc\"]", + }, + { + // i_string_invalid_utf-8.json + .input = "[\"\xff\"]", + }, + { + // i_string_inverted_surrogates_U+1D11E.json + .input = "[\"\\uDd1e\\uD834\"]", + }, + { + // i_string_iso_latin_1.json + .input = "[\"\xe9\"]", + }, + { + // i_string_lone_second_surrogate.json + .input = "[\"\\uDFAA\"]", + }, + { + // i_string_lone_utf8_continuation_byte.json + .input = "[\"\x81\"]", + }, + { + // i_string_not_in_unicode_range.json + .input = "[\"\xf4\xbf\xbf\xbf\"]", + }, + { + // i_string_overlong_sequence_2_bytes.json + .input = "[\"\xc0\xaf\"]", + }, + { + // i_string_overlong_sequence_6_bytes.json + .input = "[\"\xfc\x83\xbf\xbf\xbf\xbf\"]", + }, + { + // i_string_overlong_sequence_6_bytes_null.json + .input = "[\"\xfc\x80\x80\x80\x80\x80\"]", + }, + { + // i_string_truncated-utf-8.json + .input = "[\"\xe0\xff\"]", + }, + { + // i_string_utf16BE_no_BOM.json + .input = "\x00[\x00\"\x00\xe9\x00\"\x00]", + .input_len = 10 + }, + { + // i_string_utf16LE_no_BOM.json + .input = "[\x00\"\x00\xe9\x00\"\x00]\x00", + .input_len = 10 + }, + { + // i_string_UTF-16LE_with_BOM.json + .input = "\xff\xfe[\x00\"\x00\xe9\x00\"\x00]\x00", + .input_len = 12 + }, + { + // i_string_UTF-8_invalid_sequence.json + .input = "[\"\xe6\x97\xa5\xd1\x88\xfa\"]", + }, + { + // i_string_UTF8_surrogate_U+D800.json + .input = "[\"\xed\xa0\x80\"]", + }, + { + // i_structure_UTF-8_BOM_empty_object.json + .input = "\xef\xbb\xbf{}", + }, + /* From json.org */ + { + .input = "[\"Unclosed array\"", + }, + { + .input = "{unquoted_key: \"keys must be quoted\"}", + }, + { + .input = "[\"extra comma\",]", + }, + { + .input = "[\"double extra comma\",,]", + }, + { + .input = "[ , \"<-- missing value\"]", + }, + { + .input = "[\"Comma after the close\"],", + }, + { + .input = "[\"Extra close\"]]", + }, + { + .input = "{\"Extra comma\": true,}", + }, + { + .input = "{\"Extra value after close\": true} \"misplaced quoted value\"", + }, + { + .input = "{\"Illegal expression\": 1 + 2}", + }, + { + .input = "{\"Illegal invocation\": alert()}", + }, + { + .input = "{\"Numbers cannot have leading zeroes\": 013}", + }, + { + .input = "{\"Numbers cannot be hex\": 0x14}", + }, + { + .input = "[\"Illegal backslash escape: \\x15\"]", + }, + { + .input = "[\\naked]", + }, + { + .input = "[\"Illegal backslash escape: \\017\"]", + }, + { + .input = "{\"Missing colon\" null}", + }, + { + .input = "{\"Double colon\":: null}", + }, + { + .input = "{\"Comma instead of colon\", null}", + }, + { + .input = "[\"Colon instead of comma\": false]", + }, + { + .input = "[\"Bad value\", truth]", + }, + { + .input = "['single quote']", + }, + { + .input = "[\"\ttab\tcharacter\tin\tstring\t\"]", + }, + { + .input = "[\"tab\\ character\\ in\\ string\\ \"]", + }, + { + .input = "[\"line\n" + "break\"]", + }, + { + .input = "[\"line\\\n" + "break\"]", + }, + { + .input = "[0e]", + }, + { + .input = "[0e+]", + }, + { + .input = "[0e+-1]", + }, + { + .input = "{\"Comma instead if closing brace\": true,", + }, + { + .input = "[\"mismatch\"}", + }, + /* Test cases from Jansson project (http://www.digip.org/jansson/) + Copyright (c) 2009-2020 Petri Lehtinen + MIT License (see COPYING.MIT) + */ + { + // invalid/ascii-unicode-identifier/input + .input = "a\xc3\xa5\n", + }, + { + // invalid/brace-comma/input + .input = "{,\n", + }, + { + // invalid/extra-comma-in-multiline-array/input + .input = "[1,\n" + "2,\n" + "3,\n" + "4,\n" + "5,\n" + "]\n", + }, + { + // invalid/recursion-depth/input + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[", // ... + }, + { + // invalid/real-truncated-at-e/input + .input = "[1e]\n", + }, + { + // invalid/object-in-unterminated-array/input + .input = "[{}\n", + }, + { + // invalid/too-big-negative-integer/input + .input = "[-123123123123123123123123123123]\n", + }, + { + // invalid/unterminated-string/input + .input = "[\"a\n", + }, + { + // invalid/unterminated-object-and-array/input + .input = "{[\n", + }, + { + // invalid/invalid-negative-integer/input + .input = "[-123foo]\n", + }, + { + // invalid/minus-sign-without-number/input + .input = "[-foo]\n", + }, + { + // invalid/invalid-second-surrogate/input + .input = "[\"\\uD888\\u3210 " + "(first surrogate and invalid second surrogate)\"]\n", + }, + { + // invalid/object-unterminated-value/input + .input = "{\"a\":\"a\n", + }, + { + // invalid/null-byte-outside-string/input + .input = "[\x00\n", + }, + { + // invalid/extra-comma-in-array/input + .input = "[1,]\n", + }, + { + // invalid/garbage-after-newline/input + .input = "[1,2,3]\n" + "foo\n", + }, + { + // invalid/real-negative-overflow/input + .input = "[-123123e100000]\n", + }, + { + // invalid/real-truncated-at-point/input + .input = "[1.]\n", + }, + { + // invalid/invalid-unicode-escape/input + .input = "[\"\\uqqqq <-- invalid unicode escape\"]\n", + }, + { + // invalid/object-apostrophes/input + .input = "{'a'\n", + }, + { + // invalid/lone-open-brace/input + .input = "{\n", + }, + { + // invalid/truncated-unicode-surrogate/input + .input = "[\"\\uDADA (first surrogate without the second)\"]\n", + }, + { + // invalid/bracket-comma/input + .input = "[,\n", + }, + { + // invalid/real-garbage-after-e/input + .input = "[1ea]\n", + }, + { + // invalid/empty/input + .input = "", + }, + { + // invalid/garbage-at-the-end/input + .input = "[1,2,3]foo\n", + }, + { + // invalid/object-no-colon/input + .input = "{\"a\"\n", + }, + { + // invalid/object-no-value/input + .input = "{\"a\":\n", + }, + { + // invalid/integer-starting-with-zero/input + .input = "[012]\n", + }, + { + // invalid/unterminated-empty-key/input + .input = "{\"\n", + }, + { + // invalid/invalid-escape/input + .input = "[\"\\a <-- invalid escape\"]\n", + }, + { + // invalid/lone-open-bracket/input + .input = "[\n", + }, + { + // invalid/unterminated-array-and-object/input + .input = "[{\n", + }, + { + // invalid/invalid-identifier/input + .input = "[troo\n", + }, + { + // invalid/too-big-positive-integer/input + .input = "[123123123123123123123123123123]\n", + }, + { + // invalid/unicode-identifier/input + .input = "\xc3\xa5\n", + }, + { + // invalid/null-escape-in-string/input + .input = "[\"null escape \\u0000 not allowed\"]\n", + }, + { + // invalid/bracket-one-comma/input + .input = "[1,\n", + }, + { + // invalid/unterminated-key/input + .input = "{\"a\n", + }, + { + // invalid/apostrophe/input + .input = "['\n", + }, + { + // invalid/invalid-negative-real/input + .input = "[-123.123foo]\n", + }, + { + // invalid/null-byte-in-string/input + .input = "[\"null byte \x00 not allowed\"]\n", + }, + { + // invalid/null-byte-in-object-key/input + .input = "{\"foo\\u0000bar\": 42}", + }, + { + // invalid/real-positive-overflow/input + .input = "[123123e100000]\n", + }, + { + // invalid/lone-second-surrogate/input + .input = "[\"\\uDFAA (second surrogate on it's own)\"]\n", + }, + { + // invalid/negative-integer-starting-with-zero/input + .input = "[-012]\n", + }, + { + // invalid/tab-character-in-string/input + .input = "[\"\t <-- tab character\"]\n", + }, + { + // invalid/object-garbage-at-end/input + .input = "{\"a\":\"a\" 123}\n", + }, + { + // invalid/unterminated-array/input + .input = "[\"a\"\n", + }, + { + // invalid-unicode/restricted-utf-8/input + .input = "[\"\xfd\"]\n", + }, + { + // invalid-unicode/encoded-surrogate-half/input + .input = "[\"\xed\xa2\xab <-- encoded surrogate half\"]\n", + }, + { + // invalid-unicode/overlong-3-byte-encoding/input + .input = "[\"\xe0\x80\xa2 <-- overlong encoding\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-identifier/input + .input = "[a\xe5]\n", + }, + { + // invalid-unicode/lone-invalid-utf-8/input + .input = "\xe5\n", + }, + { + // invalid-unicode/invalid-utf-8-in-string/input + .input = "[\"\xe5 <-- invalid UTF-8\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-real-after-e/input + .input = "[1e\xe5]\n", + }, + { + // invalid-unicode/truncated-utf-8/input + .input = "[\"\xe0\xff <-- truncated UTF-8\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-after-backslash/input + .input = "[\"\\\xe5\"]\n", + }, + { + // invalid-unicode/overlong-ascii-encoding/input + .input = "[\"\xc1\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-escape/input + .input = "[\"\\u\xe5\"]\n", + }, + { + // invalid-unicode/overlong-4-byte-encoding/input + .input = "[\"\xf0\x80\x80\xa2 <-- overlong encoding\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-exponent/input + .input = "[1e1\xe5]\n", + }, + { + // invalid-unicode/lone-utf-8-continuation-byte/input + .input = "[\"\x81\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-int/input + .input = "[0\xe5]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-array/input + .input = "[\xe5]\n", + }, + { + // invalid-unicode/not-in-unicode-range/input + .input = "[\"\xf4\xbf\xbf\xbf\"]\n", + }, + { + // invalid-unicode/invalid-utf-8-in-bigger-int/input + .input = "[123\xe5]\n", + }, + /* Original Dovecot json-parser tests */ + { + .input = "{", + }, + { + .input = "{:}", + }, + { + .input = "{\"foo\":}", + }, + { + .input = "{\"foo\" []}", + }, + { + .input = "{\"foo\": [1}", + }, + { + .input = "{\"foo\": [1,]}", + }, + { + .input = "{\"foo\": 1,}", + }, + { + .input = "{\"foo\": 1.}}", + }, + { + .input = "{\"foo\": 1},{}", + }, + { + .input = "{\"foo\": \"\\ud808\"}", + }, + { + .input = "{\"foo\": \"\\udfff\"}", + }, + { + .input = "{\"foo\": \"\\uyyyy\"}", + }, + { + .input = "{\"a\":\"", + }, + { + .input = "{\"a\":nul", + }, + { + .input = "{\"a\":fals", + }, + { + .input = "{\"a\":tru", + }, + /* Limits */ + { + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + .limits = { .max_nesting = 31 }, + }, + { + .input = + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]" + "]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]", + .limits = { .max_nesting = 104 }, + }, + { + .input = + "[1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0,\n" + " 1,2,3,4,5,6,7,8,9,0]\n", + .limits = { .max_list_items = 49 }, + }, + { + .input = + "\"123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890\"", + .limits = { .max_string_size = 89 }, + }, + { + .input = + "123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890", + .flags = JSON_PARSER_FLAG_NUMBERS_AS_STRING, + .limits = { .max_string_size = 89 }, + }, + { + .input = + "{\"123456789012345678901234567890" + "123456789012345678901234567890" + "123456789012345678901234567890\": 90}", + .limits = { .max_name_size = 89 }, + }, + /* Additional tests */ + { + .input = "\"\\xFF\\xFF\\xFF\"", + + }, + /* Problems found by fuzzer */ + { + .input = "ICJ9XHU10QAAAPxlXQ==", + .flags = JSON_PARSER_FLAG_STRICT, + .base64 = TRUE, + }, + { + .input = "IiBcdTBEMNk=", + .flags = JSON_PARSER_FLAG_STRICT, + .base64 = TRUE, + }, + { + .input = + "Ilx1ZDgzZFx1ZGUzOVswLDMuNDZFMiw1ZTUsMCwzLDVlNSwzLjIs" + "Mjc4My42RTIsNWU1LDMuMjc4NUUwLDM2RTIsNSwzLjIsMiwyODUs" + "MzUsMy40NiwzLjQ2RTIsNWU1LDAsMy40NjYsMCwzLjQ2RTIsMy40" + "Miw1ZTUsMy4yLDI3ODVFMCwzLjQ4LDMuNDZFMCwzLjQ2RTIsNWU1" + "LDMuMjg1RTAsMy40ODVFMCwzLjQ2RTIsNWU1LDAsMy40Niw1ZTYs" + "My4wLDMuNkUyLDVlNSwzLjI3ODVFMCwzLjQ2LDAsMy41LDMuMiwy" + "Nzg1RTAsMy40NkUyLDVlNSwyLDUzLjI3ODVFMCwzLjYsMCwzLjUs" + "MCwzLjQsNTI1LDMuMjc4NUUwLDMuNDIsNWU1LDMuNCwzLjQ2NDZF" + "Miw1ZTUsMy41LDIsNWU1LDMuNDIsNWU1LDMuMiwyNzg1RTAsMy40" + "OCwzLjQ2RTAsMy40NkUyLDVlNSwzLjIsMjc4NUUwLDMuNDg1RTAs" + "My40NkUyLDVlNSwwLDMuNDYsNWU2LDMuMCwzLjZFMiw1ZTUsMy4y" + "Nzg1RTAsMy40NiwwLDMuNSwzLjIsMjc4NUUwLDMuNDZFMiw1ZTUs" + "Miw1My4yNzg1RTAsMy42LDAsMy41LDAsMy40LDUyNSwzLjI3ODVF" + "MCwzLjQ2RTIsNWU2LDVlNSwwLDMuNDY2LDAsMy40NkUxLDVlNSwz" + "LjUsMiw1ZTUsMy40Miw1ZTUsMy4yLDI3ODVFMCwzLjQ4LDMuNDZF" + "MCwzLjQ2RTIsNWU1LDMuMiwyNzg1RTAsMy40ODVFMCwzLjQ2RTIs" + "NWU1LDAsMy40Niw1ZTYsMy4wLDMuNkUyLDVlNSwzLjI3ODVFMCwz" + "LjQ2LDAsMy41LDMuMiwyNzg1RTAsMy40NkUyLDVlNSwyLDUzLjI3" + "ODVFMCwzLjYsMCwzLjUsMCwzLjQsNTI1LDMuMjc4NUUwLDMuNDZF" + "Miw1ZTYsMCwzLjU1NjgsMy40MCwzLjQ2RTIsNWU1LDMuNDYsMCwz" + "LjQ2RTIsNTMuNDZFMiwyNWU1LDUzLjI3ODVFMCwzLjYsMCwzLjUs" + "MCwzLjQsNTI1LDMuMjc4NUUwLDMuNDZFMiw1ZTYsMCwzRTIsNTU2" + "OCwzLjIsNWU1LDMuNSwyLDVlNSwzLjQyLDVlNSwzLjIsMjc4NUUw" + "LDMuMiwyNzg1RTAsMy40OCwzLjQ1RTAsMy4yLDI3ODVFMCw4LjQz" + "NUU1LDVlNSwwLDMuMjUsMy40NjQsMy40NjIsNTMuMjc4NUUwLDMu" + "NDZFMiwzNUUwMCwzLjQ2NiwwLDNlNSwzLjIsNiwwLDMuNDZFMiw1" + "ZTUsMy41LDIsNWU1XHVkODNkXHVkY2U5XHVkODNkXHVkZTM5XHVk" + "ODNkXHVkYzhkXHVkODNkXHVkZTM5XHVkODNkXHVkYzZlOVx1ZDgz" + "ZFx1ZGUzOFx1ZDgzZFx1ZGMzZFx1ZDgzZFx1ZGUzOVx1ZDgzZFx1" + "ZGM2ZTkMdWQ4M2RcdWRlMzlcdWQ4M2RcdWRjOGRcdWQ4M2RcdWRl" + "MzlcdWQ4M2RcdWRlMzlcdWQ4M2RcdWRjNmU5XHVkODNkXHVkZTM5" + "XHVkODNkXHVkYzhkXHVkODNkXHVkZTM5XHVkODNkXHVkYzY5XHVk" + "ODNkJXVkZTM4XHVkODNkXHVkYzZkXHVkLDMuNDIsNWU1LDMuMiw4" + "RTcsNTIwMy40OCwzLjQ2RTAsMy40NkUyLDVlNSwzLjIsMjc4NUUw" + "LDMuNDg1RTAsMy40NkUyLDVlNSwwLDMuNDYsNWU2LDMuMCwzLjZF" + "Miw1ZTUsMy4yNzg1RTAsMy40NiwwLDMuNSwzLjIsMjc4NUUwLDMu" + "NDZFMiw1ZTUsMiw1My4yNzg1RTAsMy42LDAsMy41LDAsMy40LDUy" + "NSwzLjI3ODVFMCwzLjQ2RTIsNWU2LDAsMzZFMiw1NTY4LDMuNDAs" + "My40NkUyLDVlNSwzLjQ2LDAsMy40NkUyLDUzLjQ0ODVFMCwzLjQ2" + "RTIsNWU1ODNkXHVkLDAsMy40Niw1ZTYsMy4wLDMuNkUyLDVlNSwz" + "LjI3ODVFMCwzLjQ2LDAsMy41LDMuMiwyNzg1RTAsMy40NkUyLDVl" + "NSwyLDUzLjI3ODVFMCwzLjYsMCwzLjUsMCwzLjQsNTI1LDMzLjQ2" + "MiwyLDI4NSwzNSwzLjQ2RTIsNWU1LDMuNCwzLjQ2RTIsMjc4NGUz" + "OVx1ZDgzZFx1ZGM2ZTlcdWQ4M2SuipuazMajimQ4M2RcdWRjOGRc" + "dWQ4M2RcdWRlMzlcdWQ4M2RcdWRjNmVcdWQ4M2RcdTZFMiwyNjYs" + "MCwzLjQ2NSwzLjQyLDVlNWRlMzhcdWQ4M2RcdWRjM2RcdWQ4M2Rc" + "dWRjNmU5XHVkODNkXHVkZTMsMy4yLDI3ODVFMCwzLjIsMjc4NUUw" + "LDMuNDgsMy40NUUwLDMuMiwyNzg1RTAsOC40MzVFNSw1ZTUsMCwz" + "LjIsMy40LDMuNDYyLDUzOFwuMjc4NUV1ZDgzZFx1MCwzLjQ2RTIs" + "MzVFMCwzLjQ2RTIsNWU1LDI1RTAsMy42RTIsNWVkNSwzLmMyMzc4" + "ZCgy", + .flags = JSON_PARSER_FLAG_STRICT, + .limits = { + .max_name_size = 1024U, + .max_string_size = 1024U, + .max_nesting = 10U, + .max_list_items = JSON_DEFAULT_MAX_LIST_ITEMS, + }, + .base64 = TRUE, + }, +}; + +static const unsigned int invalid_parse_test_count = + N_ELEMENTS(invalid_parse_tests); + +static void test_json_parse_invalid(void) +{ + unsigned int i; + + for (i = 0; i < invalid_parse_test_count; i++) T_BEGIN { + const struct json_invalid_parse_test *test; + struct istream *input; + struct json_parser *parser; + const char *text, *error = NULL; + unsigned int pos, text_len; + int ret = 0; + + test = &invalid_parse_tests[i]; + + text = test->input; + text_len = test->input_len; + if (text_len == 0) + text_len = strlen(text); + input = test_istream_create_data(text, text_len); + if (test->base64) { + struct istream *inputb64 = + i_stream_create_base64_decoder(input); + i_stream_unref(&input); + input = inputb64; + } + + test_begin(t_strdup_printf("json text invalid [%d]", i)); + + parser = json_parser_init(input, + &test->limits, test->flags, NULL, NULL); + + for (pos = 0; pos <= text_len && ret == 0; pos++) { + test_istream_set_size(input, pos); + ret = json_parse_more(parser, &error); + if (ret < 0) + break; + if (ret > 0) { + if (debug) + i_debug("DATA: `%s'", text); + } + } + test_out_reason_quiet("parse failure (trickle)", + ret < 0, error); + + json_parser_deinit(&parser); + + i_stream_seek(input, 0); + parser = json_parser_init(input, + &test->limits, test->flags, NULL, NULL); + + test_istream_set_size(input, text_len); + ret = json_parse_more(parser, &error); + if (ret > 0) { + if (debug) + i_debug("DATA: `%s'", text); + } + test_out_reason_quiet("parse failure (buffered)", + ret < 0, error); + json_parser_deinit(&parser); + + test_end(); + + i_stream_unref(&input); + } T_END; +} + +int main(int argc, char *argv[]) +{ + int c; + + static void (*test_functions[])(void) = { + test_json_parse_valid, + test_json_parse_invalid, + NULL + }; + + while ((c = getopt(argc, argv, "D")) > 0) { + switch (c) { + case 'D': + debug = TRUE; + break; + default: + i_fatal("Usage: %s [-D]", argv[0]); + } + } + + return test_run(test_functions); +}