From: Tobias Stoeckmann Date: Sun, 20 Mar 2022 16:22:07 +0000 (+0100) Subject: json_tokener_parse_ex: handle out of memory errors X-Git-Tag: json-c-0.17-20230812~16^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F759%2Fhead;p=thirdparty%2Fjson-c.git json_tokener_parse_ex: handle out of memory errors Do not silently truncate values or skip entries if out of memory errors occur. Proof of Concept: - Create poc.c, a program which creates an eight megabyte large json object with key "A" and a lot of "B"s as value, one of them is UTF-formatted: ```c #include #include #include #include "json.h" #define STR_LEN (8 * 1024 * 1024) #define STR_PREFIX "{ \"A\": \"" #define STR_SUFFIX "\\u0042\" }" int main(void) { char *str; struct json_tokener *tok; struct json_object *obj; if ((tok = json_tokener_new()) == NULL) errx(1, "json_tokener_new"); if ((str = malloc(STR_LEN)) == NULL) err(1, "malloc"); memset(str, 'B', STR_LEN); memcpy(str, STR_PREFIX, sizeof(STR_PREFIX) - 1); memcpy(str + STR_LEN - sizeof(STR_SUFFIX), STR_SUFFIX, sizeof(STR_SUFFIX)); obj = json_tokener_parse(str); free(str); printf("%p\n", obj); if (obj != NULL) { printf("%.*s\n", 50, json_object_to_json_string(obj)); json_object_put(obj); } json_tokener_free(tok); return 0; } ``` - Compile and run poc, assuming you have enough free heap space: ``` gcc $(pkg-config --cflags --libs) -o poc poc.c ./poc 0x559421e15de0 { "A": "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB ``` - Reduce available heap and run again, which leads to truncation: ``` ulimit -d 10000 ./poc 0x555a5b453de0 { "A": "B" } ``` - Compile json-c with this change and run with reduced heap again: ``` ulimit -d 10000 ./poc (nil) ``` The output is limited to 70 characters, i.e. json-c parses the 8 MB string correctly but the poc does not print all of them to the screen. The truncation occurs because the parser tries to add all chars up to the UTF-8 formatted 'B' at once. Since memory is limited to 10 MB there is not enough for this operation. The parser does not fail but continues normally. Another possibility is to create a json file close to 2 GB and run a program on a system with limited amount of RAM, i.e. around 3 GB. But ulimit restrictions are much easier for proof of concepts. Treat memory errors correctly and abort operations. --- diff --git a/json_tokener.c b/json_tokener.c index 0c09b66e..af03a857 100644 --- a/json_tokener.c +++ b/json_tokener.c @@ -103,6 +103,7 @@ static const char *json_tokener_errors[] = { "success", "continue", "nesting too deep", + "out of memory", "unexpected end of data", "unexpected character", "null expected", @@ -284,11 +285,24 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene /* ADVANCE_CHAR() macro: * Increments str & tok->char_offset. - * For convenience of existing conditionals, returns the old value of c (0 on eof) + * For convenience of existing conditionals, returns the old value of c (0 on eof). * Implicit inputs: c var */ #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c) +/* printbuf_memappend_checked(p, s, l) macro: + * Add string s of length l to printbuffer p. + * If operation fails abort parse operation with memory error. + */ +#define printbuf_memappend_checked(p, s, l) \ + do { \ + if (printbuf_memappend((p), (s), (l)) < 0) \ + { \ + tok->err = json_tokener_error_memory; \ + goto out; \ + } \ + } while (0) + /* End optimization macro defs */ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len) @@ -336,7 +350,11 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * char *tmplocale; tmplocale = setlocale(LC_NUMERIC, NULL); if (tmplocale) + { oldlocale = strdup(tmplocale); + if (oldlocale == NULL) + return NULL; + } setlocale(LC_NUMERIC, "C"); } #endif @@ -358,7 +376,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT)) { printbuf_reset(tok->pb); - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); state = json_tokener_state_comment_start; } else @@ -376,14 +394,20 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * saved_state = json_tokener_state_object_field_start; current = json_object_new_object(); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } break; case '[': state = json_tokener_state_eatws; saved_state = json_tokener_state_array; current = json_object_new_array(); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } break; case 'I': case 'i': @@ -486,7 +510,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * } current = json_object_new_double(is_negative ? -INFINITY : INFINITY); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; goto redo_char; @@ -496,7 +523,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { int size; int size_nan; - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); size = json_min(tok->st_pos + 1, json_null_str_len); size_nan = json_min(tok->st_pos + 1, json_nan_str_len); if ((!(tok->flags & JSON_TOKENER_STRICT) && @@ -519,7 +546,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { current = json_object_new_double(NAN); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; goto redo_char; @@ -548,7 +578,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * tok->err = json_tokener_error_parse_comment; goto out; } - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); break; case json_tokener_state_comment: @@ -559,12 +589,12 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); goto out; } } - printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start); + printbuf_memappend_checked(tok->pb, case_start, 1 + str - case_start); state = json_tokener_state_comment_end; } break; @@ -577,19 +607,19 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); goto out; } } - printbuf_memappend_fast(tok->pb, case_start, str - case_start); + printbuf_memappend_checked(tok->pb, case_start, str - case_start); MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); state = json_tokener_state_eatws; } break; case json_tokener_state_comment_end: - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); if (c == '/') { MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf); @@ -609,28 +639,31 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { if (c == tok->quote_char) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); current = json_object_new_string_len(tok->pb->buf, tok->pb->bpos); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; break; } else if (c == '\\') { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); saved_state = json_tokener_state_string; state = json_tokener_state_string_escape; break; } if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); goto out; } } @@ -643,7 +676,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * case '"': case '\\': case '/': - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); state = saved_state; break; case 'b': @@ -652,15 +685,15 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * case 't': case 'f': if (c == 'b') - printbuf_memappend_fast(tok->pb, "\b", 1); + printbuf_memappend_checked(tok->pb, "\b", 1); else if (c == 'n') - printbuf_memappend_fast(tok->pb, "\n", 1); + printbuf_memappend_checked(tok->pb, "\n", 1); else if (c == 'r') - printbuf_memappend_fast(tok->pb, "\r", 1); + printbuf_memappend_checked(tok->pb, "\r", 1); else if (c == 't') - printbuf_memappend_fast(tok->pb, "\t", 1); + printbuf_memappend_checked(tok->pb, "\t", 1); else if (c == 'f') - printbuf_memappend_fast(tok->pb, "\f", 1); + printbuf_memappend_checked(tok->pb, "\f", 1); state = saved_state; break; case 'u': @@ -720,8 +753,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * /* High surrogate was not followed by a low surrogate * Replace the high and process the rest normally */ - printbuf_memappend_fast(tok->pb, - (char *)utf8_replacement_char, 3); + printbuf_memappend_checked(tok->pb, + (char *)utf8_replacement_char, 3); } tok->high_surrogate = 0; } @@ -730,14 +763,14 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { unsigned char unescaped_utf[1]; unescaped_utf[0] = tok->ucs_char; - printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1); + printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 1); } else if (tok->ucs_char < 0x800) { unsigned char unescaped_utf[2]; unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6); unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2); + printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 2); } else if (IS_HIGH_SURROGATE(tok->ucs_char)) { @@ -763,7 +796,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * else if (IS_LOW_SURROGATE(tok->ucs_char)) { /* Got a low surrogate not preceded by a high */ - printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); } else if (tok->ucs_char < 0x10000) { @@ -771,7 +804,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12); unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3); + printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 3); } else if (tok->ucs_char < 0x110000) { @@ -780,12 +813,12 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f); unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f); unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f); - printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4); + printbuf_memappend_checked(tok->pb, (char *)unescaped_utf, 4); } else { /* Don't know what we got--insert the replacement char */ - printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); } state = saved_state; // i.e. _state_string or _state_object_field } @@ -800,7 +833,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * * it. Put a replacement char in for the high surrogate * and pop back up to _state_string or _state_object_field. */ - printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); tok->high_surrogate = 0; tok->ucs_char = 0; tok->st_pos = 0; @@ -819,7 +852,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * * Put a replacement char in for the high surrogate * and handle the escape sequence normally. */ - printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3); + printbuf_memappend_checked(tok->pb, (char *)utf8_replacement_char, 3); tok->high_surrogate = 0; tok->ucs_char = 0; tok->st_pos = 0; @@ -834,7 +867,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * case json_tokener_state_boolean: { int size1, size2; - printbuf_memappend_fast(tok->pb, &c, 1); + printbuf_memappend_checked(tok->pb, &c, 1); size1 = json_min(tok->st_pos + 1, json_true_str_len); size2 = json_min(tok->st_pos + 1, json_false_str_len); if ((!(tok->flags & JSON_TOKENER_STRICT) && @@ -845,7 +878,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { current = json_object_new_boolean(1); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; goto redo_char; @@ -859,7 +895,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { current = json_object_new_boolean(0); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_finish; state = json_tokener_state_eatws; goto redo_char; @@ -939,7 +978,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, case_len); + printbuf_memappend_checked(tok->pb, case_start, case_len); goto out; } } @@ -948,7 +987,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * it might have been intended to be, and return a potentially more understandable error right away. However, if we're at the top-level, use the number as-is - because c can be part of a new object to parse on the + because c can be part of a new object to parse on the next call to json_tokener_parse(). */ if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' && @@ -958,7 +997,7 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * goto out; } if (case_len > 0) - printbuf_memappend_fast(tok->pb, case_start, case_len); + printbuf_memappend_checked(tok->pb, case_start, case_len); // Check for -Infinity if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I')) @@ -993,7 +1032,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { current = json_object_new_int64(num64); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } } else if (!tok->is_double && tok->pb->buf[0] != '-' && json_parse_uint64(tok->pb->buf, &numuint64) == 0) @@ -1009,13 +1051,19 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * num64 = (uint64_t)numuint64; current = json_object_new_int64(num64); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } } else { current = json_object_new_uint64(numuint64); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } } } else if (tok->is_double && @@ -1024,7 +1072,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { current = json_object_new_double_s(numd, tok->pb->buf); if (current == NULL) + { + tok->err = json_tokener_error_memory; goto out; + } } else { @@ -1069,7 +1120,10 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * case json_tokener_state_array_add: if (json_object_array_add(current, obj) != 0) + { + tok->err = json_tokener_error_memory; goto out; + } saved_state = json_tokener_state_array_sep; state = json_tokener_state_eatws; goto redo_char; @@ -1129,25 +1183,30 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char * { if (c == tok->quote_char) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); obj_field_name = strdup(tok->pb->buf); + if (obj_field_name == NULL) + { + tok->err = json_tokener_error_memory; + goto out; + } saved_state = json_tokener_state_object_field_end; state = json_tokener_state_eatws; break; } else if (c == '\\') { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); saved_state = json_tokener_state_object_field; state = json_tokener_state_string_escape; break; } if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok)) { - printbuf_memappend_fast(tok->pb, case_start, - str - case_start); + printbuf_memappend_checked(tok->pb, case_start, + str - case_start); goto out; } } diff --git a/json_tokener.h b/json_tokener.h index a07e12ce..c1502e82 100644 --- a/json_tokener.h +++ b/json_tokener.h @@ -28,6 +28,7 @@ enum json_tokener_error json_tokener_success, json_tokener_continue, json_tokener_error_depth, + json_tokener_error_memory, json_tokener_error_parse_eof, json_tokener_error_parse_unexpected, json_tokener_error_parse_null,