From ea9afe0064adbcc5ff7c8d44026e996f4c8e3e89 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sun, 30 Aug 2020 19:52:10 +0200 Subject: [PATCH] shared/json: reject non-utf-8 strings MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit JSON strings must be utf-8-clean. We also verify this in json_parse_string() so we would reject a message with invalid utf-8 anyway. It would probably be slightly cheaper to detect non-conformaning strings in serialization, but then we'd have to fail serialization. By doing this early, we give the caller a chance to handle the error nicely. The test is adjusted to contain a valid utf-8 string after decoding of the utf-32 encoding in json ("विवेकख्यातिरविप्लवा हानोपायः।", something about the cessation of ignorance). --- src/shared/json.c | 9 ++++++++- src/test/test-json.c | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/shared/json.c b/src/shared/json.c index 11ad6091a42..e938e59ab60 100644 --- a/src/shared/json.c +++ b/src/shared/json.c @@ -405,6 +405,9 @@ int json_variant_new_stringn(JsonVariant **ret, const char *s, size_t n) { return 0; } + if (!utf8_is_valid_n(s, n)) /* JSON strings must be valid UTF-8 */ + return -EUCLEAN; + r = json_variant_new(&v, JSON_VARIANT_STRING, n + 1); if (r < 0) return r; @@ -636,8 +639,12 @@ int json_variant_new_array_strv(JsonVariant **ret, char **l) { return r; w->is_reference = true; - } else + } else { + if (!utf8_is_valid_n(l[v->n_elements], k)) /* JSON strings must be valid UTF-8 */ + return -EUCLEAN; + memcpy(w->string, l[v->n_elements], k+1); + } } v->normalized = true; diff --git a/src/test/test-json.c b/src/test/test-json.c index 032619a4252..3295287a677 100644 --- a/src/test/test-json.c +++ b/src/test/test-json.c @@ -543,7 +543,7 @@ int main(int argc, char *argv[]) { test_variant("{\"k\": \"v\", \"foo\": [1, 2, 3], \"bar\": {\"zap\": null}}", test_1); test_variant("{\"mutant\": [1, null, \"1\", {\"1\": [1, \"1\"]}], \"thisisaverylongproperty\": 1.27}", test_2); - test_variant("{\"foo\" : \"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFFFFF\\\"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFF\\uDBFF\\uDFFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFFFF\\\"\\uDBFF\\uDFFF\\\"\\uD9FF\\uDFFF\\uDBFF\\uDFFF\"}", NULL); + test_variant("{\"foo\" : \"\\u0935\\u093f\\u0935\\u0947\\u0915\\u0916\\u094d\\u092f\\u093e\\u0924\\u093f\\u0930\\u0935\\u093f\\u092a\\u094d\\u0932\\u0935\\u093e\\u0020\\u0939\\u093e\\u0928\\u094b\\u092a\\u093e\\u092f\\u0903\\u0964\"}", NULL); test_variant("[ 0, -0, 0.0, -0.0, 0.000, -0.000, 0e0, -0e0, 0e+0, -0e-0, 0e-0, -0e000, 0e+000 ]", test_zeroes); -- 2.47.3