From: Tom Gundersen Date: Mon, 22 Dec 2014 13:53:40 +0000 (+0100) Subject: shared: json - support escaping utf16 surrogate pairs X-Git-Tag: v219~884 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9bae67d49b861b1f142f1a1e27753fe08e63ade7;p=thirdparty%2Fsystemd.git shared: json - support escaping utf16 surrogate pairs We originally only supported escaping ucs2 encoded characters (as \uxxxx). This only covers the BMP. Support escaping also utf16 surrogate pairs (on the form \uxxxx\uyyyy) to cover all of unicode. --- diff --git a/src/shared/json.c b/src/shared/json.c index 47f801c8589..bb3d26f0e5e 100644 --- a/src/shared/json.c +++ b/src/shared/json.c @@ -53,6 +53,42 @@ static void inc_lines(unsigned *line, const char *s, size_t n) { } } +static int unhex_ucs2(const char *c, uint16_t *ret) { + int aa, bb, cc, dd; + uint16_t x; + + assert(c); + assert(ret); + + aa = unhexchar(c[0]); + if (aa < 0) + return -EINVAL; + + bb = unhexchar(c[1]); + if (bb < 0) + return -EINVAL; + + cc = unhexchar(c[2]); + if (cc < 0) + return -EINVAL; + + dd = unhexchar(c[3]); + if (dd < 0) + return -EINVAL; + + x = ((uint16_t) aa << 12) | + ((uint16_t) bb << 8) | + ((uint16_t) cc << 4) | + ((uint16_t) dd); + + if (x <= 0) + return -EINVAL; + + *ret = x; + + return 0; +} + static int json_parse_string(const char **p, char **ret) { _cleanup_free_ char *s = NULL; size_t n = 0, allocated = 0; @@ -119,39 +155,40 @@ static int json_parse_string(const char **p, char **ret) { else if (*c == 't') ch = '\t'; else if (*c == 'u') { - int aa, bb, cc, dd; uint16_t x; + int r; - aa = unhexchar(c[1]); - if (aa < 0) - return -EINVAL; + r = unhex_ucs2(c + 1, &x); + if (r < 0) + return r; - bb = unhexchar(c[2]); - if (bb < 0) - return -EINVAL; + c += 5; - cc = unhexchar(c[3]); - if (cc < 0) - return -EINVAL; + if (!GREEDY_REALLOC(s, allocated, n + 4)) + return -ENOMEM; - dd = unhexchar(c[4]); - if (dd < 0) + if (!utf16_is_surrogate(x)) + n += utf8_encode_unichar(s + n, x); + else if (utf16_is_trailing_surrogate(x)) return -EINVAL; + else { + uint16_t y; + if (c[0] != '\\' || c[1] != 'u') + return -EINVAL; - x = ((uint16_t) aa << 12) | - ((uint16_t) bb << 8) | - ((uint16_t) cc << 4) | - ((uint16_t) dd); + r = unhex_ucs2(c + 2, &y); + if (r < 0) + return r; - if (x <= 0) - return -EINVAL; + c += 6; - if (!GREEDY_REALLOC(s, allocated, n + 4)) - return -ENOMEM; + if (!utf16_is_trailing_surrogate(y)) + return -EINVAL; + + n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y)); + } - n += utf8_encode_unichar(s + n, x); - c += 5; continue; } else return -EINVAL; diff --git a/src/test/test-json.c b/src/test/test-json.c index e53e8ed50f1..b09131891cf 100644 --- a/src/test/test-json.c +++ b/src/test/test-json.c @@ -99,6 +99,9 @@ int main(int argc, char *argv[]) { test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END); test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END); test_one("\"\\uf\"", -EINVAL); + test_one("\"\\ud800a\"", -EINVAL); + test_one("\"\\udc00\\udc00\"", -EINVAL); + test_one("\"\\ud801\\udc37\"", JSON_STRING, "\xf0\x90\x90\xb7", JSON_END); return 0; }