]> git.ipfire.org Git - thirdparty/dovecot/core.git/commitdiff
lib: istream-jsonstr - Properly handle unicode input
authorAki Tuomi <aki.tuomi@dovecot.fi>
Fri, 3 Nov 2017 11:26:52 +0000 (13:26 +0200)
committerTimo Sirainen <tss@dovecot.fi>
Tue, 7 Nov 2017 17:40:09 +0000 (19:40 +0200)
Treat surrogates correctly, do not accept invalid codepoints

src/lib/istream-jsonstr.c
src/lib/json-parser.c
src/lib/test-istream-jsonstr.c

index e95cd6ba5567e1ea3779ca426027346baa4b4ffd..fceb50a53f6169e80036f137ce44f29a8fc0746b 100644 (file)
@@ -77,12 +77,34 @@ i_stream_json_unescape(const unsigned char *src, size_t len,
                *dest = '\t';
                break;
        case 'u': {
+               char chbuf[5] = {0};
+               unichar_t chr,chr2 = 0;
                buffer_t buf;
                if (len < 5)
                        return 5;
                buffer_create_from_data(&buf, dest, MAX_UTF8_LEN);
-               uni_ucs4_to_utf8_c(hex2dec(src+1, 4), &buf);
-               *src_size_r = 5;
+               memcpy(chbuf, src+1, 4);
+               if (str_to_uint32_hex(chbuf, &chr)<0)
+                       return -1;
+               if (UTF16_VALID_LOW_SURROGATE(chr))
+                       return -1;
+               /* if we encounter surrogate, we need another \\uxxxx */
+               if (UTF16_VALID_HIGH_SURROGATE(chr)) {
+                       if (len < 5+2+4)
+                               return 5+2+4;
+                       if (src[5] != '\\' && src[6] != 'u')
+                               return -1;
+                       memcpy(chbuf, src+7, 4);
+                       if (str_to_uint32_hex(chbuf, &chr2)<0)
+                               return -1;
+                       if (!UTF16_VALID_LOW_SURROGATE(chr2))
+                               return -1;
+                       chr = uni_join_surrogate(chr, chr2);
+               }
+               if (!uni_is_valid_ucs4(chr))
+                       return -1;
+               uni_ucs4_to_utf8_c(chr, &buf);
+               *src_size_r = 5 + (chr2>0?6:0);
                *dest_size_r = buf.used;
                return 0;
        }
@@ -146,8 +168,9 @@ static ssize_t i_stream_jsonstr_read(struct istream_private *stream)
                                stream->istream.stream_errno = EINVAL;
                                return -1;
                        } else if (ret2 > 0) {
-                               /* we need to get more bytes */
-                               i = 0;
+                               /* we need to get more bytes, do not consume
+                                  escape slash */
+                               i--;
                                extra = ret2;
                                break;
                        }
index acece1acb51102808f2b8435be92d9a9b78a5e0e..6d5a6c8d7efee1b6c2eb5c6158e8bc58d1dc67fc 100644 (file)
@@ -803,13 +803,9 @@ void json_append_escaped_data(string_t *dest, const unsigned char *src, size_t s
 
        for (i = 0; i < size;) {
                bytes = uni_utf8_get_char_n(src+i, size-i, &chr);
-               /* if it was valid unichar, encode + move forward by bytes */
-               if (bytes > 0) {
-                       json_append_escaped_ucs4(dest, chr);
-                       i += bytes;
-               /* encode as byte data */
-               } else {
-                       json_append_escaped_char(dest, src[i++]);
-               }
+               /* refuse to add invalid data */
+               i_assert(bytes > 0 && uni_is_valid_ucs4(chr));
+               json_append_escaped_ucs4(dest, chr);
+               i += bytes;
        }
 }
index 20d2626f7f0467689632d0fe1d84bf53fbafea14..c121a6a4e6d3af5b103bb92e2bed87cae5932d4f 100644 (file)
@@ -12,12 +12,19 @@ static const struct {
 } tests[] = {
        { "foo\\\\\\\"\\b\\f\\n\\r\\t\\u0001\\uffff\"",
          "foo\\\"\b\f\n\r\t\001\xEF\xBF\xBF", 0 },
+       { "\\ud801\\udc37\"", "\xf0\x90\x90\xb7", 0 }, /* valid codepoint */
        { "\"", "", 0 },
        { "foo\\?\"", "foo", EINVAL },
        { "foo\\?\"", "foo", EINVAL },
        { "", "", EPIPE },
        { "\\\"", "\"", EPIPE },
        { "foo", "foo", EPIPE },
+       { "\\ud801", "", EPIPE }, /* high surrogate alone */
+       { "\\udced\\udc37\"", "", EINVAL }, /* low surrogate before high */
+       { "\\ud8011\\udc37\"", "", EINVAL }, /* has extra 1 in middle */
+       { "hello \\udc37\"", "hello ", EINVAL }, /* low surrogate before high with valid prefix*/
+       { "hello \\ud801", "hello ", EPIPE }, /* high surrogate alone with valid prefix */
+       { "\\uabcg", "", EINVAL }, /* invalid hex value */
 };
 
 static void
@@ -77,6 +84,33 @@ static void test_istream_jsonstr_autoretry(void)
        test_end();
 }
 
+static void test_istream_jsonstr_partial(void)
+{
+       size_t len = 0;
+       const char *json_input = "hello\\u0060x\"";
+       const char *output = "hello`x";
+       const size_t json_input_len = strlen(json_input);
+       struct istream *input_data, *input;
+
+       test_begin("istream-jsonstr partial");
+
+       input_data = test_istream_create_data(json_input, json_input_len);
+       input = i_stream_create_jsonstr(input_data);
+       test_istream_set_size(input_data, 9);
+       test_assert(i_stream_read(input) == 5);
+       test_istream_set_size(input_data, json_input_len);
+       test_assert(i_stream_read(input) == 2);
+       test_assert(i_stream_read(input) == -1);
+
+       test_assert(memcmp(i_stream_get_data(input, &len), output, I_MIN(len, strlen(output))) == 0 &&
+                   len == strlen(output));
+
+       i_stream_unref(&input);
+       i_stream_unref(&input_data);
+
+       test_end();
+}
+
 void test_istream_jsonstr(void)
 {
        unsigned int i;
@@ -87,4 +121,5 @@ void test_istream_jsonstr(void)
                test_end();
        }
        test_istream_jsonstr_autoretry();
+       test_istream_jsonstr_partial();
 }