str_append(dest, "\\\\");
break;
default:
- if (src < 32)
+ if (src < 0x20 || src >= 0x80)
str_printfa(dest, "\\u%04x", src);
else
str_append_c(dest, src);
}
}
+void json_append_escaped_ucs4(string_t *dest, unichar_t chr)
+{
+ unichar_t high,low;
+ if (chr < 0x80)
+ json_append_escaped_char(dest, (unsigned char)chr);
+ else if (chr >= UTF16_SURROGATE_BASE) {
+ uni_split_surrogate(chr, &high, &low);
+ str_printfa(dest, "\\u%04x\\u%04x", high, low);
+ } else {
+ str_printfa(dest, "\\u%04x", chr);
+ }
+}
+
void ostream_escaped_json_format(string_t *dest, unsigned char src)
{
json_append_escaped_char(dest, src);
void json_append_escaped(string_t *dest, const char *src)
{
- for (; *src != '\0'; src++)
- json_append_escaped_char(dest, *src);
+ json_append_escaped_data(dest, (const unsigned char*)src, strlen(src));
}
void json_append_escaped_data(string_t *dest, const unsigned char *src, size_t size)
{
size_t i;
-
- for (i = 0; i < size; i++)
- json_append_escaped_char(dest, src[i]);
+ int bytes = 0;
+ unichar_t chr;
+
+ for (i = 0; i < size;) {
+ bytes = uni_utf8_get_char_n(src+i, size-i, &chr);
+ /* if it was valid unichar, encode + move forward by bytes */
+ if (bytes > 0) {
+ json_append_escaped_ucs4(dest, chr);
+ i += bytes;
+ /* encode as byte data */
+ } else {
+ json_append_escaped_char(dest, src[i++]);
+ }
+ }
}
#ifndef JSON_PARSER_H
#define JSON_PARSER_H
+#include "unichar.h"
+
enum json_type {
/* { key: */
JSON_TYPE_OBJECT_KEY,
int json_parse_next_stream(struct json_parser *parser,
struct istream **input_r);
+/* Append UCS4 to already opened JSON string. */
+void json_append_escaped_ucs4(string_t *dest, unichar_t chr);
/* Append data to already opened JSON string. src should be valid UTF-8 data. */
void json_append_escaped(string_t *dest, const char *src);
/* Same as json_append_escaped(), but append non-\0 terminated input. */
string_t *str = t_str_new(32);
test_begin("json_append_escaped()");
- json_append_escaped(str, "\b\f\r\n\t\"\\\001\002-\xC3\xA4");
- test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0001\\u0002-\xC3\xA4") == 0);
+ json_append_escaped(str, "\b\f\r\n\t\"\\\001\002-\xC3\xA4\xf0\x90\x90\xb7");
+ test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0001\\u0002-\\u00e4\\ud801\\udc37") == 0);
test_end();
}
static void test_json_append_escaped_data(void)
{
static const unsigned char test_input[] =
- "\b\f\r\n\t\"\\\000\001\002-\xC3\xA4";
+ "\b\f\r\n\t\"\\\000\001\002-\xC3\xA4\xf0\x90\x90\xb7";
string_t *str = t_str_new(32);
test_begin("json_append_escaped()");
json_append_escaped_data(str, test_input, sizeof(test_input)-1);
- test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0000\\u0001\\u0002-\xC3\xA4") == 0);
+ test_assert(strcmp(str_c(str), "\\b\\f\\r\\n\\t\\\"\\\\\\u0000\\u0001\\u0002-\\u00e4\\ud801\\udc37") == 0);
test_end();
}