]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/json.c
shared: json - support escaping utf16 surrogate pairs
[thirdparty/systemd.git] / src / shared / json.c
CommitLineData
e7eebcfc
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2014 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include <sys/types.h>
23#include <math.h>
24
25#include "macro.h"
26#include "log.h"
27#include "util.h"
28#include "utf8.h"
29#include "json.h"
30
31enum {
32 STATE_NULL,
33 STATE_VALUE,
34 STATE_VALUE_POST,
35};
36
37static void inc_lines(unsigned *line, const char *s, size_t n) {
38 const char *p = s;
39
40 if (!line)
41 return;
42
43 for (;;) {
44 const char *f;
45
46 f = memchr(p, '\n', n);
47 if (!f)
48 return;
49
50 n -= (f - p) + 1;
51 p = f + 1;
52 (*line)++;
53 }
54}
55
9bae67d4
TG
56static int unhex_ucs2(const char *c, uint16_t *ret) {
57 int aa, bb, cc, dd;
58 uint16_t x;
59
60 assert(c);
61 assert(ret);
62
63 aa = unhexchar(c[0]);
64 if (aa < 0)
65 return -EINVAL;
66
67 bb = unhexchar(c[1]);
68 if (bb < 0)
69 return -EINVAL;
70
71 cc = unhexchar(c[2]);
72 if (cc < 0)
73 return -EINVAL;
74
75 dd = unhexchar(c[3]);
76 if (dd < 0)
77 return -EINVAL;
78
79 x = ((uint16_t) aa << 12) |
80 ((uint16_t) bb << 8) |
81 ((uint16_t) cc << 4) |
82 ((uint16_t) dd);
83
84 if (x <= 0)
85 return -EINVAL;
86
87 *ret = x;
88
89 return 0;
90}
91
e7eebcfc
LP
92static int json_parse_string(const char **p, char **ret) {
93 _cleanup_free_ char *s = NULL;
94 size_t n = 0, allocated = 0;
95 const char *c;
96
97 assert(p);
98 assert(*p);
99 assert(ret);
100
101 c = *p;
102
103 if (*c != '"')
104 return -EINVAL;
105
106 c++;
107
108 for (;;) {
109 int len;
110
111 /* Check for EOF */
112 if (*c == 0)
113 return -EINVAL;
114
115 /* Check for control characters 0x00..0x1f */
116 if (*c > 0 && *c < ' ')
117 return -EINVAL;
118
119 /* Check for control character 0x7f */
120 if (*c == 0x7f)
121 return -EINVAL;
122
123 if (*c == '"') {
124 if (!s) {
125 s = strdup("");
126 if (!s)
127 return -ENOMEM;
128 } else
129 s[n] = 0;
130
131 *p = c + 1;
132
133 *ret = s;
134 s = NULL;
135 return JSON_STRING;
136 }
137
138 if (*c == '\\') {
139 char ch = 0;
140 c++;
141
142 if (*c == 0)
143 return -EINVAL;
144
145 if (IN_SET(*c, '"', '\\', '/'))
146 ch = *c;
147 else if (*c == 'b')
148 ch = '\b';
149 else if (*c == 'f')
150 ch = '\f';
151 else if (*c == 'n')
152 ch = '\n';
153 else if (*c == 'r')
154 ch = '\r';
155 else if (*c == 't')
156 ch = '\t';
157 else if (*c == 'u') {
e7eebcfc 158 uint16_t x;
9bae67d4 159 int r;
e7eebcfc 160
9bae67d4
TG
161 r = unhex_ucs2(c + 1, &x);
162 if (r < 0)
163 return r;
e7eebcfc 164
9bae67d4 165 c += 5;
e7eebcfc 166
9bae67d4
TG
167 if (!GREEDY_REALLOC(s, allocated, n + 4))
168 return -ENOMEM;
e7eebcfc 169
9bae67d4
TG
170 if (!utf16_is_surrogate(x))
171 n += utf8_encode_unichar(s + n, x);
172 else if (utf16_is_trailing_surrogate(x))
e7eebcfc 173 return -EINVAL;
9bae67d4
TG
174 else {
175 uint16_t y;
e7eebcfc 176
9bae67d4
TG
177 if (c[0] != '\\' || c[1] != 'u')
178 return -EINVAL;
e7eebcfc 179
9bae67d4
TG
180 r = unhex_ucs2(c + 2, &y);
181 if (r < 0)
182 return r;
e7eebcfc 183
9bae67d4 184 c += 6;
e7eebcfc 185
9bae67d4
TG
186 if (!utf16_is_trailing_surrogate(y))
187 return -EINVAL;
188
189 n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
190 }
e7eebcfc 191
e7eebcfc
LP
192 continue;
193 } else
194 return -EINVAL;
195
196 if (!GREEDY_REALLOC(s, allocated, n + 2))
197 return -ENOMEM;
198
199 s[n++] = ch;
200 c ++;
201 continue;
202 }
203
204 len = utf8_encoded_valid_unichar(c);
205 if (len < 0)
206 return len;
207
208 if (!GREEDY_REALLOC(s, allocated, n + len + 1))
209 return -ENOMEM;
210
211 memcpy(s + n, c, len);
212 n += len;
213 c += len;
214 }
215}
216
217static int json_parse_number(const char **p, union json_value *ret) {
218 bool negative = false, exponent_negative = false, is_double = false;
219 double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
220 intmax_t i = 0;
221 const char *c;
222
223 assert(p);
224 assert(*p);
225 assert(ret);
226
227 c = *p;
228
229 if (*c == '-') {
230 negative = true;
231 c++;
232 }
233
234 if (*c == '0')
235 c++;
236 else {
237 if (!strchr("123456789", *c) || *c == 0)
238 return -EINVAL;
239
240 do {
241 if (!is_double) {
242 int64_t t;
243
244 t = 10 * i + (*c - '0');
245 if (t < i) /* overflow */
246 is_double = false;
247 else
248 i = t;
249 }
250
251 x = 10.0 * x + (*c - '0');
252 c++;
253 } while (strchr("0123456789", *c) && *c != 0);
254 }
255
256 if (*c == '.') {
257 is_double = true;
258 c++;
259
260 if (!strchr("0123456789", *c) || *c == 0)
261 return -EINVAL;
262
263 do {
264 y = 10.0 * y + (*c - '0');
265 shift = 10.0 * shift;
266 c++;
267 } while (strchr("0123456789", *c) && *c != 0);
268 }
269
270 if (*c == 'e' || *c == 'E') {
271 is_double = true;
272 c++;
273
274 if (*c == '-') {
275 exponent_negative = true;
276 c++;
277 } else if (*c == '+')
278 c++;
279
280 if (!strchr("0123456789", *c) || *c == 0)
281 return -EINVAL;
282
283 do {
284 exponent = 10.0 * exponent + (*c - '0');
285 c++;
286 } while (strchr("0123456789", *c) && *c != 0);
287 }
288
289 if (*c != 0)
290 return -EINVAL;
291
292 *p = c;
293
294 if (is_double) {
295 ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
296 return JSON_REAL;
297 } else {
298 ret->integer = negative ? -i : i;
299 return JSON_INTEGER;
300 }
301}
302
303int json_tokenize(
304 const char **p,
305 char **ret_string,
306 union json_value *ret_value,
307 void **state,
308 unsigned *line) {
309
310 const char *c;
311 int t;
312 int r;
313
314 assert(p);
315 assert(*p);
316 assert(ret_string);
317 assert(ret_value);
318 assert(state);
319
320 t = PTR_TO_INT(*state);
321 c = *p;
322
323 if (t == STATE_NULL) {
324 if (line)
325 *line = 1;
326 t = STATE_VALUE;
327 }
328
329 for (;;) {
330 const char *b;
331
332 b = c + strspn(c, WHITESPACE);
333 if (*b == 0)
334 return JSON_END;
335
336 inc_lines(line, c, b - c);
337 c = b;
338
339 switch (t) {
340
341 case STATE_VALUE:
342
343 if (*c == '{') {
344 *ret_string = NULL;
345 *ret_value = JSON_VALUE_NULL;
346 *p = c + 1;
347 *state = INT_TO_PTR(STATE_VALUE);
348 return JSON_OBJECT_OPEN;
349
350 } else if (*c == '}') {
351 *ret_string = NULL;
352 *ret_value = JSON_VALUE_NULL;
353 *p = c + 1;
354 *state = INT_TO_PTR(STATE_VALUE_POST);
355 return JSON_OBJECT_CLOSE;
356
357 } else if (*c == '[') {
358 *ret_string = NULL;
359 *ret_value = JSON_VALUE_NULL;
360 *p = c + 1;
361 *state = INT_TO_PTR(STATE_VALUE);
362 return JSON_ARRAY_OPEN;
363
364 } else if (*c == ']') {
365 *ret_string = NULL;
366 *ret_value = JSON_VALUE_NULL;
367 *p = c + 1;
368 *state = INT_TO_PTR(STATE_VALUE_POST);
369 return JSON_ARRAY_CLOSE;
370
371 } else if (*c == '"') {
372 r = json_parse_string(&c, ret_string);
373 if (r < 0)
374 return r;
375
376 *ret_value = JSON_VALUE_NULL;
377 *p = c;
378 *state = INT_TO_PTR(STATE_VALUE_POST);
379 return r;
380
381 } else if (strchr("-0123456789", *c)) {
382 r = json_parse_number(&c, ret_value);
383 if (r < 0)
384 return r;
385
386 *ret_string = NULL;
387 *p = c;
388 *state = INT_TO_PTR(STATE_VALUE_POST);
389 return r;
390
391 } else if (startswith(c, "true")) {
392 *ret_string = NULL;
393 ret_value->boolean = true;
394 *p = c + 4;
395 *state = INT_TO_PTR(STATE_VALUE_POST);
396 return JSON_BOOLEAN;
397
398 } else if (startswith(c, "false")) {
399 *ret_string = NULL;
400 ret_value->boolean = false;
401 *p = c + 5;
402 *state = INT_TO_PTR(STATE_VALUE_POST);
403 return JSON_BOOLEAN;
404
405 } else if (startswith(c, "null")) {
406 *ret_string = NULL;
407 *ret_value = JSON_VALUE_NULL;
408 *p = c + 4;
409 *state = INT_TO_PTR(STATE_VALUE_POST);
410 return JSON_NULL;
411
412 } else
413 return -EINVAL;
414
415 case STATE_VALUE_POST:
416
417 if (*c == ':') {
418 *ret_string = NULL;
419 *ret_value = JSON_VALUE_NULL;
420 *p = c + 1;
421 *state = INT_TO_PTR(STATE_VALUE);
422 return JSON_COLON;
423 } else if (*c == ',') {
424 *ret_string = NULL;
425 *ret_value = JSON_VALUE_NULL;
426 *p = c + 1;
427 *state = INT_TO_PTR(STATE_VALUE);
428 return JSON_COMMA;
429 } else if (*c == '}') {
430 *ret_string = NULL;
431 *ret_value = JSON_VALUE_NULL;
432 *p = c + 1;
433 *state = INT_TO_PTR(STATE_VALUE_POST);
434 return JSON_OBJECT_CLOSE;
435 } else if (*c == ']') {
436 *ret_string = NULL;
437 *ret_value = JSON_VALUE_NULL;
438 *p = c + 1;
439 *state = INT_TO_PTR(STATE_VALUE_POST);
440 return JSON_ARRAY_CLOSE;
441 } else
442 return -EINVAL;
443 }
444
445 }
446}