]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/escape.c
util-lib: split string parsing related calls from util.[ch] into parse-util.[ch]
[thirdparty/systemd.git] / src / basic / escape.c
CommitLineData
4f5dd394
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
22#include "utf8.h"
23#include "util.h"
24
25#include "escape.h"
26
27size_t cescape_char(char c, char *buf) {
28 char * buf_old = buf;
29
30 switch (c) {
31
32 case '\a':
33 *(buf++) = '\\';
34 *(buf++) = 'a';
35 break;
36 case '\b':
37 *(buf++) = '\\';
38 *(buf++) = 'b';
39 break;
40 case '\f':
41 *(buf++) = '\\';
42 *(buf++) = 'f';
43 break;
44 case '\n':
45 *(buf++) = '\\';
46 *(buf++) = 'n';
47 break;
48 case '\r':
49 *(buf++) = '\\';
50 *(buf++) = 'r';
51 break;
52 case '\t':
53 *(buf++) = '\\';
54 *(buf++) = 't';
55 break;
56 case '\v':
57 *(buf++) = '\\';
58 *(buf++) = 'v';
59 break;
60 case '\\':
61 *(buf++) = '\\';
62 *(buf++) = '\\';
63 break;
64 case '"':
65 *(buf++) = '\\';
66 *(buf++) = '"';
67 break;
68 case '\'':
69 *(buf++) = '\\';
70 *(buf++) = '\'';
71 break;
72
73 default:
74 /* For special chars we prefer octal over
75 * hexadecimal encoding, simply because glib's
76 * g_strescape() does the same */
77 if ((c < ' ') || (c >= 127)) {
78 *(buf++) = '\\';
79 *(buf++) = octchar((unsigned char) c >> 6);
80 *(buf++) = octchar((unsigned char) c >> 3);
81 *(buf++) = octchar((unsigned char) c);
82 } else
83 *(buf++) = c;
84 break;
85 }
86
87 return buf - buf_old;
88}
89
90char *cescape(const char *s) {
91 char *r, *t;
92 const char *f;
93
94 assert(s);
95
96 /* Does C style string escaping. May be reversed with
97 * cunescape(). */
98
99 r = new(char, strlen(s)*4 + 1);
100 if (!r)
101 return NULL;
102
103 for (f = s, t = r; *f; f++)
104 t += cescape_char(*f, t);
105
106 *t = 0;
107
108 return r;
109}
110
111int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
112 int r = 1;
113
114 assert(p);
115 assert(*p);
116 assert(ret);
117
118 /* Unescapes C style. Returns the unescaped character in ret,
119 * unless we encountered a \u sequence in which case the full
120 * unicode character is returned in ret_unicode, instead. */
121
122 if (length != (size_t) -1 && length < 1)
123 return -EINVAL;
124
125 switch (p[0]) {
126
127 case 'a':
128 *ret = '\a';
129 break;
130 case 'b':
131 *ret = '\b';
132 break;
133 case 'f':
134 *ret = '\f';
135 break;
136 case 'n':
137 *ret = '\n';
138 break;
139 case 'r':
140 *ret = '\r';
141 break;
142 case 't':
143 *ret = '\t';
144 break;
145 case 'v':
146 *ret = '\v';
147 break;
148 case '\\':
149 *ret = '\\';
150 break;
151 case '"':
152 *ret = '"';
153 break;
154 case '\'':
155 *ret = '\'';
156 break;
157
158 case 's':
159 /* This is an extension of the XDG syntax files */
160 *ret = ' ';
161 break;
162
163 case 'x': {
164 /* hexadecimal encoding */
165 int a, b;
166
167 if (length != (size_t) -1 && length < 3)
168 return -EINVAL;
169
170 a = unhexchar(p[1]);
171 if (a < 0)
172 return -EINVAL;
173
174 b = unhexchar(p[2]);
175 if (b < 0)
176 return -EINVAL;
177
178 /* Don't allow NUL bytes */
179 if (a == 0 && b == 0)
180 return -EINVAL;
181
182 *ret = (char) ((a << 4U) | b);
183 r = 3;
184 break;
185 }
186
187 case 'u': {
188 /* C++11 style 16bit unicode */
189
190 int a[4];
191 unsigned i;
192 uint32_t c;
193
194 if (length != (size_t) -1 && length < 5)
195 return -EINVAL;
196
197 for (i = 0; i < 4; i++) {
198 a[i] = unhexchar(p[1 + i]);
199 if (a[i] < 0)
200 return a[i];
201 }
202
203 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
204
205 /* Don't allow 0 chars */
206 if (c == 0)
207 return -EINVAL;
208
209 if (c < 128)
210 *ret = c;
211 else {
212 if (!ret_unicode)
213 return -EINVAL;
214
215 *ret = 0;
216 *ret_unicode = c;
217 }
218
219 r = 5;
220 break;
221 }
222
223 case 'U': {
224 /* C++11 style 32bit unicode */
225
226 int a[8];
227 unsigned i;
228 uint32_t c;
229
230 if (length != (size_t) -1 && length < 9)
231 return -EINVAL;
232
233 for (i = 0; i < 8; i++) {
234 a[i] = unhexchar(p[1 + i]);
235 if (a[i] < 0)
236 return a[i];
237 }
238
239 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
240 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
241
242 /* Don't allow 0 chars */
243 if (c == 0)
244 return -EINVAL;
245
246 /* Don't allow invalid code points */
247 if (!unichar_is_valid(c))
248 return -EINVAL;
249
250 if (c < 128)
251 *ret = c;
252 else {
253 if (!ret_unicode)
254 return -EINVAL;
255
256 *ret = 0;
257 *ret_unicode = c;
258 }
259
260 r = 9;
261 break;
262 }
263
264 case '0':
265 case '1':
266 case '2':
267 case '3':
268 case '4':
269 case '5':
270 case '6':
271 case '7': {
272 /* octal encoding */
273 int a, b, c;
274 uint32_t m;
275
276 if (length != (size_t) -1 && length < 3)
277 return -EINVAL;
278
279 a = unoctchar(p[0]);
280 if (a < 0)
281 return -EINVAL;
282
283 b = unoctchar(p[1]);
284 if (b < 0)
285 return -EINVAL;
286
287 c = unoctchar(p[2]);
288 if (c < 0)
289 return -EINVAL;
290
291 /* don't allow NUL bytes */
292 if (a == 0 && b == 0 && c == 0)
293 return -EINVAL;
294
295 /* Don't allow bytes above 255 */
296 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
297 if (m > 255)
298 return -EINVAL;
299
300 *ret = m;
301 r = 3;
302 break;
303 }
304
305 default:
306 return -EINVAL;
307 }
308
309 return r;
310}
311
312int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
313 char *r, *t;
314 const char *f;
315 size_t pl;
316
317 assert(s);
318 assert(ret);
319
320 /* Undoes C style string escaping, and optionally prefixes it. */
321
322 pl = prefix ? strlen(prefix) : 0;
323
324 r = new(char, pl+length+1);
325 if (!r)
326 return -ENOMEM;
327
328 if (prefix)
329 memcpy(r, prefix, pl);
330
331 for (f = s, t = r + pl; f < s + length; f++) {
332 size_t remaining;
333 uint32_t u;
334 char c;
335 int k;
336
337 remaining = s + length - f;
338 assert(remaining > 0);
339
340 if (*f != '\\') {
341 /* A literal literal, copy verbatim */
342 *(t++) = *f;
343 continue;
344 }
345
346 if (remaining == 1) {
347 if (flags & UNESCAPE_RELAX) {
348 /* A trailing backslash, copy verbatim */
349 *(t++) = *f;
350 continue;
351 }
352
353 free(r);
354 return -EINVAL;
355 }
356
357 k = cunescape_one(f + 1, remaining - 1, &c, &u);
358 if (k < 0) {
359 if (flags & UNESCAPE_RELAX) {
360 /* Invalid escape code, let's take it literal then */
361 *(t++) = '\\';
362 continue;
363 }
364
365 free(r);
366 return k;
367 }
368
369 if (c != 0)
370 /* Non-Unicode? Let's encode this directly */
371 *(t++) = c;
372 else
373 /* Unicode? Then let's encode this in UTF-8 */
374 t += utf8_encode_unichar(t, u);
375
376 f += k;
377 }
378
379 *t = 0;
380
381 *ret = r;
382 return t - r;
383}
384
385int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
386 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
387}
388
389int cunescape(const char *s, UnescapeFlags flags, char **ret) {
390 return cunescape_length(s, strlen(s), flags, ret);
391}
392
393char *xescape(const char *s, const char *bad) {
394 char *r, *t;
395 const char *f;
396
397 /* Escapes all chars in bad, in addition to \ and all special
398 * chars, in \xFF style escaping. May be reversed with
399 * cunescape(). */
400
401 r = new(char, strlen(s) * 4 + 1);
402 if (!r)
403 return NULL;
404
405 for (f = s, t = r; *f; f++) {
406
407 if ((*f < ' ') || (*f >= 127) ||
408 (*f == '\\') || strchr(bad, *f)) {
409 *(t++) = '\\';
410 *(t++) = 'x';
411 *(t++) = hexchar(*f >> 4);
412 *(t++) = hexchar(*f);
413 } else
414 *(t++) = *f;
415 }
416
417 *t = 0;
418
419 return r;
420}
421
422static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
423 assert(bad);
424
425 for (; *s; s++) {
426 if (*s == '\\' || strchr(bad, *s))
427 *(t++) = '\\';
428
429 *(t++) = *s;
430 }
431
432 return t;
433}
434
435char *shell_escape(const char *s, const char *bad) {
436 char *r, *t;
437
438 r = new(char, strlen(s)*2+1);
439 if (!r)
440 return NULL;
441
442 t = strcpy_backslash_escaped(r, s, bad);
443 *t = 0;
444
445 return r;
446}
447
448char *shell_maybe_quote(const char *s) {
449 const char *p;
450 char *r, *t;
451
452 assert(s);
453
454 /* Encloses a string in double quotes if necessary to make it
455 * OK as shell string. */
456
457 for (p = s; *p; p++)
458 if (*p <= ' ' ||
459 *p >= 127 ||
460 strchr(SHELL_NEED_QUOTES, *p))
461 break;
462
463 if (!*p)
464 return strdup(s);
465
466 r = new(char, 1+strlen(s)*2+1+1);
467 if (!r)
468 return NULL;
469
470 t = r;
471 *(t++) = '"';
472 t = mempcpy(t, s, p - s);
473
474 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
475
476 *(t++)= '"';
477 *t = 0;
478
479 return r;
480}