]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/escape.c
Merge pull request #7388 from keszybz/doc-tweak
[thirdparty/systemd.git] / src / basic / escape.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdlib.h>
22 #include <string.h>
23
24 #include "alloc-util.h"
25 #include "escape.h"
26 #include "hexdecoct.h"
27 #include "macro.h"
28 #include "utf8.h"
29
30 size_t cescape_char(char c, char *buf) {
31 char * buf_old = buf;
32
33 switch (c) {
34
35 case '\a':
36 *(buf++) = '\\';
37 *(buf++) = 'a';
38 break;
39 case '\b':
40 *(buf++) = '\\';
41 *(buf++) = 'b';
42 break;
43 case '\f':
44 *(buf++) = '\\';
45 *(buf++) = 'f';
46 break;
47 case '\n':
48 *(buf++) = '\\';
49 *(buf++) = 'n';
50 break;
51 case '\r':
52 *(buf++) = '\\';
53 *(buf++) = 'r';
54 break;
55 case '\t':
56 *(buf++) = '\\';
57 *(buf++) = 't';
58 break;
59 case '\v':
60 *(buf++) = '\\';
61 *(buf++) = 'v';
62 break;
63 case '\\':
64 *(buf++) = '\\';
65 *(buf++) = '\\';
66 break;
67 case '"':
68 *(buf++) = '\\';
69 *(buf++) = '"';
70 break;
71 case '\'':
72 *(buf++) = '\\';
73 *(buf++) = '\'';
74 break;
75
76 default:
77 /* For special chars we prefer octal over
78 * hexadecimal encoding, simply because glib's
79 * g_strescape() does the same */
80 if ((c < ' ') || (c >= 127)) {
81 *(buf++) = '\\';
82 *(buf++) = octchar((unsigned char) c >> 6);
83 *(buf++) = octchar((unsigned char) c >> 3);
84 *(buf++) = octchar((unsigned char) c);
85 } else
86 *(buf++) = c;
87 break;
88 }
89
90 return buf - buf_old;
91 }
92
93 char *cescape_length(const char *s, size_t n) {
94 const char *f;
95 char *r, *t;
96
97 assert(s || n == 0);
98
99 /* Does C style string escaping. May be reversed with
100 * cunescape(). */
101
102 r = new(char, n*4 + 1);
103 if (!r)
104 return NULL;
105
106 for (f = s, t = r; f < s + n; f++)
107 t += cescape_char(*f, t);
108
109 *t = 0;
110
111 return r;
112 }
113
114 char *cescape(const char *s) {
115 assert(s);
116
117 return cescape_length(s, strlen(s));
118 }
119
120 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
121 int r = 1;
122
123 assert(p);
124 assert(*p);
125 assert(ret);
126
127 /* Unescapes C style. Returns the unescaped character in ret.
128 * Sets *eight_bit to true if the escaped sequence either fits in
129 * one byte in UTF-8 or is a non-unicode literal byte and should
130 * instead be copied directly.
131 */
132
133 if (length != (size_t) -1 && length < 1)
134 return -EINVAL;
135
136 switch (p[0]) {
137
138 case 'a':
139 *ret = '\a';
140 break;
141 case 'b':
142 *ret = '\b';
143 break;
144 case 'f':
145 *ret = '\f';
146 break;
147 case 'n':
148 *ret = '\n';
149 break;
150 case 'r':
151 *ret = '\r';
152 break;
153 case 't':
154 *ret = '\t';
155 break;
156 case 'v':
157 *ret = '\v';
158 break;
159 case '\\':
160 *ret = '\\';
161 break;
162 case '"':
163 *ret = '"';
164 break;
165 case '\'':
166 *ret = '\'';
167 break;
168
169 case 's':
170 /* This is an extension of the XDG syntax files */
171 *ret = ' ';
172 break;
173
174 case 'x': {
175 /* hexadecimal encoding */
176 int a, b;
177
178 if (length != (size_t) -1 && length < 3)
179 return -EINVAL;
180
181 a = unhexchar(p[1]);
182 if (a < 0)
183 return -EINVAL;
184
185 b = unhexchar(p[2]);
186 if (b < 0)
187 return -EINVAL;
188
189 /* Don't allow NUL bytes */
190 if (a == 0 && b == 0)
191 return -EINVAL;
192
193 *ret = (a << 4U) | b;
194 *eight_bit = true;
195 r = 3;
196 break;
197 }
198
199 case 'u': {
200 /* C++11 style 16bit unicode */
201
202 int a[4];
203 unsigned i;
204 uint32_t c;
205
206 if (length != (size_t) -1 && length < 5)
207 return -EINVAL;
208
209 for (i = 0; i < 4; i++) {
210 a[i] = unhexchar(p[1 + i]);
211 if (a[i] < 0)
212 return a[i];
213 }
214
215 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
216
217 /* Don't allow 0 chars */
218 if (c == 0)
219 return -EINVAL;
220
221 *ret = c;
222 r = 5;
223 break;
224 }
225
226 case 'U': {
227 /* C++11 style 32bit unicode */
228
229 int a[8];
230 unsigned i;
231 char32_t c;
232
233 if (length != (size_t) -1 && length < 9)
234 return -EINVAL;
235
236 for (i = 0; i < 8; i++) {
237 a[i] = unhexchar(p[1 + i]);
238 if (a[i] < 0)
239 return a[i];
240 }
241
242 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
243 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
244
245 /* Don't allow 0 chars */
246 if (c == 0)
247 return -EINVAL;
248
249 /* Don't allow invalid code points */
250 if (!unichar_is_valid(c))
251 return -EINVAL;
252
253 *ret = c;
254 r = 9;
255 break;
256 }
257
258 case '0':
259 case '1':
260 case '2':
261 case '3':
262 case '4':
263 case '5':
264 case '6':
265 case '7': {
266 /* octal encoding */
267 int a, b, c;
268 char32_t m;
269
270 if (length != (size_t) -1 && length < 3)
271 return -EINVAL;
272
273 a = unoctchar(p[0]);
274 if (a < 0)
275 return -EINVAL;
276
277 b = unoctchar(p[1]);
278 if (b < 0)
279 return -EINVAL;
280
281 c = unoctchar(p[2]);
282 if (c < 0)
283 return -EINVAL;
284
285 /* don't allow NUL bytes */
286 if (a == 0 && b == 0 && c == 0)
287 return -EINVAL;
288
289 /* Don't allow bytes above 255 */
290 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
291 if (m > 255)
292 return -EINVAL;
293
294 *ret = m;
295 *eight_bit = true;
296 r = 3;
297 break;
298 }
299
300 default:
301 return -EINVAL;
302 }
303
304 return r;
305 }
306
307 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
308 char *r, *t;
309 const char *f;
310 size_t pl;
311
312 assert(s);
313 assert(ret);
314
315 /* Undoes C style string escaping, and optionally prefixes it. */
316
317 pl = strlen_ptr(prefix);
318
319 r = new(char, pl+length+1);
320 if (!r)
321 return -ENOMEM;
322
323 if (prefix)
324 memcpy(r, prefix, pl);
325
326 for (f = s, t = r + pl; f < s + length; f++) {
327 size_t remaining;
328 bool eight_bit = false;
329 char32_t u;
330 int k;
331
332 remaining = s + length - f;
333 assert(remaining > 0);
334
335 if (*f != '\\') {
336 /* A literal, copy verbatim */
337 *(t++) = *f;
338 continue;
339 }
340
341 if (remaining == 1) {
342 if (flags & UNESCAPE_RELAX) {
343 /* A trailing backslash, copy verbatim */
344 *(t++) = *f;
345 continue;
346 }
347
348 free(r);
349 return -EINVAL;
350 }
351
352 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
353 if (k < 0) {
354 if (flags & UNESCAPE_RELAX) {
355 /* Invalid escape code, let's take it literal then */
356 *(t++) = '\\';
357 continue;
358 }
359
360 free(r);
361 return k;
362 }
363
364 f += k;
365 if (eight_bit)
366 /* One byte? Set directly as specified */
367 *(t++) = u;
368 else
369 /* Otherwise encode as multi-byte UTF-8 */
370 t += utf8_encode_unichar(t, u);
371 }
372
373 *t = 0;
374
375 *ret = r;
376 return t - r;
377 }
378
379 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
380 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
381 }
382
383 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
384 return cunescape_length(s, strlen(s), flags, ret);
385 }
386
387 char *xescape(const char *s, const char *bad) {
388 char *r, *t;
389 const char *f;
390
391 /* Escapes all chars in bad, in addition to \ and all special
392 * chars, in \xFF style escaping. May be reversed with
393 * cunescape(). */
394
395 r = new(char, strlen(s) * 4 + 1);
396 if (!r)
397 return NULL;
398
399 for (f = s, t = r; *f; f++) {
400
401 if ((*f < ' ') || (*f >= 127) ||
402 (*f == '\\') || strchr(bad, *f)) {
403 *(t++) = '\\';
404 *(t++) = 'x';
405 *(t++) = hexchar(*f >> 4);
406 *(t++) = hexchar(*f);
407 } else
408 *(t++) = *f;
409 }
410
411 *t = 0;
412
413 return r;
414 }
415
416 char *octescape(const char *s, size_t len) {
417 char *r, *t;
418 const char *f;
419
420 /* Escapes all chars in bad, in addition to \ and " chars,
421 * in \nnn style escaping. */
422
423 r = new(char, len * 4 + 1);
424 if (!r)
425 return NULL;
426
427 for (f = s, t = r; f < s + len; f++) {
428
429 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
430 *(t++) = '\\';
431 *(t++) = '0' + (*f >> 6);
432 *(t++) = '0' + ((*f >> 3) & 8);
433 *(t++) = '0' + (*f & 8);
434 } else
435 *(t++) = *f;
436 }
437
438 *t = 0;
439
440 return r;
441
442 }
443
444 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
445 assert(bad);
446
447 for (; *s; s++) {
448 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
449 *(t++) = '\\';
450 *(t++) = *s == '\n' ? 'n' : 't';
451 continue;
452 }
453
454 if (*s == '\\' || strchr(bad, *s))
455 *(t++) = '\\';
456
457 *(t++) = *s;
458 }
459
460 return t;
461 }
462
463 char *shell_escape(const char *s, const char *bad) {
464 char *r, *t;
465
466 r = new(char, strlen(s)*2+1);
467 if (!r)
468 return NULL;
469
470 t = strcpy_backslash_escaped(r, s, bad, false);
471 *t = 0;
472
473 return r;
474 }
475
476 char* shell_maybe_quote(const char *s, EscapeStyle style) {
477 const char *p;
478 char *r, *t;
479
480 assert(s);
481
482 /* Encloses a string in quotes if necessary to make it OK as a shell
483 * string. Note that we treat benign UTF-8 characters as needing
484 * escaping too, but that should be OK. */
485
486 for (p = s; *p; p++)
487 if (*p <= ' ' ||
488 *p >= 127 ||
489 strchr(SHELL_NEED_QUOTES, *p))
490 break;
491
492 if (!*p)
493 return strdup(s);
494
495 r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
496 if (!r)
497 return NULL;
498
499 t = r;
500 if (style == ESCAPE_BACKSLASH)
501 *(t++) = '"';
502 else if (style == ESCAPE_POSIX) {
503 *(t++) = '$';
504 *(t++) = '\'';
505 } else
506 assert_not_reached("Bad EscapeStyle");
507
508 t = mempcpy(t, s, p - s);
509
510 if (style == ESCAPE_BACKSLASH)
511 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
512 else
513 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
514
515 if (style == ESCAPE_BACKSLASH)
516 *(t++) = '"';
517 else
518 *(t++) = '\'';
519 *t = 0;
520
521 return r;
522 }