]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/escape.c
Merge pull request #8575 from keszybz/non-absolute-paths
[thirdparty/systemd.git] / src / basic / escape.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4f5dd394
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
4f5dd394
LP
6***/
7
11c3a366
TA
8#include <errno.h>
9#include <stdlib.h>
10#include <string.h>
11
b5efdb8a 12#include "alloc-util.h"
e4e73a63
LP
13#include "escape.h"
14#include "hexdecoct.h"
11c3a366 15#include "macro.h"
4f5dd394 16#include "utf8.h"
4f5dd394 17
4f5dd394
LP
18size_t cescape_char(char c, char *buf) {
19 char * buf_old = buf;
20
21 switch (c) {
22
23 case '\a':
24 *(buf++) = '\\';
25 *(buf++) = 'a';
26 break;
27 case '\b':
28 *(buf++) = '\\';
29 *(buf++) = 'b';
30 break;
31 case '\f':
32 *(buf++) = '\\';
33 *(buf++) = 'f';
34 break;
35 case '\n':
36 *(buf++) = '\\';
37 *(buf++) = 'n';
38 break;
39 case '\r':
40 *(buf++) = '\\';
41 *(buf++) = 'r';
42 break;
43 case '\t':
44 *(buf++) = '\\';
45 *(buf++) = 't';
46 break;
47 case '\v':
48 *(buf++) = '\\';
49 *(buf++) = 'v';
50 break;
51 case '\\':
52 *(buf++) = '\\';
53 *(buf++) = '\\';
54 break;
55 case '"':
56 *(buf++) = '\\';
57 *(buf++) = '"';
58 break;
59 case '\'':
60 *(buf++) = '\\';
61 *(buf++) = '\'';
62 break;
63
64 default:
65 /* For special chars we prefer octal over
66 * hexadecimal encoding, simply because glib's
67 * g_strescape() does the same */
68 if ((c < ' ') || (c >= 127)) {
69 *(buf++) = '\\';
70 *(buf++) = octchar((unsigned char) c >> 6);
71 *(buf++) = octchar((unsigned char) c >> 3);
72 *(buf++) = octchar((unsigned char) c);
73 } else
74 *(buf++) = c;
75 break;
76 }
77
78 return buf - buf_old;
79}
80
a5ef3638 81char *cescape_length(const char *s, size_t n) {
4f5dd394 82 const char *f;
a5ef3638 83 char *r, *t;
4f5dd394 84
a5ef3638 85 assert(s || n == 0);
4f5dd394
LP
86
87 /* Does C style string escaping. May be reversed with
88 * cunescape(). */
89
a5ef3638 90 r = new(char, n*4 + 1);
4f5dd394
LP
91 if (!r)
92 return NULL;
93
a5ef3638 94 for (f = s, t = r; f < s + n; f++)
4f5dd394
LP
95 t += cescape_char(*f, t);
96
97 *t = 0;
98
99 return r;
100}
101
a5ef3638
LP
102char *cescape(const char *s) {
103 assert(s);
104
105 return cescape_length(s, strlen(s));
106}
107
c932fb71 108int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
4f5dd394
LP
109 int r = 1;
110
111 assert(p);
112 assert(*p);
113 assert(ret);
114
3565e095
ZJS
115 /* Unescapes C style. Returns the unescaped character in ret.
116 * Sets *eight_bit to true if the escaped sequence either fits in
117 * one byte in UTF-8 or is a non-unicode literal byte and should
118 * instead be copied directly.
119 */
4f5dd394
LP
120
121 if (length != (size_t) -1 && length < 1)
122 return -EINVAL;
123
124 switch (p[0]) {
125
126 case 'a':
127 *ret = '\a';
128 break;
129 case 'b':
130 *ret = '\b';
131 break;
132 case 'f':
133 *ret = '\f';
134 break;
135 case 'n':
136 *ret = '\n';
137 break;
138 case 'r':
139 *ret = '\r';
140 break;
141 case 't':
142 *ret = '\t';
143 break;
144 case 'v':
145 *ret = '\v';
146 break;
147 case '\\':
148 *ret = '\\';
149 break;
150 case '"':
151 *ret = '"';
152 break;
153 case '\'':
154 *ret = '\'';
155 break;
156
157 case 's':
158 /* This is an extension of the XDG syntax files */
159 *ret = ' ';
160 break;
161
162 case 'x': {
163 /* hexadecimal encoding */
164 int a, b;
165
166 if (length != (size_t) -1 && length < 3)
167 return -EINVAL;
168
169 a = unhexchar(p[1]);
170 if (a < 0)
171 return -EINVAL;
172
173 b = unhexchar(p[2]);
174 if (b < 0)
175 return -EINVAL;
176
177 /* Don't allow NUL bytes */
178 if (a == 0 && b == 0)
179 return -EINVAL;
180
3565e095
ZJS
181 *ret = (a << 4U) | b;
182 *eight_bit = true;
4f5dd394
LP
183 r = 3;
184 break;
185 }
186
187 case 'u': {
188 /* C++11 style 16bit unicode */
189
190 int a[4];
191 unsigned i;
192 uint32_t c;
193
194 if (length != (size_t) -1 && length < 5)
195 return -EINVAL;
196
197 for (i = 0; i < 4; i++) {
198 a[i] = unhexchar(p[1 + i]);
199 if (a[i] < 0)
200 return a[i];
201 }
202
203 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
204
205 /* Don't allow 0 chars */
206 if (c == 0)
207 return -EINVAL;
208
3565e095 209 *ret = c;
4f5dd394
LP
210 r = 5;
211 break;
212 }
213
214 case 'U': {
215 /* C++11 style 32bit unicode */
216
217 int a[8];
218 unsigned i;
c932fb71 219 char32_t c;
4f5dd394
LP
220
221 if (length != (size_t) -1 && length < 9)
222 return -EINVAL;
223
224 for (i = 0; i < 8; i++) {
225 a[i] = unhexchar(p[1 + i]);
226 if (a[i] < 0)
227 return a[i];
228 }
229
dcd12626
LP
230 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
231 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
4f5dd394
LP
232
233 /* Don't allow 0 chars */
234 if (c == 0)
235 return -EINVAL;
236
237 /* Don't allow invalid code points */
238 if (!unichar_is_valid(c))
239 return -EINVAL;
240
3565e095 241 *ret = c;
4f5dd394
LP
242 r = 9;
243 break;
244 }
245
246 case '0':
247 case '1':
248 case '2':
249 case '3':
250 case '4':
251 case '5':
252 case '6':
253 case '7': {
254 /* octal encoding */
255 int a, b, c;
c932fb71 256 char32_t m;
4f5dd394
LP
257
258 if (length != (size_t) -1 && length < 3)
259 return -EINVAL;
260
261 a = unoctchar(p[0]);
262 if (a < 0)
263 return -EINVAL;
264
265 b = unoctchar(p[1]);
266 if (b < 0)
267 return -EINVAL;
268
269 c = unoctchar(p[2]);
270 if (c < 0)
271 return -EINVAL;
272
273 /* don't allow NUL bytes */
274 if (a == 0 && b == 0 && c == 0)
275 return -EINVAL;
276
277 /* Don't allow bytes above 255 */
dcd12626 278 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
4f5dd394
LP
279 if (m > 255)
280 return -EINVAL;
281
282 *ret = m;
3565e095 283 *eight_bit = true;
4f5dd394
LP
284 r = 3;
285 break;
286 }
287
288 default:
289 return -EINVAL;
290 }
291
292 return r;
293}
294
295int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
296 char *r, *t;
297 const char *f;
298 size_t pl;
299
300 assert(s);
301 assert(ret);
302
303 /* Undoes C style string escaping, and optionally prefixes it. */
304
7bf7ce28 305 pl = strlen_ptr(prefix);
4f5dd394
LP
306
307 r = new(char, pl+length+1);
308 if (!r)
309 return -ENOMEM;
310
311 if (prefix)
312 memcpy(r, prefix, pl);
313
314 for (f = s, t = r + pl; f < s + length; f++) {
315 size_t remaining;
3565e095 316 bool eight_bit = false;
c932fb71 317 char32_t u;
4f5dd394
LP
318 int k;
319
320 remaining = s + length - f;
321 assert(remaining > 0);
322
323 if (*f != '\\') {
629ff674 324 /* A literal, copy verbatim */
4f5dd394
LP
325 *(t++) = *f;
326 continue;
327 }
328
329 if (remaining == 1) {
330 if (flags & UNESCAPE_RELAX) {
331 /* A trailing backslash, copy verbatim */
332 *(t++) = *f;
333 continue;
334 }
335
336 free(r);
337 return -EINVAL;
338 }
339
3565e095 340 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
4f5dd394
LP
341 if (k < 0) {
342 if (flags & UNESCAPE_RELAX) {
343 /* Invalid escape code, let's take it literal then */
344 *(t++) = '\\';
345 continue;
346 }
347
348 free(r);
349 return k;
350 }
351
3565e095
ZJS
352 f += k;
353 if (eight_bit)
354 /* One byte? Set directly as specified */
355 *(t++) = u;
4f5dd394 356 else
3565e095 357 /* Otherwise encode as multi-byte UTF-8 */
4f5dd394 358 t += utf8_encode_unichar(t, u);
4f5dd394
LP
359 }
360
361 *t = 0;
362
363 *ret = r;
364 return t - r;
365}
366
367int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
368 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
369}
370
371int cunescape(const char *s, UnescapeFlags flags, char **ret) {
372 return cunescape_length(s, strlen(s), flags, ret);
373}
374
375char *xescape(const char *s, const char *bad) {
376 char *r, *t;
377 const char *f;
378
379 /* Escapes all chars in bad, in addition to \ and all special
380 * chars, in \xFF style escaping. May be reversed with
381 * cunescape(). */
382
383 r = new(char, strlen(s) * 4 + 1);
384 if (!r)
385 return NULL;
386
387 for (f = s, t = r; *f; f++) {
388
389 if ((*f < ' ') || (*f >= 127) ||
390 (*f == '\\') || strchr(bad, *f)) {
391 *(t++) = '\\';
392 *(t++) = 'x';
393 *(t++) = hexchar(*f >> 4);
394 *(t++) = hexchar(*f);
395 } else
396 *(t++) = *f;
397 }
398
399 *t = 0;
400
401 return r;
402}
403
95052df3
ZJS
404char *octescape(const char *s, size_t len) {
405 char *r, *t;
406 const char *f;
407
408 /* Escapes all chars in bad, in addition to \ and " chars,
409 * in \nnn style escaping. */
410
411 r = new(char, len * 4 + 1);
412 if (!r)
413 return NULL;
414
415 for (f = s, t = r; f < s + len; f++) {
416
4c701096 417 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
95052df3
ZJS
418 *(t++) = '\\';
419 *(t++) = '0' + (*f >> 6);
420 *(t++) = '0' + ((*f >> 3) & 8);
421 *(t++) = '0' + (*f & 8);
422 } else
423 *(t++) = *f;
424 }
425
426 *t = 0;
427
428 return r;
429
430}
431
804ee07c 432static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
4f5dd394
LP
433 assert(bad);
434
435 for (; *s; s++) {
804ee07c
ZJS
436 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
437 *(t++) = '\\';
438 *(t++) = *s == '\n' ? 'n' : 't';
439 continue;
440 }
441
4f5dd394
LP
442 if (*s == '\\' || strchr(bad, *s))
443 *(t++) = '\\';
444
445 *(t++) = *s;
446 }
447
448 return t;
449}
450
451char *shell_escape(const char *s, const char *bad) {
452 char *r, *t;
453
454 r = new(char, strlen(s)*2+1);
455 if (!r)
456 return NULL;
457
804ee07c 458 t = strcpy_backslash_escaped(r, s, bad, false);
4f5dd394
LP
459 *t = 0;
460
461 return r;
462}
463
804ee07c 464char* shell_maybe_quote(const char *s, EscapeStyle style) {
4f5dd394
LP
465 const char *p;
466 char *r, *t;
467
468 assert(s);
469
804ee07c
ZJS
470 /* Encloses a string in quotes if necessary to make it OK as a shell
471 * string. Note that we treat benign UTF-8 characters as needing
472 * escaping too, but that should be OK. */
4f5dd394
LP
473
474 for (p = s; *p; p++)
475 if (*p <= ' ' ||
476 *p >= 127 ||
477 strchr(SHELL_NEED_QUOTES, *p))
478 break;
479
480 if (!*p)
481 return strdup(s);
482
804ee07c 483 r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
4f5dd394
LP
484 if (!r)
485 return NULL;
486
487 t = r;
804ee07c
ZJS
488 if (style == ESCAPE_BACKSLASH)
489 *(t++) = '"';
490 else if (style == ESCAPE_POSIX) {
491 *(t++) = '$';
492 *(t++) = '\'';
493 } else
494 assert_not_reached("Bad EscapeStyle");
495
4f5dd394
LP
496 t = mempcpy(t, s, p - s);
497
804ee07c
ZJS
498 if (style == ESCAPE_BACKSLASH)
499 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
500 else
501 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
4f5dd394 502
804ee07c
ZJS
503 if (style == ESCAPE_BACKSLASH)
504 *(t++) = '"';
505 else
506 *(t++) = '\'';
4f5dd394
LP
507 *t = 0;
508
509 return r;
510}