]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/escape.c
79123536c034cb856b18a4432a30d5de20d07b28
[thirdparty/systemd.git] / src / basic / escape.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "alloc-util.h"
7 #include "escape.h"
8 #include "hexdecoct.h"
9 #include "string-util.h"
10 #include "strv.h"
11 #include "utf8.h"
12
13 int cescape_char(char c, char *buf) {
14 char *buf_old = buf;
15
16 /* Needs space for 4 characters in the buffer */
17
18 switch (c) {
19
20 case '\a':
21 *(buf++) = '\\';
22 *(buf++) = 'a';
23 break;
24 case '\b':
25 *(buf++) = '\\';
26 *(buf++) = 'b';
27 break;
28 case '\f':
29 *(buf++) = '\\';
30 *(buf++) = 'f';
31 break;
32 case '\n':
33 *(buf++) = '\\';
34 *(buf++) = 'n';
35 break;
36 case '\r':
37 *(buf++) = '\\';
38 *(buf++) = 'r';
39 break;
40 case '\t':
41 *(buf++) = '\\';
42 *(buf++) = 't';
43 break;
44 case '\v':
45 *(buf++) = '\\';
46 *(buf++) = 'v';
47 break;
48 case '\\':
49 *(buf++) = '\\';
50 *(buf++) = '\\';
51 break;
52 case '"':
53 *(buf++) = '\\';
54 *(buf++) = '"';
55 break;
56 case '\'':
57 *(buf++) = '\\';
58 *(buf++) = '\'';
59 break;
60
61 default:
62 /* For special chars we prefer octal over
63 * hexadecimal encoding, simply because glib's
64 * g_strescape() does the same */
65 if ((c < ' ') || (c >= 127)) {
66 *(buf++) = '\\';
67 *(buf++) = octchar((unsigned char) c >> 6);
68 *(buf++) = octchar((unsigned char) c >> 3);
69 *(buf++) = octchar((unsigned char) c);
70 } else
71 *(buf++) = c;
72 break;
73 }
74
75 return buf - buf_old;
76 }
77
78 char* cescape_length(const char *s, size_t n) {
79 const char *f;
80 char *r, *t;
81
82 /* Does C style string escaping. May be reversed with cunescape(). */
83
84 assert(s || n == 0);
85
86 if (n == SIZE_MAX)
87 n = strlen(s);
88
89 if (n > (SIZE_MAX - 1) / 4)
90 return NULL;
91
92 r = new(char, n*4 + 1);
93 if (!r)
94 return NULL;
95
96 for (f = s, t = r; f < s + n; f++)
97 t += cescape_char(*f, t);
98
99 *t = 0;
100
101 return r;
102 }
103
104 int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul) {
105 int r = 1;
106
107 assert(p);
108 assert(ret);
109
110 /* Unescapes C style. Returns the unescaped character in ret.
111 * Sets *eight_bit to true if the escaped sequence either fits in
112 * one byte in UTF-8 or is a non-unicode literal byte and should
113 * instead be copied directly.
114 */
115
116 if (length != SIZE_MAX && length < 1)
117 return -EINVAL;
118
119 switch (p[0]) {
120
121 case 'a':
122 *ret = '\a';
123 break;
124 case 'b':
125 *ret = '\b';
126 break;
127 case 'f':
128 *ret = '\f';
129 break;
130 case 'n':
131 *ret = '\n';
132 break;
133 case 'r':
134 *ret = '\r';
135 break;
136 case 't':
137 *ret = '\t';
138 break;
139 case 'v':
140 *ret = '\v';
141 break;
142 case '\\':
143 *ret = '\\';
144 break;
145 case '"':
146 *ret = '"';
147 break;
148 case '\'':
149 *ret = '\'';
150 break;
151
152 case 's':
153 /* This is an extension of the XDG syntax files */
154 *ret = ' ';
155 break;
156
157 case 'x': {
158 /* hexadecimal encoding */
159 int a, b;
160
161 if (length != SIZE_MAX && length < 3)
162 return -EINVAL;
163
164 a = unhexchar(p[1]);
165 if (a < 0)
166 return -EINVAL;
167
168 b = unhexchar(p[2]);
169 if (b < 0)
170 return -EINVAL;
171
172 /* Don't allow NUL bytes */
173 if (a == 0 && b == 0 && !accept_nul)
174 return -EINVAL;
175
176 *ret = (a << 4U) | b;
177 *eight_bit = true;
178 r = 3;
179 break;
180 }
181
182 case 'u': {
183 /* C++11 style 16-bit unicode */
184
185 int a[4];
186 size_t i;
187 uint32_t c;
188
189 if (length != SIZE_MAX && length < 5)
190 return -EINVAL;
191
192 for (i = 0; i < 4; i++) {
193 a[i] = unhexchar(p[1 + i]);
194 if (a[i] < 0)
195 return a[i];
196 }
197
198 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
199
200 /* Don't allow 0 chars */
201 if (c == 0 && !accept_nul)
202 return -EINVAL;
203
204 *ret = c;
205 r = 5;
206 break;
207 }
208
209 case 'U': {
210 /* C++11 style 32-bit unicode */
211
212 int a[8];
213 size_t i;
214 char32_t c;
215
216 if (length != SIZE_MAX && length < 9)
217 return -EINVAL;
218
219 for (i = 0; i < 8; i++) {
220 a[i] = unhexchar(p[1 + i]);
221 if (a[i] < 0)
222 return a[i];
223 }
224
225 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
226 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
227
228 /* Don't allow 0 chars */
229 if (c == 0 && !accept_nul)
230 return -EINVAL;
231
232 /* Don't allow invalid code points */
233 if (!unichar_is_valid(c))
234 return -EINVAL;
235
236 *ret = c;
237 r = 9;
238 break;
239 }
240
241 case '0':
242 case '1':
243 case '2':
244 case '3':
245 case '4':
246 case '5':
247 case '6':
248 case '7': {
249 /* octal encoding */
250 int a, b, c;
251 char32_t m;
252
253 if (length != SIZE_MAX && length < 3)
254 return -EINVAL;
255
256 a = unoctchar(p[0]);
257 if (a < 0)
258 return -EINVAL;
259
260 b = unoctchar(p[1]);
261 if (b < 0)
262 return -EINVAL;
263
264 c = unoctchar(p[2]);
265 if (c < 0)
266 return -EINVAL;
267
268 /* don't allow NUL bytes */
269 if (a == 0 && b == 0 && c == 0 && !accept_nul)
270 return -EINVAL;
271
272 /* Don't allow bytes above 255 */
273 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
274 if (m > 255)
275 return -EINVAL;
276
277 *ret = m;
278 *eight_bit = true;
279 r = 3;
280 break;
281 }
282
283 default:
284 return -EINVAL;
285 }
286
287 return r;
288 }
289
290 ssize_t cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
291 _cleanup_free_ char *ans = NULL;
292 char *t;
293 const char *f;
294 size_t pl;
295 int r;
296
297 assert(s);
298 assert(ret);
299
300 /* Undoes C style string escaping, and optionally prefixes it. */
301
302 if (length == SIZE_MAX)
303 length = strlen(s);
304
305 pl = strlen_ptr(prefix);
306
307 ans = new(char, pl+length+1);
308 if (!ans)
309 return -ENOMEM;
310
311 if (prefix)
312 memcpy(ans, prefix, pl);
313
314 for (f = s, t = ans + pl; f < s + length; f++) {
315 size_t remaining;
316 bool eight_bit = false;
317 char32_t u;
318
319 remaining = s + length - f;
320 assert(remaining > 0);
321
322 if (*f != '\\') {
323 /* A literal, copy verbatim */
324 *(t++) = *f;
325 continue;
326 }
327
328 if (remaining == 1) {
329 if (flags & UNESCAPE_RELAX) {
330 /* A trailing backslash, copy verbatim */
331 *(t++) = *f;
332 continue;
333 }
334
335 return -EINVAL;
336 }
337
338 r = cunescape_one(f + 1, remaining - 1, &u, &eight_bit, flags & UNESCAPE_ACCEPT_NUL);
339 if (r < 0) {
340 if (flags & UNESCAPE_RELAX) {
341 /* Invalid escape code, let's take it literal then */
342 *(t++) = '\\';
343 continue;
344 }
345
346 return r;
347 }
348
349 f += r;
350 if (eight_bit)
351 /* One byte? Set directly as specified */
352 *(t++) = u;
353 else
354 /* Otherwise encode as multi-byte UTF-8 */
355 t += utf8_encode_unichar(t, u);
356 }
357
358 *t = 0;
359
360 assert(t >= ans); /* Let static analyzers know that the answer is non-negative. */
361 *ret = TAKE_PTR(ans);
362 return t - *ret;
363 }
364
365 char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags) {
366 char *ans, *t, *prev, *prev2;
367 const char *f;
368
369 assert(s);
370
371 /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be
372 * reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through
373 * unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings.
374 *
375 * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is
376 * appended. */
377
378 if (console_width == 0)
379 return strdup("");
380
381 ans = new(char, MIN(strlen(s), console_width) * 4 + 1);
382 if (!ans)
383 return NULL;
384
385 memset(ans, '_', MIN(strlen(s), console_width) * 4);
386 ans[MIN(strlen(s), console_width) * 4] = 0;
387
388 bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS);
389
390 for (f = s, t = prev = prev2 = ans; ; f++) {
391 char *tmp_t = t;
392
393 if (!*f) {
394 if (force_ellipsis)
395 break;
396
397 *t = 0;
398 return ans;
399 }
400
401 if ((unsigned char) *f < ' ' ||
402 (!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) ||
403 *f == '\\' || (bad && strchr(bad, *f))) {
404 if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width)
405 break;
406
407 *(t++) = '\\';
408 *(t++) = 'x';
409 *(t++) = hexchar(*f >> 4);
410 *(t++) = hexchar(*f);
411 } else {
412 if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width)
413 break;
414
415 *(t++) = *f;
416 }
417
418 /* We might need to go back two cycles to fit three dots, so remember two positions */
419 prev2 = prev;
420 prev = tmp_t;
421 }
422
423 /* We can just write where we want, since chars are one-byte */
424 size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */
425 size_t off;
426 if (console_width - c >= (size_t) (t - ans))
427 off = (size_t) (t - ans);
428 else if (console_width - c >= (size_t) (prev - ans))
429 off = (size_t) (prev - ans);
430 else if (console_width - c >= (size_t) (prev2 - ans))
431 off = (size_t) (prev2 - ans);
432 else
433 off = console_width - c;
434 assert(off <= (size_t) (t - ans));
435
436 memcpy(ans + off, "...", c);
437 ans[off + c] = '\0';
438 return ans;
439 }
440
441 char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags) {
442 if (FLAGS_SET(flags, XESCAPE_8_BIT))
443 return xescape_full(str, /* bad= */ NULL, console_width, flags);
444 else
445 return utf8_escape_non_printable_full(str,
446 console_width,
447 FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS));
448 }
449
450 char* octescape(const char *s, size_t len) {
451 char *buf, *t;
452
453 /* Escapes \ and " chars, in \nnn style escaping. */
454
455 assert(s || len == 0);
456
457 if (len == SIZE_MAX)
458 len = strlen(s);
459
460 if (len > (SIZE_MAX - 1) / 4)
461 return NULL;
462
463 t = buf = new(char, len * 4 + 1);
464 if (!buf)
465 return NULL;
466
467 for (size_t i = 0; i < len; i++) {
468 uint8_t u = (uint8_t) s[i];
469
470 if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) {
471 *(t++) = '\\';
472 *(t++) = '0' + (u >> 6);
473 *(t++) = '0' + ((u >> 3) & 7);
474 *(t++) = '0' + (u & 7);
475 } else
476 *(t++) = u;
477 }
478
479 *t = 0;
480 return buf;
481 }
482
483 char* decescape(const char *s, size_t len, const char *bad) {
484 char *buf, *t;
485
486 /* Escapes all chars in bad, in addition to \ and " chars, in \nnn decimal style escaping. */
487
488 assert(s || len == 0);
489
490 if (len == SIZE_MAX)
491 len = strlen(s);
492
493 if (len > (SIZE_MAX - 1) / 4)
494 return NULL;
495
496 t = buf = new(char, len * 4 + 1);
497 if (!buf)
498 return NULL;
499
500 for (size_t i = 0; i < len; i++) {
501 uint8_t u = (uint8_t) s[i];
502
503 if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"') || strchr(bad, u)) {
504 *(t++) = '\\';
505 *(t++) = '0' + (u / 100);
506 *(t++) = '0' + ((u / 10) % 10);
507 *(t++) = '0' + (u % 10);
508 } else
509 *(t++) = u;
510 }
511
512 *t = 0;
513 return buf;
514 }
515
516 static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
517 assert(bad);
518 assert(t);
519 assert(s);
520
521 while (*s) {
522 int l = utf8_encoded_valid_unichar(s, SIZE_MAX);
523
524 if (char_is_cc(*s) || l < 0)
525 t += cescape_char(*(s++), t);
526 else if (l == 1) {
527 if (*s == '\\' || strchr(bad, *s))
528 *(t++) = '\\';
529 *(t++) = *(s++);
530 } else {
531 t = mempcpy(t, s, l);
532 s += l;
533 }
534 }
535
536 return t;
537 }
538
539 char* shell_escape(const char *s, const char *bad) {
540 char *buf, *t;
541
542 buf = new(char, strlen(s)*4+1);
543 if (!buf)
544 return NULL;
545
546 t = strcpy_backslash_escaped(buf, s, bad);
547 *t = 0;
548
549 return buf;
550 }
551
552 char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) {
553 const char *p;
554 char *buf, *t;
555
556 assert(s);
557
558 /* Encloses a string in quotes if necessary to make it OK as a shell string. */
559
560 if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s))
561 return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */
562
563 for (p = s; *p; ) {
564 int l = utf8_encoded_valid_unichar(p, SIZE_MAX);
565
566 if (char_is_cc(*p) || l < 0 ||
567 strchr(WHITESPACE SHELL_NEED_QUOTES, *p))
568 break;
569
570 p += l;
571 }
572
573 if (!*p)
574 return strdup(s);
575
576 buf = new(char, FLAGS_SET(flags, SHELL_ESCAPE_POSIX) + 1 + strlen(s)*4 + 1 + 1);
577 if (!buf)
578 return NULL;
579
580 t = buf;
581 if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX)) {
582 *(t++) = '$';
583 *(t++) = '\'';
584 } else
585 *(t++) = '"';
586
587 t = mempcpy(t, s, p - s);
588
589 t = strcpy_backslash_escaped(t, p,
590 FLAGS_SET(flags, SHELL_ESCAPE_POSIX) ? SHELL_NEED_ESCAPE_POSIX : SHELL_NEED_ESCAPE);
591
592 if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX))
593 *(t++) = '\'';
594 else
595 *(t++) = '"';
596 *t = 0;
597
598 return str_realloc(buf);
599 }
600
601 char* quote_command_line(char **argv, ShellEscapeFlags flags) {
602 _cleanup_free_ char *result = NULL;
603
604 assert(argv);
605
606 STRV_FOREACH(a, argv) {
607 _cleanup_free_ char *t = NULL;
608
609 t = shell_maybe_quote(*a, flags);
610 if (!t)
611 return NULL;
612
613 if (!strextend_with_separator(&result, " ", t))
614 return NULL;
615 }
616
617 return str_realloc(TAKE_PTR(result));
618 }