]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | ||
3 | #include <stdlib.h> | |
4 | #include <string.h> | |
5 | ||
6 | #include "alloc-util.h" | |
7 | #include "escape.h" | |
8 | #include "hexdecoct.h" | |
9 | #include "string-util.h" | |
10 | #include "strv.h" | |
11 | #include "utf8.h" | |
12 | ||
13 | int cescape_char(char c, char *buf) { | |
14 | char *buf_old = buf; | |
15 | ||
16 | /* Needs space for 4 characters in the buffer */ | |
17 | ||
18 | switch (c) { | |
19 | ||
20 | case '\a': | |
21 | *(buf++) = '\\'; | |
22 | *(buf++) = 'a'; | |
23 | break; | |
24 | case '\b': | |
25 | *(buf++) = '\\'; | |
26 | *(buf++) = 'b'; | |
27 | break; | |
28 | case '\f': | |
29 | *(buf++) = '\\'; | |
30 | *(buf++) = 'f'; | |
31 | break; | |
32 | case '\n': | |
33 | *(buf++) = '\\'; | |
34 | *(buf++) = 'n'; | |
35 | break; | |
36 | case '\r': | |
37 | *(buf++) = '\\'; | |
38 | *(buf++) = 'r'; | |
39 | break; | |
40 | case '\t': | |
41 | *(buf++) = '\\'; | |
42 | *(buf++) = 't'; | |
43 | break; | |
44 | case '\v': | |
45 | *(buf++) = '\\'; | |
46 | *(buf++) = 'v'; | |
47 | break; | |
48 | case '\\': | |
49 | *(buf++) = '\\'; | |
50 | *(buf++) = '\\'; | |
51 | break; | |
52 | case '"': | |
53 | *(buf++) = '\\'; | |
54 | *(buf++) = '"'; | |
55 | break; | |
56 | case '\'': | |
57 | *(buf++) = '\\'; | |
58 | *(buf++) = '\''; | |
59 | break; | |
60 | ||
61 | default: | |
62 | /* For special chars we prefer octal over | |
63 | * hexadecimal encoding, simply because glib's | |
64 | * g_strescape() does the same */ | |
65 | if ((c < ' ') || (c >= 127)) { | |
66 | *(buf++) = '\\'; | |
67 | *(buf++) = octchar((unsigned char) c >> 6); | |
68 | *(buf++) = octchar((unsigned char) c >> 3); | |
69 | *(buf++) = octchar((unsigned char) c); | |
70 | } else | |
71 | *(buf++) = c; | |
72 | break; | |
73 | } | |
74 | ||
75 | return buf - buf_old; | |
76 | } | |
77 | ||
78 | char* cescape_length(const char *s, size_t n) { | |
79 | const char *f; | |
80 | char *r, *t; | |
81 | ||
82 | /* Does C style string escaping. May be reversed with cunescape(). */ | |
83 | ||
84 | assert(s || n == 0); | |
85 | ||
86 | if (n == SIZE_MAX) | |
87 | n = strlen(s); | |
88 | ||
89 | if (n > (SIZE_MAX - 1) / 4) | |
90 | return NULL; | |
91 | ||
92 | r = new(char, n*4 + 1); | |
93 | if (!r) | |
94 | return NULL; | |
95 | ||
96 | for (f = s, t = r; f < s + n; f++) | |
97 | t += cescape_char(*f, t); | |
98 | ||
99 | *t = 0; | |
100 | ||
101 | return r; | |
102 | } | |
103 | ||
104 | int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit, bool accept_nul) { | |
105 | int r = 1; | |
106 | ||
107 | assert(p); | |
108 | assert(ret); | |
109 | ||
110 | /* Unescapes C style. Returns the unescaped character in ret. | |
111 | * Sets *eight_bit to true if the escaped sequence either fits in | |
112 | * one byte in UTF-8 or is a non-unicode literal byte and should | |
113 | * instead be copied directly. | |
114 | */ | |
115 | ||
116 | if (length != SIZE_MAX && length < 1) | |
117 | return -EINVAL; | |
118 | ||
119 | switch (p[0]) { | |
120 | ||
121 | case 'a': | |
122 | *ret = '\a'; | |
123 | break; | |
124 | case 'b': | |
125 | *ret = '\b'; | |
126 | break; | |
127 | case 'f': | |
128 | *ret = '\f'; | |
129 | break; | |
130 | case 'n': | |
131 | *ret = '\n'; | |
132 | break; | |
133 | case 'r': | |
134 | *ret = '\r'; | |
135 | break; | |
136 | case 't': | |
137 | *ret = '\t'; | |
138 | break; | |
139 | case 'v': | |
140 | *ret = '\v'; | |
141 | break; | |
142 | case '\\': | |
143 | *ret = '\\'; | |
144 | break; | |
145 | case '"': | |
146 | *ret = '"'; | |
147 | break; | |
148 | case '\'': | |
149 | *ret = '\''; | |
150 | break; | |
151 | ||
152 | case 's': | |
153 | /* This is an extension of the XDG syntax files */ | |
154 | *ret = ' '; | |
155 | break; | |
156 | ||
157 | case 'x': { | |
158 | /* hexadecimal encoding */ | |
159 | int a, b; | |
160 | ||
161 | if (length != SIZE_MAX && length < 3) | |
162 | return -EINVAL; | |
163 | ||
164 | a = unhexchar(p[1]); | |
165 | if (a < 0) | |
166 | return -EINVAL; | |
167 | ||
168 | b = unhexchar(p[2]); | |
169 | if (b < 0) | |
170 | return -EINVAL; | |
171 | ||
172 | /* Don't allow NUL bytes */ | |
173 | if (a == 0 && b == 0 && !accept_nul) | |
174 | return -EINVAL; | |
175 | ||
176 | *ret = (a << 4U) | b; | |
177 | *eight_bit = true; | |
178 | r = 3; | |
179 | break; | |
180 | } | |
181 | ||
182 | case 'u': { | |
183 | /* C++11 style 16-bit unicode */ | |
184 | ||
185 | int a[4]; | |
186 | size_t i; | |
187 | uint32_t c; | |
188 | ||
189 | if (length != SIZE_MAX && length < 5) | |
190 | return -EINVAL; | |
191 | ||
192 | for (i = 0; i < 4; i++) { | |
193 | a[i] = unhexchar(p[1 + i]); | |
194 | if (a[i] < 0) | |
195 | return a[i]; | |
196 | } | |
197 | ||
198 | c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3]; | |
199 | ||
200 | /* Don't allow 0 chars */ | |
201 | if (c == 0 && !accept_nul) | |
202 | return -EINVAL; | |
203 | ||
204 | *ret = c; | |
205 | r = 5; | |
206 | break; | |
207 | } | |
208 | ||
209 | case 'U': { | |
210 | /* C++11 style 32-bit unicode */ | |
211 | ||
212 | int a[8]; | |
213 | size_t i; | |
214 | char32_t c; | |
215 | ||
216 | if (length != SIZE_MAX && length < 9) | |
217 | return -EINVAL; | |
218 | ||
219 | for (i = 0; i < 8; i++) { | |
220 | a[i] = unhexchar(p[1 + i]); | |
221 | if (a[i] < 0) | |
222 | return a[i]; | |
223 | } | |
224 | ||
225 | c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) | | |
226 | ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7]; | |
227 | ||
228 | /* Don't allow 0 chars */ | |
229 | if (c == 0 && !accept_nul) | |
230 | return -EINVAL; | |
231 | ||
232 | /* Don't allow invalid code points */ | |
233 | if (!unichar_is_valid(c)) | |
234 | return -EINVAL; | |
235 | ||
236 | *ret = c; | |
237 | r = 9; | |
238 | break; | |
239 | } | |
240 | ||
241 | case '0': | |
242 | case '1': | |
243 | case '2': | |
244 | case '3': | |
245 | case '4': | |
246 | case '5': | |
247 | case '6': | |
248 | case '7': { | |
249 | /* octal encoding */ | |
250 | int a, b, c; | |
251 | char32_t m; | |
252 | ||
253 | if (length != SIZE_MAX && length < 3) | |
254 | return -EINVAL; | |
255 | ||
256 | a = unoctchar(p[0]); | |
257 | if (a < 0) | |
258 | return -EINVAL; | |
259 | ||
260 | b = unoctchar(p[1]); | |
261 | if (b < 0) | |
262 | return -EINVAL; | |
263 | ||
264 | c = unoctchar(p[2]); | |
265 | if (c < 0) | |
266 | return -EINVAL; | |
267 | ||
268 | /* don't allow NUL bytes */ | |
269 | if (a == 0 && b == 0 && c == 0 && !accept_nul) | |
270 | return -EINVAL; | |
271 | ||
272 | /* Don't allow bytes above 255 */ | |
273 | m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c; | |
274 | if (m > 255) | |
275 | return -EINVAL; | |
276 | ||
277 | *ret = m; | |
278 | *eight_bit = true; | |
279 | r = 3; | |
280 | break; | |
281 | } | |
282 | ||
283 | default: | |
284 | return -EINVAL; | |
285 | } | |
286 | ||
287 | return r; | |
288 | } | |
289 | ||
290 | ssize_t cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) { | |
291 | _cleanup_free_ char *ans = NULL; | |
292 | char *t; | |
293 | const char *f; | |
294 | size_t pl; | |
295 | int r; | |
296 | ||
297 | assert(s); | |
298 | assert(ret); | |
299 | ||
300 | /* Undoes C style string escaping, and optionally prefixes it. */ | |
301 | ||
302 | if (length == SIZE_MAX) | |
303 | length = strlen(s); | |
304 | ||
305 | pl = strlen_ptr(prefix); | |
306 | ||
307 | ans = new(char, pl+length+1); | |
308 | if (!ans) | |
309 | return -ENOMEM; | |
310 | ||
311 | if (prefix) | |
312 | memcpy(ans, prefix, pl); | |
313 | ||
314 | for (f = s, t = ans + pl; f < s + length; f++) { | |
315 | size_t remaining; | |
316 | bool eight_bit = false; | |
317 | char32_t u; | |
318 | ||
319 | remaining = s + length - f; | |
320 | assert(remaining > 0); | |
321 | ||
322 | if (*f != '\\') { | |
323 | /* A literal, copy verbatim */ | |
324 | *(t++) = *f; | |
325 | continue; | |
326 | } | |
327 | ||
328 | if (remaining == 1) { | |
329 | if (flags & UNESCAPE_RELAX) { | |
330 | /* A trailing backslash, copy verbatim */ | |
331 | *(t++) = *f; | |
332 | continue; | |
333 | } | |
334 | ||
335 | return -EINVAL; | |
336 | } | |
337 | ||
338 | r = cunescape_one(f + 1, remaining - 1, &u, &eight_bit, flags & UNESCAPE_ACCEPT_NUL); | |
339 | if (r < 0) { | |
340 | if (flags & UNESCAPE_RELAX) { | |
341 | /* Invalid escape code, let's take it literal then */ | |
342 | *(t++) = '\\'; | |
343 | continue; | |
344 | } | |
345 | ||
346 | return r; | |
347 | } | |
348 | ||
349 | f += r; | |
350 | if (eight_bit) | |
351 | /* One byte? Set directly as specified */ | |
352 | *(t++) = u; | |
353 | else | |
354 | /* Otherwise encode as multi-byte UTF-8 */ | |
355 | t += utf8_encode_unichar(t, u); | |
356 | } | |
357 | ||
358 | *t = 0; | |
359 | ||
360 | assert(t >= ans); /* Let static analyzers know that the answer is non-negative. */ | |
361 | *ret = TAKE_PTR(ans); | |
362 | return t - *ret; | |
363 | } | |
364 | ||
365 | char* xescape_full(const char *s, const char *bad, size_t console_width, XEscapeFlags flags) { | |
366 | char *ans, *t, *prev, *prev2; | |
367 | const char *f; | |
368 | ||
369 | assert(s); | |
370 | ||
371 | /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be | |
372 | * reversed with cunescape(). If XESCAPE_8_BIT is specified, characters >= 127 are let through | |
373 | * unchanged. This corresponds to non-ASCII printable characters in pre-unicode encodings. | |
374 | * | |
375 | * If console_width is reached, or XESCAPE_FORCE_ELLIPSIS is set, output is truncated and "..." is | |
376 | * appended. */ | |
377 | ||
378 | if (console_width == 0) | |
379 | return strdup(""); | |
380 | ||
381 | ans = new(char, MIN(strlen(s), console_width) * 4 + 1); | |
382 | if (!ans) | |
383 | return NULL; | |
384 | ||
385 | memset(ans, '_', MIN(strlen(s), console_width) * 4); | |
386 | ans[MIN(strlen(s), console_width) * 4] = 0; | |
387 | ||
388 | bool force_ellipsis = FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS); | |
389 | ||
390 | for (f = s, t = prev = prev2 = ans; ; f++) { | |
391 | char *tmp_t = t; | |
392 | ||
393 | if (!*f) { | |
394 | if (force_ellipsis) | |
395 | break; | |
396 | ||
397 | *t = 0; | |
398 | return ans; | |
399 | } | |
400 | ||
401 | if ((unsigned char) *f < ' ' || | |
402 | (!FLAGS_SET(flags, XESCAPE_8_BIT) && (unsigned char) *f >= 127) || | |
403 | *f == '\\' || (bad && strchr(bad, *f))) { | |
404 | if ((size_t) (t - ans) + 4 + 3 * force_ellipsis > console_width) | |
405 | break; | |
406 | ||
407 | *(t++) = '\\'; | |
408 | *(t++) = 'x'; | |
409 | *(t++) = hexchar(*f >> 4); | |
410 | *(t++) = hexchar(*f); | |
411 | } else { | |
412 | if ((size_t) (t - ans) + 1 + 3 * force_ellipsis > console_width) | |
413 | break; | |
414 | ||
415 | *(t++) = *f; | |
416 | } | |
417 | ||
418 | /* We might need to go back two cycles to fit three dots, so remember two positions */ | |
419 | prev2 = prev; | |
420 | prev = tmp_t; | |
421 | } | |
422 | ||
423 | /* We can just write where we want, since chars are one-byte */ | |
424 | size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */ | |
425 | size_t off; | |
426 | if (console_width - c >= (size_t) (t - ans)) | |
427 | off = (size_t) (t - ans); | |
428 | else if (console_width - c >= (size_t) (prev - ans)) | |
429 | off = (size_t) (prev - ans); | |
430 | else if (console_width - c >= (size_t) (prev2 - ans)) | |
431 | off = (size_t) (prev2 - ans); | |
432 | else | |
433 | off = console_width - c; | |
434 | assert(off <= (size_t) (t - ans)); | |
435 | ||
436 | memcpy(ans + off, "...", c); | |
437 | ans[off + c] = '\0'; | |
438 | return ans; | |
439 | } | |
440 | ||
441 | char* escape_non_printable_full(const char *str, size_t console_width, XEscapeFlags flags) { | |
442 | if (FLAGS_SET(flags, XESCAPE_8_BIT)) | |
443 | return xescape_full(str, /* bad= */ NULL, console_width, flags); | |
444 | else | |
445 | return utf8_escape_non_printable_full(str, | |
446 | console_width, | |
447 | FLAGS_SET(flags, XESCAPE_FORCE_ELLIPSIS)); | |
448 | } | |
449 | ||
450 | char* octescape(const char *s, size_t len) { | |
451 | char *buf, *t; | |
452 | ||
453 | /* Escapes \ and " chars, in \nnn style escaping. */ | |
454 | ||
455 | assert(s || len == 0); | |
456 | ||
457 | if (len == SIZE_MAX) | |
458 | len = strlen(s); | |
459 | ||
460 | if (len > (SIZE_MAX - 1) / 4) | |
461 | return NULL; | |
462 | ||
463 | t = buf = new(char, len * 4 + 1); | |
464 | if (!buf) | |
465 | return NULL; | |
466 | ||
467 | for (size_t i = 0; i < len; i++) { | |
468 | uint8_t u = (uint8_t) s[i]; | |
469 | ||
470 | if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"')) { | |
471 | *(t++) = '\\'; | |
472 | *(t++) = '0' + (u >> 6); | |
473 | *(t++) = '0' + ((u >> 3) & 7); | |
474 | *(t++) = '0' + (u & 7); | |
475 | } else | |
476 | *(t++) = u; | |
477 | } | |
478 | ||
479 | *t = 0; | |
480 | return buf; | |
481 | } | |
482 | ||
483 | char* decescape(const char *s, size_t len, const char *bad) { | |
484 | char *buf, *t; | |
485 | ||
486 | /* Escapes all chars in bad, in addition to \ and " chars, in \nnn decimal style escaping. */ | |
487 | ||
488 | assert(s || len == 0); | |
489 | ||
490 | if (len == SIZE_MAX) | |
491 | len = strlen(s); | |
492 | ||
493 | if (len > (SIZE_MAX - 1) / 4) | |
494 | return NULL; | |
495 | ||
496 | t = buf = new(char, len * 4 + 1); | |
497 | if (!buf) | |
498 | return NULL; | |
499 | ||
500 | for (size_t i = 0; i < len; i++) { | |
501 | uint8_t u = (uint8_t) s[i]; | |
502 | ||
503 | if (u < ' ' || u >= 127 || IN_SET(u, '\\', '"') || strchr(bad, u)) { | |
504 | *(t++) = '\\'; | |
505 | *(t++) = '0' + (u / 100); | |
506 | *(t++) = '0' + ((u / 10) % 10); | |
507 | *(t++) = '0' + (u % 10); | |
508 | } else | |
509 | *(t++) = u; | |
510 | } | |
511 | ||
512 | *t = 0; | |
513 | return buf; | |
514 | } | |
515 | ||
516 | static char* strcpy_backslash_escaped(char *t, const char *s, const char *bad) { | |
517 | assert(bad); | |
518 | assert(t); | |
519 | assert(s); | |
520 | ||
521 | while (*s) { | |
522 | int l = utf8_encoded_valid_unichar(s, SIZE_MAX); | |
523 | ||
524 | if (char_is_cc(*s) || l < 0) | |
525 | t += cescape_char(*(s++), t); | |
526 | else if (l == 1) { | |
527 | if (*s == '\\' || strchr(bad, *s)) | |
528 | *(t++) = '\\'; | |
529 | *(t++) = *(s++); | |
530 | } else { | |
531 | t = mempcpy(t, s, l); | |
532 | s += l; | |
533 | } | |
534 | } | |
535 | ||
536 | return t; | |
537 | } | |
538 | ||
539 | char* shell_escape(const char *s, const char *bad) { | |
540 | char *buf, *t; | |
541 | ||
542 | buf = new(char, strlen(s)*4+1); | |
543 | if (!buf) | |
544 | return NULL; | |
545 | ||
546 | t = strcpy_backslash_escaped(buf, s, bad); | |
547 | *t = 0; | |
548 | ||
549 | return buf; | |
550 | } | |
551 | ||
552 | char* shell_maybe_quote(const char *s, ShellEscapeFlags flags) { | |
553 | const char *p; | |
554 | char *buf, *t; | |
555 | ||
556 | assert(s); | |
557 | ||
558 | /* Encloses a string in quotes if necessary to make it OK as a shell string. */ | |
559 | ||
560 | if (FLAGS_SET(flags, SHELL_ESCAPE_EMPTY) && isempty(s)) | |
561 | return strdup("\"\""); /* We don't use $'' here in the POSIX mode. "" is fine too. */ | |
562 | ||
563 | for (p = s; *p; ) { | |
564 | int l = utf8_encoded_valid_unichar(p, SIZE_MAX); | |
565 | ||
566 | if (char_is_cc(*p) || l < 0 || | |
567 | strchr(WHITESPACE SHELL_NEED_QUOTES, *p)) | |
568 | break; | |
569 | ||
570 | p += l; | |
571 | } | |
572 | ||
573 | if (!*p) | |
574 | return strdup(s); | |
575 | ||
576 | buf = new(char, FLAGS_SET(flags, SHELL_ESCAPE_POSIX) + 1 + strlen(s)*4 + 1 + 1); | |
577 | if (!buf) | |
578 | return NULL; | |
579 | ||
580 | t = buf; | |
581 | if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX)) { | |
582 | *(t++) = '$'; | |
583 | *(t++) = '\''; | |
584 | } else | |
585 | *(t++) = '"'; | |
586 | ||
587 | t = mempcpy(t, s, p - s); | |
588 | ||
589 | t = strcpy_backslash_escaped(t, p, | |
590 | FLAGS_SET(flags, SHELL_ESCAPE_POSIX) ? SHELL_NEED_ESCAPE_POSIX : SHELL_NEED_ESCAPE); | |
591 | ||
592 | if (FLAGS_SET(flags, SHELL_ESCAPE_POSIX)) | |
593 | *(t++) = '\''; | |
594 | else | |
595 | *(t++) = '"'; | |
596 | *t = 0; | |
597 | ||
598 | return str_realloc(buf); | |
599 | } | |
600 | ||
601 | char* quote_command_line(char **argv, ShellEscapeFlags flags) { | |
602 | _cleanup_free_ char *result = NULL; | |
603 | ||
604 | assert(argv); | |
605 | ||
606 | STRV_FOREACH(a, argv) { | |
607 | _cleanup_free_ char *t = NULL; | |
608 | ||
609 | t = shell_maybe_quote(*a, flags); | |
610 | if (!t) | |
611 | return NULL; | |
612 | ||
613 | if (!strextend_with_separator(&result, " ", t)) | |
614 | return NULL; | |
615 | } | |
616 | ||
617 | return str_realloc(TAKE_PTR(result)); | |
618 | } |