]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/escape.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / basic / escape.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
4f5dd394
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
11c3a366
TA
21#include <errno.h>
22#include <stdlib.h>
23#include <string.h>
24
b5efdb8a 25#include "alloc-util.h"
e4e73a63
LP
26#include "escape.h"
27#include "hexdecoct.h"
11c3a366 28#include "macro.h"
4f5dd394 29#include "utf8.h"
4f5dd394 30
4f5dd394
LP
31size_t cescape_char(char c, char *buf) {
32 char * buf_old = buf;
33
34 switch (c) {
35
36 case '\a':
37 *(buf++) = '\\';
38 *(buf++) = 'a';
39 break;
40 case '\b':
41 *(buf++) = '\\';
42 *(buf++) = 'b';
43 break;
44 case '\f':
45 *(buf++) = '\\';
46 *(buf++) = 'f';
47 break;
48 case '\n':
49 *(buf++) = '\\';
50 *(buf++) = 'n';
51 break;
52 case '\r':
53 *(buf++) = '\\';
54 *(buf++) = 'r';
55 break;
56 case '\t':
57 *(buf++) = '\\';
58 *(buf++) = 't';
59 break;
60 case '\v':
61 *(buf++) = '\\';
62 *(buf++) = 'v';
63 break;
64 case '\\':
65 *(buf++) = '\\';
66 *(buf++) = '\\';
67 break;
68 case '"':
69 *(buf++) = '\\';
70 *(buf++) = '"';
71 break;
72 case '\'':
73 *(buf++) = '\\';
74 *(buf++) = '\'';
75 break;
76
77 default:
78 /* For special chars we prefer octal over
79 * hexadecimal encoding, simply because glib's
80 * g_strescape() does the same */
81 if ((c < ' ') || (c >= 127)) {
82 *(buf++) = '\\';
83 *(buf++) = octchar((unsigned char) c >> 6);
84 *(buf++) = octchar((unsigned char) c >> 3);
85 *(buf++) = octchar((unsigned char) c);
86 } else
87 *(buf++) = c;
88 break;
89 }
90
91 return buf - buf_old;
92}
93
a5ef3638 94char *cescape_length(const char *s, size_t n) {
4f5dd394 95 const char *f;
a5ef3638 96 char *r, *t;
4f5dd394 97
a5ef3638 98 assert(s || n == 0);
4f5dd394
LP
99
100 /* Does C style string escaping. May be reversed with
101 * cunescape(). */
102
a5ef3638 103 r = new(char, n*4 + 1);
4f5dd394
LP
104 if (!r)
105 return NULL;
106
a5ef3638 107 for (f = s, t = r; f < s + n; f++)
4f5dd394
LP
108 t += cescape_char(*f, t);
109
110 *t = 0;
111
112 return r;
113}
114
a5ef3638
LP
115char *cescape(const char *s) {
116 assert(s);
117
118 return cescape_length(s, strlen(s));
119}
120
c932fb71 121int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
4f5dd394
LP
122 int r = 1;
123
124 assert(p);
125 assert(*p);
126 assert(ret);
127
3565e095
ZJS
128 /* Unescapes C style. Returns the unescaped character in ret.
129 * Sets *eight_bit to true if the escaped sequence either fits in
130 * one byte in UTF-8 or is a non-unicode literal byte and should
131 * instead be copied directly.
132 */
4f5dd394
LP
133
134 if (length != (size_t) -1 && length < 1)
135 return -EINVAL;
136
137 switch (p[0]) {
138
139 case 'a':
140 *ret = '\a';
141 break;
142 case 'b':
143 *ret = '\b';
144 break;
145 case 'f':
146 *ret = '\f';
147 break;
148 case 'n':
149 *ret = '\n';
150 break;
151 case 'r':
152 *ret = '\r';
153 break;
154 case 't':
155 *ret = '\t';
156 break;
157 case 'v':
158 *ret = '\v';
159 break;
160 case '\\':
161 *ret = '\\';
162 break;
163 case '"':
164 *ret = '"';
165 break;
166 case '\'':
167 *ret = '\'';
168 break;
169
170 case 's':
171 /* This is an extension of the XDG syntax files */
172 *ret = ' ';
173 break;
174
175 case 'x': {
176 /* hexadecimal encoding */
177 int a, b;
178
179 if (length != (size_t) -1 && length < 3)
180 return -EINVAL;
181
182 a = unhexchar(p[1]);
183 if (a < 0)
184 return -EINVAL;
185
186 b = unhexchar(p[2]);
187 if (b < 0)
188 return -EINVAL;
189
190 /* Don't allow NUL bytes */
191 if (a == 0 && b == 0)
192 return -EINVAL;
193
3565e095
ZJS
194 *ret = (a << 4U) | b;
195 *eight_bit = true;
4f5dd394
LP
196 r = 3;
197 break;
198 }
199
200 case 'u': {
201 /* C++11 style 16bit unicode */
202
203 int a[4];
204 unsigned i;
205 uint32_t c;
206
207 if (length != (size_t) -1 && length < 5)
208 return -EINVAL;
209
210 for (i = 0; i < 4; i++) {
211 a[i] = unhexchar(p[1 + i]);
212 if (a[i] < 0)
213 return a[i];
214 }
215
216 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
217
218 /* Don't allow 0 chars */
219 if (c == 0)
220 return -EINVAL;
221
3565e095 222 *ret = c;
4f5dd394
LP
223 r = 5;
224 break;
225 }
226
227 case 'U': {
228 /* C++11 style 32bit unicode */
229
230 int a[8];
231 unsigned i;
c932fb71 232 char32_t c;
4f5dd394
LP
233
234 if (length != (size_t) -1 && length < 9)
235 return -EINVAL;
236
237 for (i = 0; i < 8; i++) {
238 a[i] = unhexchar(p[1 + i]);
239 if (a[i] < 0)
240 return a[i];
241 }
242
dcd12626
LP
243 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
244 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
4f5dd394
LP
245
246 /* Don't allow 0 chars */
247 if (c == 0)
248 return -EINVAL;
249
250 /* Don't allow invalid code points */
251 if (!unichar_is_valid(c))
252 return -EINVAL;
253
3565e095 254 *ret = c;
4f5dd394
LP
255 r = 9;
256 break;
257 }
258
259 case '0':
260 case '1':
261 case '2':
262 case '3':
263 case '4':
264 case '5':
265 case '6':
266 case '7': {
267 /* octal encoding */
268 int a, b, c;
c932fb71 269 char32_t m;
4f5dd394
LP
270
271 if (length != (size_t) -1 && length < 3)
272 return -EINVAL;
273
274 a = unoctchar(p[0]);
275 if (a < 0)
276 return -EINVAL;
277
278 b = unoctchar(p[1]);
279 if (b < 0)
280 return -EINVAL;
281
282 c = unoctchar(p[2]);
283 if (c < 0)
284 return -EINVAL;
285
286 /* don't allow NUL bytes */
287 if (a == 0 && b == 0 && c == 0)
288 return -EINVAL;
289
290 /* Don't allow bytes above 255 */
dcd12626 291 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
4f5dd394
LP
292 if (m > 255)
293 return -EINVAL;
294
295 *ret = m;
3565e095 296 *eight_bit = true;
4f5dd394
LP
297 r = 3;
298 break;
299 }
300
301 default:
302 return -EINVAL;
303 }
304
305 return r;
306}
307
308int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
309 char *r, *t;
310 const char *f;
311 size_t pl;
312
313 assert(s);
314 assert(ret);
315
316 /* Undoes C style string escaping, and optionally prefixes it. */
317
7bf7ce28 318 pl = strlen_ptr(prefix);
4f5dd394
LP
319
320 r = new(char, pl+length+1);
321 if (!r)
322 return -ENOMEM;
323
324 if (prefix)
325 memcpy(r, prefix, pl);
326
327 for (f = s, t = r + pl; f < s + length; f++) {
328 size_t remaining;
3565e095 329 bool eight_bit = false;
c932fb71 330 char32_t u;
4f5dd394
LP
331 int k;
332
333 remaining = s + length - f;
334 assert(remaining > 0);
335
336 if (*f != '\\') {
629ff674 337 /* A literal, copy verbatim */
4f5dd394
LP
338 *(t++) = *f;
339 continue;
340 }
341
342 if (remaining == 1) {
343 if (flags & UNESCAPE_RELAX) {
344 /* A trailing backslash, copy verbatim */
345 *(t++) = *f;
346 continue;
347 }
348
349 free(r);
350 return -EINVAL;
351 }
352
3565e095 353 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
4f5dd394
LP
354 if (k < 0) {
355 if (flags & UNESCAPE_RELAX) {
356 /* Invalid escape code, let's take it literal then */
357 *(t++) = '\\';
358 continue;
359 }
360
361 free(r);
362 return k;
363 }
364
3565e095
ZJS
365 f += k;
366 if (eight_bit)
367 /* One byte? Set directly as specified */
368 *(t++) = u;
4f5dd394 369 else
3565e095 370 /* Otherwise encode as multi-byte UTF-8 */
4f5dd394 371 t += utf8_encode_unichar(t, u);
4f5dd394
LP
372 }
373
374 *t = 0;
375
376 *ret = r;
377 return t - r;
378}
379
380int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
381 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
382}
383
384int cunescape(const char *s, UnescapeFlags flags, char **ret) {
385 return cunescape_length(s, strlen(s), flags, ret);
386}
387
388char *xescape(const char *s, const char *bad) {
389 char *r, *t;
390 const char *f;
391
392 /* Escapes all chars in bad, in addition to \ and all special
393 * chars, in \xFF style escaping. May be reversed with
394 * cunescape(). */
395
396 r = new(char, strlen(s) * 4 + 1);
397 if (!r)
398 return NULL;
399
400 for (f = s, t = r; *f; f++) {
401
402 if ((*f < ' ') || (*f >= 127) ||
403 (*f == '\\') || strchr(bad, *f)) {
404 *(t++) = '\\';
405 *(t++) = 'x';
406 *(t++) = hexchar(*f >> 4);
407 *(t++) = hexchar(*f);
408 } else
409 *(t++) = *f;
410 }
411
412 *t = 0;
413
414 return r;
415}
416
95052df3
ZJS
417char *octescape(const char *s, size_t len) {
418 char *r, *t;
419 const char *f;
420
421 /* Escapes all chars in bad, in addition to \ and " chars,
422 * in \nnn style escaping. */
423
424 r = new(char, len * 4 + 1);
425 if (!r)
426 return NULL;
427
428 for (f = s, t = r; f < s + len; f++) {
429
4c701096 430 if (*f < ' ' || *f >= 127 || IN_SET(*f, '\\', '"')) {
95052df3
ZJS
431 *(t++) = '\\';
432 *(t++) = '0' + (*f >> 6);
433 *(t++) = '0' + ((*f >> 3) & 8);
434 *(t++) = '0' + (*f & 8);
435 } else
436 *(t++) = *f;
437 }
438
439 *t = 0;
440
441 return r;
442
443}
444
804ee07c 445static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad, bool escape_tab_nl) {
4f5dd394
LP
446 assert(bad);
447
448 for (; *s; s++) {
804ee07c
ZJS
449 if (escape_tab_nl && IN_SET(*s, '\n', '\t')) {
450 *(t++) = '\\';
451 *(t++) = *s == '\n' ? 'n' : 't';
452 continue;
453 }
454
4f5dd394
LP
455 if (*s == '\\' || strchr(bad, *s))
456 *(t++) = '\\';
457
458 *(t++) = *s;
459 }
460
461 return t;
462}
463
464char *shell_escape(const char *s, const char *bad) {
465 char *r, *t;
466
467 r = new(char, strlen(s)*2+1);
468 if (!r)
469 return NULL;
470
804ee07c 471 t = strcpy_backslash_escaped(r, s, bad, false);
4f5dd394
LP
472 *t = 0;
473
474 return r;
475}
476
804ee07c 477char* shell_maybe_quote(const char *s, EscapeStyle style) {
4f5dd394
LP
478 const char *p;
479 char *r, *t;
480
481 assert(s);
482
804ee07c
ZJS
483 /* Encloses a string in quotes if necessary to make it OK as a shell
484 * string. Note that we treat benign UTF-8 characters as needing
485 * escaping too, but that should be OK. */
4f5dd394
LP
486
487 for (p = s; *p; p++)
488 if (*p <= ' ' ||
489 *p >= 127 ||
490 strchr(SHELL_NEED_QUOTES, *p))
491 break;
492
493 if (!*p)
494 return strdup(s);
495
804ee07c 496 r = new(char, (style == ESCAPE_POSIX) + 1 + strlen(s)*2 + 1 + 1);
4f5dd394
LP
497 if (!r)
498 return NULL;
499
500 t = r;
804ee07c
ZJS
501 if (style == ESCAPE_BACKSLASH)
502 *(t++) = '"';
503 else if (style == ESCAPE_POSIX) {
504 *(t++) = '$';
505 *(t++) = '\'';
506 } else
507 assert_not_reached("Bad EscapeStyle");
508
4f5dd394
LP
509 t = mempcpy(t, s, p - s);
510
804ee07c
ZJS
511 if (style == ESCAPE_BACKSLASH)
512 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE, false);
513 else
514 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE_POSIX, true);
4f5dd394 515
804ee07c
ZJS
516 if (style == ESCAPE_BACKSLASH)
517 *(t++) = '"';
518 else
519 *(t++) = '\'';
4f5dd394
LP
520 *t = 0;
521
522 return r;
523}