]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/escape.c
Merge pull request #2440 from poettering/journal-fix
[thirdparty/systemd.git] / src / basic / escape.c
CommitLineData
4f5dd394
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
11c3a366
TA
22#include <errno.h>
23#include <stdlib.h>
24#include <string.h>
25
b5efdb8a 26#include "alloc-util.h"
e4e73a63
LP
27#include "escape.h"
28#include "hexdecoct.h"
11c3a366 29#include "macro.h"
4f5dd394 30#include "utf8.h"
4f5dd394 31
4f5dd394
LP
32size_t cescape_char(char c, char *buf) {
33 char * buf_old = buf;
34
35 switch (c) {
36
37 case '\a':
38 *(buf++) = '\\';
39 *(buf++) = 'a';
40 break;
41 case '\b':
42 *(buf++) = '\\';
43 *(buf++) = 'b';
44 break;
45 case '\f':
46 *(buf++) = '\\';
47 *(buf++) = 'f';
48 break;
49 case '\n':
50 *(buf++) = '\\';
51 *(buf++) = 'n';
52 break;
53 case '\r':
54 *(buf++) = '\\';
55 *(buf++) = 'r';
56 break;
57 case '\t':
58 *(buf++) = '\\';
59 *(buf++) = 't';
60 break;
61 case '\v':
62 *(buf++) = '\\';
63 *(buf++) = 'v';
64 break;
65 case '\\':
66 *(buf++) = '\\';
67 *(buf++) = '\\';
68 break;
69 case '"':
70 *(buf++) = '\\';
71 *(buf++) = '"';
72 break;
73 case '\'':
74 *(buf++) = '\\';
75 *(buf++) = '\'';
76 break;
77
78 default:
79 /* For special chars we prefer octal over
80 * hexadecimal encoding, simply because glib's
81 * g_strescape() does the same */
82 if ((c < ' ') || (c >= 127)) {
83 *(buf++) = '\\';
84 *(buf++) = octchar((unsigned char) c >> 6);
85 *(buf++) = octchar((unsigned char) c >> 3);
86 *(buf++) = octchar((unsigned char) c);
87 } else
88 *(buf++) = c;
89 break;
90 }
91
92 return buf - buf_old;
93}
94
a5ef3638 95char *cescape_length(const char *s, size_t n) {
4f5dd394 96 const char *f;
a5ef3638 97 char *r, *t;
4f5dd394 98
a5ef3638 99 assert(s || n == 0);
4f5dd394
LP
100
101 /* Does C style string escaping. May be reversed with
102 * cunescape(). */
103
a5ef3638 104 r = new(char, n*4 + 1);
4f5dd394
LP
105 if (!r)
106 return NULL;
107
a5ef3638 108 for (f = s, t = r; f < s + n; f++)
4f5dd394
LP
109 t += cescape_char(*f, t);
110
111 *t = 0;
112
113 return r;
114}
115
a5ef3638
LP
116char *cescape(const char *s) {
117 assert(s);
118
119 return cescape_length(s, strlen(s));
120}
121
3565e095 122int cunescape_one(const char *p, size_t length, uint32_t *ret, bool *eight_bit) {
4f5dd394
LP
123 int r = 1;
124
125 assert(p);
126 assert(*p);
127 assert(ret);
128
3565e095
ZJS
129 /* Unescapes C style. Returns the unescaped character in ret.
130 * Sets *eight_bit to true if the escaped sequence either fits in
131 * one byte in UTF-8 or is a non-unicode literal byte and should
132 * instead be copied directly.
133 */
4f5dd394
LP
134
135 if (length != (size_t) -1 && length < 1)
136 return -EINVAL;
137
138 switch (p[0]) {
139
140 case 'a':
141 *ret = '\a';
142 break;
143 case 'b':
144 *ret = '\b';
145 break;
146 case 'f':
147 *ret = '\f';
148 break;
149 case 'n':
150 *ret = '\n';
151 break;
152 case 'r':
153 *ret = '\r';
154 break;
155 case 't':
156 *ret = '\t';
157 break;
158 case 'v':
159 *ret = '\v';
160 break;
161 case '\\':
162 *ret = '\\';
163 break;
164 case '"':
165 *ret = '"';
166 break;
167 case '\'':
168 *ret = '\'';
169 break;
170
171 case 's':
172 /* This is an extension of the XDG syntax files */
173 *ret = ' ';
174 break;
175
176 case 'x': {
177 /* hexadecimal encoding */
178 int a, b;
179
180 if (length != (size_t) -1 && length < 3)
181 return -EINVAL;
182
183 a = unhexchar(p[1]);
184 if (a < 0)
185 return -EINVAL;
186
187 b = unhexchar(p[2]);
188 if (b < 0)
189 return -EINVAL;
190
191 /* Don't allow NUL bytes */
192 if (a == 0 && b == 0)
193 return -EINVAL;
194
3565e095
ZJS
195 *ret = (a << 4U) | b;
196 *eight_bit = true;
4f5dd394
LP
197 r = 3;
198 break;
199 }
200
201 case 'u': {
202 /* C++11 style 16bit unicode */
203
204 int a[4];
205 unsigned i;
206 uint32_t c;
207
208 if (length != (size_t) -1 && length < 5)
209 return -EINVAL;
210
211 for (i = 0; i < 4; i++) {
212 a[i] = unhexchar(p[1 + i]);
213 if (a[i] < 0)
214 return a[i];
215 }
216
217 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
218
219 /* Don't allow 0 chars */
220 if (c == 0)
221 return -EINVAL;
222
3565e095 223 *ret = c;
4f5dd394
LP
224 r = 5;
225 break;
226 }
227
228 case 'U': {
229 /* C++11 style 32bit unicode */
230
231 int a[8];
232 unsigned i;
dcd12626 233 uint32_t c;
4f5dd394
LP
234
235 if (length != (size_t) -1 && length < 9)
236 return -EINVAL;
237
238 for (i = 0; i < 8; i++) {
239 a[i] = unhexchar(p[1 + i]);
240 if (a[i] < 0)
241 return a[i];
242 }
243
dcd12626
LP
244 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
245 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
4f5dd394
LP
246
247 /* Don't allow 0 chars */
248 if (c == 0)
249 return -EINVAL;
250
251 /* Don't allow invalid code points */
252 if (!unichar_is_valid(c))
253 return -EINVAL;
254
3565e095 255 *ret = c;
4f5dd394
LP
256 r = 9;
257 break;
258 }
259
260 case '0':
261 case '1':
262 case '2':
263 case '3':
264 case '4':
265 case '5':
266 case '6':
267 case '7': {
268 /* octal encoding */
269 int a, b, c;
dcd12626 270 uint32_t m;
4f5dd394
LP
271
272 if (length != (size_t) -1 && length < 3)
273 return -EINVAL;
274
275 a = unoctchar(p[0]);
276 if (a < 0)
277 return -EINVAL;
278
279 b = unoctchar(p[1]);
280 if (b < 0)
281 return -EINVAL;
282
283 c = unoctchar(p[2]);
284 if (c < 0)
285 return -EINVAL;
286
287 /* don't allow NUL bytes */
288 if (a == 0 && b == 0 && c == 0)
289 return -EINVAL;
290
291 /* Don't allow bytes above 255 */
dcd12626 292 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
4f5dd394
LP
293 if (m > 255)
294 return -EINVAL;
295
296 *ret = m;
3565e095 297 *eight_bit = true;
4f5dd394
LP
298 r = 3;
299 break;
300 }
301
302 default:
303 return -EINVAL;
304 }
305
306 return r;
307}
308
309int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
310 char *r, *t;
311 const char *f;
312 size_t pl;
313
314 assert(s);
315 assert(ret);
316
317 /* Undoes C style string escaping, and optionally prefixes it. */
318
319 pl = prefix ? strlen(prefix) : 0;
320
321 r = new(char, pl+length+1);
322 if (!r)
323 return -ENOMEM;
324
325 if (prefix)
326 memcpy(r, prefix, pl);
327
328 for (f = s, t = r + pl; f < s + length; f++) {
329 size_t remaining;
dcd12626 330 uint32_t u;
3565e095 331 bool eight_bit = false;
4f5dd394
LP
332 int k;
333
334 remaining = s + length - f;
335 assert(remaining > 0);
336
337 if (*f != '\\') {
338 /* A literal literal, copy verbatim */
339 *(t++) = *f;
340 continue;
341 }
342
343 if (remaining == 1) {
344 if (flags & UNESCAPE_RELAX) {
345 /* A trailing backslash, copy verbatim */
346 *(t++) = *f;
347 continue;
348 }
349
350 free(r);
351 return -EINVAL;
352 }
353
3565e095 354 k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
4f5dd394
LP
355 if (k < 0) {
356 if (flags & UNESCAPE_RELAX) {
357 /* Invalid escape code, let's take it literal then */
358 *(t++) = '\\';
359 continue;
360 }
361
362 free(r);
363 return k;
364 }
365
3565e095
ZJS
366 f += k;
367 if (eight_bit)
368 /* One byte? Set directly as specified */
369 *(t++) = u;
4f5dd394 370 else
3565e095 371 /* Otherwise encode as multi-byte UTF-8 */
4f5dd394 372 t += utf8_encode_unichar(t, u);
4f5dd394
LP
373 }
374
375 *t = 0;
376
377 *ret = r;
378 return t - r;
379}
380
381int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
382 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
383}
384
385int cunescape(const char *s, UnescapeFlags flags, char **ret) {
386 return cunescape_length(s, strlen(s), flags, ret);
387}
388
389char *xescape(const char *s, const char *bad) {
390 char *r, *t;
391 const char *f;
392
393 /* Escapes all chars in bad, in addition to \ and all special
394 * chars, in \xFF style escaping. May be reversed with
395 * cunescape(). */
396
397 r = new(char, strlen(s) * 4 + 1);
398 if (!r)
399 return NULL;
400
401 for (f = s, t = r; *f; f++) {
402
403 if ((*f < ' ') || (*f >= 127) ||
404 (*f == '\\') || strchr(bad, *f)) {
405 *(t++) = '\\';
406 *(t++) = 'x';
407 *(t++) = hexchar(*f >> 4);
408 *(t++) = hexchar(*f);
409 } else
410 *(t++) = *f;
411 }
412
413 *t = 0;
414
415 return r;
416}
417
418static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
419 assert(bad);
420
421 for (; *s; s++) {
422 if (*s == '\\' || strchr(bad, *s))
423 *(t++) = '\\';
424
425 *(t++) = *s;
426 }
427
428 return t;
429}
430
431char *shell_escape(const char *s, const char *bad) {
432 char *r, *t;
433
434 r = new(char, strlen(s)*2+1);
435 if (!r)
436 return NULL;
437
438 t = strcpy_backslash_escaped(r, s, bad);
439 *t = 0;
440
441 return r;
442}
443
444char *shell_maybe_quote(const char *s) {
445 const char *p;
446 char *r, *t;
447
448 assert(s);
449
450 /* Encloses a string in double quotes if necessary to make it
451 * OK as shell string. */
452
453 for (p = s; *p; p++)
454 if (*p <= ' ' ||
455 *p >= 127 ||
456 strchr(SHELL_NEED_QUOTES, *p))
457 break;
458
459 if (!*p)
460 return strdup(s);
461
462 r = new(char, 1+strlen(s)*2+1+1);
463 if (!r)
464 return NULL;
465
466 t = r;
467 *(t++) = '"';
468 t = mempcpy(t, s, p - s);
469
470 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
471
472 *(t++)= '"';
473 *t = 0;
474
475 return r;
476}