]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/escape.c
Merge pull request #1750 from systemd/revert-1740-master
[thirdparty/systemd.git] / src / basic / escape.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "hexdecoct.h"
25 #include "utf8.h"
26 #include "util.h"
27
28 size_t cescape_char(char c, char *buf) {
29 char * buf_old = buf;
30
31 switch (c) {
32
33 case '\a':
34 *(buf++) = '\\';
35 *(buf++) = 'a';
36 break;
37 case '\b':
38 *(buf++) = '\\';
39 *(buf++) = 'b';
40 break;
41 case '\f':
42 *(buf++) = '\\';
43 *(buf++) = 'f';
44 break;
45 case '\n':
46 *(buf++) = '\\';
47 *(buf++) = 'n';
48 break;
49 case '\r':
50 *(buf++) = '\\';
51 *(buf++) = 'r';
52 break;
53 case '\t':
54 *(buf++) = '\\';
55 *(buf++) = 't';
56 break;
57 case '\v':
58 *(buf++) = '\\';
59 *(buf++) = 'v';
60 break;
61 case '\\':
62 *(buf++) = '\\';
63 *(buf++) = '\\';
64 break;
65 case '"':
66 *(buf++) = '\\';
67 *(buf++) = '"';
68 break;
69 case '\'':
70 *(buf++) = '\\';
71 *(buf++) = '\'';
72 break;
73
74 default:
75 /* For special chars we prefer octal over
76 * hexadecimal encoding, simply because glib's
77 * g_strescape() does the same */
78 if ((c < ' ') || (c >= 127)) {
79 *(buf++) = '\\';
80 *(buf++) = octchar((unsigned char) c >> 6);
81 *(buf++) = octchar((unsigned char) c >> 3);
82 *(buf++) = octchar((unsigned char) c);
83 } else
84 *(buf++) = c;
85 break;
86 }
87
88 return buf - buf_old;
89 }
90
91 char *cescape(const char *s) {
92 char *r, *t;
93 const char *f;
94
95 assert(s);
96
97 /* Does C style string escaping. May be reversed with
98 * cunescape(). */
99
100 r = new(char, strlen(s)*4 + 1);
101 if (!r)
102 return NULL;
103
104 for (f = s, t = r; *f; f++)
105 t += cescape_char(*f, t);
106
107 *t = 0;
108
109 return r;
110 }
111
112 int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
113 int r = 1;
114
115 assert(p);
116 assert(*p);
117 assert(ret);
118
119 /* Unescapes C style. Returns the unescaped character in ret,
120 * unless we encountered a \u sequence in which case the full
121 * unicode character is returned in ret_unicode, instead. */
122
123 if (length != (size_t) -1 && length < 1)
124 return -EINVAL;
125
126 switch (p[0]) {
127
128 case 'a':
129 *ret = '\a';
130 break;
131 case 'b':
132 *ret = '\b';
133 break;
134 case 'f':
135 *ret = '\f';
136 break;
137 case 'n':
138 *ret = '\n';
139 break;
140 case 'r':
141 *ret = '\r';
142 break;
143 case 't':
144 *ret = '\t';
145 break;
146 case 'v':
147 *ret = '\v';
148 break;
149 case '\\':
150 *ret = '\\';
151 break;
152 case '"':
153 *ret = '"';
154 break;
155 case '\'':
156 *ret = '\'';
157 break;
158
159 case 's':
160 /* This is an extension of the XDG syntax files */
161 *ret = ' ';
162 break;
163
164 case 'x': {
165 /* hexadecimal encoding */
166 int a, b;
167
168 if (length != (size_t) -1 && length < 3)
169 return -EINVAL;
170
171 a = unhexchar(p[1]);
172 if (a < 0)
173 return -EINVAL;
174
175 b = unhexchar(p[2]);
176 if (b < 0)
177 return -EINVAL;
178
179 /* Don't allow NUL bytes */
180 if (a == 0 && b == 0)
181 return -EINVAL;
182
183 *ret = (char) ((a << 4U) | b);
184 r = 3;
185 break;
186 }
187
188 case 'u': {
189 /* C++11 style 16bit unicode */
190
191 int a[4];
192 unsigned i;
193 uint32_t c;
194
195 if (length != (size_t) -1 && length < 5)
196 return -EINVAL;
197
198 for (i = 0; i < 4; i++) {
199 a[i] = unhexchar(p[1 + i]);
200 if (a[i] < 0)
201 return a[i];
202 }
203
204 c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];
205
206 /* Don't allow 0 chars */
207 if (c == 0)
208 return -EINVAL;
209
210 if (c < 128)
211 *ret = c;
212 else {
213 if (!ret_unicode)
214 return -EINVAL;
215
216 *ret = 0;
217 *ret_unicode = c;
218 }
219
220 r = 5;
221 break;
222 }
223
224 case 'U': {
225 /* C++11 style 32bit unicode */
226
227 int a[8];
228 unsigned i;
229 uint32_t c;
230
231 if (length != (size_t) -1 && length < 9)
232 return -EINVAL;
233
234 for (i = 0; i < 8; i++) {
235 a[i] = unhexchar(p[1 + i]);
236 if (a[i] < 0)
237 return a[i];
238 }
239
240 c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
241 ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] << 8U) | ((uint32_t) a[6] << 4U) | (uint32_t) a[7];
242
243 /* Don't allow 0 chars */
244 if (c == 0)
245 return -EINVAL;
246
247 /* Don't allow invalid code points */
248 if (!unichar_is_valid(c))
249 return -EINVAL;
250
251 if (c < 128)
252 *ret = c;
253 else {
254 if (!ret_unicode)
255 return -EINVAL;
256
257 *ret = 0;
258 *ret_unicode = c;
259 }
260
261 r = 9;
262 break;
263 }
264
265 case '0':
266 case '1':
267 case '2':
268 case '3':
269 case '4':
270 case '5':
271 case '6':
272 case '7': {
273 /* octal encoding */
274 int a, b, c;
275 uint32_t m;
276
277 if (length != (size_t) -1 && length < 3)
278 return -EINVAL;
279
280 a = unoctchar(p[0]);
281 if (a < 0)
282 return -EINVAL;
283
284 b = unoctchar(p[1]);
285 if (b < 0)
286 return -EINVAL;
287
288 c = unoctchar(p[2]);
289 if (c < 0)
290 return -EINVAL;
291
292 /* don't allow NUL bytes */
293 if (a == 0 && b == 0 && c == 0)
294 return -EINVAL;
295
296 /* Don't allow bytes above 255 */
297 m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
298 if (m > 255)
299 return -EINVAL;
300
301 *ret = m;
302 r = 3;
303 break;
304 }
305
306 default:
307 return -EINVAL;
308 }
309
310 return r;
311 }
312
313 int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
314 char *r, *t;
315 const char *f;
316 size_t pl;
317
318 assert(s);
319 assert(ret);
320
321 /* Undoes C style string escaping, and optionally prefixes it. */
322
323 pl = prefix ? strlen(prefix) : 0;
324
325 r = new(char, pl+length+1);
326 if (!r)
327 return -ENOMEM;
328
329 if (prefix)
330 memcpy(r, prefix, pl);
331
332 for (f = s, t = r + pl; f < s + length; f++) {
333 size_t remaining;
334 uint32_t u;
335 char c;
336 int k;
337
338 remaining = s + length - f;
339 assert(remaining > 0);
340
341 if (*f != '\\') {
342 /* A literal literal, copy verbatim */
343 *(t++) = *f;
344 continue;
345 }
346
347 if (remaining == 1) {
348 if (flags & UNESCAPE_RELAX) {
349 /* A trailing backslash, copy verbatim */
350 *(t++) = *f;
351 continue;
352 }
353
354 free(r);
355 return -EINVAL;
356 }
357
358 k = cunescape_one(f + 1, remaining - 1, &c, &u);
359 if (k < 0) {
360 if (flags & UNESCAPE_RELAX) {
361 /* Invalid escape code, let's take it literal then */
362 *(t++) = '\\';
363 continue;
364 }
365
366 free(r);
367 return k;
368 }
369
370 if (c != 0)
371 /* Non-Unicode? Let's encode this directly */
372 *(t++) = c;
373 else
374 /* Unicode? Then let's encode this in UTF-8 */
375 t += utf8_encode_unichar(t, u);
376
377 f += k;
378 }
379
380 *t = 0;
381
382 *ret = r;
383 return t - r;
384 }
385
386 int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
387 return cunescape_length_with_prefix(s, length, NULL, flags, ret);
388 }
389
390 int cunescape(const char *s, UnescapeFlags flags, char **ret) {
391 return cunescape_length(s, strlen(s), flags, ret);
392 }
393
394 char *xescape(const char *s, const char *bad) {
395 char *r, *t;
396 const char *f;
397
398 /* Escapes all chars in bad, in addition to \ and all special
399 * chars, in \xFF style escaping. May be reversed with
400 * cunescape(). */
401
402 r = new(char, strlen(s) * 4 + 1);
403 if (!r)
404 return NULL;
405
406 for (f = s, t = r; *f; f++) {
407
408 if ((*f < ' ') || (*f >= 127) ||
409 (*f == '\\') || strchr(bad, *f)) {
410 *(t++) = '\\';
411 *(t++) = 'x';
412 *(t++) = hexchar(*f >> 4);
413 *(t++) = hexchar(*f);
414 } else
415 *(t++) = *f;
416 }
417
418 *t = 0;
419
420 return r;
421 }
422
423 static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
424 assert(bad);
425
426 for (; *s; s++) {
427 if (*s == '\\' || strchr(bad, *s))
428 *(t++) = '\\';
429
430 *(t++) = *s;
431 }
432
433 return t;
434 }
435
436 char *shell_escape(const char *s, const char *bad) {
437 char *r, *t;
438
439 r = new(char, strlen(s)*2+1);
440 if (!r)
441 return NULL;
442
443 t = strcpy_backslash_escaped(r, s, bad);
444 *t = 0;
445
446 return r;
447 }
448
449 char *shell_maybe_quote(const char *s) {
450 const char *p;
451 char *r, *t;
452
453 assert(s);
454
455 /* Encloses a string in double quotes if necessary to make it
456 * OK as shell string. */
457
458 for (p = s; *p; p++)
459 if (*p <= ' ' ||
460 *p >= 127 ||
461 strchr(SHELL_NEED_QUOTES, *p))
462 break;
463
464 if (!*p)
465 return strdup(s);
466
467 r = new(char, 1+strlen(s)*2+1+1);
468 if (!r)
469 return NULL;
470
471 t = r;
472 *(t++) = '"';
473 t = mempcpy(t, s, p - s);
474
475 t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
476
477 *(t++)= '"';
478 *t = 0;
479
480 return r;
481 }