[thirdparty/systemd.git] / src / basic / escape.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include "utf8.h"
#include "util.h"

#include "escape.h"

size_t cescape_char(char c, char *buf) {
        char * buf_old = buf;

        switch (c) {

                case '\a':
                        *(buf++) = '\\';
                        *(buf++) = 'a';
                        break;
                case '\b':
                        *(buf++) = '\\';
                        *(buf++) = 'b';
                        break;
                case '\f':
                        *(buf++) = '\\';
                        *(buf++) = 'f';
                        break;
                case '\n':
                        *(buf++) = '\\';
                        *(buf++) = 'n';
                        break;
                case '\r':
                        *(buf++) = '\\';
                        *(buf++) = 'r';
                        break;
                case '\t':
                        *(buf++) = '\\';
                        *(buf++) = 't';
                        break;
                case '\v':
                        *(buf++) = '\\';
                        *(buf++) = 'v';
                        break;
                case '\\':
                        *(buf++) = '\\';
                        *(buf++) = '\\';
                        break;
                case '"':
                        *(buf++) = '\\';
                        *(buf++) = '"';
                        break;
                case '\'':
                        *(buf++) = '\\';
                        *(buf++) = '\'';
                        break;

                default:
                        /* For special chars we prefer octal over
                         * hexadecimal encoding, simply because glib's
                         * g_strescape() does the same */
                        if ((c < ' ') || (c >= 127)) {
                                *(buf++) = '\\';
                                *(buf++) = octchar((unsigned char) c >> 6);
                                *(buf++) = octchar((unsigned char) c >> 3);
                                *(buf++) = octchar((unsigned char) c);
                        } else
                                *(buf++) = c;
                        break;
        }

        return buf - buf_old;
}

char *cescape(const char *s) {
        char *r, *t;
        const char *f;

        assert(s);

        /* Does C style string escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s)*4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++)
                t += cescape_char(*f, t);

        *t = 0;

        return r;
}

int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
        int r = 1;

        assert(p);
        assert(*p);
        assert(ret);

        /* Unescapes C style. Returns the unescaped character in ret,
         * unless we encountered a \u sequence in which case the full
         * unicode character is returned in ret_unicode, instead. */

        if (length != (size_t) -1 && length < 1)
                return -EINVAL;

        switch (p[0]) {

        case 'a':
                *ret = '\a';
                break;
        case 'b':
                *ret = '\b';
                break;
        case 'f':
                *ret = '\f';
                break;
        case 'n':
                *ret = '\n';
                break;
        case 'r':
                *ret = '\r';
                break;
        case 't':
                *ret = '\t';
                break;
        case 'v':
                *ret = '\v';
                break;
        case '\\':
                *ret = '\\';
                break;
        case '"':
                *ret = '"';
                break;
        case '\'':
                *ret = '\'';
                break;

        case 's':
                /* This is an extension of the XDG syntax files */
                *ret = ' ';
                break;

        case 'x': {
                /* hexadecimal encoding */
                int a, b;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unhexchar(p[1]);
                if (a < 0)
                        return -EINVAL;

                b = unhexchar(p[2]);
                if (b < 0)
                        return -EINVAL;

                /* Don't allow NUL bytes */
                if (a == 0 && b == 0)
                        return -EINVAL;

                *ret = (char) ((a << 4U) | b);
                r = 3;
                break;
        }

        case 'u': {
                /* C++11 style 16bit unicode */

                int a[4];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 5)
                        return -EINVAL;

                for (i = 0; i < 4; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 5;
                break;
        }

        case 'U': {
                /* C++11 style 32bit unicode */

                int a[8];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 9)
                        return -EINVAL;

                for (i = 0; i < 8; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
                    ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                /* Don't allow invalid code points */
                if (!unichar_is_valid(c))
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 9;
                break;
        }

        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7': {
                /* octal encoding */
                int a, b, c;
                uint32_t m;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unoctchar(p[0]);
                if (a < 0)
                        return -EINVAL;

                b = unoctchar(p[1]);
                if (b < 0)
                        return -EINVAL;

                c = unoctchar(p[2]);
                if (c < 0)
                        return -EINVAL;

                /* don't allow NUL bytes */
                if (a == 0 && b == 0 && c == 0)
                        return -EINVAL;

                /* Don't allow bytes above 255 */
                m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
                if (m > 255)
                        return -EINVAL;

                *ret = m;
                r = 3;
                break;
        }

        default:
                return -EINVAL;
        }

        return r;
}

int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
        char *r, *t;
        const char *f;
        size_t pl;

        assert(s);
        assert(ret);

        /* Undoes C style string escaping, and optionally prefixes it. */

        pl = prefix ? strlen(prefix) : 0;

        r = new(char, pl+length+1);
        if (!r)
                return -ENOMEM;

        if (prefix)
                memcpy(r, prefix, pl);

        for (f = s, t = r + pl; f < s + length; f++) {
                size_t remaining;
                uint32_t u;
                char c;
                int k;

                remaining = s + length - f;
                assert(remaining > 0);

                if (*f != '\\') {
                        /* A literal literal, copy verbatim */
                        *(t++) = *f;
                        continue;
                }

                if (remaining == 1) {
                        if (flags & UNESCAPE_RELAX) {
                                /* A trailing backslash, copy verbatim */
                                *(t++) = *f;
                                continue;
                        }

                        free(r);
                        return -EINVAL;
                }

                k = cunescape_one(f + 1, remaining - 1, &c, &u);
                if (k < 0) {
                        if (flags & UNESCAPE_RELAX) {
                                /* Invalid escape code, let's take it literal then */
                                *(t++) = '\\';
                                continue;
                        }

                        free(r);
                        return k;
                }

                if (c != 0)
                        /* Non-Unicode? Let's encode this directly */
                        *(t++) = c;
                else
                        /* Unicode? Then let's encode this in UTF-8 */
                        t += utf8_encode_unichar(t, u);

                f += k;
        }

        *t = 0;

        *ret = r;
        return t - r;
}

int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
}

int cunescape(const char *s, UnescapeFlags flags, char **ret) {
        return cunescape_length(s, strlen(s), flags, ret);
}

char *xescape(const char *s, const char *bad) {
        char *r, *t;
        const char *f;

        /* Escapes all chars in bad, in addition to \ and all special
         * chars, in \xFF style escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s) * 4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++) {

                if ((*f < ' ') || (*f >= 127) ||
                    (*f == '\\') || strchr(bad, *f)) {
                        *(t++) = '\\';
                        *(t++) = 'x';
                        *(t++) = hexchar(*f >> 4);
                        *(t++) = hexchar(*f);
                } else
                        *(t++) = *f;
        }

        *t = 0;

        return r;
}

static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
        assert(bad);

        for (; *s; s++) {
                if (*s == '\\' || strchr(bad, *s))
                        *(t++) = '\\';

                *(t++) = *s;
        }

        return t;
}

char *shell_escape(const char *s, const char *bad) {
        char *r, *t;

        r = new(char, strlen(s)*2+1);
        if (!r)
                return NULL;

        t = strcpy_backslash_escaped(r, s, bad);
        *t = 0;

        return r;
}

char *shell_maybe_quote(const char *s) {
        const char *p;
        char *r, *t;

        assert(s);

        /* Encloses a string in double quotes if necessary to make it
         * OK as shell string. */

        for (p = s; *p; p++)
                if (*p <= ' ' ||
                    *p >= 127 ||
                    strchr(SHELL_NEED_QUOTES, *p))
                        break;

        if (!*p)
                return strdup(s);

        r = new(char, 1+strlen(s)*2+1+1);
        if (!r)
                return NULL;

        t = r;
        *(t++) = '"';
        t = mempcpy(t, s, p - s);

        t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);

        *(t++)= '"';
        *t = 0;

        return r;
}
Commit	Line	Data
4f5dd394 LP	1	/-- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil --/
	2
	3	/***
	4	This file is part of systemd.
	5
	6	Copyright 2010 Lennart Poettering
	7
	8	systemd is free software; you can redistribute it and/or modify it
	9	under the terms of the GNU Lesser General Public License as published by
	10	the Free Software Foundation; either version 2.1 of the License, or
	11	(at your option) any later version.
	12
	13	systemd is distributed in the hope that it will be useful, but
	14	WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public License
	19	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	20	***/
	21
	22	#include "utf8.h"
	23	#include "util.h"
	24
	25	#include "escape.h"
	26
	27	size_t cescape_char(char c, char *buf) {
	28	char * buf_old = buf;
	29
	30	switch (c) {
	31
	32	case '\a':
	33	*(buf++) = '\\';
	34	*(buf++) = 'a';
	35	break;
	36	case '\b':
	37	*(buf++) = '\\';
	38	*(buf++) = 'b';
	39	break;
	40	case '\f':
	41	*(buf++) = '\\';
	42	*(buf++) = 'f';
	43	break;
	44	case '\n':
	45	*(buf++) = '\\';
	46	*(buf++) = 'n';
	47	break;
	48	case '\r':
	49	*(buf++) = '\\';
	50	*(buf++) = 'r';
	51	break;
	52	case '\t':
	53	*(buf++) = '\\';
	54	*(buf++) = 't';
	55	break;
	56	case '\v':
	57	*(buf++) = '\\';
	58	*(buf++) = 'v';
	59	break;
	60	case '\\':
	61	*(buf++) = '\\';
	62	*(buf++) = '\\';
	63	break;
	64	case '"':
65	*(buf++) = '\\';
66	*(buf++) = '"';
67	break;
68	case '\'':
69	*(buf++) = '\\';
70	*(buf++) = '\'';
71	break;
72
73	default:
74	/* For special chars we prefer octal over
75	* hexadecimal encoding, simply because glib's
76	* g_strescape() does the same */
77	if ((c < ' ') \|\| (c >= 127)) {
78	*(buf++) = '\\';
79	*(buf++) = octchar((unsigned char) c >> 6);
80	*(buf++) = octchar((unsigned char) c >> 3);
81	*(buf++) = octchar((unsigned char) c);
82	} else
83	*(buf++) = c;
84	break;
85	}
86
87	return buf - buf_old;
88	}
89
90	char cescape(const char s) {
91	char r, t;
92	const char *f;
93
94	assert(s);
95
96	/* Does C style string escaping. May be reversed with
97	* cunescape(). */
98
99	r = new(char, strlen(s)*4 + 1);
100	if (!r)
101	return NULL;
102
103	for (f = s, t = r; *f; f++)
104	t += cescape_char(*f, t);
105
106	*t = 0;
107
108	return r;
109	}
110
111	int cunescape_one(const char p, size_t length, char ret, uint32_t *ret_unicode) {
112	int r = 1;
113
114	assert(p);
115	assert(*p);
116	assert(ret);
117
118	/* Unescapes C style. Returns the unescaped character in ret,
119	* unless we encountered a \u sequence in which case the full
120	* unicode character is returned in ret_unicode, instead. */
121
122	if (length != (size_t) -1 && length < 1)
123	return -EINVAL;
124
125	switch (p[0]) {
126
127	case 'a':
128	*ret = '\a';
129	break;
130	case 'b':
131	*ret = '\b';
132	break;
133	case 'f':
134	*ret = '\f';
135	break;
136	case 'n':
137	*ret = '\n';
138	break;
139	case 'r':
140	*ret = '\r';
141	break;
142	case 't':
143	*ret = '\t';
144	break;
145	case 'v':
146	*ret = '\v';
147	break;
148	case '\\':
149	*ret = '\\';
150	break;
151	case '"':
152	*ret = '"';
153	break;
154	case '\'':
155	*ret = '\'';
156	break;
157
158	case 's':
159	/* This is an extension of the XDG syntax files */
160	*ret = ' ';
161	break;
162
163	case 'x': {
164	/* hexadecimal encoding */
165	int a, b;
166
167	if (length != (size_t) -1 && length < 3)
168	return -EINVAL;
169
170	a = unhexchar(p[1]);
171	if (a < 0)
172	return -EINVAL;
173
174	b = unhexchar(p[2]);
175	if (b < 0)
176	return -EINVAL;
177
178	/* Don't allow NUL bytes */
179	if (a == 0 && b == 0)
180	return -EINVAL;
181
182	*ret = (char) ((a << 4U) \| b);
183	r = 3;
184	break;
185	}
186
187	case 'u': {
188	/* C++11 style 16bit unicode */
189
190	int a[4];
191	unsigned i;
192	uint32_t c;
193
194	if (length != (size_t) -1 && length < 5)
195	return -EINVAL;
196
197	for (i = 0; i < 4; i++) {
198	a[i] = unhexchar(p[1 + i]);
199	if (a[i] < 0)
200	return a[i];
201	}
202
203	c = ((uint32_t) a[0] << 12U) \| ((uint32_t) a[1] << 8U) \| ((uint32_t) a[2] << 4U) \| (uint32_t) a[3];
204
205	/* Don't allow 0 chars */
206	if (c == 0)
207	return -EINVAL;
208
209	if (c < 128)
210	*ret = c;
211	else {
212	if (!ret_unicode)
213	return -EINVAL;
214
215	*ret = 0;
216	*ret_unicode = c;
217	}
218
219	r = 5;
220	break;
221	}
222
223	case 'U': {
224	/* C++11 style 32bit unicode */
225
226	int a[8];
227	unsigned i;
228	uint32_t c;
229
230	if (length != (size_t) -1 && length < 9)
231	return -EINVAL;
232
233	for (i = 0; i < 8; i++) {
234	a[i] = unhexchar(p[1 + i]);
235	if (a[i] < 0)
236	return a[i];
237	}
238
239	c = ((uint32_t) a[0] << 28U) \| ((uint32_t) a[1] << 24U) \| ((uint32_t) a[2] << 20U) \| ((uint32_t) a[3] << 16U) \|
240	((uint32_t) a[4] << 12U) \| ((uint32_t) a[5] << 8U) \| ((uint32_t) a[6] << 4U) \| (uint32_t) a[7];
241
242	/* Don't allow 0 chars */
243	if (c == 0)
244	return -EINVAL;
245
246	/* Don't allow invalid code points */
247	if (!unichar_is_valid(c))
248	return -EINVAL;
249
250	if (c < 128)
251	*ret = c;
252	else {
253	if (!ret_unicode)
254	return -EINVAL;
255
256	*ret = 0;
257	*ret_unicode = c;
258	}
259
260	r = 9;
261	break;
262	}
263
264	case '0':
265	case '1':
266	case '2':
267	case '3':
268	case '4':
269	case '5':
270	case '6':
271	case '7': {
272	/* octal encoding */
273	int a, b, c;
274	uint32_t m;
275
276	if (length != (size_t) -1 && length < 3)
277	return -EINVAL;
278
279	a = unoctchar(p[0]);
280	if (a < 0)
281	return -EINVAL;
282
283	b = unoctchar(p[1]);
284	if (b < 0)
285	return -EINVAL;
286
287	c = unoctchar(p[2]);
288	if (c < 0)
289	return -EINVAL;
290
291	/* don't allow NUL bytes */
292	if (a == 0 && b == 0 && c == 0)
293	return -EINVAL;
294
295	/* Don't allow bytes above 255 */
296	m = ((uint32_t) a << 6U) \| ((uint32_t) b << 3U) \| (uint32_t) c;
297	if (m > 255)
298	return -EINVAL;
299
300	*ret = m;
301	r = 3;
302	break;
303	}
304
305	default:
306	return -EINVAL;
307	}
308
309	return r;
310	}
311
312	int cunescape_length_with_prefix(const char s, size_t length, const char prefix, UnescapeFlags flags, char **ret) {
313	char r, t;
314	const char *f;
315	size_t pl;
316
317	assert(s);
318	assert(ret);
319
320	/* Undoes C style string escaping, and optionally prefixes it. */
321
322	pl = prefix ? strlen(prefix) : 0;
323
324	r = new(char, pl+length+1);
325	if (!r)
326	return -ENOMEM;
327
328	if (prefix)
329	memcpy(r, prefix, pl);
330
331	for (f = s, t = r + pl; f < s + length; f++) {
332	size_t remaining;
333	uint32_t u;
334	char c;
335	int k;
336
337	remaining = s + length - f;
338	assert(remaining > 0);
339
340	if (*f != '\\') {
341	/* A literal literal, copy verbatim */
342	(t++) = f;
343	continue;
344	}
345
346	if (remaining == 1) {
347	if (flags & UNESCAPE_RELAX) {
348	/* A trailing backslash, copy verbatim */
349	(t++) = f;
350	continue;
351	}
352
353	free(r);
354	return -EINVAL;
355	}
356
357	k = cunescape_one(f + 1, remaining - 1, &c, &u);
358	if (k < 0) {
359	if (flags & UNESCAPE_RELAX) {
360	/* Invalid escape code, let's take it literal then */
361	*(t++) = '\\';
362	continue;
363	}
364
365	free(r);
366	return k;
367	}
368
369	if (c != 0)
370	/* Non-Unicode? Let's encode this directly */
371	*(t++) = c;
372	else
373	/* Unicode? Then let's encode this in UTF-8 */
374	t += utf8_encode_unichar(t, u);
375
376	f += k;
377	}
378
379	*t = 0;
380
381	*ret = r;
382	return t - r;
383	}
384
385	int cunescape_length(const char s, size_t length, UnescapeFlags flags, char *ret) {
386	return cunescape_length_with_prefix(s, length, NULL, flags, ret);
387	}
388
389	int cunescape(const char s, UnescapeFlags flags, char *ret) {
390	return cunescape_length(s, strlen(s), flags, ret);
391	}
392
393	char xescape(const char s, const char *bad) {
394	char r, t;
395	const char *f;
396
397	/* Escapes all chars in bad, in addition to \ and all special
398	* chars, in \xFF style escaping. May be reversed with
399	* cunescape(). */
400
401	r = new(char, strlen(s) * 4 + 1);
402	if (!r)
403	return NULL;
404
405	for (f = s, t = r; *f; f++) {
406
407	if ((f < ' ') \|\| (f >= 127) \|\|
408	(f == '\\') \|\| strchr(bad, f)) {
409	*(t++) = '\\';
410	*(t++) = 'x';
411	(t++) = hexchar(f >> 4);
412	(t++) = hexchar(f);
413	} else
414	(t++) = f;
415	}
416
417	*t = 0;
418
419	return r;
420	}
421
422	static char strcpy_backslash_escaped(char t, const char s, const char bad) {
423	assert(bad);
424
425	for (; *s; s++) {
426	if (s == '\\' \|\| strchr(bad, s))
427	*(t++) = '\\';
428
429	(t++) = s;
430	}
431
432	return t;
433	}
434
435	char shell_escape(const char s, const char *bad) {
436	char r, t;
437
438	r = new(char, strlen(s)*2+1);
439	if (!r)
440	return NULL;
441
442	t = strcpy_backslash_escaped(r, s, bad);
443	*t = 0;
444
445	return r;
446	}
447
448	char shell_maybe_quote(const char s) {
449	const char *p;
450	char r, t;
451
452	assert(s);
453
454	/* Encloses a string in double quotes if necessary to make it
455	* OK as shell string. */
456
457	for (p = s; *p; p++)
458	if (*p <= ' ' \|\|
459	*p >= 127 \|\|
460	strchr(SHELL_NEED_QUOTES, *p))
461	break;
462
463	if (!*p)
464	return strdup(s);
465
466	r = new(char, 1+strlen(s)*2+1+1);
467	if (!r)
468	return NULL;
469
470	t = r;
471	*(t++) = '"';
472	t = mempcpy(t, s, p - s);
473
474	t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
475
476	*(t++)= '"';
477	*t = 0;
478
479	return r;
480	}