[thirdparty/systemd.git] / src / basic / escape.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include "alloc-util.h"
#include "escape.h"
#include "hexdecoct.h"
#include "string-util.h"
#include "utf8.h"
#include "util.h"

size_t cescape_char(char c, char *buf) {
        char * buf_old = buf;

        switch (c) {

                case '\a':
                        *(buf++) = '\\';
                        *(buf++) = 'a';
                        break;
                case '\b':
                        *(buf++) = '\\';
                        *(buf++) = 'b';
                        break;
                case '\f':
                        *(buf++) = '\\';
                        *(buf++) = 'f';
                        break;
                case '\n':
                        *(buf++) = '\\';
                        *(buf++) = 'n';
                        break;
                case '\r':
                        *(buf++) = '\\';
                        *(buf++) = 'r';
                        break;
                case '\t':
                        *(buf++) = '\\';
                        *(buf++) = 't';
                        break;
                case '\v':
                        *(buf++) = '\\';
                        *(buf++) = 'v';
                        break;
                case '\\':
                        *(buf++) = '\\';
                        *(buf++) = '\\';
                        break;
                case '"':
                        *(buf++) = '\\';
                        *(buf++) = '"';
                        break;
                case '\'':
                        *(buf++) = '\\';
                        *(buf++) = '\'';
                        break;

                default:
                        /* For special chars we prefer octal over
                         * hexadecimal encoding, simply because glib's
                         * g_strescape() does the same */
                        if ((c < ' ') || (c >= 127)) {
                                *(buf++) = '\\';
                                *(buf++) = octchar((unsigned char) c >> 6);
                                *(buf++) = octchar((unsigned char) c >> 3);
                                *(buf++) = octchar((unsigned char) c);
                        } else
                                *(buf++) = c;
                        break;
        }

        return buf - buf_old;
}

char *cescape(const char *s) {
        char *r, *t;
        const char *f;

        assert(s);

        /* Does C style string escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s)*4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++)
                t += cescape_char(*f, t);

        *t = 0;

        return r;
}

int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
        int r = 1;

        assert(p);
        assert(*p);
        assert(ret);

        /* Unescapes C style. Returns the unescaped character in ret,
         * unless we encountered a \u sequence in which case the full
         * unicode character is returned in ret_unicode, instead. */

        if (length != (size_t) -1 && length < 1)
                return -EINVAL;

        switch (p[0]) {

        case 'a':
                *ret = '\a';
                break;
        case 'b':
                *ret = '\b';
                break;
        case 'f':
                *ret = '\f';
                break;
        case 'n':
                *ret = '\n';
                break;
        case 'r':
                *ret = '\r';
                break;
        case 't':
                *ret = '\t';
                break;
        case 'v':
                *ret = '\v';
                break;
        case '\\':
                *ret = '\\';
                break;
        case '"':
                *ret = '"';
                break;
        case '\'':
                *ret = '\'';
                break;

        case 's':
                /* This is an extension of the XDG syntax files */
                *ret = ' ';
                break;

        case 'x': {
                /* hexadecimal encoding */
                int a, b;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unhexchar(p[1]);
                if (a < 0)
                        return -EINVAL;

                b = unhexchar(p[2]);
                if (b < 0)
                        return -EINVAL;

                /* Don't allow NUL bytes */
                if (a == 0 && b == 0)
                        return -EINVAL;

                *ret = (char) ((a << 4U) | b);
                r = 3;
                break;
        }

        case 'u': {
                /* C++11 style 16bit unicode */

                int a[4];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 5)
                        return -EINVAL;

                for (i = 0; i < 4; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 5;
                break;
        }

        case 'U': {
                /* C++11 style 32bit unicode */

                int a[8];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 9)
                        return -EINVAL;

                for (i = 0; i < 8; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
                    ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                /* Don't allow invalid code points */
                if (!unichar_is_valid(c))
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 9;
                break;
        }

        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7': {
                /* octal encoding */
                int a, b, c;
                uint32_t m;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unoctchar(p[0]);
                if (a < 0)
                        return -EINVAL;

                b = unoctchar(p[1]);
                if (b < 0)
                        return -EINVAL;

                c = unoctchar(p[2]);
                if (c < 0)
                        return -EINVAL;

                /* don't allow NUL bytes */
                if (a == 0 && b == 0 && c == 0)
                        return -EINVAL;

                /* Don't allow bytes above 255 */
                m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
                if (m > 255)
                        return -EINVAL;

                *ret = m;
                r = 3;
                break;
        }

        default:
                return -EINVAL;
        }

        return r;
}

int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
        char *r, *t;
        const char *f;
        size_t pl;

        assert(s);
        assert(ret);

        /* Undoes C style string escaping, and optionally prefixes it. */

        pl = prefix ? strlen(prefix) : 0;

        r = new(char, pl+length+1);
        if (!r)
                return -ENOMEM;

        if (prefix)
                memcpy(r, prefix, pl);

        for (f = s, t = r + pl; f < s + length; f++) {
                size_t remaining;
                uint32_t u;
                char c;
                int k;

                remaining = s + length - f;
                assert(remaining > 0);

                if (*f != '\\') {
                        /* A literal literal, copy verbatim */
                        *(t++) = *f;
                        continue;
                }

                if (remaining == 1) {
                        if (flags & UNESCAPE_RELAX) {
                                /* A trailing backslash, copy verbatim */
                                *(t++) = *f;
                                continue;
                        }

                        free(r);
                        return -EINVAL;
                }

                k = cunescape_one(f + 1, remaining - 1, &c, &u);
                if (k < 0) {
                        if (flags & UNESCAPE_RELAX) {
                                /* Invalid escape code, let's take it literal then */
                                *(t++) = '\\';
                                continue;
                        }

                        free(r);
                        return k;
                }

                if (c != 0)
                        /* Non-Unicode? Let's encode this directly */
                        *(t++) = c;
                else
                        /* Unicode? Then let's encode this in UTF-8 */
                        t += utf8_encode_unichar(t, u);

                f += k;
        }

        *t = 0;

        *ret = r;
        return t - r;
}

int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
}

int cunescape(const char *s, UnescapeFlags flags, char **ret) {
        return cunescape_length(s, strlen(s), flags, ret);
}

char *xescape(const char *s, const char *bad) {
        char *r, *t;
        const char *f;

        /* Escapes all chars in bad, in addition to \ and all special
         * chars, in \xFF style escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s) * 4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++) {

                if ((*f < ' ') || (*f >= 127) ||
                    (*f == '\\') || strchr(bad, *f)) {
                        *(t++) = '\\';
                        *(t++) = 'x';
                        *(t++) = hexchar(*f >> 4);
                        *(t++) = hexchar(*f);
                } else
                        *(t++) = *f;
        }

        *t = 0;

        return r;
}

static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
        assert(bad);

        for (; *s; s++) {
                if (*s == '\\' || strchr(bad, *s))
                        *(t++) = '\\';

                *(t++) = *s;
        }

        return t;
}

char *shell_escape(const char *s, const char *bad) {
        char *r, *t;

        r = new(char, strlen(s)*2+1);
        if (!r)
                return NULL;

        t = strcpy_backslash_escaped(r, s, bad);
        *t = 0;

        return r;
}

char *shell_maybe_quote(const char *s) {
        const char *p;
        char *r, *t;

        assert(s);

        /* Encloses a string in double quotes if necessary to make it
         * OK as shell string. */

        for (p = s; *p; p++)
                if (*p <= ' ' ||
                    *p >= 127 ||
                    strchr(SHELL_NEED_QUOTES, *p))
                        break;

        if (!*p)
                return strdup(s);

        r = new(char, 1+strlen(s)*2+1+1);
        if (!r)
                return NULL;

        t = r;
        *(t++) = '"';
        t = mempcpy(t, s, p - s);

        t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);

        *(t++)= '"';
        *t = 0;

        return r;
}
Commit	Line	Data
4f5dd394 LP	1	/-- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil --/
	2
	3	/***
	4	This file is part of systemd.
	5
	6	Copyright 2010 Lennart Poettering
	7
	8	systemd is free software; you can redistribute it and/or modify it
	9	under the terms of the GNU Lesser General Public License as published by
	10	the Free Software Foundation; either version 2.1 of the License, or
	11	(at your option) any later version.
	12
	13	systemd is distributed in the hope that it will be useful, but
	14	WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public License
	19	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	20	***/
	21
b5efdb8a	22	#include "alloc-util.h"
e4e73a63 LP	23	#include "escape.h"
e4e73a63 LP	24	#include "hexdecoct.h"
b11d6a7b	25	#include "string-util.h"
4f5dd394 LP	26	#include "utf8.h"
	27	#include "util.h"
	28
4f5dd394 LP	29	size_t cescape_char(char c, char *buf) {
	30	char * buf_old = buf;
	31
	32	switch (c) {
	33
	34	case '\a':
	35	*(buf++) = '\\';
	36	*(buf++) = 'a';
	37	break;
	38	case '\b':
	39	*(buf++) = '\\';
	40	*(buf++) = 'b';
	41	break;
	42	case '\f':
	43	*(buf++) = '\\';
	44	*(buf++) = 'f';
	45	break;
	46	case '\n':
	47	*(buf++) = '\\';
	48	*(buf++) = 'n';
	49	break;
	50	case '\r':
	51	*(buf++) = '\\';
	52	*(buf++) = 'r';
	53	break;
	54	case '\t':
	55	*(buf++) = '\\';
	56	*(buf++) = 't';
	57	break;
	58	case '\v':
	59	*(buf++) = '\\';
	60	*(buf++) = 'v';
	61	break;
	62	case '\\':
	63	*(buf++) = '\\';
	64	*(buf++) = '\\';
	65	break;
	66	case '"':
	67	*(buf++) = '\\';
	68	*(buf++) = '"';
	69	break;
	70	case '\'':
	71	*(buf++) = '\\';
	72	*(buf++) = '\'';
	73	break;
	74
	75	default:
	76	/* For special chars we prefer octal over
	77	* hexadecimal encoding, simply because glib's
	78	* g_strescape() does the same */
	79	if ((c < ' ') \|\| (c >= 127)) {
	80	*(buf++) = '\\';
	81	*(buf++) = octchar((unsigned char) c >> 6);
	82	*(buf++) = octchar((unsigned char) c >> 3);
	83	*(buf++) = octchar((unsigned char) c);
	84	} else
	85	*(buf++) = c;
	86	break;
	87	}
	88
	89	return buf - buf_old;
	90	}
	91
	92	char cescape(const char s) {
93	char r, t;
94	const char *f;
95
96	assert(s);
97
98	/* Does C style string escaping. May be reversed with
99	* cunescape(). */
100
101	r = new(char, strlen(s)*4 + 1);
102	if (!r)
103	return NULL;
104
105	for (f = s, t = r; *f; f++)
106	t += cescape_char(*f, t);
107
108	*t = 0;
109
110	return r;
111	}
112
113	int cunescape_one(const char p, size_t length, char ret, uint32_t *ret_unicode) {
114	int r = 1;
115
116	assert(p);
117	assert(*p);
118	assert(ret);
119
120	/* Unescapes C style. Returns the unescaped character in ret,
121	* unless we encountered a \u sequence in which case the full
122	* unicode character is returned in ret_unicode, instead. */
123
124	if (length != (size_t) -1 && length < 1)
125	return -EINVAL;
126
127	switch (p[0]) {
128
129	case 'a':
130	*ret = '\a';
131	break;
132	case 'b':
133	*ret = '\b';
134	break;
135	case 'f':
136	*ret = '\f';
137	break;
138	case 'n':
139	*ret = '\n';
140	break;
141	case 'r':
142	*ret = '\r';
143	break;
144	case 't':
145	*ret = '\t';
146	break;
147	case 'v':
148	*ret = '\v';
149	break;
150	case '\\':
151	*ret = '\\';
152	break;
153	case '"':
154	*ret = '"';
155	break;
156	case '\'':
157	*ret = '\'';
158	break;
159
160	case 's':
161	/* This is an extension of the XDG syntax files */
162	*ret = ' ';
163	break;
164
165	case 'x': {
166	/* hexadecimal encoding */
167	int a, b;
168
169	if (length != (size_t) -1 && length < 3)
170	return -EINVAL;
171
172	a = unhexchar(p[1]);
173	if (a < 0)
174	return -EINVAL;
175
176	b = unhexchar(p[2]);
177	if (b < 0)
178	return -EINVAL;
179
180	/* Don't allow NUL bytes */
181	if (a == 0 && b == 0)
182	return -EINVAL;
183
184	*ret = (char) ((a << 4U) \| b);
185	r = 3;
186	break;
187	}
188
189	case 'u': {
190	/* C++11 style 16bit unicode */
191
192	int a[4];
193	unsigned i;
194	uint32_t c;
195
196	if (length != (size_t) -1 && length < 5)
197	return -EINVAL;
198
199	for (i = 0; i < 4; i++) {
200	a[i] = unhexchar(p[1 + i]);
201	if (a[i] < 0)
202	return a[i];
203	}
204
205	c = ((uint32_t) a[0] << 12U) \| ((uint32_t) a[1] << 8U) \| ((uint32_t) a[2] << 4U) \| (uint32_t) a[3];
206
207	/* Don't allow 0 chars */
208	if (c == 0)
209	return -EINVAL;
210
211	if (c < 128)
212	*ret = c;
213	else {
214	if (!ret_unicode)
215	return -EINVAL;
216
217	*ret = 0;
218	*ret_unicode = c;
219	}
220
221	r = 5;
222	break;
223	}
224
225	case 'U': {
226	/* C++11 style 32bit unicode */
227
228	int a[8];
229	unsigned i;
dcd12626	230	uint32_t c;
4f5dd394 LP	231
	232	if (length != (size_t) -1 && length < 9)
	233	return -EINVAL;
	234
	235	for (i = 0; i < 8; i++) {
	236	a[i] = unhexchar(p[1 + i]);
	237	if (a[i] < 0)
	238	return a[i];
	239	}
	240
dcd12626 LP	241	c = ((uint32_t) a[0] << 28U) \| ((uint32_t) a[1] << 24U) \| ((uint32_t) a[2] << 20U) \| ((uint32_t) a[3] << 16U) \|
dcd12626 LP	242	((uint32_t) a[4] << 12U) \| ((uint32_t) a[5] << 8U) \| ((uint32_t) a[6] << 4U) \| (uint32_t) a[7];
4f5dd394 LP	243
	244	/* Don't allow 0 chars */
	245	if (c == 0)
	246	return -EINVAL;
	247
	248	/* Don't allow invalid code points */
	249	if (!unichar_is_valid(c))
	250	return -EINVAL;
	251
	252	if (c < 128)
	253	*ret = c;
	254	else {
	255	if (!ret_unicode)
	256	return -EINVAL;
	257
	258	*ret = 0;
	259	*ret_unicode = c;
	260	}
	261
	262	r = 9;
	263	break;
	264	}
	265
	266	case '0':
	267	case '1':
	268	case '2':
	269	case '3':
	270	case '4':
	271	case '5':
	272	case '6':
	273	case '7': {
	274	/* octal encoding */
	275	int a, b, c;
dcd12626	276	uint32_t m;
4f5dd394 LP	277
	278	if (length != (size_t) -1 && length < 3)
	279	return -EINVAL;
	280
	281	a = unoctchar(p[0]);
	282	if (a < 0)
	283	return -EINVAL;
	284
	285	b = unoctchar(p[1]);
	286	if (b < 0)
	287	return -EINVAL;
	288
	289	c = unoctchar(p[2]);
	290	if (c < 0)
	291	return -EINVAL;
	292
	293	/* don't allow NUL bytes */
	294	if (a == 0 && b == 0 && c == 0)
	295	return -EINVAL;
	296
	297	/* Don't allow bytes above 255 */
dcd12626	298	m = ((uint32_t) a << 6U) \| ((uint32_t) b << 3U) \| (uint32_t) c;
4f5dd394 LP	299	if (m > 255)
	300	return -EINVAL;
	301
	302	*ret = m;
	303	r = 3;
	304	break;
	305	}
	306
	307	default:
	308	return -EINVAL;
	309	}
	310
	311	return r;
	312	}
	313
	314	int cunescape_length_with_prefix(const char s, size_t length, const char prefix, UnescapeFlags flags, char **ret) {
	315	char r, t;
	316	const char *f;
	317	size_t pl;
	318
	319	assert(s);
	320	assert(ret);
	321
	322	/* Undoes C style string escaping, and optionally prefixes it. */
	323
	324	pl = prefix ? strlen(prefix) : 0;
	325
	326	r = new(char, pl+length+1);
	327	if (!r)
	328	return -ENOMEM;
	329
	330	if (prefix)
	331	memcpy(r, prefix, pl);
	332
	333	for (f = s, t = r + pl; f < s + length; f++) {
	334	size_t remaining;
dcd12626	335	uint32_t u;
4f5dd394 LP	336	char c;
	337	int k;
	338
	339	remaining = s + length - f;
	340	assert(remaining > 0);
	341
	342	if (*f != '\\') {
	343	/* A literal literal, copy verbatim */
	344	(t++) = f;
	345	continue;
	346	}
	347
	348	if (remaining == 1) {
	349	if (flags & UNESCAPE_RELAX) {
	350	/* A trailing backslash, copy verbatim */
	351	(t++) = f;
	352	continue;
	353	}
	354
	355	free(r);
	356	return -EINVAL;
	357	}
	358
	359	k = cunescape_one(f + 1, remaining - 1, &c, &u);
	360	if (k < 0) {
	361	if (flags & UNESCAPE_RELAX) {
	362	/* Invalid escape code, let's take it literal then */
	363	*(t++) = '\\';
	364	continue;
	365	}
	366
	367	free(r);
	368	return k;
	369	}
	370
	371	if (c != 0)
	372	/* Non-Unicode? Let's encode this directly */
	373	*(t++) = c;
	374	else
	375	/* Unicode? Then let's encode this in UTF-8 */
	376	t += utf8_encode_unichar(t, u);
	377
	378	f += k;
	379	}
	380
	381	*t = 0;
	382
	383	*ret = r;
	384	return t - r;
	385	}
	386
	387	int cunescape_length(const char s, size_t length, UnescapeFlags flags, char *ret) {
	388	return cunescape_length_with_prefix(s, length, NULL, flags, ret);
	389	}
	390
	391	int cunescape(const char s, UnescapeFlags flags, char *ret) {
	392	return cunescape_length(s, strlen(s), flags, ret);
	393	}
	394
	395	char xescape(const char s, const char *bad) {
	396	char r, t;
	397	const char *f;
	398
	399	/* Escapes all chars in bad, in addition to \ and all special
400	* chars, in \xFF style escaping. May be reversed with
401	* cunescape(). */
402
403	r = new(char, strlen(s) * 4 + 1);
404	if (!r)
405	return NULL;
406
407	for (f = s, t = r; *f; f++) {
408
409	if ((f < ' ') \|\| (f >= 127) \|\|
410	(f == '\\') \|\| strchr(bad, f)) {
411	*(t++) = '\\';
412	*(t++) = 'x';
413	(t++) = hexchar(f >> 4);
414	(t++) = hexchar(f);
415	} else
416	(t++) = f;
417	}
418
419	*t = 0;
420
421	return r;
422	}
423
424	static char strcpy_backslash_escaped(char t, const char s, const char bad) {
425	assert(bad);
426
427	for (; *s; s++) {
428	if (s == '\\' \|\| strchr(bad, s))
429	*(t++) = '\\';
430
431	(t++) = s;
432	}
433
434	return t;
435	}
436
437	char shell_escape(const char s, const char *bad) {
438	char r, t;
439
440	r = new(char, strlen(s)*2+1);
441	if (!r)
442	return NULL;
443
444	t = strcpy_backslash_escaped(r, s, bad);
445	*t = 0;
446
447	return r;
448	}
449
450	char shell_maybe_quote(const char s) {
451	const char *p;
452	char r, t;
453
454	assert(s);
455
456	/* Encloses a string in double quotes if necessary to make it
457	* OK as shell string. */
458
459	for (p = s; *p; p++)
460	if (*p <= ' ' \|\|
461	*p >= 127 \|\|
462	strchr(SHELL_NEED_QUOTES, *p))
463	break;
464
465	if (!*p)
466	return strdup(s);
467
468	r = new(char, 1+strlen(s)*2+1+1);
469	if (!r)
470	return NULL;
471
472	t = r;
473	*(t++) = '"';
474	t = mempcpy(t, s, p - s);
475
476	t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
477
478	*(t++)= '"';
479	*t = 0;
480
481	return r;
482	}