[thirdparty/systemd.git] / src / basic / escape.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include "alloc-util.h"
#include "escape.h"
#include "hexdecoct.h"
#include "utf8.h"
#include "util.h"

size_t cescape_char(char c, char *buf) {
        char * buf_old = buf;

        switch (c) {

                case '\a':
                        *(buf++) = '\\';
                        *(buf++) = 'a';
                        break;
                case '\b':
                        *(buf++) = '\\';
                        *(buf++) = 'b';
                        break;
                case '\f':
                        *(buf++) = '\\';
                        *(buf++) = 'f';
                        break;
                case '\n':
                        *(buf++) = '\\';
                        *(buf++) = 'n';
                        break;
                case '\r':
                        *(buf++) = '\\';
                        *(buf++) = 'r';
                        break;
                case '\t':
                        *(buf++) = '\\';
                        *(buf++) = 't';
                        break;
                case '\v':
                        *(buf++) = '\\';
                        *(buf++) = 'v';
                        break;
                case '\\':
                        *(buf++) = '\\';
                        *(buf++) = '\\';
                        break;
                case '"':
                        *(buf++) = '\\';
                        *(buf++) = '"';
                        break;
                case '\'':
                        *(buf++) = '\\';
                        *(buf++) = '\'';
                        break;

                default:
                        /* For special chars we prefer octal over
                         * hexadecimal encoding, simply because glib's
                         * g_strescape() does the same */
                        if ((c < ' ') || (c >= 127)) {
                                *(buf++) = '\\';
                                *(buf++) = octchar((unsigned char) c >> 6);
                                *(buf++) = octchar((unsigned char) c >> 3);
                                *(buf++) = octchar((unsigned char) c);
                        } else
                                *(buf++) = c;
                        break;
        }

        return buf - buf_old;
}

char *cescape(const char *s) {
        char *r, *t;
        const char *f;

        assert(s);

        /* Does C style string escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s)*4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++)
                t += cescape_char(*f, t);

        *t = 0;

        return r;
}

int cunescape_one(const char *p, size_t length, char *ret, uint32_t *ret_unicode) {
        int r = 1;

        assert(p);
        assert(*p);
        assert(ret);

        /* Unescapes C style. Returns the unescaped character in ret,
         * unless we encountered a \u sequence in which case the full
         * unicode character is returned in ret_unicode, instead. */

        if (length != (size_t) -1 && length < 1)
                return -EINVAL;

        switch (p[0]) {

        case 'a':
                *ret = '\a';
                break;
        case 'b':
                *ret = '\b';
                break;
        case 'f':
                *ret = '\f';
                break;
        case 'n':
                *ret = '\n';
                break;
        case 'r':
                *ret = '\r';
                break;
        case 't':
                *ret = '\t';
                break;
        case 'v':
                *ret = '\v';
                break;
        case '\\':
                *ret = '\\';
                break;
        case '"':
                *ret = '"';
                break;
        case '\'':
                *ret = '\'';
                break;

        case 's':
                /* This is an extension of the XDG syntax files */
                *ret = ' ';
                break;

        case 'x': {
                /* hexadecimal encoding */
                int a, b;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unhexchar(p[1]);
                if (a < 0)
                        return -EINVAL;

                b = unhexchar(p[2]);
                if (b < 0)
                        return -EINVAL;

                /* Don't allow NUL bytes */
                if (a == 0 && b == 0)
                        return -EINVAL;

                *ret = (char) ((a << 4U) | b);
                r = 3;
                break;
        }

        case 'u': {
                /* C++11 style 16bit unicode */

                int a[4];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 5)
                        return -EINVAL;

                for (i = 0; i < 4; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 5;
                break;
        }

        case 'U': {
                /* C++11 style 32bit unicode */

                int a[8];
                unsigned i;
                char32_t c;

                if (length != (size_t) -1 && length < 9)
                        return -EINVAL;

                for (i = 0; i < 8; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = (a[0] << 28U) | (a[1] << 24U) | (a[2] << 20U) | (a[3] << 16U) |
                    (a[4] << 12U) | (a[5] <<  8U) | (a[6] <<  4U) | (a[7] <<  0U);

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                /* Don't allow invalid code points */
                if (!unichar_is_valid(c))
                        return -EINVAL;

                if (c < 128)
                        *ret = c;
                else {
                        if (!ret_unicode)
                                return -EINVAL;

                        *ret = 0;
                        *ret_unicode = c;
                }

                r = 9;
                break;
        }

        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7': {
                /* octal encoding */
                int a, b, c;
                char32_t m;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unoctchar(p[0]);
                if (a < 0)
                        return -EINVAL;

                b = unoctchar(p[1]);
                if (b < 0)
                        return -EINVAL;

                c = unoctchar(p[2]);
                if (c < 0)
                        return -EINVAL;

                /* don't allow NUL bytes */
                if (a == 0 && b == 0 && c == 0)
                        return -EINVAL;

                /* Don't allow bytes above 255 */
                m = (a << 6U) | (b << 3U) | (char32_t) c;
                if (m > 255)
                        return -EINVAL;

                *ret = m;
                r = 3;
                break;
        }

        default:
                return -EINVAL;
        }

        return r;
}

int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
        char *r, *t;
        const char *f;
        size_t pl;

        assert(s);
        assert(ret);

        /* Undoes C style string escaping, and optionally prefixes it. */

        pl = prefix ? strlen(prefix) : 0;

        r = new(char, pl+length+1);
        if (!r)
                return -ENOMEM;

        if (prefix)
                memcpy(r, prefix, pl);

        for (f = s, t = r + pl; f < s + length; f++) {
                size_t remaining;
                char32_t u;
                char c;
                int k;

                remaining = s + length - f;
                assert(remaining > 0);

                if (*f != '\\') {
                        /* A literal literal, copy verbatim */
                        *(t++) = *f;
                        continue;
                }

                if (remaining == 1) {
                        if (flags & UNESCAPE_RELAX) {
                                /* A trailing backslash, copy verbatim */
                                *(t++) = *f;
                                continue;
                        }

                        free(r);
                        return -EINVAL;
                }

                k = cunescape_one(f + 1, remaining - 1, &c, &u);
                if (k < 0) {
                        if (flags & UNESCAPE_RELAX) {
                                /* Invalid escape code, let's take it literal then */
                                *(t++) = '\\';
                                continue;
                        }

                        free(r);
                        return k;
                }

                if (c != 0)
                        /* Non-Unicode? Let's encode this directly */
                        *(t++) = c;
                else
                        /* Unicode? Then let's encode this in UTF-8 */
                        t += utf8_encode_unichar(t, u);

                f += k;
        }

        *t = 0;

        *ret = r;
        return t - r;
}

int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
}

int cunescape(const char *s, UnescapeFlags flags, char **ret) {
        return cunescape_length(s, strlen(s), flags, ret);
}

char *xescape(const char *s, const char *bad) {
        char *r, *t;
        const char *f;

        /* Escapes all chars in bad, in addition to \ and all special
         * chars, in \xFF style escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s) * 4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++) {

                if ((*f < ' ') || (*f >= 127) ||
                    (*f == '\\') || strchr(bad, *f)) {
                        *(t++) = '\\';
                        *(t++) = 'x';
                        *(t++) = hexchar(*f >> 4);
                        *(t++) = hexchar(*f);
                } else
                        *(t++) = *f;
        }

        *t = 0;

        return r;
}

static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
        assert(bad);

        for (; *s; s++) {
                if (*s == '\\' || strchr(bad, *s))
                        *(t++) = '\\';

                *(t++) = *s;
        }

        return t;
}

char *shell_escape(const char *s, const char *bad) {
        char *r, *t;

        r = new(char, strlen(s)*2+1);
        if (!r)
                return NULL;

        t = strcpy_backslash_escaped(r, s, bad);
        *t = 0;

        return r;
}

char *shell_maybe_quote(const char *s) {
        const char *p;
        char *r, *t;

        assert(s);

        /* Encloses a string in double quotes if necessary to make it
         * OK as shell string. */

        for (p = s; *p; p++)
                if (*p <= ' ' ||
                    *p >= 127 ||
                    strchr(SHELL_NEED_QUOTES, *p))
                        break;

        if (!*p)
                return strdup(s);

        r = new(char, 1+strlen(s)*2+1+1);
        if (!r)
                return NULL;

        t = r;
        *(t++) = '"';
        t = mempcpy(t, s, p - s);

        t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);

        *(t++)= '"';
        *t = 0;

        return r;
}
Commit	Line	Data
4f5dd394 LP	1	/-- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil --/
	2
	3	/***
	4	This file is part of systemd.
	5
	6	Copyright 2010 Lennart Poettering
	7
	8	systemd is free software; you can redistribute it and/or modify it
	9	under the terms of the GNU Lesser General Public License as published by
	10	the Free Software Foundation; either version 2.1 of the License, or
	11	(at your option) any later version.
	12
	13	systemd is distributed in the hope that it will be useful, but
	14	WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public License
	19	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	20	***/
	21
b5efdb8a	22	#include "alloc-util.h"
e4e73a63 LP	23	#include "escape.h"
e4e73a63 LP	24	#include "hexdecoct.h"
4f5dd394 LP	25	#include "utf8.h"
	26	#include "util.h"
	27
4f5dd394 LP	28	size_t cescape_char(char c, char *buf) {
	29	char * buf_old = buf;
	30
	31	switch (c) {
	32
	33	case '\a':
	34	*(buf++) = '\\';
	35	*(buf++) = 'a';
	36	break;
	37	case '\b':
	38	*(buf++) = '\\';
	39	*(buf++) = 'b';
	40	break;
	41	case '\f':
	42	*(buf++) = '\\';
	43	*(buf++) = 'f';
	44	break;
	45	case '\n':
	46	*(buf++) = '\\';
	47	*(buf++) = 'n';
	48	break;
	49	case '\r':
	50	*(buf++) = '\\';
	51	*(buf++) = 'r';
	52	break;
	53	case '\t':
	54	*(buf++) = '\\';
	55	*(buf++) = 't';
	56	break;
	57	case '\v':
	58	*(buf++) = '\\';
	59	*(buf++) = 'v';
	60	break;
	61	case '\\':
	62	*(buf++) = '\\';
	63	*(buf++) = '\\';
	64	break;
	65	case '"':
	66	*(buf++) = '\\';
	67	*(buf++) = '"';
	68	break;
	69	case '\'':
	70	*(buf++) = '\\';
	71	*(buf++) = '\'';
	72	break;
	73
	74	default:
	75	/* For special chars we prefer octal over
	76	* hexadecimal encoding, simply because glib's
	77	* g_strescape() does the same */
	78	if ((c < ' ') \|\| (c >= 127)) {
	79	*(buf++) = '\\';
	80	*(buf++) = octchar((unsigned char) c >> 6);
	81	*(buf++) = octchar((unsigned char) c >> 3);
	82	*(buf++) = octchar((unsigned char) c);
	83	} else
	84	*(buf++) = c;
	85	break;
	86	}
	87
	88	return buf - buf_old;
	89	}
	90
	91	char cescape(const char s) {
92	char r, t;
93	const char *f;
94
95	assert(s);
96
97	/* Does C style string escaping. May be reversed with
98	* cunescape(). */
99
100	r = new(char, strlen(s)*4 + 1);
101	if (!r)
102	return NULL;
103
104	for (f = s, t = r; *f; f++)
105	t += cescape_char(*f, t);
106
107	*t = 0;
108
109	return r;
110	}
111
112	int cunescape_one(const char p, size_t length, char ret, uint32_t *ret_unicode) {
113	int r = 1;
114
115	assert(p);
116	assert(*p);
117	assert(ret);
118
119	/* Unescapes C style. Returns the unescaped character in ret,
120	* unless we encountered a \u sequence in which case the full
121	* unicode character is returned in ret_unicode, instead. */
122
123	if (length != (size_t) -1 && length < 1)
124	return -EINVAL;
125
126	switch (p[0]) {
127
128	case 'a':
129	*ret = '\a';
130	break;
131	case 'b':
132	*ret = '\b';
133	break;
134	case 'f':
135	*ret = '\f';
136	break;
137	case 'n':
138	*ret = '\n';
139	break;
140	case 'r':
141	*ret = '\r';
142	break;
143	case 't':
144	*ret = '\t';
145	break;
146	case 'v':
147	*ret = '\v';
148	break;
149	case '\\':
150	*ret = '\\';
151	break;
152	case '"':
153	*ret = '"';
154	break;
155	case '\'':
156	*ret = '\'';
157	break;
158
159	case 's':
160	/* This is an extension of the XDG syntax files */
161	*ret = ' ';
162	break;
163
164	case 'x': {
165	/* hexadecimal encoding */
166	int a, b;
167
168	if (length != (size_t) -1 && length < 3)
169	return -EINVAL;
170
171	a = unhexchar(p[1]);
172	if (a < 0)
173	return -EINVAL;
174
175	b = unhexchar(p[2]);
176	if (b < 0)
177	return -EINVAL;
178
179	/* Don't allow NUL bytes */
180	if (a == 0 && b == 0)
181	return -EINVAL;
182
183	*ret = (char) ((a << 4U) \| b);
184	r = 3;
185	break;
186	}
187
188	case 'u': {
189	/* C++11 style 16bit unicode */
190
191	int a[4];
192	unsigned i;
193	uint32_t c;
194
195	if (length != (size_t) -1 && length < 5)
196	return -EINVAL;
197
198	for (i = 0; i < 4; i++) {
199	a[i] = unhexchar(p[1 + i]);
200	if (a[i] < 0)
201	return a[i];
202	}
203
204	c = ((uint32_t) a[0] << 12U) \| ((uint32_t) a[1] << 8U) \| ((uint32_t) a[2] << 4U) \| (uint32_t) a[3];
205
206	/* Don't allow 0 chars */
207	if (c == 0)
208	return -EINVAL;
209
210	if (c < 128)
211	*ret = c;
212	else {
213	if (!ret_unicode)
214	return -EINVAL;
215
216	*ret = 0;
217	*ret_unicode = c;
218	}
219
220	r = 5;
221	break;
222	}
223
224	case 'U': {
225	/* C++11 style 32bit unicode */
226
227	int a[8];
228	unsigned i;
025b4c41	229	char32_t c;
4f5dd394 LP	230
	231	if (length != (size_t) -1 && length < 9)
	232	return -EINVAL;
	233
	234	for (i = 0; i < 8; i++) {
	235	a[i] = unhexchar(p[1 + i]);
	236	if (a[i] < 0)
	237	return a[i];
	238	}
	239
025b4c41 SL	240	c = (a[0] << 28U) \| (a[1] << 24U) \| (a[2] << 20U) \| (a[3] << 16U) \|
025b4c41 SL	241	(a[4] << 12U) \| (a[5] << 8U) \| (a[6] << 4U) \| (a[7] << 0U);
4f5dd394 LP	242
	243	/* Don't allow 0 chars */
	244	if (c == 0)
	245	return -EINVAL;
	246
	247	/* Don't allow invalid code points */
	248	if (!unichar_is_valid(c))
	249	return -EINVAL;
	250
	251	if (c < 128)
	252	*ret = c;
	253	else {
	254	if (!ret_unicode)
	255	return -EINVAL;
	256
	257	*ret = 0;
	258	*ret_unicode = c;
	259	}
	260
	261	r = 9;
	262	break;
	263	}
	264
	265	case '0':
	266	case '1':
	267	case '2':
	268	case '3':
	269	case '4':
	270	case '5':
	271	case '6':
	272	case '7': {
	273	/* octal encoding */
	274	int a, b, c;
025b4c41	275	char32_t m;
4f5dd394 LP	276
	277	if (length != (size_t) -1 && length < 3)
	278	return -EINVAL;
	279
	280	a = unoctchar(p[0]);
	281	if (a < 0)
	282	return -EINVAL;
	283
	284	b = unoctchar(p[1]);
	285	if (b < 0)
	286	return -EINVAL;
	287
	288	c = unoctchar(p[2]);
	289	if (c < 0)
	290	return -EINVAL;
	291
	292	/* don't allow NUL bytes */
	293	if (a == 0 && b == 0 && c == 0)
	294	return -EINVAL;
	295
	296	/* Don't allow bytes above 255 */
025b4c41	297	m = (a << 6U) \| (b << 3U) \| (char32_t) c;
4f5dd394 LP	298	if (m > 255)
	299	return -EINVAL;
	300
	301	*ret = m;
	302	r = 3;
	303	break;
	304	}
	305
	306	default:
	307	return -EINVAL;
	308	}
	309
	310	return r;
	311	}
	312
	313	int cunescape_length_with_prefix(const char s, size_t length, const char prefix, UnescapeFlags flags, char **ret) {
	314	char r, t;
	315	const char *f;
	316	size_t pl;
	317
	318	assert(s);
	319	assert(ret);
	320
	321	/* Undoes C style string escaping, and optionally prefixes it. */
	322
	323	pl = prefix ? strlen(prefix) : 0;
	324
	325	r = new(char, pl+length+1);
	326	if (!r)
	327	return -ENOMEM;
	328
	329	if (prefix)
	330	memcpy(r, prefix, pl);
	331
	332	for (f = s, t = r + pl; f < s + length; f++) {
	333	size_t remaining;
025b4c41	334	char32_t u;
4f5dd394 LP	335	char c;
	336	int k;
	337
	338	remaining = s + length - f;
	339	assert(remaining > 0);
	340
	341	if (*f != '\\') {
	342	/* A literal literal, copy verbatim */
	343	(t++) = f;
	344	continue;
	345	}
	346
	347	if (remaining == 1) {
	348	if (flags & UNESCAPE_RELAX) {
	349	/* A trailing backslash, copy verbatim */
	350	(t++) = f;
	351	continue;
	352	}
	353
	354	free(r);
	355	return -EINVAL;
	356	}
	357
	358	k = cunescape_one(f + 1, remaining - 1, &c, &u);
	359	if (k < 0) {
	360	if (flags & UNESCAPE_RELAX) {
	361	/* Invalid escape code, let's take it literal then */
	362	*(t++) = '\\';
	363	continue;
	364	}
	365
	366	free(r);
	367	return k;
	368	}
	369
	370	if (c != 0)
	371	/* Non-Unicode? Let's encode this directly */
	372	*(t++) = c;
	373	else
	374	/* Unicode? Then let's encode this in UTF-8 */
	375	t += utf8_encode_unichar(t, u);
	376
	377	f += k;
	378	}
	379
	380	*t = 0;
	381
	382	*ret = r;
	383	return t - r;
	384	}
	385
	386	int cunescape_length(const char s, size_t length, UnescapeFlags flags, char *ret) {
	387	return cunescape_length_with_prefix(s, length, NULL, flags, ret);
	388	}
	389
	390	int cunescape(const char s, UnescapeFlags flags, char *ret) {
	391	return cunescape_length(s, strlen(s), flags, ret);
	392	}
	393
	394	char xescape(const char s, const char *bad) {
	395	char r, t;
	396	const char *f;
	397
	398	/* Escapes all chars in bad, in addition to \ and all special
399	* chars, in \xFF style escaping. May be reversed with
400	* cunescape(). */
401
402	r = new(char, strlen(s) * 4 + 1);
403	if (!r)
404	return NULL;
405
406	for (f = s, t = r; *f; f++) {
407
408	if ((f < ' ') \|\| (f >= 127) \|\|
409	(f == '\\') \|\| strchr(bad, f)) {
410	*(t++) = '\\';
411	*(t++) = 'x';
412	(t++) = hexchar(f >> 4);
413	(t++) = hexchar(f);
414	} else
415	(t++) = f;
416	}
417
418	*t = 0;
419
420	return r;
421	}
422
423	static char strcpy_backslash_escaped(char t, const char s, const char bad) {
424	assert(bad);
425
426	for (; *s; s++) {
427	if (s == '\\' \|\| strchr(bad, s))
428	*(t++) = '\\';
429
430	(t++) = s;
431	}
432
433	return t;
434	}
435
436	char shell_escape(const char s, const char *bad) {
437	char r, t;
438
439	r = new(char, strlen(s)*2+1);
440	if (!r)
441	return NULL;
442
443	t = strcpy_backslash_escaped(r, s, bad);
444	*t = 0;
445
446	return r;
447	}
448
449	char shell_maybe_quote(const char s) {
450	const char *p;
451	char r, t;
452
453	assert(s);
454
455	/* Encloses a string in double quotes if necessary to make it
456	* OK as shell string. */
457
458	for (p = s; *p; p++)
459	if (*p <= ' ' \|\|
460	*p >= 127 \|\|
461	strchr(SHELL_NEED_QUOTES, *p))
462	break;
463
464	if (!*p)
465	return strdup(s);
466
467	r = new(char, 1+strlen(s)*2+1+1);
468	if (!r)
469	return NULL;
470
471	t = r;
472	*(t++) = '"';
473	t = mempcpy(t, s, p - s);
474
475	t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
476
477	*(t++)= '"';
478	*t = 0;
479
480	return r;
481	}