[thirdparty/systemd.git] / src / basic / extract-word.c

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include "alloc-util.h"
#include "escape.h"
#include "extract-word.h"
#include "string-util.h"
#include "utf8.h"
#include "util.h"

int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
        _cleanup_free_ char *s = NULL;
        size_t allocated = 0, sz = 0;
        int r;

        char quote = 0;                 /* 0 or ' or " */
        bool backslash = false;         /* whether we've just seen a backslash */
        bool separator = false;         /* whether we've just seen a separator */
        bool start = true;              /* false means we're looking at a value */

        assert(p);
        assert(ret);

        if (!separators)
                separators = WHITESPACE;

        /* Bail early if called after last value or with no input */
        if (!*p)
                goto finish_force_terminate;

        /* Parses the first word of a string, and returns it in
         * *ret. Removes all quotes in the process. When parsing fails
         * (because of an uneven number of quotes or similar), leaves
         * the pointer *p at the first invalid character. */

        for (;;) {
                char c = **p;

                if (start) {
                        if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
                                if (!GREEDY_REALLOC(s, allocated, sz+1))
                                        return -ENOMEM;

                        if (c == 0)
                                goto finish_force_terminate;
                        else if (strchr(separators, c)) {
                                (*p) ++;
                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
                                        goto finish_force_next;
                                continue;
                        }

                        /* We found a non-blank character, so we will always
                         * want to return a string (even if it is empty),
                         * allocate it here. */
                        if (!GREEDY_REALLOC(s, allocated, sz+1))
                                return -ENOMEM;

                        start = false;
                }

                if (backslash) {
                        if (!GREEDY_REALLOC(s, allocated, sz+7))
                                return -ENOMEM;

                        if (c == 0) {
                                if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
                                    (!quote || flags & EXTRACT_RELAX)) {
                                        /* If we find an unquoted trailing backslash and we're in
                                         * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
                                         * output.
                                         *
                                         * Unbalanced quotes will only be allowed in EXTRACT_RELAX
                                         * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
                                         */
                                        s[sz++] = '\\';
                                        goto finish_force_terminate;
                                }
                                if (flags & EXTRACT_RELAX)
                                        goto finish_force_terminate;
                                return -EINVAL;
                        }

                        if (flags & EXTRACT_CUNESCAPE) {
                                uint32_t u;

                                r = cunescape_one(*p, (size_t) -1, &c, &u);
                                if (r < 0) {
                                        if (flags & EXTRACT_CUNESCAPE_RELAX) {
                                                s[sz++] = '\\';
                                                s[sz++] = c;
                                                goto end_escape;
                                        }
                                        return -EINVAL;
                                }

                                (*p) += r - 1;

                                if (c != 0)
                                        s[sz++] = c; /* normal explicit char */
                                else
                                        sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
                        } else
                                s[sz++] = c;

end_escape:
                        backslash = false;

                } else if (quote) {     /* inside either single or double quotes */
                        if (c == 0) {
                                if (flags & EXTRACT_RELAX)
                                        goto finish_force_terminate;
                                return -EINVAL;
                        } else if (c == quote)          /* found the end quote */
                                quote = 0;
                        else if (c == '\\')
                                backslash = true;
                        else {
                                if (!GREEDY_REALLOC(s, allocated, sz+2))
                                        return -ENOMEM;

                                s[sz++] = c;
                        }

                } else if (separator) {
                        if (c == 0)
                                goto finish_force_terminate;
                        if (!strchr(separators, c))
                                goto finish;

                } else {
                        if (c == 0)
                                goto finish_force_terminate;
                        else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
                                quote = c;
                        else if (c == '\\')
                                backslash = true;
                        else if (strchr(separators, c)) {
                                if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
                                        (*p) ++;
                                        goto finish_force_next;
                                }
                                separator = true;
                        } else {
                                if (!GREEDY_REALLOC(s, allocated, sz+2))
                                        return -ENOMEM;

                                s[sz++] = c;
                        }
                }

                (*p) ++;
        }

finish_force_terminate:
        *p = NULL;
finish:
        if (!s) {
                *p = NULL;
                *ret = NULL;
                return 0;
        }

finish_force_next:
        s[sz] = 0;
        *ret = s;
        s = NULL;

        return 1;
}

int extract_first_word_and_warn(
                const char **p,
                char **ret,
                const char *separators,
                ExtractFlags flags,
                const char *unit,
                const char *filename,
                unsigned line,
                const char *rvalue) {

        /* Try to unquote it, if it fails, warn about it and try again
         * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
         * backslashes verbatim in invalid escape sequences. */

        const char *save;
        int r;

        save = *p;
        r = extract_first_word(p, ret, separators, flags);
        if (r >= 0)
                return r;

        if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {

                /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
                *p = save;
                r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
                if (r >= 0) {
                        /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
                        log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
                        return r;
                }

                /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
                if (r == -EINVAL)
                        return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
        }

        /* Can be any error, report it */
        return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
}

int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
        va_list ap;
        char **l;
        int n = 0, i, c, r;

        /* Parses a number of words from a string, stripping any
         * quotes if necessary. */

        assert(p);

        /* Count how many words are expected */
        va_start(ap, flags);
        for (;;) {
                if (!va_arg(ap, char **))
                        break;
                n++;
        }
        va_end(ap);

        if (n <= 0)
                return 0;

        /* Read all words into a temporary array */
        l = newa0(char*, n);
        for (c = 0; c < n; c++) {

                r = extract_first_word(p, &l[c], separators, flags);
                if (r < 0) {
                        int j;

                        for (j = 0; j < c; j++)
                                free(l[j]);

                        return r;
                }

                if (r == 0)
                        break;
        }

        /* If we managed to parse all words, return them in the passed
         * in parameters */
        va_start(ap, flags);
        for (i = 0; i < n; i++) {
                char **v;

                v = va_arg(ap, char **);
                assert(v);

                *v = l[i];
        }
        va_end(ap);

        return c;
}
Commit	Line	Data
84ac7bea LP	1	/-- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil --/
	2
	3	/***
	4	This file is part of systemd.
	5
	6	Copyright 2010 Lennart Poettering
	7
	8	systemd is free software; you can redistribute it and/or modify it
	9	under the terms of the GNU Lesser General Public License as published by
	10	the Free Software Foundation; either version 2.1 of the License, or
	11	(at your option) any later version.
	12
	13	systemd is distributed in the hope that it will be useful, but
	14	WITHOUT ANY WARRANTY; without even the implied warranty of
	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	Lesser General Public License for more details.
	17
	18	You should have received a copy of the GNU Lesser General Public License
	19	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	20	***/
	21
b5efdb8a	22	#include "alloc-util.h"
4f5dd394	23	#include "escape.h"
b11d6a7b LP	24	#include "extract-word.h"
b11d6a7b LP	25	#include "string-util.h"
84ac7bea LP	26	#include "utf8.h"
84ac7bea LP	27	#include "util.h"
84ac7bea LP	28
	29	int extract_first_word(const char p, char ret, const char *separators, ExtractFlags flags) {
	30	_cleanup_free_ char *s = NULL;
	31	size_t allocated = 0, sz = 0;
	32	int r;
	33
	34	char quote = 0; /* 0 or ' or " */
	35	bool backslash = false; /* whether we've just seen a backslash */
	36	bool separator = false; /* whether we've just seen a separator */
	37	bool start = true; /* false means we're looking at a value */
	38
	39	assert(p);
	40	assert(ret);
	41
	42	if (!separators)
	43	separators = WHITESPACE;
	44
	45	/* Bail early if called after last value or with no input */
	46	if (!*p)
	47	goto finish_force_terminate;
	48
	49	/* Parses the first word of a string, and returns it in
	50	* *ret. Removes all quotes in the process. When parsing fails
	51	* (because of an uneven number of quotes or similar), leaves
	52	* the pointer p at the first invalid character. /
	53
	54	for (;;) {
	55	char c = **p;
	56
	57	if (start) {
	58	if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
	59	if (!GREEDY_REALLOC(s, allocated, sz+1))
	60	return -ENOMEM;
	61
	62	if (c == 0)
	63	goto finish_force_terminate;
	64	else if (strchr(separators, c)) {
	65	(*p) ++;
	66	if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
	67	goto finish_force_next;
	68	continue;
	69	}
	70
	71	/* We found a non-blank character, so we will always
	72	* want to return a string (even if it is empty),
	73	* allocate it here. */
	74	if (!GREEDY_REALLOC(s, allocated, sz+1))
	75	return -ENOMEM;
	76
	77	start = false;
	78	}
	79
	80	if (backslash) {
	81	if (!GREEDY_REALLOC(s, allocated, sz+7))
	82	return -ENOMEM;
	83
	84	if (c == 0) {
	85	if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
	86	(!quote \|\| flags & EXTRACT_RELAX)) {
	87	/* If we find an unquoted trailing backslash and we're in
	88	* EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
	89	* output.
	90	*
	91	* Unbalanced quotes will only be allowed in EXTRACT_RELAX
92	* mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
93	*/
94	s[sz++] = '\\';
95	goto finish_force_terminate;
96	}
97	if (flags & EXTRACT_RELAX)
98	goto finish_force_terminate;
99	return -EINVAL;
100	}
101
102	if (flags & EXTRACT_CUNESCAPE) {
dcd12626	103	uint32_t u;
84ac7bea LP	104
	105	r = cunescape_one(*p, (size_t) -1, &c, &u);
	106	if (r < 0) {
	107	if (flags & EXTRACT_CUNESCAPE_RELAX) {
	108	s[sz++] = '\\';
	109	s[sz++] = c;
	110	goto end_escape;
	111	}
	112	return -EINVAL;
	113	}
	114
	115	(*p) += r - 1;
	116
	117	if (c != 0)
	118	s[sz++] = c; /* normal explicit char */
	119	else
	120	sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
	121	} else
	122	s[sz++] = c;
	123
	124	end_escape:
	125	backslash = false;
	126
	127	} else if (quote) { /* inside either single or double quotes */
	128	if (c == 0) {
	129	if (flags & EXTRACT_RELAX)
	130	goto finish_force_terminate;
	131	return -EINVAL;
	132	} else if (c == quote) /* found the end quote */
	133	quote = 0;
	134	else if (c == '\\')
	135	backslash = true;
	136	else {
	137	if (!GREEDY_REALLOC(s, allocated, sz+2))
	138	return -ENOMEM;
	139
	140	s[sz++] = c;
	141	}
	142
	143	} else if (separator) {
	144	if (c == 0)
	145	goto finish_force_terminate;
	146	if (!strchr(separators, c))
	147	goto finish;
	148
	149	} else {
	150	if (c == 0)
	151	goto finish_force_terminate;
	152	else if ((c == '\'' \|\| c == '"') && (flags & EXTRACT_QUOTES))
	153	quote = c;
	154	else if (c == '\\')
	155	backslash = true;
	156	else if (strchr(separators, c)) {
	157	if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
	158	(*p) ++;
	159	goto finish_force_next;
	160	}
	161	separator = true;
	162	} else {
	163	if (!GREEDY_REALLOC(s, allocated, sz+2))
	164	return -ENOMEM;
	165
	166	s[sz++] = c;
	167	}
168	}
169
170	(*p) ++;
171	}
172
173	finish_force_terminate:
174	*p = NULL;
175	finish:
176	if (!s) {
177	*p = NULL;
178	*ret = NULL;
179	return 0;
180	}
181
182	finish_force_next:
183	s[sz] = 0;
184	*ret = s;
185	s = NULL;
186
187	return 1;
188	}
189
190	int extract_first_word_and_warn(
191	const char **p,
192	char **ret,
193	const char *separators,
194	ExtractFlags flags,
195	const char *unit,
196	const char *filename,
197	unsigned line,
198	const char *rvalue) {
199
dea7b6b0 LP	200	/* Try to unquote it, if it fails, warn about it and try again
	201	* but this time using EXTRACT_CUNESCAPE_RELAX to keep the
	202	* backslashes verbatim in invalid escape sequences. */
	203
84ac7bea LP	204	const char *save;
	205	int r;
	206
	207	save = *p;
	208	r = extract_first_word(p, ret, separators, flags);
dea7b6b0 LP	209	if (r >= 0)
	210	return r;
	211
	212	if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea LP	213
	214	/* Retry it with EXTRACT_CUNESCAPE_RELAX. */
	215	*p = save;
	216	r = extract_first_word(p, ret, separators, flags\|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0 LP	217	if (r >= 0) {
	218	/* It worked this time, hence it must have been an invalid escape sequence we could correct. */
	219	log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
	220	return r;
	221	}
	222
	223	/* If it's still EINVAL; then it must be unbalanced quoting, report this. */
	224	if (r == -EINVAL)
	225	return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea LP	226	}
84ac7bea LP	227
dea7b6b0 LP	228	/* Can be any error, report it */
dea7b6b0 LP	229	return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea LP	230	}
	231
	232	int extract_many_words(const char *p, const char separators, ExtractFlags flags, ...) {
	233	va_list ap;
	234	char **l;
	235	int n = 0, i, c, r;
	236
	237	/* Parses a number of words from a string, stripping any
	238	* quotes if necessary. */
	239
	240	assert(p);
	241
	242	/* Count how many words are expected */
	243	va_start(ap, flags);
	244	for (;;) {
	245	if (!va_arg(ap, char **))
	246	break;
	247	n++;
	248	}
	249	va_end(ap);
	250
	251	if (n <= 0)
	252	return 0;
	253
	254	/* Read all words into a temporary array */
	255	l = newa0(char*, n);
	256	for (c = 0; c < n; c++) {
	257
	258	r = extract_first_word(p, &l[c], separators, flags);
	259	if (r < 0) {
	260	int j;
	261
	262	for (j = 0; j < c; j++)
	263	free(l[j]);
	264
	265	return r;
	266	}
	267
	268	if (r == 0)
	269	break;
	270	}
	271
	272	/* If we managed to parse all words, return them in the passed
	273	* in parameters */
	274	va_start(ap, flags);
	275	for (i = 0; i < n; i++) {
	276	char **v;
	277
	278	v = va_arg(ap, char **);
	279	assert(v);
	280
	281	*v = l[i];
	282	}
	283	va_end(ap);
	284
	285	return c;
	286	}