]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
extract-word: Check for early bail out before inspecting separators
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "extract-word.h"
25 #include "string-util.h"
26 #include "utf8.h"
27 #include "util.h"
28
29 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30 _cleanup_free_ char *s = NULL;
31 size_t allocated = 0, sz = 0;
32 char c;
33 int r;
34
35 char quote = 0; /* 0 or ' or " */
36 bool backslash = false; /* whether we've just seen a backslash */
37 bool separator = false; /* whether we've just seen a separator */
38
39 assert(p);
40 assert(ret);
41
42 /* Bail early if called after last value or with no input */
43 if (!*p)
44 goto finish_force_terminate;
45
46 if (!separators)
47 separators = WHITESPACE;
48
49 /* Parses the first word of a string, and returns it in
50 * *ret. Removes all quotes in the process. When parsing fails
51 * (because of an uneven number of quotes or similar), leaves
52 * the pointer *p at the first invalid character. */
53
54 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55 if (!GREEDY_REALLOC(s, allocated, sz+1))
56 return -ENOMEM;
57
58 for (;;) {
59 c = **p;
60 if (c == 0)
61 goto finish_force_terminate;
62 else if (strchr(separators, c)) {
63 (*p) ++;
64 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
65 goto finish_force_next;
66 } else {
67 /* We found a non-blank character, so we will always
68 * want to return a string (even if it is empty),
69 * allocate it here. */
70 if (!GREEDY_REALLOC(s, allocated, sz+1))
71 return -ENOMEM;
72 break;
73 }
74 }
75
76 for (;;) {
77 c = **p;
78
79 if (backslash) {
80 if (!GREEDY_REALLOC(s, allocated, sz+7))
81 return -ENOMEM;
82
83 if (c == 0) {
84 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
85 (!quote || flags & EXTRACT_RELAX)) {
86 /* If we find an unquoted trailing backslash and we're in
87 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
88 * output.
89 *
90 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
91 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
92 */
93 s[sz++] = '\\';
94 goto finish_force_terminate;
95 }
96 if (flags & EXTRACT_RELAX)
97 goto finish_force_terminate;
98 return -EINVAL;
99 }
100
101 if (flags & EXTRACT_CUNESCAPE) {
102 uint32_t u;
103
104 r = cunescape_one(*p, (size_t) -1, &c, &u);
105 if (r < 0) {
106 if (flags & EXTRACT_CUNESCAPE_RELAX) {
107 s[sz++] = '\\';
108 s[sz++] = c;
109 } else
110 return -EINVAL;
111 } else {
112 (*p) += r - 1;
113
114 if (c != 0)
115 s[sz++] = c; /* normal explicit char */
116 else
117 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
118 }
119 } else
120 s[sz++] = c;
121
122 backslash = false;
123
124 } else if (quote) { /* inside either single or double quotes */
125 if (c == 0) {
126 if (flags & EXTRACT_RELAX)
127 goto finish_force_terminate;
128 return -EINVAL;
129 } else if (c == quote) /* found the end quote */
130 quote = 0;
131 else if (c == '\\')
132 backslash = true;
133 else {
134 if (!GREEDY_REALLOC(s, allocated, sz+2))
135 return -ENOMEM;
136
137 s[sz++] = c;
138 }
139
140 } else if (separator) {
141 if (c == 0)
142 goto finish_force_terminate;
143 if (!strchr(separators, c))
144 goto finish;
145
146 } else {
147 if (c == 0)
148 goto finish_force_terminate;
149 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
150 quote = c;
151 else if (c == '\\')
152 backslash = true;
153 else if (strchr(separators, c)) {
154 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
155 (*p) ++;
156 goto finish_force_next;
157 }
158 separator = true;
159 } else {
160 if (!GREEDY_REALLOC(s, allocated, sz+2))
161 return -ENOMEM;
162
163 s[sz++] = c;
164 }
165 }
166
167 (*p) ++;
168 }
169
170 finish_force_terminate:
171 *p = NULL;
172 finish:
173 if (!s) {
174 *p = NULL;
175 *ret = NULL;
176 return 0;
177 }
178
179 finish_force_next:
180 s[sz] = 0;
181 *ret = s;
182 s = NULL;
183
184 return 1;
185 }
186
187 int extract_first_word_and_warn(
188 const char **p,
189 char **ret,
190 const char *separators,
191 ExtractFlags flags,
192 const char *unit,
193 const char *filename,
194 unsigned line,
195 const char *rvalue) {
196
197 /* Try to unquote it, if it fails, warn about it and try again
198 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
199 * backslashes verbatim in invalid escape sequences. */
200
201 const char *save;
202 int r;
203
204 save = *p;
205 r = extract_first_word(p, ret, separators, flags);
206 if (r >= 0)
207 return r;
208
209 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
210
211 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
212 *p = save;
213 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
214 if (r >= 0) {
215 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
216 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
217 return r;
218 }
219
220 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
221 if (r == -EINVAL)
222 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
223 }
224
225 /* Can be any error, report it */
226 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
227 }
228
229 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
230 va_list ap;
231 char **l;
232 int n = 0, i, c, r;
233
234 /* Parses a number of words from a string, stripping any
235 * quotes if necessary. */
236
237 assert(p);
238
239 /* Count how many words are expected */
240 va_start(ap, flags);
241 for (;;) {
242 if (!va_arg(ap, char **))
243 break;
244 n++;
245 }
246 va_end(ap);
247
248 if (n <= 0)
249 return 0;
250
251 /* Read all words into a temporary array */
252 l = newa0(char*, n);
253 for (c = 0; c < n; c++) {
254
255 r = extract_first_word(p, &l[c], separators, flags);
256 if (r < 0) {
257 int j;
258
259 for (j = 0; j < c; j++)
260 free(l[j]);
261
262 return r;
263 }
264
265 if (r == 0)
266 break;
267 }
268
269 /* If we managed to parse all words, return them in the passed
270 * in parameters */
271 va_start(ap, flags);
272 for (i = 0; i < n; i++) {
273 char **v;
274
275 v = va_arg(ap, char **);
276 assert(v);
277
278 *v = l[i];
279 }
280 va_end(ap);
281
282 return c;
283 }