]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
extract-word: Skip coalesced separators in place
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "extract-word.h"
25 #include "string-util.h"
26 #include "utf8.h"
27 #include "util.h"
28
29 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30 _cleanup_free_ char *s = NULL;
31 size_t allocated = 0, sz = 0;
32 char c;
33 int r;
34
35 char quote = 0; /* 0 or ' or " */
36 bool backslash = false; /* whether we've just seen a backslash */
37
38 assert(p);
39 assert(ret);
40
41 /* Bail early if called after last value or with no input */
42 if (!*p)
43 goto finish_force_terminate;
44 c = **p;
45
46 if (!separators)
47 separators = WHITESPACE;
48
49 /* Parses the first word of a string, and returns it in
50 * *ret. Removes all quotes in the process. When parsing fails
51 * (because of an uneven number of quotes or similar), leaves
52 * the pointer *p at the first invalid character. */
53
54 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55 if (!GREEDY_REALLOC(s, allocated, sz+1))
56 return -ENOMEM;
57
58 for (;; (*p) ++, c = **p) {
59 if (c == 0)
60 goto finish_force_terminate;
61 else if (strchr(separators, c)) {
62 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
63 (*p) ++;
64 goto finish_force_next;
65 }
66 } else {
67 /* We found a non-blank character, so we will always
68 * want to return a string (even if it is empty),
69 * allocate it here. */
70 if (!GREEDY_REALLOC(s, allocated, sz+1))
71 return -ENOMEM;
72 break;
73 }
74 }
75
76 for (;; (*p) ++, c = **p) {
77 if (backslash) {
78 if (!GREEDY_REALLOC(s, allocated, sz+7))
79 return -ENOMEM;
80
81 if (c == 0) {
82 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
83 (!quote || flags & EXTRACT_RELAX)) {
84 /* If we find an unquoted trailing backslash and we're in
85 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
86 * output.
87 *
88 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
89 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
90 */
91 s[sz++] = '\\';
92 goto finish_force_terminate;
93 }
94 if (flags & EXTRACT_RELAX)
95 goto finish_force_terminate;
96 return -EINVAL;
97 }
98
99 if (flags & EXTRACT_CUNESCAPE) {
100 uint32_t u;
101
102 r = cunescape_one(*p, (size_t) -1, &c, &u);
103 if (r < 0) {
104 if (flags & EXTRACT_CUNESCAPE_RELAX) {
105 s[sz++] = '\\';
106 s[sz++] = c;
107 } else
108 return -EINVAL;
109 } else {
110 (*p) += r - 1;
111
112 if (c != 0)
113 s[sz++] = c; /* normal explicit char */
114 else
115 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
116 }
117 } else
118 s[sz++] = c;
119
120 backslash = false;
121
122 } else if (quote) { /* inside either single or double quotes */
123 for (;; (*p) ++, c = **p) {
124 if (c == 0) {
125 if (flags & EXTRACT_RELAX)
126 goto finish_force_terminate;
127 return -EINVAL;
128 } else if (c == quote) { /* found the end quote */
129 quote = 0;
130 break;
131 } else if (c == '\\') {
132 backslash = true;
133 break;
134 } else {
135 if (!GREEDY_REALLOC(s, allocated, sz+2))
136 return -ENOMEM;
137
138 s[sz++] = c;
139 }
140 }
141
142 } else {
143 for (;; (*p) ++, c = **p) {
144 if (c == 0)
145 goto finish_force_terminate;
146 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
147 quote = c;
148 break;
149 } else if (c == '\\') {
150 backslash = true;
151 break;
152 } else if (strchr(separators, c)) {
153 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
154 (*p) ++;
155 goto finish_force_next;
156 }
157 /* Skip additional coalesced separators. */
158 for (;; (*p) ++, c = **p) {
159 if (c == 0)
160 goto finish_force_terminate;
161 if (!strchr(separators, c))
162 break;
163 }
164 goto finish;
165
166 } else {
167 if (!GREEDY_REALLOC(s, allocated, sz+2))
168 return -ENOMEM;
169
170 s[sz++] = c;
171 }
172 }
173 }
174 }
175
176 finish_force_terminate:
177 *p = NULL;
178 finish:
179 if (!s) {
180 *p = NULL;
181 *ret = NULL;
182 return 0;
183 }
184
185 finish_force_next:
186 s[sz] = 0;
187 *ret = s;
188 s = NULL;
189
190 return 1;
191 }
192
193 int extract_first_word_and_warn(
194 const char **p,
195 char **ret,
196 const char *separators,
197 ExtractFlags flags,
198 const char *unit,
199 const char *filename,
200 unsigned line,
201 const char *rvalue) {
202
203 /* Try to unquote it, if it fails, warn about it and try again
204 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
205 * backslashes verbatim in invalid escape sequences. */
206
207 const char *save;
208 int r;
209
210 save = *p;
211 r = extract_first_word(p, ret, separators, flags);
212 if (r >= 0)
213 return r;
214
215 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
216
217 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
218 *p = save;
219 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
220 if (r >= 0) {
221 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
222 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
223 return r;
224 }
225
226 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
227 if (r == -EINVAL)
228 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
229 }
230
231 /* Can be any error, report it */
232 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
233 }
234
235 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
236 va_list ap;
237 char **l;
238 int n = 0, i, c, r;
239
240 /* Parses a number of words from a string, stripping any
241 * quotes if necessary. */
242
243 assert(p);
244
245 /* Count how many words are expected */
246 va_start(ap, flags);
247 for (;;) {
248 if (!va_arg(ap, char **))
249 break;
250 n++;
251 }
252 va_end(ap);
253
254 if (n <= 0)
255 return 0;
256
257 /* Read all words into a temporary array */
258 l = newa0(char*, n);
259 for (c = 0; c < n; c++) {
260
261 r = extract_first_word(p, &l[c], separators, flags);
262 if (r < 0) {
263 int j;
264
265 for (j = 0; j < c; j++)
266 free(l[j]);
267
268 return r;
269 }
270
271 if (r == 0)
272 break;
273 }
274
275 /* If we managed to parse all words, return them in the passed
276 * in parameters */
277 va_start(ap, flags);
278 for (i = 0; i < n; i++) {
279 char **v;
280
281 v = va_arg(ap, char **);
282 assert(v);
283
284 *v = l[i];
285 }
286 va_end(ap);
287
288 return c;
289 }