]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
basic: include only what we use
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <syslog.h>
30
31 #include "alloc-util.h"
32 #include "escape.h"
33 #include "extract-word.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "string-util.h"
37 #include "utf8.h"
38
39 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
40 _cleanup_free_ char *s = NULL;
41 size_t allocated = 0, sz = 0;
42 char c;
43 int r;
44
45 char quote = 0; /* 0 or ' or " */
46 bool backslash = false; /* whether we've just seen a backslash */
47
48 assert(p);
49 assert(ret);
50
51 /* Bail early if called after last value or with no input */
52 if (!*p)
53 goto finish_force_terminate;
54 c = **p;
55
56 if (!separators)
57 separators = WHITESPACE;
58
59 /* Parses the first word of a string, and returns it in
60 * *ret. Removes all quotes in the process. When parsing fails
61 * (because of an uneven number of quotes or similar), leaves
62 * the pointer *p at the first invalid character. */
63
64 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
65 if (!GREEDY_REALLOC(s, allocated, sz+1))
66 return -ENOMEM;
67
68 for (;; (*p) ++, c = **p) {
69 if (c == 0)
70 goto finish_force_terminate;
71 else if (strchr(separators, c)) {
72 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
73 (*p) ++;
74 goto finish_force_next;
75 }
76 } else {
77 /* We found a non-blank character, so we will always
78 * want to return a string (even if it is empty),
79 * allocate it here. */
80 if (!GREEDY_REALLOC(s, allocated, sz+1))
81 return -ENOMEM;
82 break;
83 }
84 }
85
86 for (;; (*p) ++, c = **p) {
87 if (backslash) {
88 if (!GREEDY_REALLOC(s, allocated, sz+7))
89 return -ENOMEM;
90
91 if (c == 0) {
92 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
93 (!quote || flags & EXTRACT_RELAX)) {
94 /* If we find an unquoted trailing backslash and we're in
95 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
96 * output.
97 *
98 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
99 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
100 */
101 s[sz++] = '\\';
102 goto finish_force_terminate;
103 }
104 if (flags & EXTRACT_RELAX)
105 goto finish_force_terminate;
106 return -EINVAL;
107 }
108
109 if (flags & EXTRACT_CUNESCAPE) {
110 uint32_t u;
111
112 r = cunescape_one(*p, (size_t) -1, &c, &u);
113 if (r < 0) {
114 if (flags & EXTRACT_CUNESCAPE_RELAX) {
115 s[sz++] = '\\';
116 s[sz++] = c;
117 } else
118 return -EINVAL;
119 } else {
120 (*p) += r - 1;
121
122 if (c != 0)
123 s[sz++] = c; /* normal explicit char */
124 else
125 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
126 }
127 } else
128 s[sz++] = c;
129
130 backslash = false;
131
132 } else if (quote) { /* inside either single or double quotes */
133 for (;; (*p) ++, c = **p) {
134 if (c == 0) {
135 if (flags & EXTRACT_RELAX)
136 goto finish_force_terminate;
137 return -EINVAL;
138 } else if (c == quote) { /* found the end quote */
139 quote = 0;
140 break;
141 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
142 backslash = true;
143 break;
144 } else {
145 if (!GREEDY_REALLOC(s, allocated, sz+2))
146 return -ENOMEM;
147
148 s[sz++] = c;
149 }
150 }
151
152 } else {
153 for (;; (*p) ++, c = **p) {
154 if (c == 0)
155 goto finish_force_terminate;
156 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
157 quote = c;
158 break;
159 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
160 backslash = true;
161 break;
162 } else if (strchr(separators, c)) {
163 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
164 (*p) ++;
165 goto finish_force_next;
166 }
167 /* Skip additional coalesced separators. */
168 for (;; (*p) ++, c = **p) {
169 if (c == 0)
170 goto finish_force_terminate;
171 if (!strchr(separators, c))
172 break;
173 }
174 goto finish;
175
176 } else {
177 if (!GREEDY_REALLOC(s, allocated, sz+2))
178 return -ENOMEM;
179
180 s[sz++] = c;
181 }
182 }
183 }
184 }
185
186 finish_force_terminate:
187 *p = NULL;
188 finish:
189 if (!s) {
190 *p = NULL;
191 *ret = NULL;
192 return 0;
193 }
194
195 finish_force_next:
196 s[sz] = 0;
197 *ret = s;
198 s = NULL;
199
200 return 1;
201 }
202
203 int extract_first_word_and_warn(
204 const char **p,
205 char **ret,
206 const char *separators,
207 ExtractFlags flags,
208 const char *unit,
209 const char *filename,
210 unsigned line,
211 const char *rvalue) {
212
213 /* Try to unquote it, if it fails, warn about it and try again
214 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
215 * backslashes verbatim in invalid escape sequences. */
216
217 const char *save;
218 int r;
219
220 save = *p;
221 r = extract_first_word(p, ret, separators, flags);
222 if (r >= 0)
223 return r;
224
225 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
226
227 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
228 *p = save;
229 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
230 if (r >= 0) {
231 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
232 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
233 return r;
234 }
235
236 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
237 if (r == -EINVAL)
238 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
239 }
240
241 /* Can be any error, report it */
242 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
243 }
244
245 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
246 va_list ap;
247 char **l;
248 int n = 0, i, c, r;
249
250 /* Parses a number of words from a string, stripping any
251 * quotes if necessary. */
252
253 assert(p);
254
255 /* Count how many words are expected */
256 va_start(ap, flags);
257 for (;;) {
258 if (!va_arg(ap, char **))
259 break;
260 n++;
261 }
262 va_end(ap);
263
264 if (n <= 0)
265 return 0;
266
267 /* Read all words into a temporary array */
268 l = newa0(char*, n);
269 for (c = 0; c < n; c++) {
270
271 r = extract_first_word(p, &l[c], separators, flags);
272 if (r < 0) {
273 int j;
274
275 for (j = 0; j < c; j++)
276 free(l[j]);
277
278 return r;
279 }
280
281 if (r == 0)
282 break;
283 }
284
285 /* If we managed to parse all words, return them in the passed
286 * in parameters */
287 va_start(ap, flags);
288 for (i = 0; i < n; i++) {
289 char **v;
290
291 v = va_arg(ap, char **);
292 assert(v);
293
294 *v = l[i];
295 }
296 va_end(ap);
297
298 return c;
299 }