]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
utf8.[ch] et al: use char32_t and char16_t instead of int, int32_t, int16_t
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <syslog.h>
30
31 #include "alloc-util.h"
32 #include "escape.h"
33 #include "extract-word.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "string-util.h"
37 #include "utf8.h"
38
39 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
40 _cleanup_free_ char *s = NULL;
41 size_t allocated = 0, sz = 0;
42 char c;
43 int r;
44
45 char quote = 0; /* 0 or ' or " */
46 bool backslash = false; /* whether we've just seen a backslash */
47
48 assert(p);
49 assert(ret);
50
51 /* Bail early if called after last value or with no input */
52 if (!*p)
53 goto finish_force_terminate;
54 c = **p;
55
56 if (!separators)
57 separators = WHITESPACE;
58
59 /* Parses the first word of a string, and returns it in
60 * *ret. Removes all quotes in the process. When parsing fails
61 * (because of an uneven number of quotes or similar), leaves
62 * the pointer *p at the first invalid character. */
63
64 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
65 if (!GREEDY_REALLOC(s, allocated, sz+1))
66 return -ENOMEM;
67
68 for (;; (*p) ++, c = **p) {
69 if (c == 0)
70 goto finish_force_terminate;
71 else if (strchr(separators, c)) {
72 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
73 (*p) ++;
74 goto finish_force_next;
75 }
76 } else {
77 /* We found a non-blank character, so we will always
78 * want to return a string (even if it is empty),
79 * allocate it here. */
80 if (!GREEDY_REALLOC(s, allocated, sz+1))
81 return -ENOMEM;
82 break;
83 }
84 }
85
86 for (;; (*p) ++, c = **p) {
87 if (backslash) {
88 if (!GREEDY_REALLOC(s, allocated, sz+7))
89 return -ENOMEM;
90
91 if (c == 0) {
92 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
93 (!quote || flags & EXTRACT_RELAX)) {
94 /* If we find an unquoted trailing backslash and we're in
95 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
96 * output.
97 *
98 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
99 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
100 */
101 s[sz++] = '\\';
102 goto finish_force_terminate;
103 }
104 if (flags & EXTRACT_RELAX)
105 goto finish_force_terminate;
106 return -EINVAL;
107 }
108
109 if (flags & EXTRACT_CUNESCAPE) {
110 bool eight_bit = false;
111 char32_t u;
112
113 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
114 if (r < 0) {
115 if (flags & EXTRACT_CUNESCAPE_RELAX) {
116 s[sz++] = '\\';
117 s[sz++] = c;
118 } else
119 return -EINVAL;
120 } else {
121 (*p) += r - 1;
122
123 if (eight_bit)
124 s[sz++] = u;
125 else
126 sz += utf8_encode_unichar(s + sz, u);
127 }
128 } else
129 s[sz++] = c;
130
131 backslash = false;
132
133 } else if (quote) { /* inside either single or double quotes */
134 for (;; (*p) ++, c = **p) {
135 if (c == 0) {
136 if (flags & EXTRACT_RELAX)
137 goto finish_force_terminate;
138 return -EINVAL;
139 } else if (c == quote) { /* found the end quote */
140 quote = 0;
141 break;
142 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
143 backslash = true;
144 break;
145 } else {
146 if (!GREEDY_REALLOC(s, allocated, sz+2))
147 return -ENOMEM;
148
149 s[sz++] = c;
150 }
151 }
152
153 } else {
154 for (;; (*p) ++, c = **p) {
155 if (c == 0)
156 goto finish_force_terminate;
157 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
158 quote = c;
159 break;
160 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
161 backslash = true;
162 break;
163 } else if (strchr(separators, c)) {
164 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
165 (*p) ++;
166 goto finish_force_next;
167 }
168 /* Skip additional coalesced separators. */
169 for (;; (*p) ++, c = **p) {
170 if (c == 0)
171 goto finish_force_terminate;
172 if (!strchr(separators, c))
173 break;
174 }
175 goto finish;
176
177 } else {
178 if (!GREEDY_REALLOC(s, allocated, sz+2))
179 return -ENOMEM;
180
181 s[sz++] = c;
182 }
183 }
184 }
185 }
186
187 finish_force_terminate:
188 *p = NULL;
189 finish:
190 if (!s) {
191 *p = NULL;
192 *ret = NULL;
193 return 0;
194 }
195
196 finish_force_next:
197 s[sz] = 0;
198 *ret = s;
199 s = NULL;
200
201 return 1;
202 }
203
204 int extract_first_word_and_warn(
205 const char **p,
206 char **ret,
207 const char *separators,
208 ExtractFlags flags,
209 const char *unit,
210 const char *filename,
211 unsigned line,
212 const char *rvalue) {
213
214 /* Try to unquote it, if it fails, warn about it and try again
215 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
216 * backslashes verbatim in invalid escape sequences. */
217
218 const char *save;
219 int r;
220
221 save = *p;
222 r = extract_first_word(p, ret, separators, flags);
223 if (r >= 0)
224 return r;
225
226 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
227
228 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
229 *p = save;
230 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
231 if (r >= 0) {
232 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
233 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
234 return r;
235 }
236
237 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
238 if (r == -EINVAL)
239 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
240 }
241
242 /* Can be any error, report it */
243 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
244 }
245
246 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
247 va_list ap;
248 char **l;
249 int n = 0, i, c, r;
250
251 /* Parses a number of words from a string, stripping any
252 * quotes if necessary. */
253
254 assert(p);
255
256 /* Count how many words are expected */
257 va_start(ap, flags);
258 for (;;) {
259 if (!va_arg(ap, char **))
260 break;
261 n++;
262 }
263 va_end(ap);
264
265 if (n <= 0)
266 return 0;
267
268 /* Read all words into a temporary array */
269 l = newa0(char*, n);
270 for (c = 0; c < n; c++) {
271
272 r = extract_first_word(p, &l[c], separators, flags);
273 if (r < 0) {
274 int j;
275
276 for (j = 0; j < c; j++)
277 free(l[j]);
278
279 return r;
280 }
281
282 if (r == 0)
283 break;
284 }
285
286 /* If we managed to parse all words, return them in the passed
287 * in parameters */
288 va_start(ap, flags);
289 for (i = 0; i < n; i++) {
290 char **v;
291
292 v = va_arg(ap, char **);
293 assert(v);
294
295 *v = l[i];
296 }
297 va_end(ap);
298
299 return c;
300 }