]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
Revert "utf8.[ch]: use char32_t and char16_t instead of int, int32_t, int16_t"
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include "alloc-util.h"
23 #include "escape.h"
24 #include "utf8.h"
25 #include "util.h"
26 #include "extract-word.h"
27
28 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
29 _cleanup_free_ char *s = NULL;
30 size_t allocated = 0, sz = 0;
31 int r;
32
33 char quote = 0; /* 0 or ' or " */
34 bool backslash = false; /* whether we've just seen a backslash */
35 bool separator = false; /* whether we've just seen a separator */
36 bool start = true; /* false means we're looking at a value */
37
38 assert(p);
39 assert(ret);
40
41 if (!separators)
42 separators = WHITESPACE;
43
44 /* Bail early if called after last value or with no input */
45 if (!*p)
46 goto finish_force_terminate;
47
48 /* Parses the first word of a string, and returns it in
49 * *ret. Removes all quotes in the process. When parsing fails
50 * (because of an uneven number of quotes or similar), leaves
51 * the pointer *p at the first invalid character. */
52
53 for (;;) {
54 char c = **p;
55
56 if (start) {
57 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
58 if (!GREEDY_REALLOC(s, allocated, sz+1))
59 return -ENOMEM;
60
61 if (c == 0)
62 goto finish_force_terminate;
63 else if (strchr(separators, c)) {
64 (*p) ++;
65 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
66 goto finish_force_next;
67 continue;
68 }
69
70 /* We found a non-blank character, so we will always
71 * want to return a string (even if it is empty),
72 * allocate it here. */
73 if (!GREEDY_REALLOC(s, allocated, sz+1))
74 return -ENOMEM;
75
76 start = false;
77 }
78
79 if (backslash) {
80 if (!GREEDY_REALLOC(s, allocated, sz+7))
81 return -ENOMEM;
82
83 if (c == 0) {
84 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
85 (!quote || flags & EXTRACT_RELAX)) {
86 /* If we find an unquoted trailing backslash and we're in
87 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
88 * output.
89 *
90 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
91 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
92 */
93 s[sz++] = '\\';
94 goto finish_force_terminate;
95 }
96 if (flags & EXTRACT_RELAX)
97 goto finish_force_terminate;
98 return -EINVAL;
99 }
100
101 if (flags & EXTRACT_CUNESCAPE) {
102 uint32_t u;
103
104 r = cunescape_one(*p, (size_t) -1, &c, &u);
105 if (r < 0) {
106 if (flags & EXTRACT_CUNESCAPE_RELAX) {
107 s[sz++] = '\\';
108 s[sz++] = c;
109 goto end_escape;
110 }
111 return -EINVAL;
112 }
113
114 (*p) += r - 1;
115
116 if (c != 0)
117 s[sz++] = c; /* normal explicit char */
118 else
119 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
120 } else
121 s[sz++] = c;
122
123 end_escape:
124 backslash = false;
125
126 } else if (quote) { /* inside either single or double quotes */
127 if (c == 0) {
128 if (flags & EXTRACT_RELAX)
129 goto finish_force_terminate;
130 return -EINVAL;
131 } else if (c == quote) /* found the end quote */
132 quote = 0;
133 else if (c == '\\')
134 backslash = true;
135 else {
136 if (!GREEDY_REALLOC(s, allocated, sz+2))
137 return -ENOMEM;
138
139 s[sz++] = c;
140 }
141
142 } else if (separator) {
143 if (c == 0)
144 goto finish_force_terminate;
145 if (!strchr(separators, c))
146 goto finish;
147
148 } else {
149 if (c == 0)
150 goto finish_force_terminate;
151 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
152 quote = c;
153 else if (c == '\\')
154 backslash = true;
155 else if (strchr(separators, c)) {
156 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
157 (*p) ++;
158 goto finish_force_next;
159 }
160 separator = true;
161 } else {
162 if (!GREEDY_REALLOC(s, allocated, sz+2))
163 return -ENOMEM;
164
165 s[sz++] = c;
166 }
167 }
168
169 (*p) ++;
170 }
171
172 finish_force_terminate:
173 *p = NULL;
174 finish:
175 if (!s) {
176 *p = NULL;
177 *ret = NULL;
178 return 0;
179 }
180
181 finish_force_next:
182 s[sz] = 0;
183 *ret = s;
184 s = NULL;
185
186 return 1;
187 }
188
189 int extract_first_word_and_warn(
190 const char **p,
191 char **ret,
192 const char *separators,
193 ExtractFlags flags,
194 const char *unit,
195 const char *filename,
196 unsigned line,
197 const char *rvalue) {
198
199 /* Try to unquote it, if it fails, warn about it and try again
200 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
201 * backslashes verbatim in invalid escape sequences. */
202
203 const char *save;
204 int r;
205
206 save = *p;
207 r = extract_first_word(p, ret, separators, flags);
208 if (r >= 0)
209 return r;
210
211 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
212
213 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
214 *p = save;
215 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
216 if (r >= 0) {
217 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
218 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
219 return r;
220 }
221
222 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
223 if (r == -EINVAL)
224 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
225 }
226
227 /* Can be any error, report it */
228 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
229 }
230
231 int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
232 va_list ap;
233 char **l;
234 int n = 0, i, c, r;
235
236 /* Parses a number of words from a string, stripping any
237 * quotes if necessary. */
238
239 assert(p);
240
241 /* Count how many words are expected */
242 va_start(ap, flags);
243 for (;;) {
244 if (!va_arg(ap, char **))
245 break;
246 n++;
247 }
248 va_end(ap);
249
250 if (n <= 0)
251 return 0;
252
253 /* Read all words into a temporary array */
254 l = newa0(char*, n);
255 for (c = 0; c < n; c++) {
256
257 r = extract_first_word(p, &l[c], separators, flags);
258 if (r < 0) {
259 int j;
260
261 for (j = 0; j < c; j++)
262 free(l[j]);
263
264 return r;
265 }
266
267 if (r == 0)
268 break;
269 }
270
271 /* If we managed to parse all words, return them in the passed
272 * in parameters */
273 va_start(ap, flags);
274 for (i = 0; i < n; i++) {
275 char **v;
276
277 v = va_arg(ap, char **);
278 assert(v);
279
280 *v = l[i];
281 }
282 va_end(ap);
283
284 return c;
285 }