]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/extract-word.c
hwdb: ieee1394-unit-function: add Tascam IF-FW/DM mkII
[thirdparty/systemd.git] / src / basic / extract-word.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdarg.h>
5 #include <stdbool.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <syslog.h>
10
11 #include "alloc-util.h"
12 #include "escape.h"
13 #include "extract-word.h"
14 #include "log.h"
15 #include "macro.h"
16 #include "string-util.h"
17 #include "strv.h"
18 #include "utf8.h"
19
20 int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
21 _cleanup_free_ char *s = NULL;
22 size_t sz = 0;
23 char quote = 0; /* 0 or ' or " */
24 bool backslash = false; /* whether we've just seen a backslash */
25 char c;
26 int r;
27
28 assert(p);
29 assert(ret);
30 assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE));
31
32 /* Bail early if called after last value or with no input */
33 if (!*p)
34 goto finish;
35 c = **p;
36
37 if (!separators)
38 separators = WHITESPACE;
39
40 /* Parses the first word of a string, and returns it in
41 * *ret. Removes all quotes in the process. When parsing fails
42 * (because of an uneven number of quotes or similar), leaves
43 * the pointer *p at the first invalid character. */
44
45 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
46 if (!GREEDY_REALLOC(s, sz+1))
47 return -ENOMEM;
48
49 for (;; (*p)++, c = **p) {
50 if (c == 0)
51 goto finish_force_terminate;
52 else if (strchr(separators, c)) {
53 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
54 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
55 (*p)++;
56 goto finish_force_next;
57 }
58 } else {
59 /* We found a non-blank character, so we will always
60 * want to return a string (even if it is empty),
61 * allocate it here. */
62 if (!GREEDY_REALLOC(s, sz+1))
63 return -ENOMEM;
64 break;
65 }
66 }
67
68 for (;; (*p)++, c = **p) {
69 if (backslash) {
70 if (!GREEDY_REALLOC(s, sz+7))
71 return -ENOMEM;
72
73 if (c == 0) {
74 if ((flags & EXTRACT_UNESCAPE_RELAX) &&
75 (quote == 0 || flags & EXTRACT_RELAX)) {
76 /* If we find an unquoted trailing backslash and we're in
77 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
78 * output.
79 *
80 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
81 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
82 */
83 s[sz++] = '\\';
84 goto finish_force_terminate;
85 }
86 if (flags & EXTRACT_RELAX)
87 goto finish_force_terminate;
88 return -EINVAL;
89 }
90
91 if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) {
92 bool eight_bit = false;
93 char32_t u;
94
95 if ((flags & EXTRACT_CUNESCAPE) &&
96 (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) {
97 /* A valid escaped sequence */
98 assert(r >= 1);
99
100 (*p) += r - 1;
101
102 if (eight_bit)
103 s[sz++] = u;
104 else
105 sz += utf8_encode_unichar(s + sz, u);
106 } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) &&
107 (strchr(separators, **p) || **p == '\\'))
108 /* An escaped separator char or the escape char itself */
109 s[sz++] = c;
110 else if (flags & EXTRACT_UNESCAPE_RELAX) {
111 s[sz++] = '\\';
112 s[sz++] = c;
113 } else
114 return -EINVAL;
115 } else
116 s[sz++] = c;
117
118 backslash = false;
119
120 } else if (quote != 0) { /* inside either single or double quotes */
121 for (;; (*p)++, c = **p) {
122 if (c == 0) {
123 if (flags & EXTRACT_RELAX)
124 goto finish_force_terminate;
125 return -EINVAL;
126 } else if (c == quote) { /* found the end quote */
127 quote = 0;
128 if (flags & EXTRACT_UNQUOTE)
129 break;
130 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
131 backslash = true;
132 break;
133 }
134
135 if (!GREEDY_REALLOC(s, sz+2))
136 return -ENOMEM;
137
138 s[sz++] = c;
139
140 if (quote == 0)
141 break;
142 }
143
144 } else {
145 for (;; (*p)++, c = **p) {
146 if (c == 0)
147 goto finish_force_terminate;
148 else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) {
149 quote = c;
150 if (flags & EXTRACT_UNQUOTE)
151 break;
152 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
153 backslash = true;
154 break;
155 } else if (strchr(separators, c)) {
156 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
157 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
158 (*p)++;
159 goto finish_force_next;
160 }
161 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
162 /* Skip additional coalesced separators. */
163 for (;; (*p)++, c = **p) {
164 if (c == 0)
165 goto finish_force_terminate;
166 if (!strchr(separators, c))
167 break;
168 }
169 goto finish;
170
171 }
172
173 if (!GREEDY_REALLOC(s, sz+2))
174 return -ENOMEM;
175
176 s[sz++] = c;
177
178 if (quote != 0)
179 break;
180 }
181 }
182 }
183
184 finish_force_terminate:
185 *p = NULL;
186 finish:
187 if (!s) {
188 *p = NULL;
189 *ret = NULL;
190 return 0;
191 }
192
193 finish_force_next:
194 s[sz] = 0;
195 *ret = TAKE_PTR(s);
196
197 return 1;
198 }
199
200 int extract_first_word_and_warn(
201 const char **p,
202 char **ret,
203 const char *separators,
204 ExtractFlags flags,
205 const char *unit,
206 const char *filename,
207 unsigned line,
208 const char *rvalue) {
209
210 /* Try to unquote it, if it fails, warn about it and try again
211 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
212 * backslashes verbatim in invalid escape sequences. */
213
214 const char *save;
215 int r;
216
217 save = *p;
218 r = extract_first_word(p, ret, separators, flags);
219 if (r >= 0)
220 return r;
221
222 if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) {
223
224 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
225 *p = save;
226 r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX);
227 if (r >= 0) {
228 /* It worked this time, hence it must have been an invalid escape sequence. */
229 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
230 return r;
231 }
232
233 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
234 if (r == -EINVAL)
235 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
236 }
237
238 /* Can be any error, report it */
239 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
240 }
241
242 /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
243 * an object that undergoes default argument promotion as an argument to va_start).
244 * Let's make sure that ExtractFlags fits into an unsigned int. */
245 assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
246
247 int extract_many_words_internal(const char **p, const char *separators, unsigned flags, ...) {
248 va_list ap;
249 unsigned n = 0;
250 int r;
251
252 /* Parses a number of words from a string, stripping any quotes if necessary. */
253
254 assert(p);
255
256 /* Count how many words are expected */
257 va_start(ap, flags);
258 while (va_arg(ap, char**))
259 n++;
260 va_end(ap);
261
262 if (n == 0)
263 return 0;
264
265 /* Read all words into a temporary array */
266 char **l = newa0(char*, n);
267 unsigned c;
268
269 for (c = 0; c < n; c++) {
270 r = extract_first_word(p, &l[c], separators, flags);
271 if (r < 0) {
272 free_many_charp(l, c);
273 return r;
274 }
275 if (r == 0)
276 break;
277 }
278
279 /* If we managed to parse all words, return them in the passed in parameters */
280 va_start(ap, flags);
281 FOREACH_ARRAY(i, l, n) {
282 char **v = ASSERT_PTR(va_arg(ap, char**));
283 *v = *i;
284 }
285 va_end(ap);
286
287 return c;
288 }