]>
Commit | Line | Data |
---|---|---|
db9ecf05 | 1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
84ac7bea | 2 | |
11c3a366 TA |
3 | #include <errno.h> |
4 | #include <stdarg.h> | |
5 | #include <stdbool.h> | |
6 | #include <stddef.h> | |
7 | #include <stdint.h> | |
8 | #include <stdlib.h> | |
11c3a366 TA |
9 | #include <syslog.h> |
10 | ||
b5efdb8a | 11 | #include "alloc-util.h" |
4f5dd394 | 12 | #include "escape.h" |
b11d6a7b | 13 | #include "extract-word.h" |
11c3a366 TA |
14 | #include "log.h" |
15 | #include "macro.h" | |
b11d6a7b | 16 | #include "string-util.h" |
46bf625a | 17 | #include "strv.h" |
84ac7bea | 18 | #include "utf8.h" |
84ac7bea LP |
19 | |
20 | int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { | |
21 | _cleanup_free_ char *s = NULL; | |
319a4f4b | 22 | size_t sz = 0; |
84ac7bea LP |
23 | char quote = 0; /* 0 or ' or " */ |
24 | bool backslash = false; /* whether we've just seen a backslash */ | |
fa67d9c0 LP |
25 | char c; |
26 | int r; | |
84ac7bea LP |
27 | |
28 | assert(p); | |
29 | assert(ret); | |
1104d114 | 30 | assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE)); |
84ac7bea | 31 | |
84ac7bea LP |
32 | /* Bail early if called after last value or with no input */ |
33 | if (!*p) | |
c58bd76a | 34 | goto finish; |
93de9eb7 | 35 | c = **p; |
84ac7bea | 36 | |
8372da44 FB |
37 | if (!separators) |
38 | separators = WHITESPACE; | |
39 | ||
84ac7bea LP |
40 | /* Parses the first word of a string, and returns it in |
41 | * *ret. Removes all quotes in the process. When parsing fails | |
42 | * (because of an uneven number of quotes or similar), leaves | |
43 | * the pointer *p at the first invalid character. */ | |
44 | ||
b85e1c25 | 45 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) |
319a4f4b | 46 | if (!GREEDY_REALLOC(s, sz+1)) |
b85e1c25 | 47 | return -ENOMEM; |
84ac7bea | 48 | |
313cefa1 | 49 | for (;; (*p)++, c = **p) { |
b85e1c25 FB |
50 | if (c == 0) |
51 | goto finish_force_terminate; | |
52 | else if (strchr(separators, c)) { | |
93de9eb7 | 53 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { |
70b6ee61 LB |
54 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) |
55 | (*p)++; | |
b85e1c25 | 56 | goto finish_force_next; |
93de9eb7 | 57 | } |
b85e1c25 | 58 | } else { |
84ac7bea LP |
59 | /* We found a non-blank character, so we will always |
60 | * want to return a string (even if it is empty), | |
61 | * allocate it here. */ | |
319a4f4b | 62 | if (!GREEDY_REALLOC(s, sz+1)) |
84ac7bea | 63 | return -ENOMEM; |
b85e1c25 | 64 | break; |
84ac7bea | 65 | } |
b85e1c25 FB |
66 | } |
67 | ||
313cefa1 | 68 | for (;; (*p)++, c = **p) { |
84ac7bea | 69 | if (backslash) { |
319a4f4b | 70 | if (!GREEDY_REALLOC(s, sz+7)) |
84ac7bea LP |
71 | return -ENOMEM; |
72 | ||
73 | if (c == 0) { | |
3141089f | 74 | if ((flags & EXTRACT_UNESCAPE_RELAX) && |
fa67d9c0 | 75 | (quote == 0 || flags & EXTRACT_RELAX)) { |
84ac7bea | 76 | /* If we find an unquoted trailing backslash and we're in |
3141089f | 77 | * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the |
84ac7bea LP |
78 | * output. |
79 | * | |
80 | * Unbalanced quotes will only be allowed in EXTRACT_RELAX | |
3141089f | 81 | * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them. |
84ac7bea LP |
82 | */ |
83 | s[sz++] = '\\'; | |
84 | goto finish_force_terminate; | |
85 | } | |
86 | if (flags & EXTRACT_RELAX) | |
87 | goto finish_force_terminate; | |
88 | return -EINVAL; | |
89 | } | |
90 | ||
1e198efc | 91 | if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) { |
3565e095 | 92 | bool eight_bit = false; |
c932fb71 | 93 | char32_t u; |
84ac7bea | 94 | |
1e198efc | 95 | if ((flags & EXTRACT_CUNESCAPE) && |
f5fbe71d | 96 | (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) { |
1e198efc ZJS |
97 | /* A valid escaped sequence */ |
98 | assert(r >= 1); | |
99 | ||
3ff13c29 FB |
100 | (*p) += r - 1; |
101 | ||
3565e095 ZJS |
102 | if (eight_bit) |
103 | s[sz++] = u; | |
3ff13c29 | 104 | else |
3565e095 | 105 | sz += utf8_encode_unichar(s + sz, u); |
1e198efc | 106 | } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) && |
76c4e48e ZJS |
107 | (strchr(separators, **p) || **p == '\\')) |
108 | /* An escaped separator char or the escape char itself */ | |
1e198efc | 109 | s[sz++] = c; |
3141089f | 110 | else if (flags & EXTRACT_UNESCAPE_RELAX) { |
1e198efc ZJS |
111 | s[sz++] = '\\'; |
112 | s[sz++] = c; | |
113 | } else | |
114 | return -EINVAL; | |
84ac7bea LP |
115 | } else |
116 | s[sz++] = c; | |
117 | ||
84ac7bea LP |
118 | backslash = false; |
119 | ||
fa67d9c0 | 120 | } else if (quote != 0) { /* inside either single or double quotes */ |
313cefa1 | 121 | for (;; (*p)++, c = **p) { |
27fc921b FB |
122 | if (c == 0) { |
123 | if (flags & EXTRACT_RELAX) | |
124 | goto finish_force_terminate; | |
125 | return -EINVAL; | |
126 | } else if (c == quote) { /* found the end quote */ | |
127 | quote = 0; | |
1104d114 YW |
128 | if (flags & EXTRACT_UNQUOTE) |
129 | break; | |
c89f52ac | 130 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
131 | backslash = true; |
132 | break; | |
27fc921b | 133 | } |
1104d114 YW |
134 | |
135 | if (!GREEDY_REALLOC(s, sz+2)) | |
136 | return -ENOMEM; | |
137 | ||
138 | s[sz++] = c; | |
139 | ||
140 | if (quote == 0) | |
141 | break; | |
84ac7bea LP |
142 | } |
143 | ||
84ac7bea | 144 | } else { |
313cefa1 | 145 | for (;; (*p)++, c = **p) { |
27fc921b FB |
146 | if (c == 0) |
147 | goto finish_force_terminate; | |
1104d114 | 148 | else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) { |
27fc921b | 149 | quote = c; |
1104d114 YW |
150 | if (flags & EXTRACT_UNQUOTE) |
151 | break; | |
c89f52ac | 152 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
153 | backslash = true; |
154 | break; | |
155 | } else if (strchr(separators, c)) { | |
156 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { | |
70b6ee61 LB |
157 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) |
158 | (*p)++; | |
27fc921b FB |
159 | goto finish_force_next; |
160 | } | |
70b6ee61 LB |
161 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) |
162 | /* Skip additional coalesced separators. */ | |
163 | for (;; (*p)++, c = **p) { | |
164 | if (c == 0) | |
165 | goto finish_force_terminate; | |
166 | if (!strchr(separators, c)) | |
167 | break; | |
168 | } | |
0247447e FB |
169 | goto finish; |
170 | ||
27fc921b | 171 | } |
1104d114 YW |
172 | |
173 | if (!GREEDY_REALLOC(s, sz+2)) | |
174 | return -ENOMEM; | |
175 | ||
176 | s[sz++] = c; | |
177 | ||
178 | if (quote != 0) | |
179 | break; | |
84ac7bea LP |
180 | } |
181 | } | |
84ac7bea LP |
182 | } |
183 | ||
184 | finish_force_terminate: | |
185 | *p = NULL; | |
186 | finish: | |
187 | if (!s) { | |
188 | *p = NULL; | |
189 | *ret = NULL; | |
190 | return 0; | |
191 | } | |
192 | ||
193 | finish_force_next: | |
194 | s[sz] = 0; | |
ae2a15bc | 195 | *ret = TAKE_PTR(s); |
84ac7bea LP |
196 | |
197 | return 1; | |
198 | } | |
199 | ||
200 | int extract_first_word_and_warn( | |
201 | const char **p, | |
202 | char **ret, | |
203 | const char *separators, | |
204 | ExtractFlags flags, | |
205 | const char *unit, | |
206 | const char *filename, | |
207 | unsigned line, | |
208 | const char *rvalue) { | |
209 | ||
dea7b6b0 | 210 | /* Try to unquote it, if it fails, warn about it and try again |
3141089f | 211 | * but this time using EXTRACT_UNESCAPE_RELAX to keep the |
dea7b6b0 LP |
212 | * backslashes verbatim in invalid escape sequences. */ |
213 | ||
84ac7bea LP |
214 | const char *save; |
215 | int r; | |
216 | ||
217 | save = *p; | |
218 | r = extract_first_word(p, ret, separators, flags); | |
dea7b6b0 LP |
219 | if (r >= 0) |
220 | return r; | |
221 | ||
3141089f | 222 | if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) { |
84ac7bea | 223 | |
3141089f | 224 | /* Retry it with EXTRACT_UNESCAPE_RELAX. */ |
84ac7bea | 225 | *p = save; |
3141089f | 226 | r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX); |
dea7b6b0 | 227 | if (r >= 0) { |
330785f5 ZJS |
228 | /* It worked this time, hence it must have been an invalid escape sequence. */ |
229 | log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); | |
dea7b6b0 LP |
230 | return r; |
231 | } | |
232 | ||
233 | /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ | |
234 | if (r == -EINVAL) | |
235 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); | |
84ac7bea LP |
236 | } |
237 | ||
dea7b6b0 LP |
238 | /* Can be any error, report it */ |
239 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); | |
84ac7bea LP |
240 | } |
241 | ||
e4d85dbb ZJS |
242 | /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing |
243 | * an object that undergoes default argument promotion as an argument to va_start). | |
244 | * Let's make sure that ExtractFlags fits into an unsigned int. */ | |
245 | assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); | |
246 | ||
247 | int extract_many_words(const char **p, const char *separators, unsigned flags, ...) { | |
84ac7bea LP |
248 | va_list ap; |
249 | char **l; | |
250 | int n = 0, i, c, r; | |
251 | ||
252 | /* Parses a number of words from a string, stripping any | |
253 | * quotes if necessary. */ | |
254 | ||
255 | assert(p); | |
256 | ||
257 | /* Count how many words are expected */ | |
258 | va_start(ap, flags); | |
259 | for (;;) { | |
260 | if (!va_arg(ap, char **)) | |
261 | break; | |
262 | n++; | |
263 | } | |
264 | va_end(ap); | |
265 | ||
266 | if (n <= 0) | |
267 | return 0; | |
268 | ||
269 | /* Read all words into a temporary array */ | |
270 | l = newa0(char*, n); | |
271 | for (c = 0; c < n; c++) { | |
272 | ||
273 | r = extract_first_word(p, &l[c], separators, flags); | |
274 | if (r < 0) { | |
275 | int j; | |
276 | ||
277 | for (j = 0; j < c; j++) | |
278 | free(l[j]); | |
279 | ||
280 | return r; | |
281 | } | |
282 | ||
283 | if (r == 0) | |
284 | break; | |
285 | } | |
286 | ||
287 | /* If we managed to parse all words, return them in the passed | |
288 | * in parameters */ | |
289 | va_start(ap, flags); | |
290 | for (i = 0; i < n; i++) { | |
291 | char **v; | |
292 | ||
293 | v = va_arg(ap, char **); | |
294 | assert(v); | |
295 | ||
296 | *v = l[i]; | |
297 | } | |
298 | va_end(ap); | |
299 | ||
300 | return c; | |
301 | } |