]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
84ac7bea LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2010 Lennart Poettering | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
11c3a366 TA |
21 | #include <errno.h> |
22 | #include <stdarg.h> | |
23 | #include <stdbool.h> | |
24 | #include <stddef.h> | |
25 | #include <stdint.h> | |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <syslog.h> | |
29 | ||
b5efdb8a | 30 | #include "alloc-util.h" |
4f5dd394 | 31 | #include "escape.h" |
b11d6a7b | 32 | #include "extract-word.h" |
11c3a366 TA |
33 | #include "log.h" |
34 | #include "macro.h" | |
b11d6a7b | 35 | #include "string-util.h" |
84ac7bea | 36 | #include "utf8.h" |
84ac7bea LP |
37 | |
38 | int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { | |
39 | _cleanup_free_ char *s = NULL; | |
40 | size_t allocated = 0, sz = 0; | |
b85e1c25 | 41 | char c; |
84ac7bea LP |
42 | int r; |
43 | ||
44 | char quote = 0; /* 0 or ' or " */ | |
45 | bool backslash = false; /* whether we've just seen a backslash */ | |
84ac7bea LP |
46 | |
47 | assert(p); | |
48 | assert(ret); | |
49 | ||
84ac7bea LP |
50 | /* Bail early if called after last value or with no input */ |
51 | if (!*p) | |
c58bd76a | 52 | goto finish; |
93de9eb7 | 53 | c = **p; |
84ac7bea | 54 | |
8372da44 FB |
55 | if (!separators) |
56 | separators = WHITESPACE; | |
57 | ||
84ac7bea LP |
58 | /* Parses the first word of a string, and returns it in |
59 | * *ret. Removes all quotes in the process. When parsing fails | |
60 | * (because of an uneven number of quotes or similar), leaves | |
61 | * the pointer *p at the first invalid character. */ | |
62 | ||
b85e1c25 FB |
63 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) |
64 | if (!GREEDY_REALLOC(s, allocated, sz+1)) | |
65 | return -ENOMEM; | |
84ac7bea | 66 | |
313cefa1 | 67 | for (;; (*p)++, c = **p) { |
b85e1c25 FB |
68 | if (c == 0) |
69 | goto finish_force_terminate; | |
70 | else if (strchr(separators, c)) { | |
93de9eb7 | 71 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { |
313cefa1 | 72 | (*p)++; |
b85e1c25 | 73 | goto finish_force_next; |
93de9eb7 | 74 | } |
b85e1c25 | 75 | } else { |
84ac7bea LP |
76 | /* We found a non-blank character, so we will always |
77 | * want to return a string (even if it is empty), | |
78 | * allocate it here. */ | |
79 | if (!GREEDY_REALLOC(s, allocated, sz+1)) | |
80 | return -ENOMEM; | |
b85e1c25 | 81 | break; |
84ac7bea | 82 | } |
b85e1c25 FB |
83 | } |
84 | ||
313cefa1 | 85 | for (;; (*p)++, c = **p) { |
84ac7bea LP |
86 | if (backslash) { |
87 | if (!GREEDY_REALLOC(s, allocated, sz+7)) | |
88 | return -ENOMEM; | |
89 | ||
90 | if (c == 0) { | |
91 | if ((flags & EXTRACT_CUNESCAPE_RELAX) && | |
92 | (!quote || flags & EXTRACT_RELAX)) { | |
93 | /* If we find an unquoted trailing backslash and we're in | |
94 | * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the | |
95 | * output. | |
96 | * | |
97 | * Unbalanced quotes will only be allowed in EXTRACT_RELAX | |
98 | * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them. | |
99 | */ | |
100 | s[sz++] = '\\'; | |
101 | goto finish_force_terminate; | |
102 | } | |
103 | if (flags & EXTRACT_RELAX) | |
104 | goto finish_force_terminate; | |
105 | return -EINVAL; | |
106 | } | |
107 | ||
108 | if (flags & EXTRACT_CUNESCAPE) { | |
3565e095 | 109 | bool eight_bit = false; |
c932fb71 | 110 | char32_t u; |
84ac7bea | 111 | |
3565e095 | 112 | r = cunescape_one(*p, (size_t) -1, &u, &eight_bit); |
84ac7bea LP |
113 | if (r < 0) { |
114 | if (flags & EXTRACT_CUNESCAPE_RELAX) { | |
115 | s[sz++] = '\\'; | |
116 | s[sz++] = c; | |
3ff13c29 FB |
117 | } else |
118 | return -EINVAL; | |
119 | } else { | |
120 | (*p) += r - 1; | |
121 | ||
3565e095 ZJS |
122 | if (eight_bit) |
123 | s[sz++] = u; | |
3ff13c29 | 124 | else |
3565e095 | 125 | sz += utf8_encode_unichar(s + sz, u); |
84ac7bea | 126 | } |
84ac7bea LP |
127 | } else |
128 | s[sz++] = c; | |
129 | ||
84ac7bea LP |
130 | backslash = false; |
131 | ||
132 | } else if (quote) { /* inside either single or double quotes */ | |
313cefa1 | 133 | for (;; (*p)++, c = **p) { |
27fc921b FB |
134 | if (c == 0) { |
135 | if (flags & EXTRACT_RELAX) | |
136 | goto finish_force_terminate; | |
137 | return -EINVAL; | |
138 | } else if (c == quote) { /* found the end quote */ | |
139 | quote = 0; | |
140 | break; | |
c89f52ac | 141 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
142 | backslash = true; |
143 | break; | |
144 | } else { | |
145 | if (!GREEDY_REALLOC(s, allocated, sz+2)) | |
146 | return -ENOMEM; | |
84ac7bea | 147 | |
27fc921b FB |
148 | s[sz++] = c; |
149 | } | |
84ac7bea LP |
150 | } |
151 | ||
84ac7bea | 152 | } else { |
313cefa1 | 153 | for (;; (*p)++, c = **p) { |
27fc921b FB |
154 | if (c == 0) |
155 | goto finish_force_terminate; | |
4c701096 | 156 | else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) { |
27fc921b FB |
157 | quote = c; |
158 | break; | |
c89f52ac | 159 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
160 | backslash = true; |
161 | break; | |
162 | } else if (strchr(separators, c)) { | |
163 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { | |
313cefa1 | 164 | (*p)++; |
27fc921b FB |
165 | goto finish_force_next; |
166 | } | |
0247447e | 167 | /* Skip additional coalesced separators. */ |
313cefa1 | 168 | for (;; (*p)++, c = **p) { |
0247447e FB |
169 | if (c == 0) |
170 | goto finish_force_terminate; | |
171 | if (!strchr(separators, c)) | |
172 | break; | |
173 | } | |
174 | goto finish; | |
175 | ||
27fc921b FB |
176 | } else { |
177 | if (!GREEDY_REALLOC(s, allocated, sz+2)) | |
178 | return -ENOMEM; | |
84ac7bea | 179 | |
27fc921b FB |
180 | s[sz++] = c; |
181 | } | |
84ac7bea LP |
182 | } |
183 | } | |
84ac7bea LP |
184 | } |
185 | ||
186 | finish_force_terminate: | |
187 | *p = NULL; | |
188 | finish: | |
189 | if (!s) { | |
190 | *p = NULL; | |
191 | *ret = NULL; | |
192 | return 0; | |
193 | } | |
194 | ||
195 | finish_force_next: | |
196 | s[sz] = 0; | |
197 | *ret = s; | |
198 | s = NULL; | |
199 | ||
200 | return 1; | |
201 | } | |
202 | ||
203 | int extract_first_word_and_warn( | |
204 | const char **p, | |
205 | char **ret, | |
206 | const char *separators, | |
207 | ExtractFlags flags, | |
208 | const char *unit, | |
209 | const char *filename, | |
210 | unsigned line, | |
211 | const char *rvalue) { | |
212 | ||
dea7b6b0 LP |
213 | /* Try to unquote it, if it fails, warn about it and try again |
214 | * but this time using EXTRACT_CUNESCAPE_RELAX to keep the | |
215 | * backslashes verbatim in invalid escape sequences. */ | |
216 | ||
84ac7bea LP |
217 | const char *save; |
218 | int r; | |
219 | ||
220 | save = *p; | |
221 | r = extract_first_word(p, ret, separators, flags); | |
dea7b6b0 LP |
222 | if (r >= 0) |
223 | return r; | |
224 | ||
225 | if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) { | |
84ac7bea LP |
226 | |
227 | /* Retry it with EXTRACT_CUNESCAPE_RELAX. */ | |
228 | *p = save; | |
229 | r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX); | |
dea7b6b0 | 230 | if (r >= 0) { |
330785f5 ZJS |
231 | /* It worked this time, hence it must have been an invalid escape sequence. */ |
232 | log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); | |
dea7b6b0 LP |
233 | return r; |
234 | } | |
235 | ||
236 | /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ | |
237 | if (r == -EINVAL) | |
238 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); | |
84ac7bea LP |
239 | } |
240 | ||
dea7b6b0 LP |
241 | /* Can be any error, report it */ |
242 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); | |
84ac7bea LP |
243 | } |
244 | ||
e4d85dbb ZJS |
245 | /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing |
246 | * an object that undergoes default argument promotion as an argument to va_start). | |
247 | * Let's make sure that ExtractFlags fits into an unsigned int. */ | |
248 | assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); | |
249 | ||
250 | int extract_many_words(const char **p, const char *separators, unsigned flags, ...) { | |
84ac7bea LP |
251 | va_list ap; |
252 | char **l; | |
253 | int n = 0, i, c, r; | |
254 | ||
255 | /* Parses a number of words from a string, stripping any | |
256 | * quotes if necessary. */ | |
257 | ||
258 | assert(p); | |
259 | ||
260 | /* Count how many words are expected */ | |
261 | va_start(ap, flags); | |
262 | for (;;) { | |
263 | if (!va_arg(ap, char **)) | |
264 | break; | |
265 | n++; | |
266 | } | |
267 | va_end(ap); | |
268 | ||
269 | if (n <= 0) | |
270 | return 0; | |
271 | ||
272 | /* Read all words into a temporary array */ | |
273 | l = newa0(char*, n); | |
274 | for (c = 0; c < n; c++) { | |
275 | ||
276 | r = extract_first_word(p, &l[c], separators, flags); | |
277 | if (r < 0) { | |
278 | int j; | |
279 | ||
280 | for (j = 0; j < c; j++) | |
281 | free(l[j]); | |
282 | ||
283 | return r; | |
284 | } | |
285 | ||
286 | if (r == 0) | |
287 | break; | |
288 | } | |
289 | ||
290 | /* If we managed to parse all words, return them in the passed | |
291 | * in parameters */ | |
292 | va_start(ap, flags); | |
293 | for (i = 0; i < n; i++) { | |
294 | char **v; | |
295 | ||
296 | v = va_arg(ap, char **); | |
297 | assert(v); | |
298 | ||
299 | *v = l[i]; | |
300 | } | |
301 | va_end(ap); | |
302 | ||
303 | return c; | |
304 | } |