]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
84ac7bea LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2010 Lennart Poettering | |
84ac7bea LP |
6 | ***/ |
7 | ||
11c3a366 TA |
8 | #include <errno.h> |
9 | #include <stdarg.h> | |
10 | #include <stdbool.h> | |
11 | #include <stddef.h> | |
12 | #include <stdint.h> | |
13 | #include <stdlib.h> | |
14 | #include <string.h> | |
15 | #include <syslog.h> | |
16 | ||
b5efdb8a | 17 | #include "alloc-util.h" |
4f5dd394 | 18 | #include "escape.h" |
b11d6a7b | 19 | #include "extract-word.h" |
11c3a366 TA |
20 | #include "log.h" |
21 | #include "macro.h" | |
b11d6a7b | 22 | #include "string-util.h" |
84ac7bea | 23 | #include "utf8.h" |
84ac7bea LP |
24 | |
25 | int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { | |
26 | _cleanup_free_ char *s = NULL; | |
27 | size_t allocated = 0, sz = 0; | |
b85e1c25 | 28 | char c; |
84ac7bea LP |
29 | int r; |
30 | ||
31 | char quote = 0; /* 0 or ' or " */ | |
32 | bool backslash = false; /* whether we've just seen a backslash */ | |
84ac7bea LP |
33 | |
34 | assert(p); | |
35 | assert(ret); | |
36 | ||
84ac7bea LP |
37 | /* Bail early if called after last value or with no input */ |
38 | if (!*p) | |
c58bd76a | 39 | goto finish; |
93de9eb7 | 40 | c = **p; |
84ac7bea | 41 | |
8372da44 FB |
42 | if (!separators) |
43 | separators = WHITESPACE; | |
44 | ||
84ac7bea LP |
45 | /* Parses the first word of a string, and returns it in |
46 | * *ret. Removes all quotes in the process. When parsing fails | |
47 | * (because of an uneven number of quotes or similar), leaves | |
48 | * the pointer *p at the first invalid character. */ | |
49 | ||
b85e1c25 FB |
50 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) |
51 | if (!GREEDY_REALLOC(s, allocated, sz+1)) | |
52 | return -ENOMEM; | |
84ac7bea | 53 | |
313cefa1 | 54 | for (;; (*p)++, c = **p) { |
b85e1c25 FB |
55 | if (c == 0) |
56 | goto finish_force_terminate; | |
57 | else if (strchr(separators, c)) { | |
93de9eb7 | 58 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { |
313cefa1 | 59 | (*p)++; |
b85e1c25 | 60 | goto finish_force_next; |
93de9eb7 | 61 | } |
b85e1c25 | 62 | } else { |
84ac7bea LP |
63 | /* We found a non-blank character, so we will always |
64 | * want to return a string (even if it is empty), | |
65 | * allocate it here. */ | |
66 | if (!GREEDY_REALLOC(s, allocated, sz+1)) | |
67 | return -ENOMEM; | |
b85e1c25 | 68 | break; |
84ac7bea | 69 | } |
b85e1c25 FB |
70 | } |
71 | ||
313cefa1 | 72 | for (;; (*p)++, c = **p) { |
84ac7bea LP |
73 | if (backslash) { |
74 | if (!GREEDY_REALLOC(s, allocated, sz+7)) | |
75 | return -ENOMEM; | |
76 | ||
77 | if (c == 0) { | |
78 | if ((flags & EXTRACT_CUNESCAPE_RELAX) && | |
79 | (!quote || flags & EXTRACT_RELAX)) { | |
80 | /* If we find an unquoted trailing backslash and we're in | |
81 | * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the | |
82 | * output. | |
83 | * | |
84 | * Unbalanced quotes will only be allowed in EXTRACT_RELAX | |
85 | * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them. | |
86 | */ | |
87 | s[sz++] = '\\'; | |
88 | goto finish_force_terminate; | |
89 | } | |
90 | if (flags & EXTRACT_RELAX) | |
91 | goto finish_force_terminate; | |
92 | return -EINVAL; | |
93 | } | |
94 | ||
95 | if (flags & EXTRACT_CUNESCAPE) { | |
3565e095 | 96 | bool eight_bit = false; |
c932fb71 | 97 | char32_t u; |
84ac7bea | 98 | |
3565e095 | 99 | r = cunescape_one(*p, (size_t) -1, &u, &eight_bit); |
84ac7bea LP |
100 | if (r < 0) { |
101 | if (flags & EXTRACT_CUNESCAPE_RELAX) { | |
102 | s[sz++] = '\\'; | |
103 | s[sz++] = c; | |
3ff13c29 FB |
104 | } else |
105 | return -EINVAL; | |
106 | } else { | |
107 | (*p) += r - 1; | |
108 | ||
3565e095 ZJS |
109 | if (eight_bit) |
110 | s[sz++] = u; | |
3ff13c29 | 111 | else |
3565e095 | 112 | sz += utf8_encode_unichar(s + sz, u); |
84ac7bea | 113 | } |
84ac7bea LP |
114 | } else |
115 | s[sz++] = c; | |
116 | ||
84ac7bea LP |
117 | backslash = false; |
118 | ||
119 | } else if (quote) { /* inside either single or double quotes */ | |
313cefa1 | 120 | for (;; (*p)++, c = **p) { |
27fc921b FB |
121 | if (c == 0) { |
122 | if (flags & EXTRACT_RELAX) | |
123 | goto finish_force_terminate; | |
124 | return -EINVAL; | |
125 | } else if (c == quote) { /* found the end quote */ | |
126 | quote = 0; | |
127 | break; | |
c89f52ac | 128 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
129 | backslash = true; |
130 | break; | |
131 | } else { | |
132 | if (!GREEDY_REALLOC(s, allocated, sz+2)) | |
133 | return -ENOMEM; | |
84ac7bea | 134 | |
27fc921b FB |
135 | s[sz++] = c; |
136 | } | |
84ac7bea LP |
137 | } |
138 | ||
84ac7bea | 139 | } else { |
313cefa1 | 140 | for (;; (*p)++, c = **p) { |
27fc921b FB |
141 | if (c == 0) |
142 | goto finish_force_terminate; | |
4c701096 | 143 | else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) { |
27fc921b FB |
144 | quote = c; |
145 | break; | |
c89f52ac | 146 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { |
27fc921b FB |
147 | backslash = true; |
148 | break; | |
149 | } else if (strchr(separators, c)) { | |
150 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { | |
313cefa1 | 151 | (*p)++; |
27fc921b FB |
152 | goto finish_force_next; |
153 | } | |
0247447e | 154 | /* Skip additional coalesced separators. */ |
313cefa1 | 155 | for (;; (*p)++, c = **p) { |
0247447e FB |
156 | if (c == 0) |
157 | goto finish_force_terminate; | |
158 | if (!strchr(separators, c)) | |
159 | break; | |
160 | } | |
161 | goto finish; | |
162 | ||
27fc921b FB |
163 | } else { |
164 | if (!GREEDY_REALLOC(s, allocated, sz+2)) | |
165 | return -ENOMEM; | |
84ac7bea | 166 | |
27fc921b FB |
167 | s[sz++] = c; |
168 | } | |
84ac7bea LP |
169 | } |
170 | } | |
84ac7bea LP |
171 | } |
172 | ||
173 | finish_force_terminate: | |
174 | *p = NULL; | |
175 | finish: | |
176 | if (!s) { | |
177 | *p = NULL; | |
178 | *ret = NULL; | |
179 | return 0; | |
180 | } | |
181 | ||
182 | finish_force_next: | |
183 | s[sz] = 0; | |
ae2a15bc | 184 | *ret = TAKE_PTR(s); |
84ac7bea LP |
185 | |
186 | return 1; | |
187 | } | |
188 | ||
189 | int extract_first_word_and_warn( | |
190 | const char **p, | |
191 | char **ret, | |
192 | const char *separators, | |
193 | ExtractFlags flags, | |
194 | const char *unit, | |
195 | const char *filename, | |
196 | unsigned line, | |
197 | const char *rvalue) { | |
198 | ||
dea7b6b0 LP |
199 | /* Try to unquote it, if it fails, warn about it and try again |
200 | * but this time using EXTRACT_CUNESCAPE_RELAX to keep the | |
201 | * backslashes verbatim in invalid escape sequences. */ | |
202 | ||
84ac7bea LP |
203 | const char *save; |
204 | int r; | |
205 | ||
206 | save = *p; | |
207 | r = extract_first_word(p, ret, separators, flags); | |
dea7b6b0 LP |
208 | if (r >= 0) |
209 | return r; | |
210 | ||
211 | if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) { | |
84ac7bea LP |
212 | |
213 | /* Retry it with EXTRACT_CUNESCAPE_RELAX. */ | |
214 | *p = save; | |
215 | r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX); | |
dea7b6b0 | 216 | if (r >= 0) { |
330785f5 ZJS |
217 | /* It worked this time, hence it must have been an invalid escape sequence. */ |
218 | log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); | |
dea7b6b0 LP |
219 | return r; |
220 | } | |
221 | ||
222 | /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ | |
223 | if (r == -EINVAL) | |
224 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); | |
84ac7bea LP |
225 | } |
226 | ||
dea7b6b0 LP |
227 | /* Can be any error, report it */ |
228 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); | |
84ac7bea LP |
229 | } |
230 | ||
e4d85dbb ZJS |
231 | /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing |
232 | * an object that undergoes default argument promotion as an argument to va_start). | |
233 | * Let's make sure that ExtractFlags fits into an unsigned int. */ | |
234 | assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); | |
235 | ||
236 | int extract_many_words(const char **p, const char *separators, unsigned flags, ...) { | |
84ac7bea LP |
237 | va_list ap; |
238 | char **l; | |
239 | int n = 0, i, c, r; | |
240 | ||
241 | /* Parses a number of words from a string, stripping any | |
242 | * quotes if necessary. */ | |
243 | ||
244 | assert(p); | |
245 | ||
246 | /* Count how many words are expected */ | |
247 | va_start(ap, flags); | |
248 | for (;;) { | |
249 | if (!va_arg(ap, char **)) | |
250 | break; | |
251 | n++; | |
252 | } | |
253 | va_end(ap); | |
254 | ||
255 | if (n <= 0) | |
256 | return 0; | |
257 | ||
258 | /* Read all words into a temporary array */ | |
259 | l = newa0(char*, n); | |
260 | for (c = 0; c < n; c++) { | |
261 | ||
262 | r = extract_first_word(p, &l[c], separators, flags); | |
263 | if (r < 0) { | |
264 | int j; | |
265 | ||
266 | for (j = 0; j < c; j++) | |
267 | free(l[j]); | |
268 | ||
269 | return r; | |
270 | } | |
271 | ||
272 | if (r == 0) | |
273 | break; | |
274 | } | |
275 | ||
276 | /* If we managed to parse all words, return them in the passed | |
277 | * in parameters */ | |
278 | va_start(ap, flags); | |
279 | for (i = 0; i < n; i++) { | |
280 | char **v; | |
281 | ||
282 | v = va_arg(ap, char **); | |
283 | assert(v); | |
284 | ||
285 | *v = l[i]; | |
286 | } | |
287 | va_end(ap); | |
288 | ||
289 | return c; | |
290 | } |