]>
Commit | Line | Data |
---|---|---|
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ | |
2 | ||
3 | #include "alloc-util.h" | |
4 | #include "escape.h" | |
5 | #include "extract-word.h" | |
6 | #include "log.h" | |
7 | #include "string-util.h" | |
8 | #include "utf8.h" | |
9 | ||
10 | int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) { | |
11 | _cleanup_free_ char *s = NULL; | |
12 | size_t sz = 0; | |
13 | char quote = 0; /* 0 or ' or " */ | |
14 | bool backslash = false; /* whether we've just seen a backslash */ | |
15 | char c; | |
16 | int r; | |
17 | ||
18 | assert(p); | |
19 | assert(ret); | |
20 | assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE)); | |
21 | ||
22 | /* Bail early if called after last value or with no input */ | |
23 | if (!*p) | |
24 | goto finish; | |
25 | c = **p; | |
26 | ||
27 | if (!separators) | |
28 | separators = WHITESPACE; | |
29 | ||
30 | /* Parses the first word of a string, and returns it in | |
31 | * *ret. Removes all quotes in the process. When parsing fails | |
32 | * (because of an uneven number of quotes or similar), leaves | |
33 | * the pointer *p at the first invalid character. */ | |
34 | ||
35 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) | |
36 | if (!GREEDY_REALLOC(s, sz+1)) | |
37 | return -ENOMEM; | |
38 | ||
39 | for (;; (*p)++, c = **p) { | |
40 | if (c == 0) | |
41 | goto finish_force_terminate; | |
42 | else if (strchr(separators, c)) { | |
43 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { | |
44 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) | |
45 | (*p)++; | |
46 | goto finish_force_next; | |
47 | } | |
48 | } else { | |
49 | /* We found a non-blank character, so we will always | |
50 | * want to return a string (even if it is empty), | |
51 | * allocate it here. */ | |
52 | if (!GREEDY_REALLOC(s, sz+1)) | |
53 | return -ENOMEM; | |
54 | break; | |
55 | } | |
56 | } | |
57 | ||
58 | for (;; (*p)++, c = **p) { | |
59 | if (backslash) { | |
60 | if (!GREEDY_REALLOC(s, sz+7)) | |
61 | return -ENOMEM; | |
62 | ||
63 | if (c == 0) { | |
64 | if ((flags & EXTRACT_UNESCAPE_RELAX) && | |
65 | (quote == 0 || flags & EXTRACT_RELAX)) { | |
66 | /* If we find an unquoted trailing backslash and we're in | |
67 | * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the | |
68 | * output. | |
69 | * | |
70 | * Unbalanced quotes will only be allowed in EXTRACT_RELAX | |
71 | * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them. | |
72 | */ | |
73 | s[sz++] = '\\'; | |
74 | goto finish_force_terminate; | |
75 | } | |
76 | if (flags & EXTRACT_RELAX) | |
77 | goto finish_force_terminate; | |
78 | return -EINVAL; | |
79 | } | |
80 | ||
81 | if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) { | |
82 | bool eight_bit = false; | |
83 | char32_t u; | |
84 | ||
85 | if ((flags & EXTRACT_CUNESCAPE) && | |
86 | (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) { | |
87 | /* A valid escaped sequence */ | |
88 | assert(r >= 1); | |
89 | ||
90 | (*p) += r - 1; | |
91 | ||
92 | if (eight_bit) | |
93 | s[sz++] = u; | |
94 | else | |
95 | sz += utf8_encode_unichar(s + sz, u); | |
96 | } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) && | |
97 | (strchr(separators, **p) || **p == '\\')) | |
98 | /* An escaped separator char or the escape char itself */ | |
99 | s[sz++] = c; | |
100 | else if (flags & EXTRACT_UNESCAPE_RELAX) { | |
101 | s[sz++] = '\\'; | |
102 | s[sz++] = c; | |
103 | } else | |
104 | return -EINVAL; | |
105 | } else | |
106 | s[sz++] = c; | |
107 | ||
108 | backslash = false; | |
109 | ||
110 | } else if (quote != 0) { /* inside either single or double quotes */ | |
111 | for (;; (*p)++, c = **p) { | |
112 | if (c == 0) { | |
113 | if (flags & EXTRACT_RELAX) | |
114 | goto finish_force_terminate; | |
115 | return -EINVAL; | |
116 | } else if (c == quote) { /* found the end quote */ | |
117 | quote = 0; | |
118 | if (flags & EXTRACT_UNQUOTE) | |
119 | break; | |
120 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { | |
121 | backslash = true; | |
122 | break; | |
123 | } | |
124 | ||
125 | if (!GREEDY_REALLOC(s, sz+2)) | |
126 | return -ENOMEM; | |
127 | ||
128 | s[sz++] = c; | |
129 | ||
130 | if (quote == 0) | |
131 | break; | |
132 | } | |
133 | ||
134 | } else { | |
135 | for (;; (*p)++, c = **p) { | |
136 | if (c == 0) | |
137 | goto finish_force_terminate; | |
138 | else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) { | |
139 | quote = c; | |
140 | if (flags & EXTRACT_UNQUOTE) | |
141 | break; | |
142 | } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) { | |
143 | backslash = true; | |
144 | break; | |
145 | } else if (strchr(separators, c)) { | |
146 | if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) { | |
147 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) | |
148 | (*p)++; | |
149 | goto finish_force_next; | |
150 | } | |
151 | if (!(flags & EXTRACT_RETAIN_SEPARATORS)) | |
152 | /* Skip additional coalesced separators. */ | |
153 | for (;; (*p)++, c = **p) { | |
154 | if (c == 0) | |
155 | goto finish_force_terminate; | |
156 | if (!strchr(separators, c)) | |
157 | break; | |
158 | } | |
159 | goto finish; | |
160 | ||
161 | } | |
162 | ||
163 | if (!GREEDY_REALLOC(s, sz+2)) | |
164 | return -ENOMEM; | |
165 | ||
166 | s[sz++] = c; | |
167 | ||
168 | if (quote != 0) | |
169 | break; | |
170 | } | |
171 | } | |
172 | } | |
173 | ||
174 | finish_force_terminate: | |
175 | *p = NULL; | |
176 | finish: | |
177 | if (!s) { | |
178 | *p = NULL; | |
179 | *ret = NULL; | |
180 | return 0; | |
181 | } | |
182 | ||
183 | finish_force_next: | |
184 | s[sz] = 0; | |
185 | *ret = TAKE_PTR(s); | |
186 | ||
187 | return 1; | |
188 | } | |
189 | ||
190 | int extract_first_word_and_warn( | |
191 | const char **p, | |
192 | char **ret, | |
193 | const char *separators, | |
194 | ExtractFlags flags, | |
195 | const char *unit, | |
196 | const char *filename, | |
197 | unsigned line, | |
198 | const char *rvalue) { | |
199 | ||
200 | /* Try to unquote it, if it fails, warn about it and try again | |
201 | * but this time using EXTRACT_UNESCAPE_RELAX to keep the | |
202 | * backslashes verbatim in invalid escape sequences. */ | |
203 | ||
204 | const char *save; | |
205 | int r; | |
206 | ||
207 | save = *p; | |
208 | r = extract_first_word(p, ret, separators, flags); | |
209 | if (r >= 0) | |
210 | return r; | |
211 | ||
212 | if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) { | |
213 | ||
214 | /* Retry it with EXTRACT_UNESCAPE_RELAX. */ | |
215 | *p = save; | |
216 | r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX); | |
217 | if (r >= 0) { | |
218 | /* It worked this time, hence it must have been an invalid escape sequence. */ | |
219 | log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret); | |
220 | return r; | |
221 | } | |
222 | ||
223 | /* If it's still EINVAL; then it must be unbalanced quoting, report this. */ | |
224 | if (r == -EINVAL) | |
225 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue); | |
226 | } | |
227 | ||
228 | /* Can be any error, report it */ | |
229 | return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue); | |
230 | } | |
231 | ||
232 | /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing | |
233 | * an object that undergoes default argument promotion as an argument to va_start). | |
234 | * Let's make sure that ExtractFlags fits into an unsigned int. */ | |
235 | assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned)); | |
236 | ||
237 | int extract_many_words_internal(const char **p, const char *separators, unsigned flags, ...) { | |
238 | va_list ap; | |
239 | unsigned n = 0; | |
240 | int r; | |
241 | ||
242 | /* Parses a number of words from a string, stripping any quotes if necessary. */ | |
243 | ||
244 | assert(p); | |
245 | ||
246 | /* Count how many words are expected */ | |
247 | va_start(ap, flags); | |
248 | while (va_arg(ap, char**)) | |
249 | n++; | |
250 | va_end(ap); | |
251 | ||
252 | if (n == 0) | |
253 | return 0; | |
254 | ||
255 | /* Read all words into a temporary array */ | |
256 | char **l = newa0(char*, n); | |
257 | unsigned c; | |
258 | ||
259 | for (c = 0; c < n; c++) { | |
260 | r = extract_first_word(p, &l[c], separators, flags); | |
261 | if (r < 0) { | |
262 | free_many_charp(l, c); | |
263 | return r; | |
264 | } | |
265 | if (r == 0) | |
266 | break; | |
267 | } | |
268 | ||
269 | /* If we managed to parse all words, return them in the passed in parameters */ | |
270 | va_start(ap, flags); | |
271 | FOREACH_ARRAY(i, l, n) { | |
272 | char **v = ASSERT_PTR(va_arg(ap, char**)); | |
273 | *v = *i; | |
274 | } | |
275 | va_end(ap); | |
276 | ||
277 | return c; | |
278 | } |