]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/basic/extract-word.c
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options
[thirdparty/systemd.git] / src / basic / extract-word.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include "alloc-util.h"
4#include "escape.h"
5#include "extract-word.h"
6#include "log.h"
7#include "string-util.h"
8#include "utf8.h"
9
10int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
11 _cleanup_free_ char *s = NULL;
12 size_t sz = 0;
13 char quote = 0; /* 0 or ' or " */
14 bool backslash = false; /* whether we've just seen a backslash */
15 char c;
16 int r;
17
18 assert(p);
19 assert(ret);
20 assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE));
21
22 /* Bail early if called after last value or with no input */
23 if (!*p)
24 goto finish;
25 c = **p;
26
27 if (!separators)
28 separators = WHITESPACE;
29
30 /* Parses the first word of a string, and returns it in
31 * *ret. Removes all quotes in the process. When parsing fails
32 * (because of an uneven number of quotes or similar), leaves
33 * the pointer *p at the first invalid character. */
34
35 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
36 if (!GREEDY_REALLOC(s, sz+1))
37 return -ENOMEM;
38
39 for (;; (*p)++, c = **p) {
40 if (c == 0)
41 goto finish_force_terminate;
42 else if (strchr(separators, c)) {
43 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
44 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
45 (*p)++;
46 goto finish_force_next;
47 }
48 } else {
49 /* We found a non-blank character, so we will always
50 * want to return a string (even if it is empty),
51 * allocate it here. */
52 if (!GREEDY_REALLOC(s, sz+1))
53 return -ENOMEM;
54 break;
55 }
56 }
57
58 for (;; (*p)++, c = **p) {
59 if (backslash) {
60 if (!GREEDY_REALLOC(s, sz+7))
61 return -ENOMEM;
62
63 if (c == 0) {
64 if ((flags & EXTRACT_UNESCAPE_RELAX) &&
65 (quote == 0 || flags & EXTRACT_RELAX)) {
66 /* If we find an unquoted trailing backslash and we're in
67 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
68 * output.
69 *
70 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
71 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
72 */
73 s[sz++] = '\\';
74 goto finish_force_terminate;
75 }
76 if (flags & EXTRACT_RELAX)
77 goto finish_force_terminate;
78 return -EINVAL;
79 }
80
81 if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) {
82 bool eight_bit = false;
83 char32_t u;
84
85 if ((flags & EXTRACT_CUNESCAPE) &&
86 (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) {
87 /* A valid escaped sequence */
88 assert(r >= 1);
89
90 (*p) += r - 1;
91
92 if (eight_bit)
93 s[sz++] = u;
94 else
95 sz += utf8_encode_unichar(s + sz, u);
96 } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) &&
97 (strchr(separators, **p) || **p == '\\'))
98 /* An escaped separator char or the escape char itself */
99 s[sz++] = c;
100 else if (flags & EXTRACT_UNESCAPE_RELAX) {
101 s[sz++] = '\\';
102 s[sz++] = c;
103 } else
104 return -EINVAL;
105 } else
106 s[sz++] = c;
107
108 backslash = false;
109
110 } else if (quote != 0) { /* inside either single or double quotes */
111 for (;; (*p)++, c = **p) {
112 if (c == 0) {
113 if (flags & EXTRACT_RELAX)
114 goto finish_force_terminate;
115 return -EINVAL;
116 } else if (c == quote) { /* found the end quote */
117 quote = 0;
118 if (flags & EXTRACT_UNQUOTE)
119 break;
120 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
121 backslash = true;
122 break;
123 }
124
125 if (!GREEDY_REALLOC(s, sz+2))
126 return -ENOMEM;
127
128 s[sz++] = c;
129
130 if (quote == 0)
131 break;
132 }
133
134 } else {
135 for (;; (*p)++, c = **p) {
136 if (c == 0)
137 goto finish_force_terminate;
138 else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) {
139 quote = c;
140 if (flags & EXTRACT_UNQUOTE)
141 break;
142 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
143 backslash = true;
144 break;
145 } else if (strchr(separators, c)) {
146 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
147 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
148 (*p)++;
149 goto finish_force_next;
150 }
151 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
152 /* Skip additional coalesced separators. */
153 for (;; (*p)++, c = **p) {
154 if (c == 0)
155 goto finish_force_terminate;
156 if (!strchr(separators, c))
157 break;
158 }
159 goto finish;
160
161 }
162
163 if (!GREEDY_REALLOC(s, sz+2))
164 return -ENOMEM;
165
166 s[sz++] = c;
167
168 if (quote != 0)
169 break;
170 }
171 }
172 }
173
174finish_force_terminate:
175 *p = NULL;
176finish:
177 if (!s) {
178 *p = NULL;
179 *ret = NULL;
180 return 0;
181 }
182
183finish_force_next:
184 s[sz] = 0;
185 *ret = TAKE_PTR(s);
186
187 return 1;
188}
189
190int extract_first_word_and_warn(
191 const char **p,
192 char **ret,
193 const char *separators,
194 ExtractFlags flags,
195 const char *unit,
196 const char *filename,
197 unsigned line,
198 const char *rvalue) {
199
200 /* Try to unquote it, if it fails, warn about it and try again
201 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
202 * backslashes verbatim in invalid escape sequences. */
203
204 const char *save;
205 int r;
206
207 save = *p;
208 r = extract_first_word(p, ret, separators, flags);
209 if (r >= 0)
210 return r;
211
212 if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) {
213
214 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
215 *p = save;
216 r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX);
217 if (r >= 0) {
218 /* It worked this time, hence it must have been an invalid escape sequence. */
219 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
220 return r;
221 }
222
223 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
224 if (r == -EINVAL)
225 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
226 }
227
228 /* Can be any error, report it */
229 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
230}
231
232/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
233 * an object that undergoes default argument promotion as an argument to va_start).
234 * Let's make sure that ExtractFlags fits into an unsigned int. */
235assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
236
237int extract_many_words_internal(const char **p, const char *separators, unsigned flags, ...) {
238 va_list ap;
239 unsigned n = 0;
240 int r;
241
242 /* Parses a number of words from a string, stripping any quotes if necessary. */
243
244 assert(p);
245
246 /* Count how many words are expected */
247 va_start(ap, flags);
248 while (va_arg(ap, char**))
249 n++;
250 va_end(ap);
251
252 if (n == 0)
253 return 0;
254
255 /* Read all words into a temporary array */
256 char **l = newa0(char*, n);
257 unsigned c;
258
259 for (c = 0; c < n; c++) {
260 r = extract_first_word(p, &l[c], separators, flags);
261 if (r < 0) {
262 free_many_charp(l, c);
263 return r;
264 }
265 if (r == 0)
266 break;
267 }
268
269 /* If we managed to parse all words, return them in the passed in parameters */
270 va_start(ap, flags);
271 FOREACH_ARRAY(i, l, n) {
272 char **v = ASSERT_PTR(va_arg(ap, char**));
273 *v = *i;
274 }
275 va_end(ap);
276
277 return c;
278}