]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
man/systemd-sysext: list ephemeral/ephemeral-import in the list of options
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
84ac7bea 2
b5efdb8a 3#include "alloc-util.h"
4f5dd394 4#include "escape.h"
b11d6a7b 5#include "extract-word.h"
11c3a366 6#include "log.h"
b11d6a7b 7#include "string-util.h"
84ac7bea 8#include "utf8.h"
84ac7bea
LP
9
10int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
11 _cleanup_free_ char *s = NULL;
319a4f4b 12 size_t sz = 0;
84ac7bea
LP
13 char quote = 0; /* 0 or ' or " */
14 bool backslash = false; /* whether we've just seen a backslash */
fa67d9c0
LP
15 char c;
16 int r;
84ac7bea
LP
17
18 assert(p);
19 assert(ret);
1104d114 20 assert(!FLAGS_SET(flags, EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE));
84ac7bea 21
84ac7bea
LP
22 /* Bail early if called after last value or with no input */
23 if (!*p)
c58bd76a 24 goto finish;
93de9eb7 25 c = **p;
84ac7bea 26
8372da44
FB
27 if (!separators)
28 separators = WHITESPACE;
29
84ac7bea
LP
30 /* Parses the first word of a string, and returns it in
31 * *ret. Removes all quotes in the process. When parsing fails
32 * (because of an uneven number of quotes or similar), leaves
33 * the pointer *p at the first invalid character. */
34
b85e1c25 35 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
319a4f4b 36 if (!GREEDY_REALLOC(s, sz+1))
b85e1c25 37 return -ENOMEM;
84ac7bea 38
313cefa1 39 for (;; (*p)++, c = **p) {
b85e1c25
FB
40 if (c == 0)
41 goto finish_force_terminate;
42 else if (strchr(separators, c)) {
93de9eb7 43 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
70b6ee61
LB
44 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
45 (*p)++;
b85e1c25 46 goto finish_force_next;
93de9eb7 47 }
b85e1c25 48 } else {
84ac7bea
LP
49 /* We found a non-blank character, so we will always
50 * want to return a string (even if it is empty),
51 * allocate it here. */
319a4f4b 52 if (!GREEDY_REALLOC(s, sz+1))
84ac7bea 53 return -ENOMEM;
b85e1c25 54 break;
84ac7bea 55 }
b85e1c25
FB
56 }
57
313cefa1 58 for (;; (*p)++, c = **p) {
84ac7bea 59 if (backslash) {
319a4f4b 60 if (!GREEDY_REALLOC(s, sz+7))
84ac7bea
LP
61 return -ENOMEM;
62
63 if (c == 0) {
3141089f 64 if ((flags & EXTRACT_UNESCAPE_RELAX) &&
fa67d9c0 65 (quote == 0 || flags & EXTRACT_RELAX)) {
84ac7bea 66 /* If we find an unquoted trailing backslash and we're in
3141089f 67 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
84ac7bea
LP
68 * output.
69 *
70 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
3141089f 71 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
84ac7bea
LP
72 */
73 s[sz++] = '\\';
74 goto finish_force_terminate;
75 }
76 if (flags & EXTRACT_RELAX)
77 goto finish_force_terminate;
78 return -EINVAL;
79 }
80
1e198efc 81 if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) {
3565e095 82 bool eight_bit = false;
c932fb71 83 char32_t u;
84ac7bea 84
1e198efc 85 if ((flags & EXTRACT_CUNESCAPE) &&
f5fbe71d 86 (r = cunescape_one(*p, SIZE_MAX, &u, &eight_bit, false)) >= 0) {
1e198efc
ZJS
87 /* A valid escaped sequence */
88 assert(r >= 1);
89
3ff13c29
FB
90 (*p) += r - 1;
91
3565e095
ZJS
92 if (eight_bit)
93 s[sz++] = u;
3ff13c29 94 else
3565e095 95 sz += utf8_encode_unichar(s + sz, u);
1e198efc 96 } else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) &&
76c4e48e
ZJS
97 (strchr(separators, **p) || **p == '\\'))
98 /* An escaped separator char or the escape char itself */
1e198efc 99 s[sz++] = c;
3141089f 100 else if (flags & EXTRACT_UNESCAPE_RELAX) {
1e198efc
ZJS
101 s[sz++] = '\\';
102 s[sz++] = c;
103 } else
104 return -EINVAL;
84ac7bea
LP
105 } else
106 s[sz++] = c;
107
84ac7bea
LP
108 backslash = false;
109
fa67d9c0 110 } else if (quote != 0) { /* inside either single or double quotes */
313cefa1 111 for (;; (*p)++, c = **p) {
27fc921b
FB
112 if (c == 0) {
113 if (flags & EXTRACT_RELAX)
114 goto finish_force_terminate;
115 return -EINVAL;
116 } else if (c == quote) { /* found the end quote */
117 quote = 0;
1104d114
YW
118 if (flags & EXTRACT_UNQUOTE)
119 break;
c89f52ac 120 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
121 backslash = true;
122 break;
27fc921b 123 }
1104d114
YW
124
125 if (!GREEDY_REALLOC(s, sz+2))
126 return -ENOMEM;
127
128 s[sz++] = c;
129
130 if (quote == 0)
131 break;
84ac7bea
LP
132 }
133
84ac7bea 134 } else {
313cefa1 135 for (;; (*p)++, c = **p) {
27fc921b
FB
136 if (c == 0)
137 goto finish_force_terminate;
1104d114 138 else if (IN_SET(c, '\'', '"') && (flags & (EXTRACT_KEEP_QUOTE | EXTRACT_UNQUOTE))) {
27fc921b 139 quote = c;
1104d114
YW
140 if (flags & EXTRACT_UNQUOTE)
141 break;
c89f52ac 142 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
143 backslash = true;
144 break;
145 } else if (strchr(separators, c)) {
146 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
70b6ee61
LB
147 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
148 (*p)++;
27fc921b
FB
149 goto finish_force_next;
150 }
70b6ee61
LB
151 if (!(flags & EXTRACT_RETAIN_SEPARATORS))
152 /* Skip additional coalesced separators. */
153 for (;; (*p)++, c = **p) {
154 if (c == 0)
155 goto finish_force_terminate;
156 if (!strchr(separators, c))
157 break;
158 }
0247447e
FB
159 goto finish;
160
27fc921b 161 }
1104d114
YW
162
163 if (!GREEDY_REALLOC(s, sz+2))
164 return -ENOMEM;
165
166 s[sz++] = c;
167
168 if (quote != 0)
169 break;
84ac7bea
LP
170 }
171 }
84ac7bea
LP
172 }
173
174finish_force_terminate:
175 *p = NULL;
176finish:
177 if (!s) {
178 *p = NULL;
179 *ret = NULL;
180 return 0;
181 }
182
183finish_force_next:
184 s[sz] = 0;
ae2a15bc 185 *ret = TAKE_PTR(s);
84ac7bea
LP
186
187 return 1;
188}
189
190int extract_first_word_and_warn(
191 const char **p,
192 char **ret,
193 const char *separators,
194 ExtractFlags flags,
195 const char *unit,
196 const char *filename,
197 unsigned line,
198 const char *rvalue) {
199
dea7b6b0 200 /* Try to unquote it, if it fails, warn about it and try again
3141089f 201 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
dea7b6b0
LP
202 * backslashes verbatim in invalid escape sequences. */
203
84ac7bea
LP
204 const char *save;
205 int r;
206
207 save = *p;
208 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
209 if (r >= 0)
210 return r;
211
3141089f 212 if (r == -EINVAL && !(flags & EXTRACT_UNESCAPE_RELAX)) {
84ac7bea 213
3141089f 214 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
84ac7bea 215 *p = save;
3141089f 216 r = extract_first_word(p, ret, separators, flags|EXTRACT_UNESCAPE_RELAX);
dea7b6b0 217 if (r >= 0) {
330785f5
ZJS
218 /* It worked this time, hence it must have been an invalid escape sequence. */
219 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
dea7b6b0
LP
220 return r;
221 }
222
223 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
224 if (r == -EINVAL)
225 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
226 }
227
dea7b6b0
LP
228 /* Can be any error, report it */
229 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
230}
231
e4d85dbb
ZJS
232/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
233 * an object that undergoes default argument promotion as an argument to va_start).
234 * Let's make sure that ExtractFlags fits into an unsigned int. */
235assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
236
4f495126 237int extract_many_words_internal(const char **p, const char *separators, unsigned flags, ...) {
84ac7bea 238 va_list ap;
4f495126
MY
239 unsigned n = 0;
240 int r;
84ac7bea 241
4f495126 242 /* Parses a number of words from a string, stripping any quotes if necessary. */
84ac7bea
LP
243
244 assert(p);
245
246 /* Count how many words are expected */
247 va_start(ap, flags);
4f495126 248 while (va_arg(ap, char**))
84ac7bea 249 n++;
84ac7bea
LP
250 va_end(ap);
251
4f495126 252 if (n == 0)
84ac7bea
LP
253 return 0;
254
255 /* Read all words into a temporary array */
4f495126
MY
256 char **l = newa0(char*, n);
257 unsigned c;
84ac7bea 258
4f495126 259 for (c = 0; c < n; c++) {
84ac7bea
LP
260 r = extract_first_word(p, &l[c], separators, flags);
261 if (r < 0) {
24ae45cb 262 free_many_charp(l, c);
84ac7bea
LP
263 return r;
264 }
84ac7bea
LP
265 if (r == 0)
266 break;
267 }
268
4f495126 269 /* If we managed to parse all words, return them in the passed in parameters */
84ac7bea 270 va_start(ap, flags);
4f495126
MY
271 FOREACH_ARRAY(i, l, n) {
272 char **v = ASSERT_PTR(va_arg(ap, char**));
273 *v = *i;
84ac7bea
LP
274 }
275 va_end(ap);
276
277 return c;
278}