]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
84ac7bea
LP
2/***
3 This file is part of systemd.
4
5 Copyright 2010 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
11c3a366
TA
21#include <errno.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stddef.h>
25#include <stdint.h>
26#include <stdlib.h>
27#include <string.h>
28#include <syslog.h>
29
b5efdb8a 30#include "alloc-util.h"
4f5dd394 31#include "escape.h"
b11d6a7b 32#include "extract-word.h"
11c3a366
TA
33#include "log.h"
34#include "macro.h"
b11d6a7b 35#include "string-util.h"
84ac7bea 36#include "utf8.h"
84ac7bea
LP
37
38int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
39 _cleanup_free_ char *s = NULL;
40 size_t allocated = 0, sz = 0;
b85e1c25 41 char c;
84ac7bea
LP
42 int r;
43
44 char quote = 0; /* 0 or ' or " */
45 bool backslash = false; /* whether we've just seen a backslash */
84ac7bea
LP
46
47 assert(p);
48 assert(ret);
49
84ac7bea
LP
50 /* Bail early if called after last value or with no input */
51 if (!*p)
c58bd76a 52 goto finish;
93de9eb7 53 c = **p;
84ac7bea 54
8372da44
FB
55 if (!separators)
56 separators = WHITESPACE;
57
84ac7bea
LP
58 /* Parses the first word of a string, and returns it in
59 * *ret. Removes all quotes in the process. When parsing fails
60 * (because of an uneven number of quotes or similar), leaves
61 * the pointer *p at the first invalid character. */
62
b85e1c25
FB
63 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
64 if (!GREEDY_REALLOC(s, allocated, sz+1))
65 return -ENOMEM;
84ac7bea 66
313cefa1 67 for (;; (*p)++, c = **p) {
b85e1c25
FB
68 if (c == 0)
69 goto finish_force_terminate;
70 else if (strchr(separators, c)) {
93de9eb7 71 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
313cefa1 72 (*p)++;
b85e1c25 73 goto finish_force_next;
93de9eb7 74 }
b85e1c25 75 } else {
84ac7bea
LP
76 /* We found a non-blank character, so we will always
77 * want to return a string (even if it is empty),
78 * allocate it here. */
79 if (!GREEDY_REALLOC(s, allocated, sz+1))
80 return -ENOMEM;
b85e1c25 81 break;
84ac7bea 82 }
b85e1c25
FB
83 }
84
313cefa1 85 for (;; (*p)++, c = **p) {
84ac7bea
LP
86 if (backslash) {
87 if (!GREEDY_REALLOC(s, allocated, sz+7))
88 return -ENOMEM;
89
90 if (c == 0) {
91 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
92 (!quote || flags & EXTRACT_RELAX)) {
93 /* If we find an unquoted trailing backslash and we're in
94 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
95 * output.
96 *
97 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
98 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
99 */
100 s[sz++] = '\\';
101 goto finish_force_terminate;
102 }
103 if (flags & EXTRACT_RELAX)
104 goto finish_force_terminate;
105 return -EINVAL;
106 }
107
108 if (flags & EXTRACT_CUNESCAPE) {
3565e095 109 bool eight_bit = false;
c932fb71 110 char32_t u;
84ac7bea 111
3565e095 112 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
84ac7bea
LP
113 if (r < 0) {
114 if (flags & EXTRACT_CUNESCAPE_RELAX) {
115 s[sz++] = '\\';
116 s[sz++] = c;
3ff13c29
FB
117 } else
118 return -EINVAL;
119 } else {
120 (*p) += r - 1;
121
3565e095
ZJS
122 if (eight_bit)
123 s[sz++] = u;
3ff13c29 124 else
3565e095 125 sz += utf8_encode_unichar(s + sz, u);
84ac7bea 126 }
84ac7bea
LP
127 } else
128 s[sz++] = c;
129
84ac7bea
LP
130 backslash = false;
131
132 } else if (quote) { /* inside either single or double quotes */
313cefa1 133 for (;; (*p)++, c = **p) {
27fc921b
FB
134 if (c == 0) {
135 if (flags & EXTRACT_RELAX)
136 goto finish_force_terminate;
137 return -EINVAL;
138 } else if (c == quote) { /* found the end quote */
139 quote = 0;
140 break;
c89f52ac 141 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
142 backslash = true;
143 break;
144 } else {
145 if (!GREEDY_REALLOC(s, allocated, sz+2))
146 return -ENOMEM;
84ac7bea 147
27fc921b
FB
148 s[sz++] = c;
149 }
84ac7bea
LP
150 }
151
84ac7bea 152 } else {
313cefa1 153 for (;; (*p)++, c = **p) {
27fc921b
FB
154 if (c == 0)
155 goto finish_force_terminate;
4c701096 156 else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_QUOTES)) {
27fc921b
FB
157 quote = c;
158 break;
c89f52ac 159 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
160 backslash = true;
161 break;
162 } else if (strchr(separators, c)) {
163 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
313cefa1 164 (*p)++;
27fc921b
FB
165 goto finish_force_next;
166 }
0247447e 167 /* Skip additional coalesced separators. */
313cefa1 168 for (;; (*p)++, c = **p) {
0247447e
FB
169 if (c == 0)
170 goto finish_force_terminate;
171 if (!strchr(separators, c))
172 break;
173 }
174 goto finish;
175
27fc921b
FB
176 } else {
177 if (!GREEDY_REALLOC(s, allocated, sz+2))
178 return -ENOMEM;
84ac7bea 179
27fc921b
FB
180 s[sz++] = c;
181 }
84ac7bea
LP
182 }
183 }
84ac7bea
LP
184 }
185
186finish_force_terminate:
187 *p = NULL;
188finish:
189 if (!s) {
190 *p = NULL;
191 *ret = NULL;
192 return 0;
193 }
194
195finish_force_next:
196 s[sz] = 0;
197 *ret = s;
198 s = NULL;
199
200 return 1;
201}
202
203int extract_first_word_and_warn(
204 const char **p,
205 char **ret,
206 const char *separators,
207 ExtractFlags flags,
208 const char *unit,
209 const char *filename,
210 unsigned line,
211 const char *rvalue) {
212
dea7b6b0
LP
213 /* Try to unquote it, if it fails, warn about it and try again
214 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
215 * backslashes verbatim in invalid escape sequences. */
216
84ac7bea
LP
217 const char *save;
218 int r;
219
220 save = *p;
221 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
222 if (r >= 0)
223 return r;
224
225 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea
LP
226
227 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
228 *p = save;
229 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0 230 if (r >= 0) {
330785f5
ZJS
231 /* It worked this time, hence it must have been an invalid escape sequence. */
232 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
dea7b6b0
LP
233 return r;
234 }
235
236 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
237 if (r == -EINVAL)
238 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
239 }
240
dea7b6b0
LP
241 /* Can be any error, report it */
242 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
243}
244
e4d85dbb
ZJS
245/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
246 * an object that undergoes default argument promotion as an argument to va_start).
247 * Let's make sure that ExtractFlags fits into an unsigned int. */
248assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
249
250int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
84ac7bea
LP
251 va_list ap;
252 char **l;
253 int n = 0, i, c, r;
254
255 /* Parses a number of words from a string, stripping any
256 * quotes if necessary. */
257
258 assert(p);
259
260 /* Count how many words are expected */
261 va_start(ap, flags);
262 for (;;) {
263 if (!va_arg(ap, char **))
264 break;
265 n++;
266 }
267 va_end(ap);
268
269 if (n <= 0)
270 return 0;
271
272 /* Read all words into a temporary array */
273 l = newa0(char*, n);
274 for (c = 0; c < n; c++) {
275
276 r = extract_first_word(p, &l[c], separators, flags);
277 if (r < 0) {
278 int j;
279
280 for (j = 0; j < c; j++)
281 free(l[j]);
282
283 return r;
284 }
285
286 if (r == 0)
287 break;
288 }
289
290 /* If we managed to parse all words, return them in the passed
291 * in parameters */
292 va_start(ap, flags);
293 for (i = 0; i < n; i++) {
294 char **v;
295
296 v = va_arg(ap, char **);
297 assert(v);
298
299 *v = l[i];
300 }
301 va_end(ap);
302
303 return c;
304}