1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "alloc-util.h"
13 #include "extract-word.h"
16 #include "string-util.h"
20 int extract_first_word(const char **p
, char **ret
, const char *separators
, ExtractFlags flags
) {
21 _cleanup_free_
char *s
= NULL
;
23 char quote
= 0; /* 0 or ' or " */
24 bool backslash
= false; /* whether we've just seen a backslash */
30 assert(!FLAGS_SET(flags
, EXTRACT_KEEP_QUOTE
| EXTRACT_UNQUOTE
));
32 /* Bail early if called after last value or with no input */
38 separators
= WHITESPACE
;
40 /* Parses the first word of a string, and returns it in
41 * *ret. Removes all quotes in the process. When parsing fails
42 * (because of an uneven number of quotes or similar), leaves
43 * the pointer *p at the first invalid character. */
45 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
)
46 if (!GREEDY_REALLOC(s
, sz
+1))
49 for (;; (*p
)++, c
= **p
) {
51 goto finish_force_terminate
;
52 else if (strchr(separators
, c
)) {
53 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
54 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
56 goto finish_force_next
;
59 /* We found a non-blank character, so we will always
60 * want to return a string (even if it is empty),
61 * allocate it here. */
62 if (!GREEDY_REALLOC(s
, sz
+1))
68 for (;; (*p
)++, c
= **p
) {
70 if (!GREEDY_REALLOC(s
, sz
+7))
74 if ((flags
& EXTRACT_UNESCAPE_RELAX
) &&
75 (quote
== 0 || flags
& EXTRACT_RELAX
)) {
76 /* If we find an unquoted trailing backslash and we're in
77 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
80 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
81 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
84 goto finish_force_terminate
;
86 if (flags
& EXTRACT_RELAX
)
87 goto finish_force_terminate
;
91 if (flags
& (EXTRACT_CUNESCAPE
|EXTRACT_UNESCAPE_SEPARATORS
)) {
92 bool eight_bit
= false;
95 if ((flags
& EXTRACT_CUNESCAPE
) &&
96 (r
= cunescape_one(*p
, SIZE_MAX
, &u
, &eight_bit
, false)) >= 0) {
97 /* A valid escaped sequence */
105 sz
+= utf8_encode_unichar(s
+ sz
, u
);
106 } else if ((flags
& EXTRACT_UNESCAPE_SEPARATORS
) &&
107 (strchr(separators
, **p
) || **p
== '\\'))
108 /* An escaped separator char or the escape char itself */
110 else if (flags
& EXTRACT_UNESCAPE_RELAX
) {
120 } else if (quote
!= 0) { /* inside either single or double quotes */
121 for (;; (*p
)++, c
= **p
) {
123 if (flags
& EXTRACT_RELAX
)
124 goto finish_force_terminate
;
126 } else if (c
== quote
) { /* found the end quote */
128 if (flags
& EXTRACT_UNQUOTE
)
130 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
135 if (!GREEDY_REALLOC(s
, sz
+2))
145 for (;; (*p
)++, c
= **p
) {
147 goto finish_force_terminate
;
148 else if (IN_SET(c
, '\'', '"') && (flags
& (EXTRACT_KEEP_QUOTE
| EXTRACT_UNQUOTE
))) {
150 if (flags
& EXTRACT_UNQUOTE
)
152 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
155 } else if (strchr(separators
, c
)) {
156 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
157 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
159 goto finish_force_next
;
161 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
162 /* Skip additional coalesced separators. */
163 for (;; (*p
)++, c
= **p
) {
165 goto finish_force_terminate
;
166 if (!strchr(separators
, c
))
173 if (!GREEDY_REALLOC(s
, sz
+2))
184 finish_force_terminate
:
200 int extract_first_word_and_warn(
203 const char *separators
,
206 const char *filename
,
208 const char *rvalue
) {
210 /* Try to unquote it, if it fails, warn about it and try again
211 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
212 * backslashes verbatim in invalid escape sequences. */
218 r
= extract_first_word(p
, ret
, separators
, flags
);
222 if (r
== -EINVAL
&& !(flags
& EXTRACT_UNESCAPE_RELAX
)) {
224 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
226 r
= extract_first_word(p
, ret
, separators
, flags
|EXTRACT_UNESCAPE_RELAX
);
228 /* It worked this time, hence it must have been an invalid escape sequence. */
229 log_syntax(unit
, LOG_WARNING
, filename
, line
, EINVAL
, "Ignoring unknown escape sequences: \"%s\"", *ret
);
233 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
235 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unbalanced quoting, ignoring: \"%s\"", rvalue
);
238 /* Can be any error, report it */
239 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unable to decode word \"%s\", ignoring: %m", rvalue
);
242 /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
243 * an object that undergoes default argument promotion as an argument to va_start).
244 * Let's make sure that ExtractFlags fits into an unsigned int. */
245 assert_cc(sizeof(enum ExtractFlags
) <= sizeof(unsigned));
247 int extract_many_words_internal(const char **p
, const char *separators
, unsigned flags
, ...) {
252 /* Parses a number of words from a string, stripping any quotes if necessary. */
256 /* Count how many words are expected */
258 while (va_arg(ap
, char**))
265 /* Read all words into a temporary array */
266 char **l
= newa0(char*, n
);
269 for (c
= 0; c
< n
; c
++) {
270 r
= extract_first_word(p
, &l
[c
], separators
, flags
);
272 free_many_charp(l
, c
);
279 /* If we managed to parse all words, return them in the passed in parameters */
281 FOREACH_ARRAY(i
, l
, n
) {
282 char **v
= ASSERT_PTR(va_arg(ap
, char**));