1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
3 #include "alloc-util.h"
5 #include "extract-word.h"
7 #include "string-util.h"
10 int extract_first_word(const char **p
, char **ret
, const char *separators
, ExtractFlags flags
) {
11 _cleanup_free_
char *s
= NULL
;
13 char quote
= 0; /* 0 or ' or " */
14 bool backslash
= false; /* whether we've just seen a backslash */
20 assert(!FLAGS_SET(flags
, EXTRACT_KEEP_QUOTE
| EXTRACT_UNQUOTE
));
22 /* Bail early if called after last value or with no input */
28 separators
= WHITESPACE
;
30 /* Parses the first word of a string, and returns it in
31 * *ret. Removes all quotes in the process. When parsing fails
32 * (because of an uneven number of quotes or similar), leaves
33 * the pointer *p at the first invalid character. */
35 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
)
36 if (!GREEDY_REALLOC(s
, sz
+1))
39 for (;; (*p
)++, c
= **p
) {
41 goto finish_force_terminate
;
42 else if (strchr(separators
, c
)) {
43 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
44 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
46 goto finish_force_next
;
49 /* We found a non-blank character, so we will always
50 * want to return a string (even if it is empty),
51 * allocate it here. */
52 if (!GREEDY_REALLOC(s
, sz
+1))
58 for (;; (*p
)++, c
= **p
) {
60 if (!GREEDY_REALLOC(s
, sz
+7))
64 if ((flags
& EXTRACT_UNESCAPE_RELAX
) &&
65 (quote
== 0 || flags
& EXTRACT_RELAX
)) {
66 /* If we find an unquoted trailing backslash and we're in
67 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
70 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
71 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
74 goto finish_force_terminate
;
76 if (flags
& EXTRACT_RELAX
)
77 goto finish_force_terminate
;
81 if (flags
& (EXTRACT_CUNESCAPE
|EXTRACT_UNESCAPE_SEPARATORS
)) {
82 bool eight_bit
= false;
85 if ((flags
& EXTRACT_CUNESCAPE
) &&
86 (r
= cunescape_one(*p
, SIZE_MAX
, &u
, &eight_bit
, false)) >= 0) {
87 /* A valid escaped sequence */
95 sz
+= utf8_encode_unichar(s
+ sz
, u
);
96 } else if ((flags
& EXTRACT_UNESCAPE_SEPARATORS
) &&
97 (strchr(separators
, **p
) || **p
== '\\'))
98 /* An escaped separator char or the escape char itself */
100 else if (flags
& EXTRACT_UNESCAPE_RELAX
) {
110 } else if (quote
!= 0) { /* inside either single or double quotes */
111 for (;; (*p
)++, c
= **p
) {
113 if (flags
& EXTRACT_RELAX
)
114 goto finish_force_terminate
;
116 } else if (c
== quote
) { /* found the end quote */
118 if (flags
& EXTRACT_UNQUOTE
)
120 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
125 if (!GREEDY_REALLOC(s
, sz
+2))
135 for (;; (*p
)++, c
= **p
) {
137 goto finish_force_terminate
;
138 else if (IN_SET(c
, '\'', '"') && (flags
& (EXTRACT_KEEP_QUOTE
| EXTRACT_UNQUOTE
))) {
140 if (flags
& EXTRACT_UNQUOTE
)
142 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
145 } else if (strchr(separators
, c
)) {
146 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
147 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
149 goto finish_force_next
;
151 if (!(flags
& EXTRACT_RETAIN_SEPARATORS
))
152 /* Skip additional coalesced separators. */
153 for (;; (*p
)++, c
= **p
) {
155 goto finish_force_terminate
;
156 if (!strchr(separators
, c
))
163 if (!GREEDY_REALLOC(s
, sz
+2))
174 finish_force_terminate
:
190 int extract_first_word_and_warn(
193 const char *separators
,
196 const char *filename
,
198 const char *rvalue
) {
200 /* Try to unquote it, if it fails, warn about it and try again
201 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
202 * backslashes verbatim in invalid escape sequences. */
208 r
= extract_first_word(p
, ret
, separators
, flags
);
212 if (r
== -EINVAL
&& !(flags
& EXTRACT_UNESCAPE_RELAX
)) {
214 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
216 r
= extract_first_word(p
, ret
, separators
, flags
|EXTRACT_UNESCAPE_RELAX
);
218 /* It worked this time, hence it must have been an invalid escape sequence. */
219 log_syntax(unit
, LOG_WARNING
, filename
, line
, EINVAL
, "Ignoring unknown escape sequences: \"%s\"", *ret
);
223 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
225 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unbalanced quoting, ignoring: \"%s\"", rvalue
);
228 /* Can be any error, report it */
229 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unable to decode word \"%s\", ignoring: %m", rvalue
);
232 /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
233 * an object that undergoes default argument promotion as an argument to va_start).
234 * Let's make sure that ExtractFlags fits into an unsigned int. */
235 assert_cc(sizeof(enum ExtractFlags
) <= sizeof(unsigned));
237 int extract_many_words_internal(const char **p
, const char *separators
, unsigned flags
, ...) {
242 /* Parses a number of words from a string, stripping any quotes if necessary. */
246 /* Count how many words are expected */
248 while (va_arg(ap
, char**))
255 /* Read all words into a temporary array */
256 char **l
= newa0(char*, n
);
259 for (c
= 0; c
< n
; c
++) {
260 r
= extract_first_word(p
, &l
[c
], separators
, flags
);
262 free_many_charp(l
, c
);
269 /* If we managed to parse all words, return them in the passed in parameters */
271 FOREACH_ARRAY(i
, l
, n
) {
272 char **v
= ASSERT_PTR(va_arg(ap
, char**));