1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
11 #include "alloc-util.h"
13 #include "extract-word.h"
16 #include "string-util.h"
20 int extract_first_word(const char **p
, char **ret
, const char *separators
, ExtractFlags flags
) {
21 _cleanup_free_
char *s
= NULL
;
22 size_t allocated
= 0, sz
= 0;
23 char quote
= 0; /* 0 or ' or " */
24 bool backslash
= false; /* whether we've just seen a backslash */
31 /* Bail early if called after last value or with no input */
37 separators
= WHITESPACE
;
39 /* Parses the first word of a string, and returns it in
40 * *ret. Removes all quotes in the process. When parsing fails
41 * (because of an uneven number of quotes or similar), leaves
42 * the pointer *p at the first invalid character. */
44 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
)
45 if (!GREEDY_REALLOC(s
, allocated
, sz
+1))
48 for (;; (*p
)++, c
= **p
) {
50 goto finish_force_terminate
;
51 else if (strchr(separators
, c
)) {
52 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
54 goto finish_force_next
;
57 /* We found a non-blank character, so we will always
58 * want to return a string (even if it is empty),
59 * allocate it here. */
60 if (!GREEDY_REALLOC(s
, allocated
, sz
+1))
66 for (;; (*p
)++, c
= **p
) {
68 if (!GREEDY_REALLOC(s
, allocated
, sz
+7))
72 if ((flags
& EXTRACT_UNESCAPE_RELAX
) &&
73 (quote
== 0 || flags
& EXTRACT_RELAX
)) {
74 /* If we find an unquoted trailing backslash and we're in
75 * EXTRACT_UNESCAPE_RELAX mode, keep it verbatim in the
78 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
79 * mode, EXTRACT_UNESCAPE_RELAX mode does not allow them.
82 goto finish_force_terminate
;
84 if (flags
& EXTRACT_RELAX
)
85 goto finish_force_terminate
;
89 if (flags
& (EXTRACT_CUNESCAPE
|EXTRACT_UNESCAPE_SEPARATORS
)) {
90 bool eight_bit
= false;
93 if ((flags
& EXTRACT_CUNESCAPE
) &&
94 (r
= cunescape_one(*p
, SIZE_MAX
, &u
, &eight_bit
, false)) >= 0) {
95 /* A valid escaped sequence */
103 sz
+= utf8_encode_unichar(s
+ sz
, u
);
104 } else if ((flags
& EXTRACT_UNESCAPE_SEPARATORS
) &&
105 (strchr(separators
, **p
) || **p
== '\\'))
106 /* An escaped separator char or the escape char itself */
108 else if (flags
& EXTRACT_UNESCAPE_RELAX
) {
118 } else if (quote
!= 0) { /* inside either single or double quotes */
119 for (;; (*p
)++, c
= **p
) {
121 if (flags
& EXTRACT_RELAX
)
122 goto finish_force_terminate
;
124 } else if (c
== quote
) { /* found the end quote */
127 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
131 if (!GREEDY_REALLOC(s
, allocated
, sz
+2))
139 for (;; (*p
)++, c
= **p
) {
141 goto finish_force_terminate
;
142 else if (IN_SET(c
, '\'', '"') && (flags
& EXTRACT_UNQUOTE
)) {
145 } else if (c
== '\\' && !(flags
& EXTRACT_RETAIN_ESCAPE
)) {
148 } else if (strchr(separators
, c
)) {
149 if (flags
& EXTRACT_DONT_COALESCE_SEPARATORS
) {
151 goto finish_force_next
;
153 /* Skip additional coalesced separators. */
154 for (;; (*p
)++, c
= **p
) {
156 goto finish_force_terminate
;
157 if (!strchr(separators
, c
))
163 if (!GREEDY_REALLOC(s
, allocated
, sz
+2))
172 finish_force_terminate
:
188 int extract_first_word_and_warn(
191 const char *separators
,
194 const char *filename
,
196 const char *rvalue
) {
198 /* Try to unquote it, if it fails, warn about it and try again
199 * but this time using EXTRACT_UNESCAPE_RELAX to keep the
200 * backslashes verbatim in invalid escape sequences. */
206 r
= extract_first_word(p
, ret
, separators
, flags
);
210 if (r
== -EINVAL
&& !(flags
& EXTRACT_UNESCAPE_RELAX
)) {
212 /* Retry it with EXTRACT_UNESCAPE_RELAX. */
214 r
= extract_first_word(p
, ret
, separators
, flags
|EXTRACT_UNESCAPE_RELAX
);
216 /* It worked this time, hence it must have been an invalid escape sequence. */
217 log_syntax(unit
, LOG_WARNING
, filename
, line
, EINVAL
, "Ignoring unknown escape sequences: \"%s\"", *ret
);
221 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
223 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unbalanced quoting, ignoring: \"%s\"", rvalue
);
226 /* Can be any error, report it */
227 return log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Unable to decode word \"%s\", ignoring: %m", rvalue
);
230 /* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
231 * an object that undergoes default argument promotion as an argument to va_start).
232 * Let's make sure that ExtractFlags fits into an unsigned int. */
233 assert_cc(sizeof(enum ExtractFlags
) <= sizeof(unsigned));
235 int extract_many_words(const char **p
, const char *separators
, unsigned flags
, ...) {
240 /* Parses a number of words from a string, stripping any
241 * quotes if necessary. */
245 /* Count how many words are expected */
248 if (!va_arg(ap
, char **))
257 /* Read all words into a temporary array */
259 for (c
= 0; c
< n
; c
++) {
261 r
= extract_first_word(p
, &l
[c
], separators
, flags
);
265 for (j
= 0; j
< c
; j
++)
275 /* If we managed to parse all words, return them in the passed
278 for (i
= 0; i
< n
; i
++) {
281 v
= va_arg(ap
, char **);