]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
basic/extract-word,man: clarify "correction" of invalid escapes
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
84ac7bea
LP
1/***
2 This file is part of systemd.
3
4 Copyright 2010 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
11c3a366
TA
20#include <errno.h>
21#include <stdarg.h>
22#include <stdbool.h>
23#include <stddef.h>
24#include <stdint.h>
25#include <stdlib.h>
26#include <string.h>
27#include <syslog.h>
28
b5efdb8a 29#include "alloc-util.h"
4f5dd394 30#include "escape.h"
b11d6a7b 31#include "extract-word.h"
11c3a366
TA
32#include "log.h"
33#include "macro.h"
b11d6a7b 34#include "string-util.h"
84ac7bea 35#include "utf8.h"
84ac7bea
LP
36
37int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
38 _cleanup_free_ char *s = NULL;
39 size_t allocated = 0, sz = 0;
b85e1c25 40 char c;
84ac7bea
LP
41 int r;
42
43 char quote = 0; /* 0 or ' or " */
44 bool backslash = false; /* whether we've just seen a backslash */
84ac7bea
LP
45
46 assert(p);
47 assert(ret);
48
84ac7bea
LP
49 /* Bail early if called after last value or with no input */
50 if (!*p)
c58bd76a 51 goto finish;
93de9eb7 52 c = **p;
84ac7bea 53
8372da44
FB
54 if (!separators)
55 separators = WHITESPACE;
56
84ac7bea
LP
57 /* Parses the first word of a string, and returns it in
58 * *ret. Removes all quotes in the process. When parsing fails
59 * (because of an uneven number of quotes or similar), leaves
60 * the pointer *p at the first invalid character. */
61
b85e1c25
FB
62 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
63 if (!GREEDY_REALLOC(s, allocated, sz+1))
64 return -ENOMEM;
84ac7bea 65
313cefa1 66 for (;; (*p)++, c = **p) {
b85e1c25
FB
67 if (c == 0)
68 goto finish_force_terminate;
69 else if (strchr(separators, c)) {
93de9eb7 70 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
313cefa1 71 (*p)++;
b85e1c25 72 goto finish_force_next;
93de9eb7 73 }
b85e1c25 74 } else {
84ac7bea
LP
75 /* We found a non-blank character, so we will always
76 * want to return a string (even if it is empty),
77 * allocate it here. */
78 if (!GREEDY_REALLOC(s, allocated, sz+1))
79 return -ENOMEM;
b85e1c25 80 break;
84ac7bea 81 }
b85e1c25
FB
82 }
83
313cefa1 84 for (;; (*p)++, c = **p) {
84ac7bea
LP
85 if (backslash) {
86 if (!GREEDY_REALLOC(s, allocated, sz+7))
87 return -ENOMEM;
88
89 if (c == 0) {
90 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
91 (!quote || flags & EXTRACT_RELAX)) {
92 /* If we find an unquoted trailing backslash and we're in
93 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
94 * output.
95 *
96 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
97 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
98 */
99 s[sz++] = '\\';
100 goto finish_force_terminate;
101 }
102 if (flags & EXTRACT_RELAX)
103 goto finish_force_terminate;
104 return -EINVAL;
105 }
106
107 if (flags & EXTRACT_CUNESCAPE) {
3565e095 108 bool eight_bit = false;
c932fb71 109 char32_t u;
84ac7bea 110
3565e095 111 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
84ac7bea
LP
112 if (r < 0) {
113 if (flags & EXTRACT_CUNESCAPE_RELAX) {
114 s[sz++] = '\\';
115 s[sz++] = c;
3ff13c29
FB
116 } else
117 return -EINVAL;
118 } else {
119 (*p) += r - 1;
120
3565e095
ZJS
121 if (eight_bit)
122 s[sz++] = u;
3ff13c29 123 else
3565e095 124 sz += utf8_encode_unichar(s + sz, u);
84ac7bea 125 }
84ac7bea
LP
126 } else
127 s[sz++] = c;
128
84ac7bea
LP
129 backslash = false;
130
131 } else if (quote) { /* inside either single or double quotes */
313cefa1 132 for (;; (*p)++, c = **p) {
27fc921b
FB
133 if (c == 0) {
134 if (flags & EXTRACT_RELAX)
135 goto finish_force_terminate;
136 return -EINVAL;
137 } else if (c == quote) { /* found the end quote */
138 quote = 0;
139 break;
c89f52ac 140 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
141 backslash = true;
142 break;
143 } else {
144 if (!GREEDY_REALLOC(s, allocated, sz+2))
145 return -ENOMEM;
84ac7bea 146
27fc921b
FB
147 s[sz++] = c;
148 }
84ac7bea
LP
149 }
150
84ac7bea 151 } else {
313cefa1 152 for (;; (*p)++, c = **p) {
27fc921b
FB
153 if (c == 0)
154 goto finish_force_terminate;
155 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
156 quote = c;
157 break;
c89f52ac 158 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
159 backslash = true;
160 break;
161 } else if (strchr(separators, c)) {
162 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
313cefa1 163 (*p)++;
27fc921b
FB
164 goto finish_force_next;
165 }
0247447e 166 /* Skip additional coalesced separators. */
313cefa1 167 for (;; (*p)++, c = **p) {
0247447e
FB
168 if (c == 0)
169 goto finish_force_terminate;
170 if (!strchr(separators, c))
171 break;
172 }
173 goto finish;
174
27fc921b
FB
175 } else {
176 if (!GREEDY_REALLOC(s, allocated, sz+2))
177 return -ENOMEM;
84ac7bea 178
27fc921b
FB
179 s[sz++] = c;
180 }
84ac7bea
LP
181 }
182 }
84ac7bea
LP
183 }
184
185finish_force_terminate:
186 *p = NULL;
187finish:
188 if (!s) {
189 *p = NULL;
190 *ret = NULL;
191 return 0;
192 }
193
194finish_force_next:
195 s[sz] = 0;
196 *ret = s;
197 s = NULL;
198
199 return 1;
200}
201
202int extract_first_word_and_warn(
203 const char **p,
204 char **ret,
205 const char *separators,
206 ExtractFlags flags,
207 const char *unit,
208 const char *filename,
209 unsigned line,
210 const char *rvalue) {
211
dea7b6b0
LP
212 /* Try to unquote it, if it fails, warn about it and try again
213 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
214 * backslashes verbatim in invalid escape sequences. */
215
84ac7bea
LP
216 const char *save;
217 int r;
218
219 save = *p;
220 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
221 if (r >= 0)
222 return r;
223
224 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea
LP
225
226 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
227 *p = save;
228 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0 229 if (r >= 0) {
330785f5
ZJS
230 /* It worked this time, hence it must have been an invalid escape sequence. */
231 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
dea7b6b0
LP
232 return r;
233 }
234
235 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
236 if (r == -EINVAL)
237 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
238 }
239
dea7b6b0
LP
240 /* Can be any error, report it */
241 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
242}
243
244int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
245 va_list ap;
246 char **l;
247 int n = 0, i, c, r;
248
249 /* Parses a number of words from a string, stripping any
250 * quotes if necessary. */
251
252 assert(p);
253
254 /* Count how many words are expected */
255 va_start(ap, flags);
256 for (;;) {
257 if (!va_arg(ap, char **))
258 break;
259 n++;
260 }
261 va_end(ap);
262
263 if (n <= 0)
264 return 0;
265
266 /* Read all words into a temporary array */
267 l = newa0(char*, n);
268 for (c = 0; c < n; c++) {
269
270 r = extract_first_word(p, &l[c], separators, flags);
271 if (r < 0) {
272 int j;
273
274 for (j = 0; j < c; j++)
275 free(l[j]);
276
277 return r;
278 }
279
280 if (r == 0)
281 break;
282 }
283
284 /* If we managed to parse all words, return them in the passed
285 * in parameters */
286 va_start(ap, flags);
287 for (i = 0; i < n; i++) {
288 char **v;
289
290 v = va_arg(ap, char **);
291 assert(v);
292
293 *v = l[i];
294 }
295 va_end(ap);
296
297 return c;
298}