]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
Merge pull request #2440 from poettering/journal-fix
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
84ac7bea
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
11c3a366
TA
22#include <errno.h>
23#include <stdarg.h>
24#include <stdbool.h>
25#include <stddef.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <syslog.h>
30
b5efdb8a 31#include "alloc-util.h"
4f5dd394 32#include "escape.h"
b11d6a7b 33#include "extract-word.h"
11c3a366
TA
34#include "log.h"
35#include "macro.h"
b11d6a7b 36#include "string-util.h"
84ac7bea 37#include "utf8.h"
84ac7bea
LP
38
39int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
40 _cleanup_free_ char *s = NULL;
41 size_t allocated = 0, sz = 0;
b85e1c25 42 char c;
84ac7bea
LP
43 int r;
44
45 char quote = 0; /* 0 or ' or " */
46 bool backslash = false; /* whether we've just seen a backslash */
84ac7bea
LP
47
48 assert(p);
49 assert(ret);
50
84ac7bea
LP
51 /* Bail early if called after last value or with no input */
52 if (!*p)
53 goto finish_force_terminate;
93de9eb7 54 c = **p;
84ac7bea 55
8372da44
FB
56 if (!separators)
57 separators = WHITESPACE;
58
84ac7bea
LP
59 /* Parses the first word of a string, and returns it in
60 * *ret. Removes all quotes in the process. When parsing fails
61 * (because of an uneven number of quotes or similar), leaves
62 * the pointer *p at the first invalid character. */
63
b85e1c25
FB
64 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
65 if (!GREEDY_REALLOC(s, allocated, sz+1))
66 return -ENOMEM;
84ac7bea 67
93de9eb7 68 for (;; (*p) ++, c = **p) {
b85e1c25
FB
69 if (c == 0)
70 goto finish_force_terminate;
71 else if (strchr(separators, c)) {
93de9eb7
FB
72 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
73 (*p) ++;
b85e1c25 74 goto finish_force_next;
93de9eb7 75 }
b85e1c25 76 } else {
84ac7bea
LP
77 /* We found a non-blank character, so we will always
78 * want to return a string (even if it is empty),
79 * allocate it here. */
80 if (!GREEDY_REALLOC(s, allocated, sz+1))
81 return -ENOMEM;
b85e1c25 82 break;
84ac7bea 83 }
b85e1c25
FB
84 }
85
93de9eb7 86 for (;; (*p) ++, c = **p) {
84ac7bea
LP
87 if (backslash) {
88 if (!GREEDY_REALLOC(s, allocated, sz+7))
89 return -ENOMEM;
90
91 if (c == 0) {
92 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
93 (!quote || flags & EXTRACT_RELAX)) {
94 /* If we find an unquoted trailing backslash and we're in
95 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
96 * output.
97 *
98 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
99 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
100 */
101 s[sz++] = '\\';
102 goto finish_force_terminate;
103 }
104 if (flags & EXTRACT_RELAX)
105 goto finish_force_terminate;
106 return -EINVAL;
107 }
108
109 if (flags & EXTRACT_CUNESCAPE) {
dcd12626 110 uint32_t u;
3565e095 111 bool eight_bit = false;
84ac7bea 112
3565e095 113 r = cunescape_one(*p, (size_t) -1, &u, &eight_bit);
84ac7bea
LP
114 if (r < 0) {
115 if (flags & EXTRACT_CUNESCAPE_RELAX) {
116 s[sz++] = '\\';
117 s[sz++] = c;
3ff13c29
FB
118 } else
119 return -EINVAL;
120 } else {
121 (*p) += r - 1;
122
3565e095
ZJS
123 if (eight_bit)
124 s[sz++] = u;
3ff13c29 125 else
3565e095 126 sz += utf8_encode_unichar(s + sz, u);
84ac7bea 127 }
84ac7bea
LP
128 } else
129 s[sz++] = c;
130
84ac7bea
LP
131 backslash = false;
132
133 } else if (quote) { /* inside either single or double quotes */
27fc921b
FB
134 for (;; (*p) ++, c = **p) {
135 if (c == 0) {
136 if (flags & EXTRACT_RELAX)
137 goto finish_force_terminate;
138 return -EINVAL;
139 } else if (c == quote) { /* found the end quote */
140 quote = 0;
141 break;
c89f52ac 142 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
143 backslash = true;
144 break;
145 } else {
146 if (!GREEDY_REALLOC(s, allocated, sz+2))
147 return -ENOMEM;
84ac7bea 148
27fc921b
FB
149 s[sz++] = c;
150 }
84ac7bea
LP
151 }
152
84ac7bea 153 } else {
27fc921b
FB
154 for (;; (*p) ++, c = **p) {
155 if (c == 0)
156 goto finish_force_terminate;
157 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
158 quote = c;
159 break;
c89f52ac 160 } else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
27fc921b
FB
161 backslash = true;
162 break;
163 } else if (strchr(separators, c)) {
164 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
165 (*p) ++;
166 goto finish_force_next;
167 }
0247447e
FB
168 /* Skip additional coalesced separators. */
169 for (;; (*p) ++, c = **p) {
170 if (c == 0)
171 goto finish_force_terminate;
172 if (!strchr(separators, c))
173 break;
174 }
175 goto finish;
176
27fc921b
FB
177 } else {
178 if (!GREEDY_REALLOC(s, allocated, sz+2))
179 return -ENOMEM;
84ac7bea 180
27fc921b
FB
181 s[sz++] = c;
182 }
84ac7bea
LP
183 }
184 }
84ac7bea
LP
185 }
186
187finish_force_terminate:
188 *p = NULL;
189finish:
190 if (!s) {
191 *p = NULL;
192 *ret = NULL;
193 return 0;
194 }
195
196finish_force_next:
197 s[sz] = 0;
198 *ret = s;
199 s = NULL;
200
201 return 1;
202}
203
204int extract_first_word_and_warn(
205 const char **p,
206 char **ret,
207 const char *separators,
208 ExtractFlags flags,
209 const char *unit,
210 const char *filename,
211 unsigned line,
212 const char *rvalue) {
213
dea7b6b0
LP
214 /* Try to unquote it, if it fails, warn about it and try again
215 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
216 * backslashes verbatim in invalid escape sequences. */
217
84ac7bea
LP
218 const char *save;
219 int r;
220
221 save = *p;
222 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
223 if (r >= 0)
224 return r;
225
226 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea
LP
227
228 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
229 *p = save;
230 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0
LP
231 if (r >= 0) {
232 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
233 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
234 return r;
235 }
236
237 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
238 if (r == -EINVAL)
239 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
240 }
241
dea7b6b0
LP
242 /* Can be any error, report it */
243 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
244}
245
246int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
247 va_list ap;
248 char **l;
249 int n = 0, i, c, r;
250
251 /* Parses a number of words from a string, stripping any
252 * quotes if necessary. */
253
254 assert(p);
255
256 /* Count how many words are expected */
257 va_start(ap, flags);
258 for (;;) {
259 if (!va_arg(ap, char **))
260 break;
261 n++;
262 }
263 va_end(ap);
264
265 if (n <= 0)
266 return 0;
267
268 /* Read all words into a temporary array */
269 l = newa0(char*, n);
270 for (c = 0; c < n; c++) {
271
272 r = extract_first_word(p, &l[c], separators, flags);
273 if (r < 0) {
274 int j;
275
276 for (j = 0; j < c; j++)
277 free(l[j]);
278
279 return r;
280 }
281
282 if (r == 0)
283 break;
284 }
285
286 /* If we managed to parse all words, return them in the passed
287 * in parameters */
288 va_start(ap, flags);
289 for (i = 0; i < n; i++) {
290 char **v;
291
292 v = va_arg(ap, char **);
293 assert(v);
294
295 *v = l[i];
296 }
297 va_end(ap);
298
299 return c;
300}