]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
extract-word: increment pointer p and keep c in sync in for loop
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
84ac7bea
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
b5efdb8a 22#include "alloc-util.h"
4f5dd394 23#include "escape.h"
b11d6a7b
LP
24#include "extract-word.h"
25#include "string-util.h"
84ac7bea
LP
26#include "utf8.h"
27#include "util.h"
84ac7bea
LP
28
29int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30 _cleanup_free_ char *s = NULL;
31 size_t allocated = 0, sz = 0;
b85e1c25 32 char c;
84ac7bea
LP
33 int r;
34
35 char quote = 0; /* 0 or ' or " */
36 bool backslash = false; /* whether we've just seen a backslash */
37 bool separator = false; /* whether we've just seen a separator */
84ac7bea
LP
38
39 assert(p);
40 assert(ret);
41
84ac7bea
LP
42 /* Bail early if called after last value or with no input */
43 if (!*p)
44 goto finish_force_terminate;
93de9eb7 45 c = **p;
84ac7bea 46
8372da44
FB
47 if (!separators)
48 separators = WHITESPACE;
49
84ac7bea
LP
50 /* Parses the first word of a string, and returns it in
51 * *ret. Removes all quotes in the process. When parsing fails
52 * (because of an uneven number of quotes or similar), leaves
53 * the pointer *p at the first invalid character. */
54
b85e1c25
FB
55 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
56 if (!GREEDY_REALLOC(s, allocated, sz+1))
57 return -ENOMEM;
84ac7bea 58
93de9eb7 59 for (;; (*p) ++, c = **p) {
b85e1c25
FB
60 if (c == 0)
61 goto finish_force_terminate;
62 else if (strchr(separators, c)) {
93de9eb7
FB
63 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
64 (*p) ++;
b85e1c25 65 goto finish_force_next;
93de9eb7 66 }
b85e1c25 67 } else {
84ac7bea
LP
68 /* We found a non-blank character, so we will always
69 * want to return a string (even if it is empty),
70 * allocate it here. */
71 if (!GREEDY_REALLOC(s, allocated, sz+1))
72 return -ENOMEM;
b85e1c25 73 break;
84ac7bea 74 }
b85e1c25
FB
75 }
76
93de9eb7 77 for (;; (*p) ++, c = **p) {
84ac7bea
LP
78 if (backslash) {
79 if (!GREEDY_REALLOC(s, allocated, sz+7))
80 return -ENOMEM;
81
82 if (c == 0) {
83 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
84 (!quote || flags & EXTRACT_RELAX)) {
85 /* If we find an unquoted trailing backslash and we're in
86 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
87 * output.
88 *
89 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
90 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
91 */
92 s[sz++] = '\\';
93 goto finish_force_terminate;
94 }
95 if (flags & EXTRACT_RELAX)
96 goto finish_force_terminate;
97 return -EINVAL;
98 }
99
100 if (flags & EXTRACT_CUNESCAPE) {
dcd12626 101 uint32_t u;
84ac7bea
LP
102
103 r = cunescape_one(*p, (size_t) -1, &c, &u);
104 if (r < 0) {
105 if (flags & EXTRACT_CUNESCAPE_RELAX) {
106 s[sz++] = '\\';
107 s[sz++] = c;
3ff13c29
FB
108 } else
109 return -EINVAL;
110 } else {
111 (*p) += r - 1;
112
113 if (c != 0)
114 s[sz++] = c; /* normal explicit char */
115 else
116 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
84ac7bea 117 }
84ac7bea
LP
118 } else
119 s[sz++] = c;
120
84ac7bea
LP
121 backslash = false;
122
123 } else if (quote) { /* inside either single or double quotes */
124 if (c == 0) {
125 if (flags & EXTRACT_RELAX)
126 goto finish_force_terminate;
127 return -EINVAL;
128 } else if (c == quote) /* found the end quote */
129 quote = 0;
130 else if (c == '\\')
131 backslash = true;
132 else {
133 if (!GREEDY_REALLOC(s, allocated, sz+2))
134 return -ENOMEM;
135
136 s[sz++] = c;
137 }
138
139 } else if (separator) {
140 if (c == 0)
141 goto finish_force_terminate;
142 if (!strchr(separators, c))
143 goto finish;
144
145 } else {
146 if (c == 0)
147 goto finish_force_terminate;
148 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES))
149 quote = c;
150 else if (c == '\\')
151 backslash = true;
152 else if (strchr(separators, c)) {
153 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
154 (*p) ++;
155 goto finish_force_next;
156 }
157 separator = true;
158 } else {
159 if (!GREEDY_REALLOC(s, allocated, sz+2))
160 return -ENOMEM;
161
162 s[sz++] = c;
163 }
164 }
84ac7bea
LP
165 }
166
167finish_force_terminate:
168 *p = NULL;
169finish:
170 if (!s) {
171 *p = NULL;
172 *ret = NULL;
173 return 0;
174 }
175
176finish_force_next:
177 s[sz] = 0;
178 *ret = s;
179 s = NULL;
180
181 return 1;
182}
183
184int extract_first_word_and_warn(
185 const char **p,
186 char **ret,
187 const char *separators,
188 ExtractFlags flags,
189 const char *unit,
190 const char *filename,
191 unsigned line,
192 const char *rvalue) {
193
dea7b6b0
LP
194 /* Try to unquote it, if it fails, warn about it and try again
195 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
196 * backslashes verbatim in invalid escape sequences. */
197
84ac7bea
LP
198 const char *save;
199 int r;
200
201 save = *p;
202 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
203 if (r >= 0)
204 return r;
205
206 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea
LP
207
208 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
209 *p = save;
210 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0
LP
211 if (r >= 0) {
212 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
213 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
214 return r;
215 }
216
217 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
218 if (r == -EINVAL)
219 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
220 }
221
dea7b6b0
LP
222 /* Can be any error, report it */
223 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
224}
225
226int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
227 va_list ap;
228 char **l;
229 int n = 0, i, c, r;
230
231 /* Parses a number of words from a string, stripping any
232 * quotes if necessary. */
233
234 assert(p);
235
236 /* Count how many words are expected */
237 va_start(ap, flags);
238 for (;;) {
239 if (!va_arg(ap, char **))
240 break;
241 n++;
242 }
243 va_end(ap);
244
245 if (n <= 0)
246 return 0;
247
248 /* Read all words into a temporary array */
249 l = newa0(char*, n);
250 for (c = 0; c < n; c++) {
251
252 r = extract_first_word(p, &l[c], separators, flags);
253 if (r < 0) {
254 int j;
255
256 for (j = 0; j < c; j++)
257 free(l[j]);
258
259 return r;
260 }
261
262 if (r == 0)
263 break;
264 }
265
266 /* If we managed to parse all words, return them in the passed
267 * in parameters */
268 va_start(ap, flags);
269 for (i = 0; i < n; i++) {
270 char **v;
271
272 v = va_arg(ap, char **);
273 assert(v);
274
275 *v = l[i];
276 }
277 va_end(ap);
278
279 return c;
280}