]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/extract-word.c
man: document automatic dependencies
[thirdparty/systemd.git] / src / basic / extract-word.c
CommitLineData
84ac7bea
LP
1/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3/***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20***/
21
b5efdb8a 22#include "alloc-util.h"
4f5dd394 23#include "escape.h"
b11d6a7b
LP
24#include "extract-word.h"
25#include "string-util.h"
84ac7bea
LP
26#include "utf8.h"
27#include "util.h"
84ac7bea
LP
28
29int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
30 _cleanup_free_ char *s = NULL;
31 size_t allocated = 0, sz = 0;
b85e1c25 32 char c;
84ac7bea
LP
33 int r;
34
35 char quote = 0; /* 0 or ' or " */
36 bool backslash = false; /* whether we've just seen a backslash */
84ac7bea
LP
37
38 assert(p);
39 assert(ret);
40
84ac7bea
LP
41 /* Bail early if called after last value or with no input */
42 if (!*p)
43 goto finish_force_terminate;
93de9eb7 44 c = **p;
84ac7bea 45
8372da44
FB
46 if (!separators)
47 separators = WHITESPACE;
48
84ac7bea
LP
49 /* Parses the first word of a string, and returns it in
50 * *ret. Removes all quotes in the process. When parsing fails
51 * (because of an uneven number of quotes or similar), leaves
52 * the pointer *p at the first invalid character. */
53
b85e1c25
FB
54 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
55 if (!GREEDY_REALLOC(s, allocated, sz+1))
56 return -ENOMEM;
84ac7bea 57
93de9eb7 58 for (;; (*p) ++, c = **p) {
b85e1c25
FB
59 if (c == 0)
60 goto finish_force_terminate;
61 else if (strchr(separators, c)) {
93de9eb7
FB
62 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
63 (*p) ++;
b85e1c25 64 goto finish_force_next;
93de9eb7 65 }
b85e1c25 66 } else {
84ac7bea
LP
67 /* We found a non-blank character, so we will always
68 * want to return a string (even if it is empty),
69 * allocate it here. */
70 if (!GREEDY_REALLOC(s, allocated, sz+1))
71 return -ENOMEM;
b85e1c25 72 break;
84ac7bea 73 }
b85e1c25
FB
74 }
75
93de9eb7 76 for (;; (*p) ++, c = **p) {
84ac7bea
LP
77 if (backslash) {
78 if (!GREEDY_REALLOC(s, allocated, sz+7))
79 return -ENOMEM;
80
81 if (c == 0) {
82 if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
83 (!quote || flags & EXTRACT_RELAX)) {
84 /* If we find an unquoted trailing backslash and we're in
85 * EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
86 * output.
87 *
88 * Unbalanced quotes will only be allowed in EXTRACT_RELAX
89 * mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
90 */
91 s[sz++] = '\\';
92 goto finish_force_terminate;
93 }
94 if (flags & EXTRACT_RELAX)
95 goto finish_force_terminate;
96 return -EINVAL;
97 }
98
99 if (flags & EXTRACT_CUNESCAPE) {
dcd12626 100 uint32_t u;
84ac7bea
LP
101
102 r = cunescape_one(*p, (size_t) -1, &c, &u);
103 if (r < 0) {
104 if (flags & EXTRACT_CUNESCAPE_RELAX) {
105 s[sz++] = '\\';
106 s[sz++] = c;
3ff13c29
FB
107 } else
108 return -EINVAL;
109 } else {
110 (*p) += r - 1;
111
112 if (c != 0)
113 s[sz++] = c; /* normal explicit char */
114 else
115 sz += utf8_encode_unichar(s + sz, u); /* unicode chars we'll encode as utf8 */
84ac7bea 116 }
84ac7bea
LP
117 } else
118 s[sz++] = c;
119
84ac7bea
LP
120 backslash = false;
121
122 } else if (quote) { /* inside either single or double quotes */
27fc921b
FB
123 for (;; (*p) ++, c = **p) {
124 if (c == 0) {
125 if (flags & EXTRACT_RELAX)
126 goto finish_force_terminate;
127 return -EINVAL;
128 } else if (c == quote) { /* found the end quote */
129 quote = 0;
130 break;
131 } else if (c == '\\') {
132 backslash = true;
133 break;
134 } else {
135 if (!GREEDY_REALLOC(s, allocated, sz+2))
136 return -ENOMEM;
84ac7bea 137
27fc921b
FB
138 s[sz++] = c;
139 }
84ac7bea
LP
140 }
141
84ac7bea 142 } else {
27fc921b
FB
143 for (;; (*p) ++, c = **p) {
144 if (c == 0)
145 goto finish_force_terminate;
146 else if ((c == '\'' || c == '"') && (flags & EXTRACT_QUOTES)) {
147 quote = c;
148 break;
149 } else if (c == '\\') {
150 backslash = true;
151 break;
152 } else if (strchr(separators, c)) {
153 if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
154 (*p) ++;
155 goto finish_force_next;
156 }
0247447e
FB
157 /* Skip additional coalesced separators. */
158 for (;; (*p) ++, c = **p) {
159 if (c == 0)
160 goto finish_force_terminate;
161 if (!strchr(separators, c))
162 break;
163 }
164 goto finish;
165
27fc921b
FB
166 } else {
167 if (!GREEDY_REALLOC(s, allocated, sz+2))
168 return -ENOMEM;
84ac7bea 169
27fc921b
FB
170 s[sz++] = c;
171 }
84ac7bea
LP
172 }
173 }
84ac7bea
LP
174 }
175
176finish_force_terminate:
177 *p = NULL;
178finish:
179 if (!s) {
180 *p = NULL;
181 *ret = NULL;
182 return 0;
183 }
184
185finish_force_next:
186 s[sz] = 0;
187 *ret = s;
188 s = NULL;
189
190 return 1;
191}
192
193int extract_first_word_and_warn(
194 const char **p,
195 char **ret,
196 const char *separators,
197 ExtractFlags flags,
198 const char *unit,
199 const char *filename,
200 unsigned line,
201 const char *rvalue) {
202
dea7b6b0
LP
203 /* Try to unquote it, if it fails, warn about it and try again
204 * but this time using EXTRACT_CUNESCAPE_RELAX to keep the
205 * backslashes verbatim in invalid escape sequences. */
206
84ac7bea
LP
207 const char *save;
208 int r;
209
210 save = *p;
211 r = extract_first_word(p, ret, separators, flags);
dea7b6b0
LP
212 if (r >= 0)
213 return r;
214
215 if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
84ac7bea
LP
216
217 /* Retry it with EXTRACT_CUNESCAPE_RELAX. */
218 *p = save;
219 r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
dea7b6b0
LP
220 if (r >= 0) {
221 /* It worked this time, hence it must have been an invalid escape sequence we could correct. */
222 log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Invalid escape sequences in line, correcting: \"%s\"", rvalue);
223 return r;
224 }
225
226 /* If it's still EINVAL; then it must be unbalanced quoting, report this. */
227 if (r == -EINVAL)
228 return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
84ac7bea
LP
229 }
230
dea7b6b0
LP
231 /* Can be any error, report it */
232 return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
84ac7bea
LP
233}
234
235int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) {
236 va_list ap;
237 char **l;
238 int n = 0, i, c, r;
239
240 /* Parses a number of words from a string, stripping any
241 * quotes if necessary. */
242
243 assert(p);
244
245 /* Count how many words are expected */
246 va_start(ap, flags);
247 for (;;) {
248 if (!va_arg(ap, char **))
249 break;
250 n++;
251 }
252 va_end(ap);
253
254 if (n <= 0)
255 return 0;
256
257 /* Read all words into a temporary array */
258 l = newa0(char*, n);
259 for (c = 0; c < n; c++) {
260
261 r = extract_first_word(p, &l[c], separators, flags);
262 if (r < 0) {
263 int j;
264
265 for (j = 0; j < c; j++)
266 free(l[j]);
267
268 return r;
269 }
270
271 if (r == 0)
272 break;
273 }
274
275 /* If we managed to parse all words, return them in the passed
276 * in parameters */
277 va_start(ap, flags);
278 for (i = 0; i < n; i++) {
279 char **v;
280
281 v = va_arg(ap, char **);
282 assert(v);
283
284 *v = l[i];
285 }
286 va_end(ap);
287
288 return c;
289}