]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
util-lib: split our string related calls from util.[ch] into its own file string...
[thirdparty/systemd.git] / src / basic / xml.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <string.h>
23
24 #include "string-util.h"
25 #include "util.h"
26 #include "xml.h"
27
28 enum {
29 STATE_NULL,
30 STATE_TEXT,
31 STATE_TAG,
32 STATE_ATTRIBUTE,
33 };
34
35 static void inc_lines(unsigned *line, const char *s, size_t n) {
36 const char *p = s;
37
38 if (!line)
39 return;
40
41 for (;;) {
42 const char *f;
43
44 f = memchr(p, '\n', n);
45 if (!f)
46 return;
47
48 n -= (f - p) + 1;
49 p = f + 1;
50 (*line)++;
51 }
52 }
53
54 /* We don't actually do real XML here. We only read a simplistic
55 * subset, that is a bit less strict that XML and lacks all the more
56 * complex features, like entities, or namespaces. However, we do
57 * support some HTML5-like simplifications */
58
59 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
60 const char *c, *e, *b;
61 char *ret;
62 int t;
63
64 assert(p);
65 assert(*p);
66 assert(name);
67 assert(state);
68
69 t = PTR_TO_INT(*state);
70 c = *p;
71
72 if (t == STATE_NULL) {
73 if (line)
74 *line = 1;
75 t = STATE_TEXT;
76 }
77
78 for (;;) {
79 if (*c == 0)
80 return XML_END;
81
82 switch (t) {
83
84 case STATE_TEXT: {
85 int x;
86
87 e = strchrnul(c, '<');
88 if (e > c) {
89 /* More text... */
90 ret = strndup(c, e - c);
91 if (!ret)
92 return -ENOMEM;
93
94 inc_lines(line, c, e - c);
95
96 *name = ret;
97 *p = e;
98 *state = INT_TO_PTR(STATE_TEXT);
99
100 return XML_TEXT;
101 }
102
103 assert(*e == '<');
104 b = c + 1;
105
106 if (startswith(b, "!--")) {
107 /* A comment */
108 e = strstr(b + 3, "-->");
109 if (!e)
110 return -EINVAL;
111
112 inc_lines(line, b, e + 3 - b);
113
114 c = e + 3;
115 continue;
116 }
117
118 if (*b == '?') {
119 /* Processing instruction */
120
121 e = strstr(b + 1, "?>");
122 if (!e)
123 return -EINVAL;
124
125 inc_lines(line, b, e + 2 - b);
126
127 c = e + 2;
128 continue;
129 }
130
131 if (*b == '!') {
132 /* DTD */
133
134 e = strchr(b + 1, '>');
135 if (!e)
136 return -EINVAL;
137
138 inc_lines(line, b, e + 1 - b);
139
140 c = e + 1;
141 continue;
142 }
143
144 if (*b == '/') {
145 /* A closing tag */
146 x = XML_TAG_CLOSE;
147 b++;
148 } else
149 x = XML_TAG_OPEN;
150
151 e = strpbrk(b, WHITESPACE "/>");
152 if (!e)
153 return -EINVAL;
154
155 ret = strndup(b, e - b);
156 if (!ret)
157 return -ENOMEM;
158
159 *name = ret;
160 *p = e;
161 *state = INT_TO_PTR(STATE_TAG);
162
163 return x;
164 }
165
166 case STATE_TAG:
167
168 b = c + strspn(c, WHITESPACE);
169 if (*b == 0)
170 return -EINVAL;
171
172 inc_lines(line, c, b - c);
173
174 e = b + strcspn(b, WHITESPACE "=/>");
175 if (e > b) {
176 /* An attribute */
177
178 ret = strndup(b, e - b);
179 if (!ret)
180 return -ENOMEM;
181
182 *name = ret;
183 *p = e;
184 *state = INT_TO_PTR(STATE_ATTRIBUTE);
185
186 return XML_ATTRIBUTE_NAME;
187 }
188
189 if (startswith(b, "/>")) {
190 /* An empty tag */
191
192 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
193 *p = b + 2;
194 *state = INT_TO_PTR(STATE_TEXT);
195
196 return XML_TAG_CLOSE_EMPTY;
197 }
198
199 if (*b != '>')
200 return -EINVAL;
201
202 c = b + 1;
203 t = STATE_TEXT;
204 continue;
205
206 case STATE_ATTRIBUTE:
207
208 if (*c == '=') {
209 c++;
210
211 if (*c == '\'' || *c == '\"') {
212 /* Tag with a quoted value */
213
214 e = strchr(c+1, *c);
215 if (!e)
216 return -EINVAL;
217
218 inc_lines(line, c, e - c);
219
220 ret = strndup(c+1, e - c - 1);
221 if (!ret)
222 return -ENOMEM;
223
224 *name = ret;
225 *p = e + 1;
226 *state = INT_TO_PTR(STATE_TAG);
227
228 return XML_ATTRIBUTE_VALUE;
229
230 }
231
232 /* Tag with a value without quotes */
233
234 b = strpbrk(c, WHITESPACE ">");
235 if (!b)
236 b = c;
237
238 ret = strndup(c, b - c);
239 if (!ret)
240 return -ENOMEM;
241
242 *name = ret;
243 *p = b;
244 *state = INT_TO_PTR(STATE_TAG);
245 return XML_ATTRIBUTE_VALUE;
246 }
247
248 t = STATE_TAG;
249 continue;
250 }
251
252 }
253
254 assert_not_reached("Bad state");
255 }