]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
15c629b1884ca172ac16e719ba3bd9eec5aba8a0
[thirdparty/systemd.git] / src / basic / xml.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <string.h>
23
24 #include "util.h"
25 #include "xml.h"
26
27 enum {
28 STATE_NULL,
29 STATE_TEXT,
30 STATE_TAG,
31 STATE_ATTRIBUTE,
32 };
33
34 static void inc_lines(unsigned *line, const char *s, size_t n) {
35 const char *p = s;
36
37 if (!line)
38 return;
39
40 for (;;) {
41 const char *f;
42
43 f = memchr(p, '\n', n);
44 if (!f)
45 return;
46
47 n -= (f - p) + 1;
48 p = f + 1;
49 (*line)++;
50 }
51 }
52
53 /* We don't actually do real XML here. We only read a simplistic
54 * subset, that is a bit less strict that XML and lacks all the more
55 * complex features, like entities, or namespaces. However, we do
56 * support some HTML5-like simplifications */
57
58 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
59 const char *c, *e, *b;
60 char *ret;
61 int t;
62
63 assert(p);
64 assert(*p);
65 assert(name);
66 assert(state);
67
68 t = PTR_TO_INT(*state);
69 c = *p;
70
71 if (t == STATE_NULL) {
72 if (line)
73 *line = 1;
74 t = STATE_TEXT;
75 }
76
77 for (;;) {
78 if (*c == 0)
79 return XML_END;
80
81 switch (t) {
82
83 case STATE_TEXT: {
84 int x;
85
86 e = strchrnul(c, '<');
87 if (e > c) {
88 /* More text... */
89 ret = strndup(c, e - c);
90 if (!ret)
91 return -ENOMEM;
92
93 inc_lines(line, c, e - c);
94
95 *name = ret;
96 *p = e;
97 *state = INT_TO_PTR(STATE_TEXT);
98
99 return XML_TEXT;
100 }
101
102 assert(*e == '<');
103 b = c + 1;
104
105 if (startswith(b, "!--")) {
106 /* A comment */
107 e = strstr(b + 3, "-->");
108 if (!e)
109 return -EINVAL;
110
111 inc_lines(line, b, e + 3 - b);
112
113 c = e + 3;
114 continue;
115 }
116
117 if (*b == '?') {
118 /* Processing instruction */
119
120 e = strstr(b + 1, "?>");
121 if (!e)
122 return -EINVAL;
123
124 inc_lines(line, b, e + 2 - b);
125
126 c = e + 2;
127 continue;
128 }
129
130 if (*b == '!') {
131 /* DTD */
132
133 e = strchr(b + 1, '>');
134 if (!e)
135 return -EINVAL;
136
137 inc_lines(line, b, e + 1 - b);
138
139 c = e + 1;
140 continue;
141 }
142
143 if (*b == '/') {
144 /* A closing tag */
145 x = XML_TAG_CLOSE;
146 b++;
147 } else
148 x = XML_TAG_OPEN;
149
150 e = strpbrk(b, WHITESPACE "/>");
151 if (!e)
152 return -EINVAL;
153
154 ret = strndup(b, e - b);
155 if (!ret)
156 return -ENOMEM;
157
158 *name = ret;
159 *p = e;
160 *state = INT_TO_PTR(STATE_TAG);
161
162 return x;
163 }
164
165 case STATE_TAG:
166
167 b = c + strspn(c, WHITESPACE);
168 if (*b == 0)
169 return -EINVAL;
170
171 inc_lines(line, c, b - c);
172
173 e = b + strcspn(b, WHITESPACE "=/>");
174 if (e > b) {
175 /* An attribute */
176
177 ret = strndup(b, e - b);
178 if (!ret)
179 return -ENOMEM;
180
181 *name = ret;
182 *p = e;
183 *state = INT_TO_PTR(STATE_ATTRIBUTE);
184
185 return XML_ATTRIBUTE_NAME;
186 }
187
188 if (startswith(b, "/>")) {
189 /* An empty tag */
190
191 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
192 *p = b + 2;
193 *state = INT_TO_PTR(STATE_TEXT);
194
195 return XML_TAG_CLOSE_EMPTY;
196 }
197
198 if (*b != '>')
199 return -EINVAL;
200
201 c = b + 1;
202 t = STATE_TEXT;
203 continue;
204
205 case STATE_ATTRIBUTE:
206
207 if (*c == '=') {
208 c++;
209
210 if (*c == '\'' || *c == '\"') {
211 /* Tag with a quoted value */
212
213 e = strchr(c+1, *c);
214 if (!e)
215 return -EINVAL;
216
217 inc_lines(line, c, e - c);
218
219 ret = strndup(c+1, e - c - 1);
220 if (!ret)
221 return -ENOMEM;
222
223 *name = ret;
224 *p = e + 1;
225 *state = INT_TO_PTR(STATE_TAG);
226
227 return XML_ATTRIBUTE_VALUE;
228
229 }
230
231 /* Tag with a value without quotes */
232
233 b = strpbrk(c, WHITESPACE ">");
234 if (!b)
235 b = c;
236
237 ret = strndup(c, b - c);
238 if (!ret)
239 return -ENOMEM;
240
241 *name = ret;
242 *p = b;
243 *state = INT_TO_PTR(STATE_TAG);
244 return XML_ATTRIBUTE_VALUE;
245 }
246
247 t = STATE_TAG;
248 continue;
249 }
250
251 }
252
253 assert_not_reached("Bad state");
254 }