]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / basic / xml.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <stddef.h>
23 #include <string.h>
24
25 #include "macro.h"
26 #include "string-util.h"
27 #include "xml.h"
28
29 enum {
30 STATE_NULL,
31 STATE_TEXT,
32 STATE_TAG,
33 STATE_ATTRIBUTE,
34 };
35
36 static void inc_lines(unsigned *line, const char *s, size_t n) {
37 const char *p = s;
38
39 if (!line)
40 return;
41
42 for (;;) {
43 const char *f;
44
45 f = memchr(p, '\n', n);
46 if (!f)
47 return;
48
49 n -= (f - p) + 1;
50 p = f + 1;
51 (*line)++;
52 }
53 }
54
55 /* We don't actually do real XML here. We only read a simplistic
56 * subset, that is a bit less strict that XML and lacks all the more
57 * complex features, like entities, or namespaces. However, we do
58 * support some HTML5-like simplifications */
59
60 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
61 const char *c, *e, *b;
62 char *ret;
63 int t;
64
65 assert(p);
66 assert(*p);
67 assert(name);
68 assert(state);
69
70 t = PTR_TO_INT(*state);
71 c = *p;
72
73 if (t == STATE_NULL) {
74 if (line)
75 *line = 1;
76 t = STATE_TEXT;
77 }
78
79 for (;;) {
80 if (*c == 0)
81 return XML_END;
82
83 switch (t) {
84
85 case STATE_TEXT: {
86 int x;
87
88 e = strchrnul(c, '<');
89 if (e > c) {
90 /* More text... */
91 ret = strndup(c, e - c);
92 if (!ret)
93 return -ENOMEM;
94
95 inc_lines(line, c, e - c);
96
97 *name = ret;
98 *p = e;
99 *state = INT_TO_PTR(STATE_TEXT);
100
101 return XML_TEXT;
102 }
103
104 assert(*e == '<');
105 b = c + 1;
106
107 if (startswith(b, "!--")) {
108 /* A comment */
109 e = strstr(b + 3, "-->");
110 if (!e)
111 return -EINVAL;
112
113 inc_lines(line, b, e + 3 - b);
114
115 c = e + 3;
116 continue;
117 }
118
119 if (*b == '?') {
120 /* Processing instruction */
121
122 e = strstr(b + 1, "?>");
123 if (!e)
124 return -EINVAL;
125
126 inc_lines(line, b, e + 2 - b);
127
128 c = e + 2;
129 continue;
130 }
131
132 if (*b == '!') {
133 /* DTD */
134
135 e = strchr(b + 1, '>');
136 if (!e)
137 return -EINVAL;
138
139 inc_lines(line, b, e + 1 - b);
140
141 c = e + 1;
142 continue;
143 }
144
145 if (*b == '/') {
146 /* A closing tag */
147 x = XML_TAG_CLOSE;
148 b++;
149 } else
150 x = XML_TAG_OPEN;
151
152 e = strpbrk(b, WHITESPACE "/>");
153 if (!e)
154 return -EINVAL;
155
156 ret = strndup(b, e - b);
157 if (!ret)
158 return -ENOMEM;
159
160 *name = ret;
161 *p = e;
162 *state = INT_TO_PTR(STATE_TAG);
163
164 return x;
165 }
166
167 case STATE_TAG:
168
169 b = c + strspn(c, WHITESPACE);
170 if (*b == 0)
171 return -EINVAL;
172
173 inc_lines(line, c, b - c);
174
175 e = b + strcspn(b, WHITESPACE "=/>");
176 if (e > b) {
177 /* An attribute */
178
179 ret = strndup(b, e - b);
180 if (!ret)
181 return -ENOMEM;
182
183 *name = ret;
184 *p = e;
185 *state = INT_TO_PTR(STATE_ATTRIBUTE);
186
187 return XML_ATTRIBUTE_NAME;
188 }
189
190 if (startswith(b, "/>")) {
191 /* An empty tag */
192
193 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
194 *p = b + 2;
195 *state = INT_TO_PTR(STATE_TEXT);
196
197 return XML_TAG_CLOSE_EMPTY;
198 }
199
200 if (*b != '>')
201 return -EINVAL;
202
203 c = b + 1;
204 t = STATE_TEXT;
205 continue;
206
207 case STATE_ATTRIBUTE:
208
209 if (*c == '=') {
210 c++;
211
212 if (IN_SET(*c, '\'', '\"')) {
213 /* Tag with a quoted value */
214
215 e = strchr(c+1, *c);
216 if (!e)
217 return -EINVAL;
218
219 inc_lines(line, c, e - c);
220
221 ret = strndup(c+1, e - c - 1);
222 if (!ret)
223 return -ENOMEM;
224
225 *name = ret;
226 *p = e + 1;
227 *state = INT_TO_PTR(STATE_TAG);
228
229 return XML_ATTRIBUTE_VALUE;
230
231 }
232
233 /* Tag with a value without quotes */
234
235 b = strpbrk(c, WHITESPACE ">");
236 if (!b)
237 b = c;
238
239 ret = strndup(c, b - c);
240 if (!ret)
241 return -ENOMEM;
242
243 *name = ret;
244 *p = b;
245 *state = INT_TO_PTR(STATE_TAG);
246 return XML_ATTRIBUTE_VALUE;
247 }
248
249 t = STATE_TAG;
250 continue;
251 }
252
253 }
254
255 assert_not_reached("Bad state");
256 }