]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
tree-wide: drop 'This file is part of systemd' blurb
[thirdparty/systemd.git] / src / basic / xml.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2013 Lennart Poettering
4 ***/
5
6 #include <errno.h>
7 #include <stddef.h>
8 #include <string.h>
9
10 #include "macro.h"
11 #include "string-util.h"
12 #include "xml.h"
13
14 enum {
15 STATE_NULL,
16 STATE_TEXT,
17 STATE_TAG,
18 STATE_ATTRIBUTE,
19 };
20
21 static void inc_lines(unsigned *line, const char *s, size_t n) {
22 const char *p = s;
23
24 if (!line)
25 return;
26
27 for (;;) {
28 const char *f;
29
30 f = memchr(p, '\n', n);
31 if (!f)
32 return;
33
34 n -= (f - p) + 1;
35 p = f + 1;
36 (*line)++;
37 }
38 }
39
40 /* We don't actually do real XML here. We only read a simplistic
41 * subset, that is a bit less strict that XML and lacks all the more
42 * complex features, like entities, or namespaces. However, we do
43 * support some HTML5-like simplifications */
44
45 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
46 const char *c, *e, *b;
47 char *ret;
48 int t;
49
50 assert(p);
51 assert(*p);
52 assert(name);
53 assert(state);
54
55 t = PTR_TO_INT(*state);
56 c = *p;
57
58 if (t == STATE_NULL) {
59 if (line)
60 *line = 1;
61 t = STATE_TEXT;
62 }
63
64 for (;;) {
65 if (*c == 0)
66 return XML_END;
67
68 switch (t) {
69
70 case STATE_TEXT: {
71 int x;
72
73 e = strchrnul(c, '<');
74 if (e > c) {
75 /* More text... */
76 ret = strndup(c, e - c);
77 if (!ret)
78 return -ENOMEM;
79
80 inc_lines(line, c, e - c);
81
82 *name = ret;
83 *p = e;
84 *state = INT_TO_PTR(STATE_TEXT);
85
86 return XML_TEXT;
87 }
88
89 assert(*e == '<');
90 b = c + 1;
91
92 if (startswith(b, "!--")) {
93 /* A comment */
94 e = strstr(b + 3, "-->");
95 if (!e)
96 return -EINVAL;
97
98 inc_lines(line, b, e + 3 - b);
99
100 c = e + 3;
101 continue;
102 }
103
104 if (*b == '?') {
105 /* Processing instruction */
106
107 e = strstr(b + 1, "?>");
108 if (!e)
109 return -EINVAL;
110
111 inc_lines(line, b, e + 2 - b);
112
113 c = e + 2;
114 continue;
115 }
116
117 if (*b == '!') {
118 /* DTD */
119
120 e = strchr(b + 1, '>');
121 if (!e)
122 return -EINVAL;
123
124 inc_lines(line, b, e + 1 - b);
125
126 c = e + 1;
127 continue;
128 }
129
130 if (*b == '/') {
131 /* A closing tag */
132 x = XML_TAG_CLOSE;
133 b++;
134 } else
135 x = XML_TAG_OPEN;
136
137 e = strpbrk(b, WHITESPACE "/>");
138 if (!e)
139 return -EINVAL;
140
141 ret = strndup(b, e - b);
142 if (!ret)
143 return -ENOMEM;
144
145 *name = ret;
146 *p = e;
147 *state = INT_TO_PTR(STATE_TAG);
148
149 return x;
150 }
151
152 case STATE_TAG:
153
154 b = c + strspn(c, WHITESPACE);
155 if (*b == 0)
156 return -EINVAL;
157
158 inc_lines(line, c, b - c);
159
160 e = b + strcspn(b, WHITESPACE "=/>");
161 if (e > b) {
162 /* An attribute */
163
164 ret = strndup(b, e - b);
165 if (!ret)
166 return -ENOMEM;
167
168 *name = ret;
169 *p = e;
170 *state = INT_TO_PTR(STATE_ATTRIBUTE);
171
172 return XML_ATTRIBUTE_NAME;
173 }
174
175 if (startswith(b, "/>")) {
176 /* An empty tag */
177
178 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
179 *p = b + 2;
180 *state = INT_TO_PTR(STATE_TEXT);
181
182 return XML_TAG_CLOSE_EMPTY;
183 }
184
185 if (*b != '>')
186 return -EINVAL;
187
188 c = b + 1;
189 t = STATE_TEXT;
190 continue;
191
192 case STATE_ATTRIBUTE:
193
194 if (*c == '=') {
195 c++;
196
197 if (IN_SET(*c, '\'', '\"')) {
198 /* Tag with a quoted value */
199
200 e = strchr(c+1, *c);
201 if (!e)
202 return -EINVAL;
203
204 inc_lines(line, c, e - c);
205
206 ret = strndup(c+1, e - c - 1);
207 if (!ret)
208 return -ENOMEM;
209
210 *name = ret;
211 *p = e + 1;
212 *state = INT_TO_PTR(STATE_TAG);
213
214 return XML_ATTRIBUTE_VALUE;
215
216 }
217
218 /* Tag with a value without quotes */
219
220 b = strpbrk(c, WHITESPACE ">");
221 if (!b)
222 b = c;
223
224 ret = strndup(c, b - c);
225 if (!ret)
226 return -ENOMEM;
227
228 *name = ret;
229 *p = b;
230 *state = INT_TO_PTR(STATE_TAG);
231 return XML_ATTRIBUTE_VALUE;
232 }
233
234 t = STATE_TAG;
235 continue;
236 }
237
238 }
239
240 assert_not_reached("Bad state");
241 }