]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
Merge pull request #7388 from keszybz/doc-tweak
[thirdparty/systemd.git] / src / basic / xml.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2013 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stddef.h>
22 #include <string.h>
23
24 #include "macro.h"
25 #include "string-util.h"
26 #include "xml.h"
27
28 enum {
29 STATE_NULL,
30 STATE_TEXT,
31 STATE_TAG,
32 STATE_ATTRIBUTE,
33 };
34
35 static void inc_lines(unsigned *line, const char *s, size_t n) {
36 const char *p = s;
37
38 if (!line)
39 return;
40
41 for (;;) {
42 const char *f;
43
44 f = memchr(p, '\n', n);
45 if (!f)
46 return;
47
48 n -= (f - p) + 1;
49 p = f + 1;
50 (*line)++;
51 }
52 }
53
54 /* We don't actually do real XML here. We only read a simplistic
55 * subset, that is a bit less strict that XML and lacks all the more
56 * complex features, like entities, or namespaces. However, we do
57 * support some HTML5-like simplifications */
58
59 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
60 const char *c, *e, *b;
61 char *ret;
62 int t;
63
64 assert(p);
65 assert(*p);
66 assert(name);
67 assert(state);
68
69 t = PTR_TO_INT(*state);
70 c = *p;
71
72 if (t == STATE_NULL) {
73 if (line)
74 *line = 1;
75 t = STATE_TEXT;
76 }
77
78 for (;;) {
79 if (*c == 0)
80 return XML_END;
81
82 switch (t) {
83
84 case STATE_TEXT: {
85 int x;
86
87 e = strchrnul(c, '<');
88 if (e > c) {
89 /* More text... */
90 ret = strndup(c, e - c);
91 if (!ret)
92 return -ENOMEM;
93
94 inc_lines(line, c, e - c);
95
96 *name = ret;
97 *p = e;
98 *state = INT_TO_PTR(STATE_TEXT);
99
100 return XML_TEXT;
101 }
102
103 assert(*e == '<');
104 b = c + 1;
105
106 if (startswith(b, "!--")) {
107 /* A comment */
108 e = strstr(b + 3, "-->");
109 if (!e)
110 return -EINVAL;
111
112 inc_lines(line, b, e + 3 - b);
113
114 c = e + 3;
115 continue;
116 }
117
118 if (*b == '?') {
119 /* Processing instruction */
120
121 e = strstr(b + 1, "?>");
122 if (!e)
123 return -EINVAL;
124
125 inc_lines(line, b, e + 2 - b);
126
127 c = e + 2;
128 continue;
129 }
130
131 if (*b == '!') {
132 /* DTD */
133
134 e = strchr(b + 1, '>');
135 if (!e)
136 return -EINVAL;
137
138 inc_lines(line, b, e + 1 - b);
139
140 c = e + 1;
141 continue;
142 }
143
144 if (*b == '/') {
145 /* A closing tag */
146 x = XML_TAG_CLOSE;
147 b++;
148 } else
149 x = XML_TAG_OPEN;
150
151 e = strpbrk(b, WHITESPACE "/>");
152 if (!e)
153 return -EINVAL;
154
155 ret = strndup(b, e - b);
156 if (!ret)
157 return -ENOMEM;
158
159 *name = ret;
160 *p = e;
161 *state = INT_TO_PTR(STATE_TAG);
162
163 return x;
164 }
165
166 case STATE_TAG:
167
168 b = c + strspn(c, WHITESPACE);
169 if (*b == 0)
170 return -EINVAL;
171
172 inc_lines(line, c, b - c);
173
174 e = b + strcspn(b, WHITESPACE "=/>");
175 if (e > b) {
176 /* An attribute */
177
178 ret = strndup(b, e - b);
179 if (!ret)
180 return -ENOMEM;
181
182 *name = ret;
183 *p = e;
184 *state = INT_TO_PTR(STATE_ATTRIBUTE);
185
186 return XML_ATTRIBUTE_NAME;
187 }
188
189 if (startswith(b, "/>")) {
190 /* An empty tag */
191
192 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
193 *p = b + 2;
194 *state = INT_TO_PTR(STATE_TEXT);
195
196 return XML_TAG_CLOSE_EMPTY;
197 }
198
199 if (*b != '>')
200 return -EINVAL;
201
202 c = b + 1;
203 t = STATE_TEXT;
204 continue;
205
206 case STATE_ATTRIBUTE:
207
208 if (*c == '=') {
209 c++;
210
211 if (IN_SET(*c, '\'', '\"')) {
212 /* Tag with a quoted value */
213
214 e = strchr(c+1, *c);
215 if (!e)
216 return -EINVAL;
217
218 inc_lines(line, c, e - c);
219
220 ret = strndup(c+1, e - c - 1);
221 if (!ret)
222 return -ENOMEM;
223
224 *name = ret;
225 *p = e + 1;
226 *state = INT_TO_PTR(STATE_TAG);
227
228 return XML_ATTRIBUTE_VALUE;
229
230 }
231
232 /* Tag with a value without quotes */
233
234 b = strpbrk(c, WHITESPACE ">");
235 if (!b)
236 b = c;
237
238 ret = strndup(c, b - c);
239 if (!ret)
240 return -ENOMEM;
241
242 *name = ret;
243 *p = b;
244 *state = INT_TO_PTR(STATE_TAG);
245 return XML_ATTRIBUTE_VALUE;
246 }
247
248 t = STATE_TAG;
249 continue;
250 }
251
252 }
253
254 assert_not_reached("Bad state");
255 }