]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/xml.c
Merge pull request #1607 from keszybz/lz4-remove-v1
[thirdparty/systemd.git] / src / basic / xml.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <stddef.h>
24 #include <string.h>
25
26 #include "macro.h"
27 #include "string-util.h"
28 #include "xml.h"
29
30 enum {
31 STATE_NULL,
32 STATE_TEXT,
33 STATE_TAG,
34 STATE_ATTRIBUTE,
35 };
36
37 static void inc_lines(unsigned *line, const char *s, size_t n) {
38 const char *p = s;
39
40 if (!line)
41 return;
42
43 for (;;) {
44 const char *f;
45
46 f = memchr(p, '\n', n);
47 if (!f)
48 return;
49
50 n -= (f - p) + 1;
51 p = f + 1;
52 (*line)++;
53 }
54 }
55
56 /* We don't actually do real XML here. We only read a simplistic
57 * subset, that is a bit less strict that XML and lacks all the more
58 * complex features, like entities, or namespaces. However, we do
59 * support some HTML5-like simplifications */
60
61 int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
62 const char *c, *e, *b;
63 char *ret;
64 int t;
65
66 assert(p);
67 assert(*p);
68 assert(name);
69 assert(state);
70
71 t = PTR_TO_INT(*state);
72 c = *p;
73
74 if (t == STATE_NULL) {
75 if (line)
76 *line = 1;
77 t = STATE_TEXT;
78 }
79
80 for (;;) {
81 if (*c == 0)
82 return XML_END;
83
84 switch (t) {
85
86 case STATE_TEXT: {
87 int x;
88
89 e = strchrnul(c, '<');
90 if (e > c) {
91 /* More text... */
92 ret = strndup(c, e - c);
93 if (!ret)
94 return -ENOMEM;
95
96 inc_lines(line, c, e - c);
97
98 *name = ret;
99 *p = e;
100 *state = INT_TO_PTR(STATE_TEXT);
101
102 return XML_TEXT;
103 }
104
105 assert(*e == '<');
106 b = c + 1;
107
108 if (startswith(b, "!--")) {
109 /* A comment */
110 e = strstr(b + 3, "-->");
111 if (!e)
112 return -EINVAL;
113
114 inc_lines(line, b, e + 3 - b);
115
116 c = e + 3;
117 continue;
118 }
119
120 if (*b == '?') {
121 /* Processing instruction */
122
123 e = strstr(b + 1, "?>");
124 if (!e)
125 return -EINVAL;
126
127 inc_lines(line, b, e + 2 - b);
128
129 c = e + 2;
130 continue;
131 }
132
133 if (*b == '!') {
134 /* DTD */
135
136 e = strchr(b + 1, '>');
137 if (!e)
138 return -EINVAL;
139
140 inc_lines(line, b, e + 1 - b);
141
142 c = e + 1;
143 continue;
144 }
145
146 if (*b == '/') {
147 /* A closing tag */
148 x = XML_TAG_CLOSE;
149 b++;
150 } else
151 x = XML_TAG_OPEN;
152
153 e = strpbrk(b, WHITESPACE "/>");
154 if (!e)
155 return -EINVAL;
156
157 ret = strndup(b, e - b);
158 if (!ret)
159 return -ENOMEM;
160
161 *name = ret;
162 *p = e;
163 *state = INT_TO_PTR(STATE_TAG);
164
165 return x;
166 }
167
168 case STATE_TAG:
169
170 b = c + strspn(c, WHITESPACE);
171 if (*b == 0)
172 return -EINVAL;
173
174 inc_lines(line, c, b - c);
175
176 e = b + strcspn(b, WHITESPACE "=/>");
177 if (e > b) {
178 /* An attribute */
179
180 ret = strndup(b, e - b);
181 if (!ret)
182 return -ENOMEM;
183
184 *name = ret;
185 *p = e;
186 *state = INT_TO_PTR(STATE_ATTRIBUTE);
187
188 return XML_ATTRIBUTE_NAME;
189 }
190
191 if (startswith(b, "/>")) {
192 /* An empty tag */
193
194 *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
195 *p = b + 2;
196 *state = INT_TO_PTR(STATE_TEXT);
197
198 return XML_TAG_CLOSE_EMPTY;
199 }
200
201 if (*b != '>')
202 return -EINVAL;
203
204 c = b + 1;
205 t = STATE_TEXT;
206 continue;
207
208 case STATE_ATTRIBUTE:
209
210 if (*c == '=') {
211 c++;
212
213 if (*c == '\'' || *c == '\"') {
214 /* Tag with a quoted value */
215
216 e = strchr(c+1, *c);
217 if (!e)
218 return -EINVAL;
219
220 inc_lines(line, c, e - c);
221
222 ret = strndup(c+1, e - c - 1);
223 if (!ret)
224 return -ENOMEM;
225
226 *name = ret;
227 *p = e + 1;
228 *state = INT_TO_PTR(STATE_TAG);
229
230 return XML_ATTRIBUTE_VALUE;
231
232 }
233
234 /* Tag with a value without quotes */
235
236 b = strpbrk(c, WHITESPACE ">");
237 if (!b)
238 b = c;
239
240 ret = strndup(c, b - c);
241 if (!ret)
242 return -ENOMEM;
243
244 *name = ret;
245 *p = b;
246 *state = INT_TO_PTR(STATE_TAG);
247 return XML_ATTRIBUTE_VALUE;
248 }
249
250 t = STATE_TAG;
251 continue;
252 }
253
254 }
255
256 assert_not_reached("Bad state");
257 }