]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
08bcebf3 LP |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2013 Lennart Poettering | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
11c3a366 TA |
21 | #include <errno.h> |
22 | #include <stddef.h> | |
08bcebf3 LP |
23 | #include <string.h> |
24 | ||
11c3a366 | 25 | #include "macro.h" |
07630cea | 26 | #include "string-util.h" |
08bcebf3 LP |
27 | #include "xml.h" |
28 | ||
29 | enum { | |
bcf3295d | 30 | STATE_NULL, |
08bcebf3 LP |
31 | STATE_TEXT, |
32 | STATE_TAG, | |
33 | STATE_ATTRIBUTE, | |
34 | }; | |
35 | ||
bcf3295d LP |
36 | static void inc_lines(unsigned *line, const char *s, size_t n) { |
37 | const char *p = s; | |
38 | ||
39 | if (!line) | |
40 | return; | |
41 | ||
42 | for (;;) { | |
43 | const char *f; | |
44 | ||
45 | f = memchr(p, '\n', n); | |
46 | if (!f) | |
47 | return; | |
48 | ||
49 | n -= (f - p) + 1; | |
50 | p = f + 1; | |
51 | (*line)++; | |
52 | } | |
53 | } | |
54 | ||
08bcebf3 LP |
55 | /* We don't actually do real XML here. We only read a simplistic |
56 | * subset, that is a bit less strict that XML and lacks all the more | |
57 | * complex features, like entities, or namespaces. However, we do | |
58 | * support some HTML5-like simplifications */ | |
59 | ||
bcf3295d | 60 | int xml_tokenize(const char **p, char **name, void **state, unsigned *line) { |
08bcebf3 LP |
61 | const char *c, *e, *b; |
62 | char *ret; | |
63 | int t; | |
64 | ||
65 | assert(p); | |
66 | assert(*p); | |
67 | assert(name); | |
68 | assert(state); | |
69 | ||
70 | t = PTR_TO_INT(*state); | |
71 | c = *p; | |
72 | ||
bcf3295d LP |
73 | if (t == STATE_NULL) { |
74 | if (line) | |
75 | *line = 1; | |
76 | t = STATE_TEXT; | |
77 | } | |
78 | ||
08bcebf3 LP |
79 | for (;;) { |
80 | if (*c == 0) | |
81 | return XML_END; | |
82 | ||
83 | switch (t) { | |
84 | ||
85 | case STATE_TEXT: { | |
86 | int x; | |
87 | ||
88 | e = strchrnul(c, '<'); | |
89 | if (e > c) { | |
90 | /* More text... */ | |
91 | ret = strndup(c, e - c); | |
92 | if (!ret) | |
93 | return -ENOMEM; | |
94 | ||
bcf3295d LP |
95 | inc_lines(line, c, e - c); |
96 | ||
08bcebf3 LP |
97 | *name = ret; |
98 | *p = e; | |
99 | *state = INT_TO_PTR(STATE_TEXT); | |
100 | ||
101 | return XML_TEXT; | |
102 | } | |
103 | ||
104 | assert(*e == '<'); | |
105 | b = c + 1; | |
106 | ||
107 | if (startswith(b, "!--")) { | |
108 | /* A comment */ | |
109 | e = strstr(b + 3, "-->"); | |
110 | if (!e) | |
111 | return -EINVAL; | |
112 | ||
bcf3295d LP |
113 | inc_lines(line, b, e + 3 - b); |
114 | ||
08bcebf3 LP |
115 | c = e + 3; |
116 | continue; | |
117 | } | |
118 | ||
119 | if (*b == '?') { | |
120 | /* Processing instruction */ | |
121 | ||
122 | e = strstr(b + 1, "?>"); | |
123 | if (!e) | |
124 | return -EINVAL; | |
125 | ||
bcf3295d LP |
126 | inc_lines(line, b, e + 2 - b); |
127 | ||
08bcebf3 LP |
128 | c = e + 2; |
129 | continue; | |
130 | } | |
131 | ||
132 | if (*b == '!') { | |
133 | /* DTD */ | |
134 | ||
135 | e = strchr(b + 1, '>'); | |
136 | if (!e) | |
137 | return -EINVAL; | |
138 | ||
bcf3295d LP |
139 | inc_lines(line, b, e + 1 - b); |
140 | ||
08bcebf3 LP |
141 | c = e + 1; |
142 | continue; | |
143 | } | |
144 | ||
145 | if (*b == '/') { | |
146 | /* A closing tag */ | |
147 | x = XML_TAG_CLOSE; | |
148 | b++; | |
149 | } else | |
150 | x = XML_TAG_OPEN; | |
151 | ||
152 | e = strpbrk(b, WHITESPACE "/>"); | |
153 | if (!e) | |
154 | return -EINVAL; | |
155 | ||
156 | ret = strndup(b, e - b); | |
157 | if (!ret) | |
158 | return -ENOMEM; | |
159 | ||
160 | *name = ret; | |
161 | *p = e; | |
162 | *state = INT_TO_PTR(STATE_TAG); | |
163 | ||
164 | return x; | |
165 | } | |
166 | ||
167 | case STATE_TAG: | |
168 | ||
169 | b = c + strspn(c, WHITESPACE); | |
170 | if (*b == 0) | |
171 | return -EINVAL; | |
172 | ||
bcf3295d LP |
173 | inc_lines(line, c, b - c); |
174 | ||
08bcebf3 LP |
175 | e = b + strcspn(b, WHITESPACE "=/>"); |
176 | if (e > b) { | |
177 | /* An attribute */ | |
178 | ||
179 | ret = strndup(b, e - b); | |
180 | if (!ret) | |
181 | return -ENOMEM; | |
182 | ||
183 | *name = ret; | |
184 | *p = e; | |
185 | *state = INT_TO_PTR(STATE_ATTRIBUTE); | |
186 | ||
187 | return XML_ATTRIBUTE_NAME; | |
188 | } | |
189 | ||
190 | if (startswith(b, "/>")) { | |
191 | /* An empty tag */ | |
192 | ||
193 | *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ | |
194 | *p = b + 2; | |
195 | *state = INT_TO_PTR(STATE_TEXT); | |
196 | ||
197 | return XML_TAG_CLOSE_EMPTY; | |
198 | } | |
199 | ||
200 | if (*b != '>') | |
201 | return -EINVAL; | |
202 | ||
203 | c = b + 1; | |
204 | t = STATE_TEXT; | |
205 | continue; | |
206 | ||
207 | case STATE_ATTRIBUTE: | |
208 | ||
209 | if (*c == '=') { | |
210 | c++; | |
211 | ||
4c701096 | 212 | if (IN_SET(*c, '\'', '\"')) { |
08bcebf3 LP |
213 | /* Tag with a quoted value */ |
214 | ||
215 | e = strchr(c+1, *c); | |
216 | if (!e) | |
217 | return -EINVAL; | |
218 | ||
bcf3295d LP |
219 | inc_lines(line, c, e - c); |
220 | ||
08bcebf3 LP |
221 | ret = strndup(c+1, e - c - 1); |
222 | if (!ret) | |
223 | return -ENOMEM; | |
224 | ||
225 | *name = ret; | |
226 | *p = e + 1; | |
227 | *state = INT_TO_PTR(STATE_TAG); | |
228 | ||
229 | return XML_ATTRIBUTE_VALUE; | |
230 | ||
231 | } | |
232 | ||
233 | /* Tag with a value without quotes */ | |
234 | ||
235 | b = strpbrk(c, WHITESPACE ">"); | |
236 | if (!b) | |
237 | b = c; | |
238 | ||
239 | ret = strndup(c, b - c); | |
240 | if (!ret) | |
241 | return -ENOMEM; | |
242 | ||
243 | *name = ret; | |
244 | *p = b; | |
245 | *state = INT_TO_PTR(STATE_TAG); | |
246 | return XML_ATTRIBUTE_VALUE; | |
247 | } | |
248 | ||
249 | t = STATE_TAG; | |
250 | continue; | |
251 | } | |
252 | ||
253 | } | |
254 | ||
255 | assert_not_reached("Bad state"); | |
256 | } |