]>
Commit | Line | Data |
---|---|---|
08bcebf3 LP |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2013 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
11c3a366 TA |
20 | #include <errno.h> |
21 | #include <stddef.h> | |
08bcebf3 LP |
22 | #include <string.h> |
23 | ||
11c3a366 | 24 | #include "macro.h" |
07630cea | 25 | #include "string-util.h" |
08bcebf3 LP |
26 | #include "xml.h" |
27 | ||
28 | enum { | |
bcf3295d | 29 | STATE_NULL, |
08bcebf3 LP |
30 | STATE_TEXT, |
31 | STATE_TAG, | |
32 | STATE_ATTRIBUTE, | |
33 | }; | |
34 | ||
bcf3295d LP |
35 | static void inc_lines(unsigned *line, const char *s, size_t n) { |
36 | const char *p = s; | |
37 | ||
38 | if (!line) | |
39 | return; | |
40 | ||
41 | for (;;) { | |
42 | const char *f; | |
43 | ||
44 | f = memchr(p, '\n', n); | |
45 | if (!f) | |
46 | return; | |
47 | ||
48 | n -= (f - p) + 1; | |
49 | p = f + 1; | |
50 | (*line)++; | |
51 | } | |
52 | } | |
53 | ||
08bcebf3 LP |
54 | /* We don't actually do real XML here. We only read a simplistic |
55 | * subset, that is a bit less strict that XML and lacks all the more | |
56 | * complex features, like entities, or namespaces. However, we do | |
57 | * support some HTML5-like simplifications */ | |
58 | ||
bcf3295d | 59 | int xml_tokenize(const char **p, char **name, void **state, unsigned *line) { |
08bcebf3 LP |
60 | const char *c, *e, *b; |
61 | char *ret; | |
62 | int t; | |
63 | ||
64 | assert(p); | |
65 | assert(*p); | |
66 | assert(name); | |
67 | assert(state); | |
68 | ||
69 | t = PTR_TO_INT(*state); | |
70 | c = *p; | |
71 | ||
bcf3295d LP |
72 | if (t == STATE_NULL) { |
73 | if (line) | |
74 | *line = 1; | |
75 | t = STATE_TEXT; | |
76 | } | |
77 | ||
08bcebf3 LP |
78 | for (;;) { |
79 | if (*c == 0) | |
80 | return XML_END; | |
81 | ||
82 | switch (t) { | |
83 | ||
84 | case STATE_TEXT: { | |
85 | int x; | |
86 | ||
87 | e = strchrnul(c, '<'); | |
88 | if (e > c) { | |
89 | /* More text... */ | |
90 | ret = strndup(c, e - c); | |
91 | if (!ret) | |
92 | return -ENOMEM; | |
93 | ||
bcf3295d LP |
94 | inc_lines(line, c, e - c); |
95 | ||
08bcebf3 LP |
96 | *name = ret; |
97 | *p = e; | |
98 | *state = INT_TO_PTR(STATE_TEXT); | |
99 | ||
100 | return XML_TEXT; | |
101 | } | |
102 | ||
103 | assert(*e == '<'); | |
104 | b = c + 1; | |
105 | ||
106 | if (startswith(b, "!--")) { | |
107 | /* A comment */ | |
108 | e = strstr(b + 3, "-->"); | |
109 | if (!e) | |
110 | return -EINVAL; | |
111 | ||
bcf3295d LP |
112 | inc_lines(line, b, e + 3 - b); |
113 | ||
08bcebf3 LP |
114 | c = e + 3; |
115 | continue; | |
116 | } | |
117 | ||
118 | if (*b == '?') { | |
119 | /* Processing instruction */ | |
120 | ||
121 | e = strstr(b + 1, "?>"); | |
122 | if (!e) | |
123 | return -EINVAL; | |
124 | ||
bcf3295d LP |
125 | inc_lines(line, b, e + 2 - b); |
126 | ||
08bcebf3 LP |
127 | c = e + 2; |
128 | continue; | |
129 | } | |
130 | ||
131 | if (*b == '!') { | |
132 | /* DTD */ | |
133 | ||
134 | e = strchr(b + 1, '>'); | |
135 | if (!e) | |
136 | return -EINVAL; | |
137 | ||
bcf3295d LP |
138 | inc_lines(line, b, e + 1 - b); |
139 | ||
08bcebf3 LP |
140 | c = e + 1; |
141 | continue; | |
142 | } | |
143 | ||
144 | if (*b == '/') { | |
145 | /* A closing tag */ | |
146 | x = XML_TAG_CLOSE; | |
147 | b++; | |
148 | } else | |
149 | x = XML_TAG_OPEN; | |
150 | ||
151 | e = strpbrk(b, WHITESPACE "/>"); | |
152 | if (!e) | |
153 | return -EINVAL; | |
154 | ||
155 | ret = strndup(b, e - b); | |
156 | if (!ret) | |
157 | return -ENOMEM; | |
158 | ||
159 | *name = ret; | |
160 | *p = e; | |
161 | *state = INT_TO_PTR(STATE_TAG); | |
162 | ||
163 | return x; | |
164 | } | |
165 | ||
166 | case STATE_TAG: | |
167 | ||
168 | b = c + strspn(c, WHITESPACE); | |
169 | if (*b == 0) | |
170 | return -EINVAL; | |
171 | ||
bcf3295d LP |
172 | inc_lines(line, c, b - c); |
173 | ||
08bcebf3 LP |
174 | e = b + strcspn(b, WHITESPACE "=/>"); |
175 | if (e > b) { | |
176 | /* An attribute */ | |
177 | ||
178 | ret = strndup(b, e - b); | |
179 | if (!ret) | |
180 | return -ENOMEM; | |
181 | ||
182 | *name = ret; | |
183 | *p = e; | |
184 | *state = INT_TO_PTR(STATE_ATTRIBUTE); | |
185 | ||
186 | return XML_ATTRIBUTE_NAME; | |
187 | } | |
188 | ||
189 | if (startswith(b, "/>")) { | |
190 | /* An empty tag */ | |
191 | ||
192 | *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ | |
193 | *p = b + 2; | |
194 | *state = INT_TO_PTR(STATE_TEXT); | |
195 | ||
196 | return XML_TAG_CLOSE_EMPTY; | |
197 | } | |
198 | ||
199 | if (*b != '>') | |
200 | return -EINVAL; | |
201 | ||
202 | c = b + 1; | |
203 | t = STATE_TEXT; | |
204 | continue; | |
205 | ||
206 | case STATE_ATTRIBUTE: | |
207 | ||
208 | if (*c == '=') { | |
209 | c++; | |
210 | ||
211 | if (*c == '\'' || *c == '\"') { | |
212 | /* Tag with a quoted value */ | |
213 | ||
214 | e = strchr(c+1, *c); | |
215 | if (!e) | |
216 | return -EINVAL; | |
217 | ||
bcf3295d LP |
218 | inc_lines(line, c, e - c); |
219 | ||
08bcebf3 LP |
220 | ret = strndup(c+1, e - c - 1); |
221 | if (!ret) | |
222 | return -ENOMEM; | |
223 | ||
224 | *name = ret; | |
225 | *p = e + 1; | |
226 | *state = INT_TO_PTR(STATE_TAG); | |
227 | ||
228 | return XML_ATTRIBUTE_VALUE; | |
229 | ||
230 | } | |
231 | ||
232 | /* Tag with a value without quotes */ | |
233 | ||
234 | b = strpbrk(c, WHITESPACE ">"); | |
235 | if (!b) | |
236 | b = c; | |
237 | ||
238 | ret = strndup(c, b - c); | |
239 | if (!ret) | |
240 | return -ENOMEM; | |
241 | ||
242 | *name = ret; | |
243 | *p = b; | |
244 | *state = INT_TO_PTR(STATE_TAG); | |
245 | return XML_ATTRIBUTE_VALUE; | |
246 | } | |
247 | ||
248 | t = STATE_TAG; | |
249 | continue; | |
250 | } | |
251 | ||
252 | } | |
253 | ||
254 | assert_not_reached("Bad state"); | |
255 | } |