]>
Commit | Line | Data |
---|---|---|
08bcebf3 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2013 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU Lesser General Public License as published by | |
10 | the Free Software Foundation; either version 2.1 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | Lesser General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU Lesser General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <string.h> | |
23 | ||
24 | #include "util.h" | |
25 | #include "xml.h" | |
26 | ||
27 | enum { | |
bcf3295d | 28 | STATE_NULL, |
08bcebf3 LP |
29 | STATE_TEXT, |
30 | STATE_TAG, | |
31 | STATE_ATTRIBUTE, | |
32 | }; | |
33 | ||
bcf3295d LP |
34 | static void inc_lines(unsigned *line, const char *s, size_t n) { |
35 | const char *p = s; | |
36 | ||
37 | if (!line) | |
38 | return; | |
39 | ||
40 | for (;;) { | |
41 | const char *f; | |
42 | ||
43 | f = memchr(p, '\n', n); | |
44 | if (!f) | |
45 | return; | |
46 | ||
47 | n -= (f - p) + 1; | |
48 | p = f + 1; | |
49 | (*line)++; | |
50 | } | |
51 | } | |
52 | ||
08bcebf3 LP |
53 | /* We don't actually do real XML here. We only read a simplistic |
54 | * subset, that is a bit less strict that XML and lacks all the more | |
55 | * complex features, like entities, or namespaces. However, we do | |
56 | * support some HTML5-like simplifications */ | |
57 | ||
bcf3295d | 58 | int xml_tokenize(const char **p, char **name, void **state, unsigned *line) { |
08bcebf3 LP |
59 | const char *c, *e, *b; |
60 | char *ret; | |
61 | int t; | |
62 | ||
63 | assert(p); | |
64 | assert(*p); | |
65 | assert(name); | |
66 | assert(state); | |
67 | ||
68 | t = PTR_TO_INT(*state); | |
69 | c = *p; | |
70 | ||
bcf3295d LP |
71 | if (t == STATE_NULL) { |
72 | if (line) | |
73 | *line = 1; | |
74 | t = STATE_TEXT; | |
75 | } | |
76 | ||
08bcebf3 LP |
77 | for (;;) { |
78 | if (*c == 0) | |
79 | return XML_END; | |
80 | ||
81 | switch (t) { | |
82 | ||
83 | case STATE_TEXT: { | |
84 | int x; | |
85 | ||
86 | e = strchrnul(c, '<'); | |
87 | if (e > c) { | |
88 | /* More text... */ | |
89 | ret = strndup(c, e - c); | |
90 | if (!ret) | |
91 | return -ENOMEM; | |
92 | ||
bcf3295d LP |
93 | inc_lines(line, c, e - c); |
94 | ||
08bcebf3 LP |
95 | *name = ret; |
96 | *p = e; | |
97 | *state = INT_TO_PTR(STATE_TEXT); | |
98 | ||
99 | return XML_TEXT; | |
100 | } | |
101 | ||
102 | assert(*e == '<'); | |
103 | b = c + 1; | |
104 | ||
105 | if (startswith(b, "!--")) { | |
106 | /* A comment */ | |
107 | e = strstr(b + 3, "-->"); | |
108 | if (!e) | |
109 | return -EINVAL; | |
110 | ||
bcf3295d LP |
111 | inc_lines(line, b, e + 3 - b); |
112 | ||
08bcebf3 LP |
113 | c = e + 3; |
114 | continue; | |
115 | } | |
116 | ||
117 | if (*b == '?') { | |
118 | /* Processing instruction */ | |
119 | ||
120 | e = strstr(b + 1, "?>"); | |
121 | if (!e) | |
122 | return -EINVAL; | |
123 | ||
bcf3295d LP |
124 | inc_lines(line, b, e + 2 - b); |
125 | ||
08bcebf3 LP |
126 | c = e + 2; |
127 | continue; | |
128 | } | |
129 | ||
130 | if (*b == '!') { | |
131 | /* DTD */ | |
132 | ||
133 | e = strchr(b + 1, '>'); | |
134 | if (!e) | |
135 | return -EINVAL; | |
136 | ||
bcf3295d LP |
137 | inc_lines(line, b, e + 1 - b); |
138 | ||
08bcebf3 LP |
139 | c = e + 1; |
140 | continue; | |
141 | } | |
142 | ||
143 | if (*b == '/') { | |
144 | /* A closing tag */ | |
145 | x = XML_TAG_CLOSE; | |
146 | b++; | |
147 | } else | |
148 | x = XML_TAG_OPEN; | |
149 | ||
150 | e = strpbrk(b, WHITESPACE "/>"); | |
151 | if (!e) | |
152 | return -EINVAL; | |
153 | ||
154 | ret = strndup(b, e - b); | |
155 | if (!ret) | |
156 | return -ENOMEM; | |
157 | ||
158 | *name = ret; | |
159 | *p = e; | |
160 | *state = INT_TO_PTR(STATE_TAG); | |
161 | ||
162 | return x; | |
163 | } | |
164 | ||
165 | case STATE_TAG: | |
166 | ||
167 | b = c + strspn(c, WHITESPACE); | |
168 | if (*b == 0) | |
169 | return -EINVAL; | |
170 | ||
bcf3295d LP |
171 | inc_lines(line, c, b - c); |
172 | ||
08bcebf3 LP |
173 | e = b + strcspn(b, WHITESPACE "=/>"); |
174 | if (e > b) { | |
175 | /* An attribute */ | |
176 | ||
177 | ret = strndup(b, e - b); | |
178 | if (!ret) | |
179 | return -ENOMEM; | |
180 | ||
181 | *name = ret; | |
182 | *p = e; | |
183 | *state = INT_TO_PTR(STATE_ATTRIBUTE); | |
184 | ||
185 | return XML_ATTRIBUTE_NAME; | |
186 | } | |
187 | ||
188 | if (startswith(b, "/>")) { | |
189 | /* An empty tag */ | |
190 | ||
191 | *name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */ | |
192 | *p = b + 2; | |
193 | *state = INT_TO_PTR(STATE_TEXT); | |
194 | ||
195 | return XML_TAG_CLOSE_EMPTY; | |
196 | } | |
197 | ||
198 | if (*b != '>') | |
199 | return -EINVAL; | |
200 | ||
201 | c = b + 1; | |
202 | t = STATE_TEXT; | |
203 | continue; | |
204 | ||
205 | case STATE_ATTRIBUTE: | |
206 | ||
207 | if (*c == '=') { | |
208 | c++; | |
209 | ||
210 | if (*c == '\'' || *c == '\"') { | |
211 | /* Tag with a quoted value */ | |
212 | ||
213 | e = strchr(c+1, *c); | |
214 | if (!e) | |
215 | return -EINVAL; | |
216 | ||
bcf3295d LP |
217 | inc_lines(line, c, e - c); |
218 | ||
08bcebf3 LP |
219 | ret = strndup(c+1, e - c - 1); |
220 | if (!ret) | |
221 | return -ENOMEM; | |
222 | ||
223 | *name = ret; | |
224 | *p = e + 1; | |
225 | *state = INT_TO_PTR(STATE_TAG); | |
226 | ||
227 | return XML_ATTRIBUTE_VALUE; | |
228 | ||
229 | } | |
230 | ||
231 | /* Tag with a value without quotes */ | |
232 | ||
233 | b = strpbrk(c, WHITESPACE ">"); | |
234 | if (!b) | |
235 | b = c; | |
236 | ||
237 | ret = strndup(c, b - c); | |
238 | if (!ret) | |
239 | return -ENOMEM; | |
240 | ||
241 | *name = ret; | |
242 | *p = b; | |
243 | *state = INT_TO_PTR(STATE_TAG); | |
244 | return XML_ATTRIBUTE_VALUE; | |
245 | } | |
246 | ||
247 | t = STATE_TAG; | |
248 | continue; | |
249 | } | |
250 | ||
251 | } | |
252 | ||
253 | assert_not_reached("Bad state"); | |
254 | } |