]>
Commit | Line | Data |
---|---|---|
43ae1d95 | 1 | |
2 | /* | |
c2d4889f | 3 | * $Id: ESICustomParser.cc,v 1.6 2005/03/28 21:44:12 hno Exp $ |
43ae1d95 | 4 | * |
5 | * DEBUG: section 86 ESI processing | |
6 | * AUTHOR: Robert Collins | |
7 | * | |
8 | * SQUID Web Proxy Cache http://www.squid-cache.org/ | |
9 | * ---------------------------------------------------------- | |
10 | * | |
11 | * Squid is the result of efforts by numerous individuals from | |
12 | * the Internet community; see the CONTRIBUTORS file for full | |
13 | * details. Many organizations have provided support for Squid's | |
14 | * development; see the SPONSORS file for full details. Squid is | |
15 | * Copyrighted (C) 2001 by the Regents of the University of | |
16 | * California; see the COPYRIGHT file for full details. Squid | |
17 | * incorporates software developed and/or copyrighted by other | |
18 | * sources; see the CREDITS file for full details. | |
19 | * | |
20 | * This program is free software; you can redistribute it and/or modify | |
21 | * it under the terms of the GNU General Public License as published by | |
22 | * the Free Software Foundation; either version 2 of the License, or | |
23 | * (at your option) any later version. | |
24 | * | |
25 | * This program is distributed in the hope that it will be useful, | |
26 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
28 | * GNU General Public License for more details. | |
29 | * | |
30 | * You should have received a copy of the GNU General Public License | |
31 | * along with this program; if not, write to the Free Software | |
32 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
33 | * | |
34 | */ | |
35 | ||
36 | #include "squid.h" | |
37 | #include "ESICustomParser.h" | |
38 | #include "Trie.h" | |
924f73bc | 39 | #include "TrieCharTransform.h" |
43ae1d95 | 40 | #include "Array.h" |
41 | ||
42 | Trie *ESICustomParser::SearchTrie=NULL; | |
43ae1d95 | 43 | |
c2d4889f | 44 | RegisterESIParser("custom", ESICustomParser); |
45 | ||
43ae1d95 | 46 | Trie * |
47 | ESICustomParser::GetTrie() | |
48 | { | |
49 | if (SearchTrie) | |
50 | return SearchTrie; | |
51 | ||
924f73bc | 52 | SearchTrie = new Trie(new TrieCaseless); |
43ae1d95 | 53 | |
54 | assert (SearchTrie->add | |
55 | ("<esi:",5,(void *)ESITAG)); | |
56 | ||
57 | assert (SearchTrie->add | |
58 | ("</esi:",6,(void *)ESIENDTAG)); | |
59 | ||
60 | assert (SearchTrie->add | |
61 | ("<!--",4,(void *)ESICOMMENT)); | |
62 | ||
63 | return SearchTrie; | |
64 | } | |
65 | ||
43ae1d95 | 66 | ESICustomParser::ESICustomParser(ESIParserClient *aClient) : theClient (aClient) |
67 | {} | |
68 | ||
69 | ESICustomParser::~ESICustomParser() | |
70 | { | |
71 | theClient = NULL; | |
72 | } | |
73 | ||
74 | char const * | |
924f73bc | 75 | ESICustomParser::findTag(char const *buffer, size_t bufferLength) |
43ae1d95 | 76 | { |
77 | size_t myOffset (0); | |
5aeabf95 | 78 | void *resulttype = NULL; |
43ae1d95 | 79 | |
924f73bc | 80 | while (myOffset < bufferLength && |
81 | (resulttype =GetTrie()->findPrefix (buffer + myOffset, bufferLength - myOffset)) == NULL) | |
43ae1d95 | 82 | ++myOffset; |
83 | ||
924f73bc | 84 | if (myOffset == bufferLength) |
43ae1d95 | 85 | return NULL; |
86 | ||
87 | debug (86,9)("ESICustomParser::findTag: found %p\n", resulttype); | |
88 | ||
89 | /* Yuck! */ | |
90 | lastTag = static_cast<ESITAG_t>((int)resulttype); | |
91 | ||
924f73bc | 92 | return buffer + myOffset; |
43ae1d95 | 93 | } |
94 | ||
95 | bool | |
96 | ESICustomParser::parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream) | |
97 | { | |
98 | debug (86,9)("ESICustomParser::parse: Appending data to internal buffer\n"); | |
99 | content.append (dataToParse, lengthOfData); | |
100 | ||
101 | if (!endOfStream) { | |
102 | return true; | |
103 | } | |
104 | ||
105 | size_t openESITags (0); | |
106 | char const *currentPos = content.buf(); | |
107 | size_t remainingCount = content.size(); | |
5aeabf95 | 108 | char const *tag = NULL; |
43ae1d95 | 109 | |
110 | while ((tag = findTag(currentPos, remainingCount))) { | |
111 | if (tag - currentPos) | |
112 | theClient->parserDefault (currentPos,tag - currentPos); | |
113 | ||
114 | switch (lastTag) { | |
115 | ||
116 | case ESITAG: { | |
117 | ++openESITags; | |
411c6ea3 | 118 | char *tagEnd = strchr(const_cast<char *>(tag), '>'); |
43ae1d95 | 119 | |
120 | if (!tagEnd) { | |
121 | error = "Could not find end ('>') of tag"; | |
122 | return false; | |
123 | } | |
124 | ||
125 | if (tagEnd - tag > (ssize_t)remainingCount) { | |
126 | error = "Tag ends beyond the parse buffer."; | |
127 | return false; | |
128 | } | |
129 | ||
130 | if (*(tagEnd - 1) == '/') | |
131 | --openESITags; | |
132 | ||
411c6ea3 | 133 | char * endofName = strpbrk(const_cast<char *>(tag), w_space); |
43ae1d95 | 134 | |
135 | if (endofName > tagEnd) | |
136 | endofName = const_cast<char *>(tagEnd); | |
137 | ||
138 | *endofName = '\0'; | |
139 | ||
140 | *tagEnd = '\0'; | |
141 | ||
142 | Vector<char *>attributes; | |
143 | ||
411c6ea3 | 144 | char *attribute = const_cast<char *>(endofName + 1); |
43ae1d95 | 145 | |
146 | while (attribute > tag && attribute < tagEnd) { | |
147 | /* leading spaces */ | |
148 | ||
149 | while (attribute < tagEnd && (xisspace(*attribute) || (*attribute == '/'))) | |
150 | ++attribute; | |
151 | ||
152 | if (! (attribute < tagEnd)) | |
153 | break; | |
154 | ||
155 | /* attribute name */ | |
156 | attributes.push_back(attribute); | |
157 | ||
158 | char *nextSpace = strpbrk(attribute, w_space); | |
159 | ||
160 | char *equals = strchr(attribute, '='); | |
161 | ||
162 | if (!equals) { | |
163 | error = "Missing attribute value."; | |
164 | return false; | |
165 | } | |
166 | ||
167 | if (nextSpace && nextSpace < equals) | |
168 | *nextSpace = '\0'; | |
169 | else | |
170 | *equals = '\0'; | |
171 | ||
172 | ++equals; | |
173 | ||
174 | while (equals < tagEnd && xisspace(*equals)) | |
175 | ++equals; | |
176 | ||
177 | char sep = *equals; | |
178 | ||
179 | if (sep != '\'' && sep != '"') { | |
180 | error = "Unknown identifier ("; | |
181 | error.append (sep); | |
182 | error.append (")"); | |
183 | return false; | |
184 | } | |
185 | ||
186 | char *value = equals + 1; | |
187 | char *end = strchr (value, sep); | |
188 | attributes.push_back(value); | |
189 | *end = '\0'; | |
190 | attribute = end + 1; | |
191 | } | |
192 | ||
193 | theClient->start (tag + 1, (const char **)attributes.items, attributes.size() >> 1); | |
194 | /* TODO: attributes */ | |
195 | ||
196 | if (*(tagEnd - 1) == '/') | |
197 | theClient->end (tag + 1); | |
198 | ||
199 | remainingCount -= tagEnd - currentPos + 1; | |
200 | ||
201 | currentPos = tagEnd + 1; | |
202 | } | |
203 | ||
204 | break; | |
205 | ||
206 | case ESIENDTAG: { | |
207 | if (!openESITags) | |
208 | return false; | |
209 | ||
210 | char const *tagEnd = strchr(tag, '>'); | |
211 | ||
212 | if (!tagEnd) | |
213 | return false; | |
214 | ||
215 | if (tagEnd - tag > (ssize_t)remainingCount) | |
216 | return false; | |
217 | ||
411c6ea3 | 218 | char * endofName = strpbrk(const_cast<char *>(tag), w_space); |
43ae1d95 | 219 | |
220 | if (endofName > tagEnd) | |
221 | endofName = const_cast<char *>(tagEnd); | |
222 | ||
223 | *endofName = '\0'; | |
224 | ||
225 | theClient->end (tag + 2); | |
226 | ||
227 | --openESITags; | |
228 | ||
229 | remainingCount -= tagEnd - currentPos + 1; | |
230 | ||
231 | currentPos = tagEnd + 1; | |
232 | } | |
233 | ||
234 | break; | |
235 | ||
236 | case ESICOMMENT: { | |
237 | /* Further optimisation potential: | |
238 | * 1) recognize end comments for esi and don't callback on | |
239 | * comments. | |
240 | * 2) provide the comment length to the caller. | |
241 | */ | |
242 | /* Comments must not be nested, without CDATA | |
243 | * and we don't support CDATA | |
244 | */ | |
411c6ea3 | 245 | char *commentEnd = strstr (const_cast<char *>(tag), "-->"); |
43ae1d95 | 246 | |
247 | if (!commentEnd) { | |
248 | error = "missing end of comment"; | |
249 | return false; | |
250 | } | |
251 | ||
252 | if (commentEnd - tag > (ssize_t)remainingCount) { | |
253 | error = "comment ends beyond parse buffer"; | |
254 | return false; | |
255 | } | |
256 | ||
257 | *commentEnd = '\0'; | |
258 | theClient->parserComment (tag + 4); | |
259 | remainingCount -= commentEnd - currentPos + 3; | |
260 | currentPos = commentEnd + 3; | |
261 | } | |
262 | ||
263 | break; | |
264 | break; | |
265 | ||
266 | default: | |
267 | fatal ("unknown ESI tag type found"); | |
268 | }; | |
269 | ||
270 | /* | |
271 | * Find next esi tag (open or closing) or comment | |
272 | * send tag, or full comment text | |
273 | * rinse | |
274 | */ | |
275 | } | |
276 | ||
277 | if (remainingCount) | |
278 | theClient->parserDefault (currentPos,remainingCount); | |
279 | ||
280 | debug (86,5)("ESICustomParser::parse: Finished parsing, will return %d\n", !openESITags); | |
281 | ||
282 | if (openESITags) | |
283 | error = "ESI Tags still open"; | |
284 | ||
285 | return !openESITags; | |
286 | } | |
287 | ||
288 | size_t | |
289 | ESICustomParser::lineNumber() const | |
290 | { | |
291 | /* We don't track lines in the body */ | |
292 | return 0; | |
293 | } | |
294 | ||
295 | char const * | |
296 | ESICustomParser::errorString() const | |
297 | { | |
298 | if (error.size()) | |
299 | return error.buf(); | |
300 | else | |
301 | return "Parsing error strings not implemented"; | |
302 | } |