]> git.ipfire.org Git - thirdparty/squid.git/blob - src/esi/CustomParser.cc
SourceFormat Enforcement
[thirdparty/squid.git] / src / esi / CustomParser.cc
1
2 /*
3 * DEBUG: section 86 ESI processing
4 * AUTHOR: Robert Collins
5 *
6 * SQUID Web Proxy Cache http://www.squid-cache.org/
7 * ----------------------------------------------------------
8 *
9 * Squid is the result of efforts by numerous individuals from
10 * the Internet community; see the CONTRIBUTORS file for full
11 * details. Many organizations have provided support for Squid's
12 * development; see the SPONSORS file for full details. Squid is
13 * Copyrighted (C) 2001 by the Regents of the University of
14 * California; see the COPYRIGHT file for full details. Squid
15 * incorporates software developed and/or copyrighted by other
16 * sources; see the CREDITS file for full details.
17 *
18 * This program is free software; you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation; either version 2 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
31 *
32 */
33
34 #include "squid.h"
35 #include "base/Vector.h"
36 #include "Debug.h"
37 #include "esi/CustomParser.h"
38 #include "libTrie/Trie.h"
39 #include "libTrie/TrieCharTransform.h"
40
41 Trie *ESICustomParser::SearchTrie=NULL;
42
43 EsiParserDefinition(ESICustomParser);
44
45 Trie *
46 ESICustomParser::GetTrie()
47 {
48 if (SearchTrie)
49 return SearchTrie;
50
51 SearchTrie = new Trie(new TrieCaseless);
52
53 static const ESITAG_t ESITAG_value = ESITAG;
54
55 assert (SearchTrie->add
56 ("<esi:",5,(void *)&ESITAG_value));
57
58 static const ESITAG_t ESIENDTAG_value = ESIENDTAG;
59
60 assert (SearchTrie->add
61 ("</esi:",6,(void *)&ESIENDTAG_value));
62
63 static const ESITAG_t ESICOMMENT_value = ESICOMMENT;
64
65 assert (SearchTrie->add
66 ("<!--",4,(void *)&ESICOMMENT_value));
67
68 return SearchTrie;
69 }
70
71 ESICustomParser::ESICustomParser(ESIParserClient *aClient) :
72 theClient(aClient),
73 lastTag(ESITAG)
74 {}
75
76 ESICustomParser::~ESICustomParser()
77 {
78 theClient = NULL;
79 }
80
81 char const *
82 ESICustomParser::findTag(char const *buffer, size_t bufferLength)
83 {
84 size_t myOffset (0);
85 ESITAG_t *resulttype = NULL;
86
87 while (myOffset < bufferLength &&
88 (resulttype = static_cast<ESITAG_t *>(GetTrie()->findPrefix (buffer + myOffset, bufferLength - myOffset)))
89 == NULL)
90 ++myOffset;
91
92 if (myOffset == bufferLength)
93 return NULL;
94
95 debugs(86, 9, "ESICustomParser::findTag: found " << *resulttype);
96
97 lastTag = *resulttype;
98
99 return buffer + myOffset;
100 }
101
102 bool
103 ESICustomParser::parse(char const *dataToParse, size_t const lengthOfData, bool const endOfStream)
104 {
105 debugs(86, 9, "ESICustomParser::parse: Appending data to internal buffer");
106 content.append (dataToParse, lengthOfData);
107
108 if (!endOfStream) {
109 return true;
110 }
111
112 size_t openESITags (0);
113 //erring on the safe side. Probably rawBuf would be ok too
114 char const *currentPos = content.termedBuf();
115 size_t remainingCount = content.size();
116 char const *tag = NULL;
117
118 while ((tag = findTag(currentPos, remainingCount))) {
119 if (tag - currentPos)
120 theClient->parserDefault (currentPos,tag - currentPos);
121
122 switch (lastTag) {
123
124 case ESITAG: {
125 ++openESITags;
126 char *tagEnd = strchr(const_cast<char *>(tag), '>');
127
128 if (!tagEnd) {
129 error = "Could not find end ('>') of tag";
130 return false;
131 }
132
133 if (tagEnd - tag > (ssize_t)remainingCount) {
134 error = "Tag ends beyond the parse buffer.";
135 return false;
136 }
137
138 if (*(tagEnd - 1) == '/')
139 --openESITags;
140
141 char * endofName = strpbrk(const_cast<char *>(tag), w_space);
142
143 if (endofName > tagEnd)
144 endofName = const_cast<char *>(tagEnd);
145
146 *endofName = '\0';
147
148 *tagEnd = '\0';
149
150 Vector<char *>attributes;
151
152 char *attribute = const_cast<char *>(endofName + 1);
153
154 while (attribute > tag && attribute < tagEnd) {
155 /* leading spaces */
156
157 while (attribute < tagEnd && (xisspace(*attribute) || (*attribute == '/')))
158 ++attribute;
159
160 if (! (attribute < tagEnd))
161 break;
162
163 /* attribute name */
164 attributes.push_back(attribute);
165
166 char *nextSpace = strpbrk(attribute, w_space);
167
168 char *equals = strchr(attribute, '=');
169
170 if (!equals) {
171 error = "Missing attribute value.";
172 return false;
173 }
174
175 if (nextSpace && nextSpace < equals)
176 *nextSpace = '\0';
177 else
178 *equals = '\0';
179
180 ++equals;
181
182 while (equals < tagEnd && xisspace(*equals))
183 ++equals;
184
185 char sep = *equals;
186
187 if (sep != '\'' && sep != '"') {
188 error = "Unknown identifier (";
189 error.append (sep);
190 error.append (")");
191 return false;
192 }
193
194 char *value = equals + 1;
195 char *end = strchr(value, sep);
196
197 if (!end) {
198 error = "Missing attribute ending separator (";
199 error.append(sep);
200 error.append(")");
201 return false;
202 }
203 attributes.push_back(value);
204 *end = '\0';
205 attribute = end + 1;
206 }
207
208 theClient->start (tag + 1, (const char **)attributes.items, attributes.size() >> 1);
209 /* TODO: attributes */
210
211 if (*(tagEnd - 1) == '/')
212 theClient->end (tag + 1);
213
214 remainingCount -= tagEnd - currentPos + 1;
215
216 currentPos = tagEnd + 1;
217 }
218
219 break;
220
221 case ESIENDTAG: {
222 if (!openESITags)
223 return false;
224
225 char const *tagEnd = strchr(tag, '>');
226
227 if (!tagEnd)
228 return false;
229
230 if (tagEnd - tag > (ssize_t)remainingCount)
231 return false;
232
233 char * endofName = strpbrk(const_cast<char *>(tag), w_space);
234
235 if (endofName > tagEnd)
236 endofName = const_cast<char *>(tagEnd);
237
238 *endofName = '\0';
239
240 theClient->end (tag + 2);
241
242 --openESITags;
243
244 remainingCount -= tagEnd - currentPos + 1;
245
246 currentPos = tagEnd + 1;
247 }
248
249 break;
250
251 case ESICOMMENT: {
252 /* Further optimisation potential:
253 * 1) recognize end comments for esi and don't callback on
254 * comments.
255 * 2) provide the comment length to the caller.
256 */
257 /* Comments must not be nested, without CDATA
258 * and we don't support CDATA
259 */
260 char *commentEnd = strstr (const_cast<char *>(tag), "-->");
261
262 if (!commentEnd) {
263 error = "missing end of comment";
264 return false;
265 }
266
267 if (commentEnd - tag > (ssize_t)remainingCount) {
268 error = "comment ends beyond parse buffer";
269 return false;
270 }
271
272 *commentEnd = '\0';
273 theClient->parserComment (tag + 4);
274 remainingCount -= commentEnd - currentPos + 3;
275 currentPos = commentEnd + 3;
276 }
277
278 break;
279 break;
280
281 default:
282 fatal ("unknown ESI tag type found");
283 };
284
285 /*
286 * Find next esi tag (open or closing) or comment
287 * send tag, or full comment text
288 * rinse
289 */
290 }
291
292 if (remainingCount)
293 theClient->parserDefault (currentPos,remainingCount);
294
295 debugs(86, 5, "ESICustomParser::parse: Finished parsing, will return " << !openESITags);
296
297 if (openESITags)
298 error = "ESI Tags still open";
299
300 return !openESITags;
301 }
302
303 long int
304 ESICustomParser::lineNumber() const
305 {
306 /* We don't track lines in the body */
307 return 0;
308 }
309
310 char const *
311 ESICustomParser::errorString() const
312 {
313 if (error.size())
314 return error.termedBuf();
315 else
316 return "Parsing error strings not implemented";
317 }