]>
Commit | Line | Data |
---|---|---|
2246b732 | 1 | |
2 | /* | |
429f7150 | 3 | * $Id: HttpMsg.cc,v 1.17 2005/09/15 20:19:41 wessels Exp $ |
2246b732 | 4 | * |
5 | * DEBUG: section 74 HTTP Message | |
6 | * AUTHOR: Alex Rousskov | |
7 | * | |
2b6662ba | 8 | * SQUID Web Proxy Cache http://www.squid-cache.org/ |
e25c139f | 9 | * ---------------------------------------------------------- |
2246b732 | 10 | * |
2b6662ba | 11 | * Squid is the result of efforts by numerous individuals from |
12 | * the Internet community; see the CONTRIBUTORS file for full | |
13 | * details. Many organizations have provided support for Squid's | |
14 | * development; see the SPONSORS file for full details. Squid is | |
15 | * Copyrighted (C) 2001 by the Regents of the University of | |
16 | * California; see the COPYRIGHT file for full details. Squid | |
17 | * incorporates software developed and/or copyrighted by other | |
18 | * sources; see the CREDITS file for full details. | |
2246b732 | 19 | * |
20 | * This program is free software; you can redistribute it and/or modify | |
21 | * it under the terms of the GNU General Public License as published by | |
22 | * the Free Software Foundation; either version 2 of the License, or | |
23 | * (at your option) any later version. | |
24 | * | |
25 | * This program is distributed in the hope that it will be useful, | |
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
28 | * GNU General Public License for more details. | |
29 | * | |
30 | * You should have received a copy of the GNU General Public License | |
31 | * along with this program; if not, write to the Free Software | |
cbdec147 | 32 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. |
e25c139f | 33 | * |
2246b732 | 34 | */ |
35 | ||
36 | #include "squid.h" | |
8596962e | 37 | #include "HttpMsg.h" |
38 | #include "HttpRequest.h" | |
39 | #include "HttpReply.h" | |
40 | ||
41 | HttpMsg::HttpMsg(http_hdr_owner_type owner): header(owner), | |
42 | cache_control(NULL), hdr_sz(0), content_length(0), protocol(PROTO_NONE), | |
43 | pstate(psReadyToParseStartLine) | |
44 | {} | |
45 | ||
46 | ||
47 | HttpMsgParseState &operator++ (HttpMsgParseState &aState) | |
48 | { | |
49 | int tmp = (int)aState; | |
50 | aState = (HttpMsgParseState)(++tmp); | |
51 | return aState; | |
52 | } | |
53 | ||
2246b732 | 54 | |
55 | /* find end of headers */ | |
56 | int | |
57 | httpMsgIsolateHeaders(const char **parse_start, const char **blk_start, const char **blk_end) | |
58 | { | |
bdb1a5d5 | 59 | /* |
60 | * parse_start points to the first line of HTTP message *headers*, | |
61 | * not including the request or status lines | |
62 | */ | |
63 | size_t l = strlen(*parse_start); | |
64 | size_t end = headersEnd(*parse_start, l); | |
65 | int nnl; | |
62e76326 | 66 | |
2246b732 | 67 | if (end) { |
62e76326 | 68 | *blk_start = *parse_start; |
69 | *blk_end = *parse_start + end - 1; | |
70 | /* | |
71 | * leave blk_end pointing to the first character after the | |
72 | * first newline which terminates the headers | |
73 | */ | |
74 | assert(**blk_end == '\n'); | |
75 | ||
76 | while (*(*blk_end - 1) == '\r') | |
77 | (*blk_end)--; | |
78 | ||
79 | assert(*(*blk_end - 1) == '\n'); | |
80 | ||
81 | *parse_start += end; | |
82 | ||
83 | return 1; | |
2246b732 | 84 | } |
62e76326 | 85 | |
bdb1a5d5 | 86 | /* |
87 | * If we didn't find the end of headers, and parse_start does | |
88 | * NOT point to a CR or NL character, then return failure | |
89 | */ | |
90 | if (**parse_start != '\r' && **parse_start != '\n') | |
62e76326 | 91 | return 0; /* failure */ |
92 | ||
bdb1a5d5 | 93 | /* |
94 | * If we didn't find the end of headers, and parse_start does point | |
95 | * to an empty line, then we have empty headers. Skip all CR and | |
96 | * NL characters up to the first NL. Leave parse_start pointing at | |
97 | * the first character after the first NL. | |
98 | */ | |
99 | *blk_start = *parse_start; | |
62e76326 | 100 | |
bdb1a5d5 | 101 | *blk_end = *blk_start; |
62e76326 | 102 | |
a4295415 | 103 | for (nnl = 0; nnl == 0; (*parse_start)++) { |
62e76326 | 104 | if (**parse_start == '\r') |
105 | (void) 0; | |
106 | else if (**parse_start == '\n') | |
107 | nnl++; | |
108 | else | |
109 | break; | |
2246b732 | 110 | } |
62e76326 | 111 | |
bdb1a5d5 | 112 | return 1; |
2246b732 | 113 | } |
114 | ||
8596962e | 115 | /* find first CRLF */ |
116 | static int | |
117 | httpMsgIsolateStart(const char **parse_start, const char **blk_start, const char **blk_end) | |
118 | { | |
119 | int slen = strcspn(*parse_start, "\r\n"); | |
120 | ||
121 | if (!(*parse_start)[slen]) /* no CRLF found */ | |
122 | return 0; | |
123 | ||
124 | *blk_start = *parse_start; | |
125 | ||
126 | *blk_end = *blk_start + slen; | |
127 | ||
128 | while (**blk_end == '\r') /* CR */ | |
129 | (*blk_end)++; | |
130 | ||
131 | if (**blk_end == '\n') /* LF */ | |
132 | (*blk_end)++; | |
133 | ||
134 | *parse_start = *blk_end; | |
135 | ||
136 | return 1; | |
137 | } | |
138 | ||
139 | // negative return is the negated HTTP_ error code | |
140 | // zero return means need more data | |
141 | // positive return is the size of parsed headers | |
142 | bool HttpMsg::parse(MemBuf *buf, bool eof, http_status *error) | |
143 | { | |
144 | assert(error); | |
145 | *error = HTTP_STATUS_NONE; | |
146 | ||
147 | // httpMsgParseStep() and debugging require 0-termination, unfortunately | |
148 | buf->terminate(); // does not affect content size | |
149 | ||
150 | // find the end of headers | |
151 | // TODO: Remove? httpReplyParseStep() should do similar checks | |
152 | const size_t hdr_len = headersEnd(buf->content(), buf->contentSize()); | |
153 | ||
154 | if (hdr_len <= 0) { | |
155 | debugs(58, 3, "HttpMsg::parse: failed to find end of headers " << | |
156 | "(eof: " << eof << ") in '" << buf->content() << "'"); | |
157 | ||
158 | if (eof) // iff we have seen the end, this is an error | |
159 | *error = HTTP_INVALID_HEADER; | |
160 | ||
161 | return false; | |
162 | } | |
163 | ||
164 | // TODO: move to httpReplyParseStep() | |
165 | if (hdr_len > Config.maxReplyHeaderSize) { | |
166 | debugs(58, 1, "HttpMsg::parse: Too large reply header (" << | |
167 | hdr_len << " > " << Config.maxReplyHeaderSize); | |
168 | *error = HTTP_HEADER_TOO_LARGE; | |
169 | return false; | |
170 | } | |
171 | ||
172 | if (!sanityCheckStartLine(buf, error)) // redundant; could be remvoed | |
173 | return false; | |
174 | ||
175 | const int res = httpMsgParseStep(buf->content(), eof); | |
176 | ||
177 | if (res < 0) { // error | |
178 | debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers " << | |
179 | "in '" << buf->content() << "'"); | |
180 | *error = HTTP_INVALID_HEADER; | |
181 | return false; | |
182 | } | |
183 | ||
184 | if (res == 0) { | |
185 | debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << | |
186 | buf->content() << "'"); | |
187 | return false; // but this should not happen due to headersEnd() above | |
188 | } | |
189 | ||
190 | assert(res > 0); | |
191 | debugs(58, 9, "HttpMsg::parse success (" << hdr_len << " bytes) " << | |
192 | "near '" << buf->content() << "'"); | |
193 | ||
194 | if (hdr_sz != (int)hdr_len) { | |
195 | debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " << | |
196 | hdr_sz << " != " << hdr_len); | |
197 | hdr_sz = (int)hdr_len; // because old http.cc code used hdr_len | |
198 | } | |
199 | ||
200 | return true; | |
201 | } | |
202 | ||
203 | ||
204 | ||
205 | /* | |
206 | * parses a 0-terminating buffer into HttpMsg. | |
207 | * Returns: | |
208 | * 1 -- success | |
209 | * 0 -- need more data (partial parse) | |
210 | * -1 -- parse error | |
211 | */ | |
212 | int | |
213 | HttpMsg::httpMsgParseStep(const char *buf, int atEnd) | |
214 | { | |
215 | const char *parse_start = buf; | |
216 | const char *blk_start, *blk_end; | |
217 | const char **parse_end_ptr = &blk_end; | |
218 | assert(parse_start); | |
219 | assert(pstate < psParsed); | |
8596962e | 220 | |
221 | *parse_end_ptr = parse_start; | |
222 | ||
223 | if (pstate == psReadyToParseStartLine) { | |
224 | if (!httpMsgIsolateStart(&parse_start, &blk_start, &blk_end)) | |
225 | return 0; | |
226 | ||
429f7150 | 227 | if (!parseFirstLine(blk_start, blk_end)) |
228 | return httpMsgParseError(); | |
8596962e | 229 | |
230 | *parse_end_ptr = parse_start; | |
231 | ||
232 | hdr_sz = *parse_end_ptr - buf; | |
233 | ||
234 | ++pstate; | |
235 | } | |
236 | ||
237 | if (pstate == psReadyToParseHeaders) { | |
238 | if (!httpMsgIsolateHeaders(&parse_start, &blk_start, &blk_end)) { | |
239 | if (atEnd) | |
240 | blk_start = parse_start, blk_end = blk_start + strlen(blk_start); | |
241 | else | |
242 | return 0; | |
243 | } | |
244 | ||
245 | if (!httpHeaderParse(&header, blk_start, blk_end)) | |
246 | return httpMsgParseError(); | |
247 | ||
07947ad8 | 248 | hdrCacheInit(); |
8596962e | 249 | |
250 | *parse_end_ptr = parse_start; | |
251 | ||
252 | hdr_sz = *parse_end_ptr - buf; | |
253 | ||
254 | ++pstate; | |
255 | } | |
256 | ||
257 | return 1; | |
258 | } | |
259 | ||
260 | ||
261 | /* handy: resets and returns -1 */ | |
262 | int | |
263 | HttpMsg::httpMsgParseError() | |
264 | { | |
265 | reset(); | |
266 | /* indicate an error */ | |
267 | ||
268 | if (HttpReply *rep = dynamic_cast<HttpReply*>(this)) | |
269 | rep->sline.status = HTTP_INVALID_HEADER; | |
270 | ||
271 | return -1; | |
272 | } | |
273 | ||
274 | ||
275 | ||
62e76326 | 276 | /* returns true if connection should be "persistent" |
2246b732 | 277 | * after processing this message */ |
278 | int | |
450e0c10 | 279 | httpMsgIsPersistent(HttpVersion const &http_ver, const HttpHeader * hdr) |
2246b732 | 280 | { |
21b92762 | 281 | #if WHEN_SQUID_IS_NOT_HTTP1_1 |
282 | ||
bffee5af | 283 | if ((http_ver.major >= 1) && (http_ver.minor >= 1)) { |
62e76326 | 284 | /* |
285 | * for modern versions of HTTP: persistent unless there is | |
286 | * a "Connection: close" header. | |
287 | */ | |
288 | return !httpHeaderHasConnDir(hdr, "close"); | |
8596962e | 289 | } else |
21b92762 | 290 | #else |
291 | { | |
292 | #endif | |
62e76326 | 293 | /* |
294 | * Persistent connections in Netscape 3.x are allegedly broken, | |
295 | * return false if it is a browser connection. If there is a | |
296 | * VIA header, then we assume this is NOT a browser connection. | |
297 | */ | |
298 | const char *agent = httpHeaderGetStr(hdr, HDR_USER_AGENT); | |
299 | ||
8596962e | 300 | if (agent && !httpHeaderHas(hdr, HDR_VIA)) { |
301 | if (!strncasecmp(agent, "Mozilla/3.", 10)) | |
302 | return 0; | |
62e76326 | 303 | |
8596962e | 304 | if (!strncasecmp(agent, "Netscape/3.", 11)) |
305 | return 0; | |
2246b732 | 306 | } |
8596962e | 307 | |
308 | /* for old versions of HTTP: persistent if has "keep-alive" */ | |
309 | return httpHeaderHasConnDir(hdr, "keep-alive"); | |
310 | } | |
2246b732 | 311 | } |
8596962e | 312 | |
313 | void HttpMsg::packInto(Packer *p, bool full_uri) const | |
314 | { | |
315 | packFirstLineInto(p, full_uri); | |
316 | httpHeaderPackInto(&header, p); | |
317 | packerAppend(p, "\r\n", 2); | |
318 | } | |
319 | ||
07947ad8 | 320 | void HttpMsg::hdrCacheInit() |
321 | { | |
322 | content_length = httpHeaderGetInt(&header, HDR_CONTENT_LENGTH); | |
323 | assert(NULL == cache_control); | |
324 | cache_control = httpHeaderGetCc(&header); | |
325 | } |