5 * DEBUG: section 74 HTTP Message
6 * AUTHOR: Alex Rousskov
8 * SQUID Web Proxy Cache http://www.squid-cache.org/
9 * ----------------------------------------------------------
11 * Squid is the result of efforts by numerous individuals from
12 * the Internet community; see the CONTRIBUTORS file for full
13 * details. Many organizations have provided support for Squid's
14 * development; see the SPONSORS file for full details. Squid is
15 * Copyrighted (C) 2001 by the Regents of the University of
16 * California; see the COPYRIGHT file for full details. Squid
17 * incorporates software developed and/or copyrighted by other
18 * sources; see the CREDITS file for full details.
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
40 HttpMsg::HttpMsg(http_hdr_owner_type owner
): header(owner
),
41 cache_control(NULL
), hdr_sz(0), content_length(0), protocol(PROTO_NONE
),
42 pstate(psReadyToParseStartLine
), lock_count(0)
47 assert(lock_count
== 0);
51 HttpMsgParseState
&operator++ (HttpMsgParseState
&aState
)
53 int tmp
= (int)aState
;
54 aState
= (HttpMsgParseState
)(++tmp
);
58 /* find end of headers */
60 httpMsgIsolateHeaders(const char **parse_start
, int l
, const char **blk_start
, const char **blk_end
)
63 * parse_start points to the first line of HTTP message *headers*,
64 * not including the request or status lines
66 size_t end
= headersEnd(*parse_start
, l
);
70 *blk_start
= *parse_start
;
71 *blk_end
= *parse_start
+ end
- 1;
73 * leave blk_end pointing to the first character after the
74 * first newline which terminates the headers
76 assert(**blk_end
== '\n');
78 while (*(*blk_end
- 1) == '\r')
81 assert(*(*blk_end
- 1) == '\n');
89 * If we didn't find the end of headers, and parse_start does
90 * NOT point to a CR or NL character, then return failure
92 if (**parse_start
!= '\r' && **parse_start
!= '\n')
93 return 0; /* failure */
96 * If we didn't find the end of headers, and parse_start does point
97 * to an empty line, then we have empty headers. Skip all CR and
98 * NL characters up to the first NL. Leave parse_start pointing at
99 * the first character after the first NL.
101 *blk_start
= *parse_start
;
103 *blk_end
= *blk_start
;
105 for (nnl
= 0; nnl
== 0; (*parse_start
)++) {
106 if (**parse_start
== '\r')
108 else if (**parse_start
== '\n')
117 /* find first CRLF */
119 httpMsgIsolateStart(const char **parse_start
, const char **blk_start
, const char **blk_end
)
121 int slen
= strcspn(*parse_start
, "\r\n");
123 if (!(*parse_start
)[slen
]) /* no CRLF found */
126 *blk_start
= *parse_start
;
128 *blk_end
= *blk_start
+ slen
;
130 while (**blk_end
== '\r') /* CR */
133 if (**blk_end
== '\n') /* LF */
136 *parse_start
= *blk_end
;
141 // negative return is the negated HTTP_ error code
142 // zero return means need more data
143 // positive return is the size of parsed headers
144 bool HttpMsg::parse(MemBuf
*buf
, bool eof
, http_status
*error
)
147 *error
= HTTP_STATUS_NONE
;
149 // httpMsgParseStep() and debugging require 0-termination, unfortunately
150 buf
->terminate(); // does not affect content size
152 // find the end of headers
153 const size_t hdr_len
= headersEnd(buf
->content(), buf
->contentSize());
155 // sanity check the start line to see if this is in fact an HTTP message
156 if (!sanityCheckStartLine(buf
, hdr_len
, error
)) {
157 // NP: sanityCheck sets *error and sends debug warnings on syntax errors.
158 // if we have seen the connection close, this is an error too
159 if (eof
&& *error
==HTTP_STATUS_NONE
)
160 *error
= HTTP_INVALID_HEADER
;
165 // TODO: move to httpReplyParseStep()
166 if (hdr_len
> Config
.maxReplyHeaderSize
|| (hdr_len
<= 0 && (size_t)buf
->contentSize() > Config
.maxReplyHeaderSize
)) {
167 debugs(58, 1, "HttpMsg::parse: Too large reply header (" << hdr_len
<< " > " << Config
.maxReplyHeaderSize
);
168 *error
= HTTP_HEADER_TOO_LARGE
;
173 debugs(58, 3, "HttpMsg::parse: failed to find end of headers (eof: " << eof
<< ") in '" << buf
->content() << "'");
175 if (eof
) // iff we have seen the end, this is an error
176 *error
= HTTP_INVALID_HEADER
;
181 const int res
= httpMsgParseStep(buf
->content(), buf
->contentSize(), eof
);
183 if (res
< 0) { // error
184 debugs(58, 3, "HttpMsg::parse: cannot parse isolated headers in '" << buf
->content() << "'");
185 *error
= HTTP_INVALID_HEADER
;
190 debugs(58, 2, "HttpMsg::parse: strange, need more data near '" << buf
->content() << "'");
191 *error
= HTTP_INVALID_HEADER
;
192 return false; // but this should not happen due to headersEnd() above
196 debugs(58, 9, "HttpMsg::parse success (" << hdr_len
<< " bytes) near '" << buf
->content() << "'");
198 if (hdr_sz
!= (int)hdr_len
) {
199 debugs(58, 1, "internal HttpMsg::parse vs. headersEnd error: " <<
200 hdr_sz
<< " != " << hdr_len
);
201 hdr_sz
= (int)hdr_len
; // because old http.cc code used hdr_len
208 * parseCharBuf() takes character buffer of HTTP headers (buf),
209 * which may not be NULL-terminated, and fills in an HttpMsg
210 * structure. The parameter 'end' specifies the offset to
211 * the end of the reply headers. The caller may know where the
212 * end is, but is unable to NULL-terminate the buffer. This function
213 * returns true on success.
216 HttpMsg::parseCharBuf(const char *buf
, ssize_t end
)
220 /* reset current state, because we are not used in incremental fashion */
225 success
= httpMsgParseStep(mb
.buf
, mb
.size
, 0);
231 * parses a 0-terminating buffer into HttpMsg.
234 * 0 -- need more data (partial parse)
238 HttpMsg::httpMsgParseStep(const char *buf
, int len
, int atEnd
)
240 const char *parse_start
= buf
;
242 const char *blk_start
, *blk_end
;
243 const char **parse_end_ptr
= &blk_end
;
245 assert(pstate
< psParsed
);
247 *parse_end_ptr
= parse_start
;
249 PROF_start(HttpMsg_httpMsgParseStep
);
251 if (pstate
== psReadyToParseStartLine
) {
252 if (!httpMsgIsolateStart(&parse_start
, &blk_start
, &blk_end
)) {
253 PROF_stop(HttpMsg_httpMsgParseStep
);
257 if (!parseFirstLine(blk_start
, blk_end
)) {
258 PROF_stop(HttpMsg_httpMsgParseStep
);
259 return httpMsgParseError();
262 *parse_end_ptr
= parse_start
;
264 hdr_sz
= *parse_end_ptr
- buf
;
265 parse_len
= parse_len
- hdr_sz
;
271 * XXX This code uses parse_start; but if we're incrementally parsing then
272 * this code might not actually be given parse_start at the right spot (just
273 * after headers.) Grr.
275 if (pstate
== psReadyToParseHeaders
) {
276 if (!httpMsgIsolateHeaders(&parse_start
, parse_len
, &blk_start
, &blk_end
)) {
278 blk_start
= parse_start
, blk_end
= blk_start
+ strlen(blk_start
);
280 PROF_stop(HttpMsg_httpMsgParseStep
);
285 if (!header
.parse(blk_start
, blk_end
)) {
286 PROF_stop(HttpMsg_httpMsgParseStep
);
287 return httpMsgParseError();
292 *parse_end_ptr
= parse_start
;
294 hdr_sz
= *parse_end_ptr
- buf
;
299 PROF_stop(HttpMsg_httpMsgParseStep
);
303 /* handy: resets and returns -1 */
305 HttpMsg::httpMsgParseError()
312 HttpMsg::setContentLength(int64_t clen
)
314 header
.delById(HDR_CONTENT_LENGTH
); // if any
315 header
.putInt64(HDR_CONTENT_LENGTH
, clen
);
316 content_length
= clen
;
320 HttpMsg::persistent() const
322 if (http_ver
> HttpVersion(1, 0)) {
324 * for modern versions of HTTP: persistent unless there is
325 * a "Connection: close" header.
327 return !httpHeaderHasConnDir(&header
, "close");
329 /* for old versions of HTTP: persistent if has "keep-alive" */
330 return httpHeaderHasConnDir(&header
, "keep-alive");
334 void HttpMsg::packInto(Packer
*p
, bool full_uri
) const
336 packFirstLineInto(p
, full_uri
);
338 packerAppend(p
, "\r\n", 2);
341 void HttpMsg::hdrCacheInit()
343 content_length
= header
.getInt64(HDR_CONTENT_LENGTH
);
344 assert(NULL
== cache_control
);
345 cache_control
= header
.getCc();
349 * useful for debugging
351 void HttpMsg::firstLineBuf(MemBuf
& mb
)
354 packerToMemInit(&p
, &mb
);
355 packFirstLineInto(&p
, true);
359 // use HTTPMSGLOCK() instead of calling this directly
367 // use HTTPMSGUNLOCK() instead of calling this directly
371 assert(lock_count
> 0);
380 HttpParserInit(HttpParser
*hdr
, const char *buf
, int bufsiz
)
383 hdr
->request_parse_status
= HTTP_STATUS_NONE
;
385 hdr
->bufsiz
= bufsiz
;
386 hdr
->req_start
= hdr
->req_end
= -1;
387 hdr
->hdr_start
= hdr
->hdr_end
= -1;
388 debugs(74, 5, "httpParseInit: Request buffer is " << buf
);
389 hdr
->m_start
= hdr
->m_end
= -1;
390 hdr
->u_start
= hdr
->u_end
= -1;
391 hdr
->v_start
= hdr
->v_end
= -1;
392 hdr
->v_maj
= hdr
->v_min
= 0;
396 /* XXX This should eventually turn into something inlined or #define'd */
398 HttpParserReqSz(HttpParser
*hp
)
400 assert(hp
->state
== 1);
401 assert(hp
->req_start
!= -1);
402 assert(hp
->req_end
!= -1);
403 return hp
->req_end
- hp
->req_start
+ 1;
408 * This +1 makes it 'right' but won't make any sense if
409 * there's a 0 byte header? This won't happen normally - a valid header
410 * is at -least- a blank line (\n, or \r\n.)
413 HttpParserHdrSz(HttpParser
*hp
)
415 assert(hp
->state
== 1);
416 assert(hp
->hdr_start
!= -1);
417 assert(hp
->hdr_end
!= -1);
418 return hp
->hdr_end
- hp
->hdr_start
+ 1;
422 HttpParserHdrBuf(HttpParser
*hp
)
424 assert(hp
->state
== 1);
425 assert(hp
->hdr_start
!= -1);
426 assert(hp
->hdr_end
!= -1);
427 return hp
->buf
+ hp
->hdr_start
;
431 HttpParserRequestLen(HttpParser
*hp
)
433 return hp
->hdr_end
- hp
->req_start
+ 1;
438 HttpParser::parseRequestFirstLine()
440 int second_word
= -1; // track the suspected URI start
441 int first_whitespace
= -1, last_whitespace
= -1; // track the first and last SP byte
442 int line_end
= -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
444 debugs(74, 5, HERE
<< "parsing possible request: " << buf
);
446 // Single-pass parse: (provided we have the whole line anyways)
449 if (Config
.onoff
.relaxed_header_parser
) {
450 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[req_start
] == ' ')
451 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
452 "Whitespace bytes received ahead of method. " <<
453 "Ignored due to relaxed_header_parser.");
454 // Be tolerant of prefix spaces (other bytes are valid method values)
455 for (; req_start
< bufsiz
&& buf
[req_start
] == ' '; req_start
++);
458 for (int i
= 0; i
< bufsiz
; i
++) {
459 // track first and last whitespace (SP only)
462 if (first_whitespace
< req_start
)
463 first_whitespace
= i
;
466 // track next non-SP/non-HT byte after first_whitespace
467 if (second_word
< first_whitespace
&& buf
[i
] != ' ' && buf
[i
] != '\t') {
471 // locate line terminator
472 if (buf
[i
] == '\n') {
477 if (i
< bufsiz
- 1 && buf
[i
] == '\r') {
478 if (Config
.onoff
.relaxed_header_parser
) {
479 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[i
+ 1] == '\r')
480 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
481 "Series of carriage-return bytes received prior to line terminator. " <<
482 "Ignored due to relaxed_header_parser.");
484 // Be tolerant of invalid multiple \r prior to terminal \n
485 if (buf
[i
+ 1] == '\n' || buf
[i
+ 1] == '\r')
487 while (i
< bufsiz
- 1 && buf
[i
+ 1] == '\r')
490 if (buf
[i
+ 1] == '\n') {
495 if (buf
[i
+ 1] == '\n') {
502 // RFC 2616 section 5.1
503 // "No CR or LF is allowed except in the final CRLF sequence"
504 request_parse_status
= HTTP_BAD_REQUEST
;
509 debugs(74, 5, "Parser: retval 0: from " << req_start
<<
510 "->" << req_end
<< ": needs more data to complete first line.");
514 // NP: we have now seen EOL, more-data (0) cannot occur.
515 // From here on any failure is -1, success is 1
520 // Process what we now know about the line structure into field offsets
521 // generating HTTP status for any aborts as we go.
523 // First non-whitespace = beginning of method
524 if (req_start
> line_end
) {
525 request_parse_status
= HTTP_BAD_REQUEST
;
530 // First whitespace = end of method
531 if (first_whitespace
> line_end
|| first_whitespace
< req_start
) {
532 request_parse_status
= HTTP_BAD_REQUEST
; // no method
535 m_end
= first_whitespace
- 1;
536 if (m_end
< m_start
) {
537 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI?
541 // First non-whitespace after first SP = beginning of URL+Version
542 if (second_word
> line_end
|| second_word
< req_start
) {
543 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI
546 u_start
= second_word
;
548 // RFC 1945: SP and version following URI are optional, marking version 0.9
549 // we identify this by the last whitespace being earlier than URI start
550 if (last_whitespace
< second_word
&& last_whitespace
>= req_start
) {
554 request_parse_status
= HTTP_OK
; // HTTP/0.9
557 // otherwise last whitespace is somewhere after end of URI.
558 u_end
= last_whitespace
;
559 // crop any trailing whitespace in the area we think of as URI
560 for (; u_end
>= u_start
&& xisspace(buf
[u_end
]); u_end
--);
562 if (u_end
< u_start
) {
563 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI
567 // Last whitespace SP = before start of protocol/version
568 if (last_whitespace
>= line_end
) {
569 request_parse_status
= HTTP_BAD_REQUEST
; // missing version
572 v_start
= last_whitespace
+ 1;
575 // We only accept HTTP protocol requests right now.
576 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
577 if ((v_end
- v_start
+1) < 5 || strncasecmp(&buf
[v_start
], "HTTP/", 5) != 0) {
578 #if USE_HTTP_VIOLATIONS
579 // being lax; old parser accepted strange versions
580 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
584 request_parse_status
= HTTP_OK
; // treat as HTTP/0.9
587 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
; // protocol not supported / implemented.
592 int i
= v_start
+ sizeof("HTTP/") -1;
594 /* next should be 1 or more digits */
595 if (!isdigit(buf
[i
])) {
596 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
600 for (; i
<= line_end
&& (isdigit(buf
[i
])) && maj
< 65536; i
++) {
602 maj
= maj
+ (buf
[i
]) - '0';
604 // catch too-big values or missing remainders
605 if (maj
>= 65536 || i
> line_end
) {
606 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
611 /* next should be .; we -have- to have this as we have a whole line.. */
613 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
616 // catch missing minor part
617 if (++i
> line_end
) {
618 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
622 /* next should be one or more digits */
623 if (!isdigit(buf
[i
])) {
624 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
628 for (; i
<= line_end
&& (isdigit(buf
[i
])) && min
< 65536; i
++) {
630 min
= min
+ (buf
[i
]) - '0';
632 // catch too-big values or trailing garbage
633 if (min
>= 65536 || i
< line_end
) {
634 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
640 * Rightio - we have all the schtuff. Return true; we've got enough.
642 request_parse_status
= HTTP_OK
;
647 HttpParserParseReqLine(HttpParser
*hmsg
)
649 PROF_start(HttpParserParseReqLine
);
650 int retcode
= hmsg
->parseRequestFirstLine();
651 debugs(74, 5, "Parser: retval " << retcode
<< ": from " << hmsg
->req_start
<<
652 "->" << hmsg
->req_end
<< ": method " << hmsg
->m_start
<< "->" <<
653 hmsg
->m_end
<< "; url " << hmsg
->u_start
<< "->" << hmsg
->u_end
<<
654 "; version " << hmsg
->v_start
<< "->" << hmsg
->v_end
<< " (" << hmsg
->v_maj
<<
655 "/" << hmsg
->v_min
<< ")");
656 PROF_stop(HttpParserParseReqLine
);