From: Amos Jeffries Date: Mon, 30 Dec 2013 21:22:56 +0000 (-0800) Subject: Make Http1Parser::parseRequestFirstLine() private and document X-Git-Tag: merge-candidate-3-v1~506^2~77 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c11191e0f9af998fae4b454f403d0a62db7d1970;p=thirdparty%2Fsquid.git Make Http1Parser::parseRequestFirstLine() private and document Also shuffle the prefix garbage tolerance processing to a separate method skipGarbageLines() and document the intended operations (it is currently non-conformant with RFC 2616). --- diff --git a/src/http/Http1Parser.cc b/src/http/Http1Parser.cc index db289f1130..b7041fa0c9 100644 --- a/src/http/Http1Parser.cc +++ b/src/http/Http1Parser.cc @@ -34,6 +34,77 @@ Http::Http1Parser::reset(const char *aBuf, int len) debugs(74, DBG_DATA, "Request parse " << Raw("buf", buf, bufsiz)); } +/** + * Attempt to parse the first line of a new request message. + * + * Governed by RFC 2616 section 4.1 + * " + * In the interest of robustness, servers SHOULD ignore any empty + * line(s) received where a Request-Line is expected. In other words, if + * the server is reading the protocol stream at the beginning of a + * message and receives a CRLF first, it should ignore the CRLF. + * + * ... To restate what is explicitly forbidden by the + * BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an + * extra CRLF. + * " + * + * Parsing state is stored between calls to avoid repeating buffer scans. + * \return true if garbage whitespace was found + */ +bool +Http::Http1Parser::skipGarbageLines() +{ + req.start = parseOffset_; // avoid re-parsing any portion we managed to complete + +#if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ... + if (Config.onoff.relaxed_header_parser) { + if (Config.onoff.relaxed_header_parser < 0 && (buf[req.start] == '\r' || buf[req.start] == '\n')) + debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << + "CRLF bytes received ahead of request-line. " << + "Ignored due to relaxed_header_parser."); + // Be tolerant of prefix empty lines + for (; req.start < bufsiz && (buf[req.start] == '\n' || ((buf[req.start] == '\r' && (buf[req.start+1] == '\n')); ++req.start); + parseOffset_ = req.start; + } +#endif + + /* XXX: this is a Squid-specific tolerance + * it appears never to have been relevant outside out unit-tests + * because the ConnStateData parser loop starts with consumeWhitespace() + * which absorbs any SP HTAB VTAB CR LF characters. + * But unit-tests called the HttpParser method directly without that pruning. + */ +#if USE_HTTP_VIOLATIONS + if (Config.onoff.relaxed_header_parser) { + if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ') + debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << + "Whitespace bytes received ahead of method. " << + "Ignored due to relaxed_header_parser."); + // Be tolerant of prefix spaces (other bytes are valid method values) + for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start); + parseOffset_ = req.start; + } +#endif + + return (parseOffset_ > 0); +} + +/** + * Attempt to parse the first line of a new request message. + * + * Governed by: + * RFC 1945 section 5.1 + * RFC 2616 section 5.1 + * + * Parsing state is stored between calls. However the current implementation + * begins parsing from scratch on every call. + * The return value tells you whether the parsing state fields are valid or not. + * + * \retval -1 an error occurred. request_parse_status indicates HTTP status result. + * \retval 1 successful parse. member fields contain the request-line items + * \retval 0 more data is needed to complete the parse + */ int Http::Http1Parser::parseRequestFirstLine() { @@ -47,15 +118,6 @@ Http::Http1Parser::parseRequestFirstLine() // Single-pass parse: (provided we have the whole line anyways) req.start = parseOffset_; // avoid re-parsing any portion we managed to complete - if (Config.onoff.relaxed_header_parser) { - if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ') - debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << - "Whitespace bytes received ahead of method. " << - "Ignored due to relaxed_header_parser."); - // Be tolerant of prefix spaces (other bytes are valid method values) - for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start); - parseOffset_ = req.start; - } req.end = -1; for (int i = 0; i < bufsiz; ++i) { // track first and last whitespace (SP only) @@ -256,6 +318,11 @@ bool Http::Http1Parser::parseRequest() { // stage 1: locate the request-line + if (completedState_ == HTTP_PARSE_NEW) { + if (skipGarbageLines() && (size_t)bufsiz < parseOffset_) + return 0; + } + // stage 2: parse the request-line if (completedState_ == HTTP_PARSE_NEW) { PROF_start(HttpParserParseReqLine); diff --git a/src/http/Http1Parser.h b/src/http/Http1Parser.h index c50685e159..b217edfdc7 100644 --- a/src/http/Http1Parser.h +++ b/src/http/Http1Parser.h @@ -72,26 +72,8 @@ public: * \return true if a valid request was parsed. * \note Use isDone() method to determine between incomplete parse and errors. */ - // TODO: parse more than just the request-line bool parseRequest(); - /** - * Attempt to parse the first line of a new request message. - * - * Governed by: - * RFC 1945 section 5.1 - * RFC 2616 section 5.1 - * - * Parsing state is stored between calls. However the current implementation - * begins parsing from scratch on every call. - * The return value tells you whether the parsing state fields are valid or not. - * - * \retval -1 an error occurred. request_parse_status indicates HTTP status result. - * \retval 1 successful parse. member fields contain the request-line items - * \retval 0 more data is needed to complete the parse - */ - int parseRequestFirstLine(); - public: const char *buf; int bufsiz; @@ -122,6 +104,9 @@ public: Http::StatusCode request_parse_status; private: + bool skipGarbageLines(); + int parseRequestFirstLine(); + /// byte offset for non-parsed region of the buffer size_t parseOffset_;