From: Amos Jeffries Date: Fri, 23 Jan 2015 07:43:39 +0000 (-0800) Subject: Review changes for HTTP ResponseParser upgrade X-Git-Tag: merge-candidate-3-v1~240^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b8f86fd23f25acb646cb6a31fa181579392028f8;p=thirdparty%2Fsquid.git Review changes for HTTP ResponseParser upgrade * redesign parser logic after Tokenizer API changes * fix ICY protocol mime block detection * add RFC 7230 section 3.5 whitespace tolerant parse * add documentation to match request parser regarding RFC compliance --- diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc index 964bde6d82..e072d79a2b 100644 --- a/src/http/one/Parser.cc +++ b/src/http/one/Parser.cc @@ -11,6 +11,7 @@ #include "http/one/Parser.h" #include "mime_header.h" #include "parser/Tokenizer.h" +#include "SquidConfig.h" /// RFC 7230 section 2.6 - 7 magic octets const SBuf Http::One::Parser::Http1magic("HTTP/1."); @@ -25,9 +26,26 @@ Http::One::Parser::clear() } bool -Http::One::Parser::findMimeBlock(const char *which, size_t limit) +Http::One::Parser::skipLineTerminator(::Parser::Tokenizer &tok) const { - if (msgProtocol_.major == 1) { + static const SBuf crlf("\r\n"); + if (tok.skip(crlf)) + return true; + + if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF)) + return true; + + return false; +} + +bool +Http::One::Parser::findMimeBlock(const char *which, const size_t limit) +{ + // MIME headers block exist in (only) HTTP/1.x and ICY + const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) || + msgProtocol_.protocol == AnyP::PROTO_ICY; + + if (expectMime) { /* NOTE: HTTP/0.9 messages do not have a mime header block. * So the rest of the code will need to deal with '0'-byte headers * (ie, none, so don't try parsing em) @@ -43,6 +61,15 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit) debugs(33, 5, "Incomplete " << which << ", waiting for end of headers"); return false; } + + // Squid could handle these headers, but admin does not want to + if (messageHeaderSize() >= limit) { + debugs(33, 5, "Too large " << which); + parseStatusCode = Http::scHeaderTooLarge; + parsingStage_ = HTTP_PARSE_DONE; + return false; + } + mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes); debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}"); @@ -52,13 +79,6 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit) // NP: we do not do any further stages here yet so go straight to DONE parsingStage_ = HTTP_PARSE_DONE; - // Squid could handle these headers, but admin does not want to - if (messageHeaderSize() >= limit) { - debugs(33, 5, "Too large " << which); - parseStatusCode = Http::scHeaderTooLarge; - return false; - } - return true; } diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h index 0f457d2bad..734971c13d 100644 --- a/src/http/one/Parser.h +++ b/src/http/one/Parser.h @@ -14,6 +14,10 @@ #include "http/StatusCode.h" #include "SBuf.h" +namespace Parser { +class Tokenizer; +} + namespace Http { namespace One { @@ -99,8 +103,19 @@ public: Http::StatusCode parseStatusCode; protected: - /// parse scan to find the mime headers block for current message - bool findMimeBlock(const char *which, size_t limit); + /// detect and skip the CRLF or (if tolerant) LF line terminator + /// consume from the tokenizer and return true only if found + bool skipLineTerminator(::Parser::Tokenizer &tok) const; + + /** + * Parse scan to find the mime headers block for current message. + * + * \retval true if mime block (or a blocks non-existence) has been + * identified accurately within limit characters. + * mimeHeaderBlock_ has been updated and buf_ consumed. + * \retval false an error occured, or no MIME terminator found within limit. + */ + bool findMimeBlock(const char *which, const size_t limit); /// RFC 7230 section 2.6 - 7 magic octets static const SBuf Http1magic; diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc index 5f04e0829e..53b80f6f99 100644 --- a/src/http/one/ResponseParser.cc +++ b/src/http/one/ResponseParser.cc @@ -25,7 +25,7 @@ Http::One::ResponseParser::firstLineSize() const return result; } // NP: the parser does not accept >2 DIGIT for version numbers - if (msgProtocol_.minor >10) + if (msgProtocol_.minor > 9) result += 2; else result += 1; @@ -39,42 +39,32 @@ Http::One::ResponseParser::firstLineSize() const // NP: we found the protocol version and consumed it already. // just need the status code and reason phrase const int -Http::One::ResponseParser::parseResponseStatusAndReason() +Http::One::ResponseParser::parseResponseStatusAndReason(::Parser::Tokenizer &tok, const CharacterSet &WspDelim) { - if (buf_.isEmpty()) - return 0; - - ::Parser::Tokenizer tok(buf_); - if (!completedStatus_) { debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "..."); - SBuf status; - // status code is 3 DIGIT octets - // NP: search space is >3 to get terminator character) - if(!tok.prefix(status, CharacterSet::DIGIT, 4)) - return -1; // invalid status - // NOTE: multiple SP or non-SP bytes between version and status code are invalid. - if (tok.atEnd()) - return 0; // need more to be sure we have it all - if(!tok.skip(' ')) - return -1; // invalid status, a single SP terminator required - // NOTE: any whitespace after the single SP is part of the reason phrase. + /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets. + * There is no limit on what those octets may be. + * 000 through 999 are all valid. + */ + int64_t statusValue; + if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) { - debugs(74, 6, "found string status-code=" << status); + debugs(74, 6, "found int64 status-code=" << statusValue); + statusCode_ = static_cast(statusValue); - // get the actual numeric value of the 0-3 digits we found - ::Parser::Tokenizer t2(status); - int64_t statusValue; - if (!t2.int64(statusValue)) - return -1; // ouch. digits not forming a valid number? - debugs(74, 6, "found int64 status-code=" << statusValue); - if (statusValue < 0 || statusValue > 999) - return -1; // ouch. digits not within valid status code range. + buf_ = tok.remaining(); // resume checkpoint + completedStatus_ = true; - statusCode_ = static_cast(statusValue); + } else if (tok.atEnd()) { + debugs(74, 6, "Parser needs more data"); + return 0; // need more to be sure we have it all - buf_ = tok.remaining(); // resume checkpoint - completedStatus_ = true; + } else { + debugs(74, 6, "invalid status-line. invalid code."); + return -1; // invalid status, a single SP terminator required + } + // NOTE: any whitespace after the single SP is part of the reason phrase. } if (tok.atEnd()) @@ -90,66 +80,79 @@ Http::One::ResponseParser::parseResponseStatusAndReason() // if we got here we are still looking for reason-phrase bytes static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT; - tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing - tok.skip('\r'); // optional trailing CR + (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing + if (skipLineTerminator(tok)) { + debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); + buf_ = tok.remaining(); // resume checkpoint + return 1; + } + reasonPhrase_.clear(); if (tok.atEnd()) return 0; // need more to be sure we have it all - // LF existence matters - if (!tok.skip('\n')) { - reasonPhrase_.clear(); - return -1; // found invalid characters in the phrase - } - - debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); - buf_ = tok.remaining(); // resume checkpoint - return 1; + debugs(74, 6, "invalid status-line. garbage in reason phrase."); + return -1; } +/** + * Attempt to parse the method field out of an HTTP message status-line. + * + * Governed by: + * RFC 1945 section 6.1 + * RFC 7230 section 2.6, 3.1 and 3.5 + * + * Parsing state is stored between calls. The current implementation uses + * checkpoints after each successful status-line field. + * The return value tells you whether the parsing is completed or not. + * + * \retval -1 an error occurred. + * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter. + * \retval 0 more data is needed to complete the parse + */ const int Http::One::ResponseParser::parseResponseFirstLine() { ::Parser::Tokenizer tok(buf_); + CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP + + if (Config.onoff.relaxed_header_parser) { + // RFC 7230 section 3.5 + // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR + // as whitespace between status-line fields + WspDelim += CharacterSet::HTAB + + CharacterSet("VT,FF","\x0B\x0C") + + CharacterSet::CR; + } + if (msgProtocol_.protocol != AnyP::PROTO_NONE) { debugs(74, 6, "continue incremental parse for " << msgProtocol_); debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); // we already found the magic, but not the full line. keep going. - return parseResponseStatusAndReason(); + return parseResponseStatusAndReason(tok, WspDelim); } else if (tok.skip(Http1magic)) { debugs(74, 6, "found prefix magic " << Http1magic); // HTTP Response status-line parse - // magic contains major version, still need to find minor - SBuf verMinor; - // NP: we limit to 2-digits for speed, there really is no limit - // XXX: the protocols we accept dont have valid versions > 10 anyway - if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2)) - return -1; // invalid version minor code - if (tok.atEnd()) - return 0; // need more to be sure we have it all - if(!tok.skip(' ')) - return -1; // invalid version, a single SP terminator required - - debugs(74, 6, "found string version-minor=" << verMinor); + // magic contains major version, still need to find minor DIGIT + int64_t verMinor; + if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) { + msgProtocol_.protocol = AnyP::PROTO_HTTP; + msgProtocol_.major = 1; + msgProtocol_.minor = static_cast(verMinor); - // get the actual numeric value of the 0-3 digits we found - ::Parser::Tokenizer t2(verMinor); - int64_t tvm = 0; - if (!t2.int64(tvm)) - return -1; // ouch. digits not forming a valid number? - msgProtocol_.minor = static_cast(tvm); + debugs(74, 6, "found version=" << msgProtocol_); - msgProtocol_.protocol = AnyP::PROTO_HTTP; - msgProtocol_.major = 1; + debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); + buf_ = tok.remaining(); // resume checkpoint + return parseResponseStatusAndReason(tok, WspDelim); - debugs(74, 6, "found version=" << msgProtocol_); - - debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); - buf_ = tok.remaining(); // resume checkpoint - return parseResponseStatusAndReason(); + } else if (tok.atEnd()) + return 0; // need more to be sure we have it all + else + return -1; // invalid version or delimiter, a single SP terminator required } else if (tok.skip(IcyMagic)) { debugs(74, 6, "found prefix magic " << IcyMagic); @@ -158,7 +161,7 @@ Http::One::ResponseParser::parseResponseFirstLine() // NP: ICY has no /major.minor details debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); buf_ = tok.remaining(); // resume checkpoint - return parseResponseStatusAndReason(); + return parseResponseStatusAndReason(tok, WspDelim); } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) { debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9"); diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h index d81f1d4189..93f8fd0b8d 100644 --- a/src/http/one/ResponseParser.h +++ b/src/http/one/ResponseParser.h @@ -35,7 +35,7 @@ public: private: const int parseResponseFirstLine(); - const int parseResponseStatusAndReason(); + const int parseResponseStatusAndReason(::Parser::Tokenizer&, const CharacterSet &); /// magic prefix for identifying ICY response messages static const SBuf IcyMagic;