From: Amos Jeffries <squid3@treenet.co.nz>
Date: Fri, 23 Jan 2015 07:43:39 +0000 (-0800)
Subject: Review changes for HTTP ResponseParser upgrade
X-Git-Tag: merge-candidate-3-v1~240^2~4
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b8f86fd23f25acb646cb6a31fa181579392028f8;p=thirdparty%2Fsquid.git

Review changes for HTTP ResponseParser upgrade

* redesign parser logic after Tokenizer API changes

* fix ICY protocol mime block detection

* add RFC 7230 section 3.5 whitespace tolerant parse

* add documentation to match request parser regarding RFC compliance
---

diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc
index 964bde6d82..e072d79a2b 100644
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -11,6 +11,7 @@
 #include "http/one/Parser.h"
 #include "mime_header.h"
 #include "parser/Tokenizer.h"
+#include "SquidConfig.h"
 
 /// RFC 7230 section 2.6 - 7 magic octets
 const SBuf Http::One::Parser::Http1magic("HTTP/1.");
@@ -25,9 +26,26 @@ Http::One::Parser::clear()
 }
 
 bool
-Http::One::Parser::findMimeBlock(const char *which, size_t limit)
+Http::One::Parser::skipLineTerminator(::Parser::Tokenizer &tok) const
 {
-    if (msgProtocol_.major == 1) {
+    static const SBuf crlf("\r\n");
+    if (tok.skip(crlf))
+        return true;
+
+    if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
+        return true;
+
+    return false;
+}
+
+bool
+Http::One::Parser::findMimeBlock(const char *which, const size_t limit)
+{
+    // MIME headers block exist in (only) HTTP/1.x and ICY
+    const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
+                            msgProtocol_.protocol == AnyP::PROTO_ICY;
+
+    if (expectMime) {
         /* NOTE: HTTP/0.9 messages do not have a mime header block.
          *       So the rest of the code will need to deal with '0'-byte headers
          *       (ie, none, so don't try parsing em)
@@ -43,6 +61,15 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit)
                 debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
             return false;
         }
+
+        // Squid could handle these headers, but admin does not want to
+        if (messageHeaderSize() >= limit) {
+            debugs(33, 5, "Too large " << which);
+            parseStatusCode = Http::scHeaderTooLarge;
+            parsingStage_ = HTTP_PARSE_DONE;
+            return false;
+        }
+
         mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
         debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
 
@@ -52,13 +79,6 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit)
     // NP: we do not do any further stages here yet so go straight to DONE
     parsingStage_ = HTTP_PARSE_DONE;
 
-    // Squid could handle these headers, but admin does not want to
-    if (messageHeaderSize() >= limit) {
-        debugs(33, 5, "Too large " << which);
-        parseStatusCode = Http::scHeaderTooLarge;
-        return false;
-    }
-
     return true;
 }
 
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h
index 0f457d2bad..734971c13d 100644
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -14,6 +14,10 @@
 #include "http/StatusCode.h"
 #include "SBuf.h"
 
+namespace Parser {
+class Tokenizer;
+}
+
 namespace Http {
 namespace One {
 
@@ -99,8 +103,19 @@ public:
     Http::StatusCode parseStatusCode;
 
 protected:
-    /// parse scan to find the mime headers block for current message
-    bool findMimeBlock(const char *which, size_t limit);
+    /// detect and skip the CRLF or (if tolerant) LF line terminator
+    /// consume from the tokenizer and return true only if found
+    bool skipLineTerminator(::Parser::Tokenizer &tok) const;
+
+    /**
+     * Parse scan to find the mime headers block for current message.
+     *
+     * \retval true if mime block (or a blocks non-existence) has been
+     *              identified accurately within limit characters.
+     *              mimeHeaderBlock_ has been updated and buf_ consumed.
+     * \retval false an error occured, or no MIME terminator found within limit.
+     */
+    bool findMimeBlock(const char *which, const size_t limit);
 
     /// RFC 7230 section 2.6 - 7 magic octets
     static const SBuf Http1magic;
diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc
index 5f04e0829e..53b80f6f99 100644
--- a/src/http/one/ResponseParser.cc
+++ b/src/http/one/ResponseParser.cc
@@ -25,7 +25,7 @@ Http::One::ResponseParser::firstLineSize() const
         return result;
     }
     // NP: the parser does not accept >2 DIGIT for version numbers
-    if (msgProtocol_.minor >10)
+    if (msgProtocol_.minor > 9)
         result += 2;
     else
         result += 1;
@@ -39,42 +39,32 @@ Http::One::ResponseParser::firstLineSize() const
 // NP: we found the protocol version and consumed it already.
 // just need the status code and reason phrase
 const int
-Http::One::ResponseParser::parseResponseStatusAndReason()
+Http::One::ResponseParser::parseResponseStatusAndReason(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
 {
-    if (buf_.isEmpty())
-        return 0;
-
-    ::Parser::Tokenizer tok(buf_);
-
     if (!completedStatus_) {
         debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
-        SBuf status;
-        // status code is 3 DIGIT octets
-        // NP: search space is >3 to get terminator character)
-        if(!tok.prefix(status, CharacterSet::DIGIT, 4))
-            return -1; // invalid status
-        // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid status, a single SP terminator required
-        // NOTE: any whitespace after the single SP is part of the reason phrase.
+        /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets.
+         * There is no limit on what those octets may be.
+         * 000 through 999 are all valid.
+         */
+        int64_t statusValue;
+        if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) {
 
-        debugs(74, 6, "found string status-code=" << status);
+            debugs(74, 6, "found int64 status-code=" << statusValue);
+            statusCode_ = static_cast<Http::StatusCode>(statusValue);
 
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(status);
-        int64_t statusValue;
-        if (!t2.int64(statusValue))
-            return -1; // ouch. digits not forming a valid number?
-        debugs(74, 6, "found int64 status-code=" << statusValue);
-        if (statusValue < 0 || statusValue > 999)
-            return -1; // ouch. digits not within valid status code range.
+            buf_ = tok.remaining(); // resume checkpoint
+            completedStatus_ = true;
 
-        statusCode_ = static_cast<Http::StatusCode>(statusValue);
+        } else if (tok.atEnd()) {
+            debugs(74, 6, "Parser needs more data");
+            return 0; // need more to be sure we have it all
 
-        buf_ = tok.remaining(); // resume checkpoint
-        completedStatus_ = true;
+        } else {
+            debugs(74, 6, "invalid status-line. invalid code.");
+            return -1; // invalid status, a single SP terminator required
+        }
+        // NOTE: any whitespace after the single SP is part of the reason phrase.
     }
 
     if (tok.atEnd())
@@ -90,66 +80,79 @@ Http::One::ResponseParser::parseResponseStatusAndReason()
 
     // if we got here we are still looking for reason-phrase bytes
     static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
-    tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
-    tok.skip('\r'); // optional trailing CR
+    (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+    if (skipLineTerminator(tok)) {
+        debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+        buf_ = tok.remaining(); // resume checkpoint
+        return 1;
+    }
+    reasonPhrase_.clear();
 
     if (tok.atEnd())
         return 0; // need more to be sure we have it all
 
-    // LF existence matters
-    if (!tok.skip('\n')) {
-        reasonPhrase_.clear();
-        return -1; // found invalid characters in the phrase
-    }
-
-    debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-    buf_ = tok.remaining(); // resume checkpoint
-    return 1;
+    debugs(74, 6, "invalid status-line. garbage in reason phrase.");
+    return -1;
 }
 
+/**
+ * Attempt to parse the method field out of an HTTP message status-line.
+ *
+ * Governed by:
+ *  RFC 1945 section 6.1
+ *  RFC 7230 section 2.6, 3.1 and 3.5
+ *
+ * Parsing state is stored between calls. The current implementation uses
+ * checkpoints after each successful status-line field.
+ * The return value tells you whether the parsing is completed or not.
+ *
+ * \retval -1  an error occurred.
+ * \retval  1  successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
+ * \retval  0  more data is needed to complete the parse
+ */
 const int
 Http::One::ResponseParser::parseResponseFirstLine()
 {
     ::Parser::Tokenizer tok(buf_);
 
+    CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
+
+    if (Config.onoff.relaxed_header_parser) {
+        // RFC 7230 section 3.5
+        // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
+        // as whitespace between status-line fields
+        WspDelim += CharacterSet::HTAB
+                  + CharacterSet("VT,FF","\x0B\x0C")
+                  + CharacterSet::CR;
+    }
+
     if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
         debugs(74, 6, "continue incremental parse for " << msgProtocol_);
         debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
         // we already found the magic, but not the full line. keep going.
-        return parseResponseStatusAndReason();
+        return parseResponseStatusAndReason(tok, WspDelim);
 
     } else if (tok.skip(Http1magic)) {
         debugs(74, 6, "found prefix magic " << Http1magic);
         // HTTP Response status-line parse
 
-        // magic contains major version, still need to find minor
-        SBuf verMinor;
-        // NP: we limit to 2-digits for speed, there really is no limit
-        // XXX: the protocols we accept dont have valid versions > 10 anyway
-        if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
-            return -1; // invalid version minor code
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid version, a single SP terminator required
-
-        debugs(74, 6, "found string version-minor=" << verMinor);
+        // magic contains major version, still need to find minor DIGIT
+        int64_t verMinor;
+        if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
+            msgProtocol_.protocol = AnyP::PROTO_HTTP;
+            msgProtocol_.major = 1;
+            msgProtocol_.minor = static_cast<unsigned int>(verMinor);
 
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(verMinor);
-        int64_t tvm = 0;
-        if (!t2.int64(tvm))
-            return -1; // ouch. digits not forming a valid number?
-        msgProtocol_.minor = static_cast<unsigned int>(tvm);
+            debugs(74, 6, "found version=" << msgProtocol_);
 
-        msgProtocol_.protocol = AnyP::PROTO_HTTP;
-        msgProtocol_.major = 1;
+            debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+            buf_ = tok.remaining(); // resume checkpoint
+            return parseResponseStatusAndReason(tok, WspDelim);
 
-        debugs(74, 6, "found version=" << msgProtocol_);
-
-        debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-        buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
+        } else if (tok.atEnd())
+            return 0; // need more to be sure we have it all
+        else
+            return -1; // invalid version or delimiter, a single SP terminator required
 
     } else if (tok.skip(IcyMagic)) {
         debugs(74, 6, "found prefix magic " << IcyMagic);
@@ -158,7 +161,7 @@ Http::One::ResponseParser::parseResponseFirstLine()
         // NP: ICY has no /major.minor details
         debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
         buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
+        return parseResponseStatusAndReason(tok, WspDelim);
 
     } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
         debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h
index d81f1d4189..93f8fd0b8d 100644
--- a/src/http/one/ResponseParser.h
+++ b/src/http/one/ResponseParser.h
@@ -35,7 +35,7 @@ public:
 
 private:
     const int parseResponseFirstLine();
-    const int parseResponseStatusAndReason();
+    const int parseResponseStatusAndReason(::Parser::Tokenizer&, const CharacterSet &);
 
     /// magic prefix for identifying ICY response messages
     static const SBuf IcyMagic;