Review changes for HTTP ResponseParser upgrade

author Amos Jeffries <squid3@treenet.co.nz>

Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)

committer Amos Jeffries <squid3@treenet.co.nz>

Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)
author Amos Jeffries <squid3@treenet.co.nz>
Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)
committer Amos Jeffries <squid3@treenet.co.nz>
Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc

index 964bde6d82e33b4e1336021bf8c37d16e7373243..e072d79a2bbd9437c5b8d397fbd43bc989fa790e 100644 (file)
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -11,6 +11,7 @@
  #include "http/one/Parser.h"
  #include "mime_header.h"
  #include "parser/Tokenizer.h"
+#include "SquidConfig.h"
  
  /// RFC 7230 section 2.6 - 7 magic octets
  const SBuf Http::One::Parser::Http1magic("HTTP/1.");
@@ -25,9 +26,26 @@ Http::One::Parser::clear()
  }
  
  bool
-Http::One::Parser::findMimeBlock(const char *which, size_t limit)
+Http::One::Parser::skipLineTerminator(::Parser::Tokenizer &tok) const
  {
-    if (msgProtocol_.major == 1) {
+    static const SBuf crlf("\r\n");
+    if (tok.skip(crlf))
+        return true;
+
+    if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
+        return true;
+
+    return false;
+}
+
+bool
+Http::One::Parser::findMimeBlock(const char *which, const size_t limit)
+{
+    // MIME headers block exist in (only) HTTP/1.x and ICY
+    const bool expectMime = (msgProtocol_.protocol == AnyP::PROTO_HTTP && msgProtocol_.major == 1) ||
+                            msgProtocol_.protocol == AnyP::PROTO_ICY;
+
+    if (expectMime) {
          /* NOTE: HTTP/0.9 messages do not have a mime header block.
           *       So the rest of the code will need to deal with '0'-byte headers
           *       (ie, none, so don't try parsing em)
@@ -43,6 +61,15 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit)
                  debugs(33, 5, "Incomplete " << which << ", waiting for end of headers");
              return false;
          }
+
+        // Squid could handle these headers, but admin does not want to
+        if (messageHeaderSize() >= limit) {
+            debugs(33, 5, "Too large " << which);
+            parseStatusCode = Http::scHeaderTooLarge;
+            parsingStage_ = HTTP_PARSE_DONE;
+            return false;
+        }
+
          mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
          debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
  
@@ -52,13 +79,6 @@ Http::One::Parser::findMimeBlock(const char *which, size_t limit)
      // NP: we do not do any further stages here yet so go straight to DONE
      parsingStage_ = HTTP_PARSE_DONE;
  
-    // Squid could handle these headers, but admin does not want to
-    if (messageHeaderSize() >= limit) {
-        debugs(33, 5, "Too large " << which);
-        parseStatusCode = Http::scHeaderTooLarge;
-        return false;
-    }
-
      return true;
  }
  
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index 0f457d2bad12fbf9d2a6f7b990ed3ac5180a83d1..734971c13de42175df7fdf4d37038107aa9c0d5d 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -14,6 +14,10 @@
  #include "http/StatusCode.h"
  #include "SBuf.h"
  
+namespace Parser {
+class Tokenizer;
+}
+
  namespace Http {
  namespace One {
  
@@ -99,8 +103,19 @@ public:
      Http::StatusCode parseStatusCode;
  
  protected:
-    /// parse scan to find the mime headers block for current message
-    bool findMimeBlock(const char *which, size_t limit);
+    /// detect and skip the CRLF or (if tolerant) LF line terminator
+    /// consume from the tokenizer and return true only if found
+    bool skipLineTerminator(::Parser::Tokenizer &tok) const;
+
+    /**
+     * Parse scan to find the mime headers block for current message.
+     *
+     * \retval true if mime block (or a blocks non-existence) has been
+     *              identified accurately within limit characters.
+     *              mimeHeaderBlock_ has been updated and buf_ consumed.
+     * \retval false an error occured, or no MIME terminator found within limit.
+     */
+    bool findMimeBlock(const char *which, const size_t limit);
  
      /// RFC 7230 section 2.6 - 7 magic octets
      static const SBuf Http1magic;
diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc

index 5f04e0829e17e3dd7c48fa72f35543eaf5170d37..53b80f6f998c93b462aedb3c8c19c8e2a4826e66 100644 (file)
--- a/src/http/one/ResponseParser.cc
+++ b/src/http/one/ResponseParser.cc
@@ -25,7 +25,7 @@ Http::One::ResponseParser::firstLineSize() const
          return result;
      }
      // NP: the parser does not accept >2 DIGIT for version numbers
-    if (msgProtocol_.minor >10)
+    if (msgProtocol_.minor > 9)
          result += 2;
      else
          result += 1;
@@ -39,42 +39,32 @@ Http::One::ResponseParser::firstLineSize() const
  // NP: we found the protocol version and consumed it already.
  // just need the status code and reason phrase
  const int
-Http::One::ResponseParser::parseResponseStatusAndReason()
+Http::One::ResponseParser::parseResponseStatusAndReason(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
  {
-    if (buf_.isEmpty())
-        return 0;
-
-    ::Parser::Tokenizer tok(buf_);
-
      if (!completedStatus_) {
          debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
-        SBuf status;
-        // status code is 3 DIGIT octets
-        // NP: search space is >3 to get terminator character)
-        if(!tok.prefix(status, CharacterSet::DIGIT, 4))
-            return -1; // invalid status
-        // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid status, a single SP terminator required
-        // NOTE: any whitespace after the single SP is part of the reason phrase.
+        /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets.
+         * There is no limit on what those octets may be.
+         * 000 through 999 are all valid.
+         */
+        int64_t statusValue;
+        if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) {
  
-        debugs(74, 6, "found string status-code=" << status);
+            debugs(74, 6, "found int64 status-code=" << statusValue);
+            statusCode_ = static_cast<Http::StatusCode>(statusValue);
  
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(status);
-        int64_t statusValue;
-        if (!t2.int64(statusValue))
-            return -1; // ouch. digits not forming a valid number?
-        debugs(74, 6, "found int64 status-code=" << statusValue);
-        if (statusValue < 0 || statusValue > 999)
-            return -1; // ouch. digits not within valid status code range.
+            buf_ = tok.remaining(); // resume checkpoint
+            completedStatus_ = true;
  
-        statusCode_ = static_cast<Http::StatusCode>(statusValue);
+        } else if (tok.atEnd()) {
+            debugs(74, 6, "Parser needs more data");
+            return 0; // need more to be sure we have it all
  
-        buf_ = tok.remaining(); // resume checkpoint
-        completedStatus_ = true;
+        } else {
+            debugs(74, 6, "invalid status-line. invalid code.");
+            return -1; // invalid status, a single SP terminator required
+        }
+        // NOTE: any whitespace after the single SP is part of the reason phrase.
      }
  
      if (tok.atEnd())
@@ -90,66 +80,79 @@ Http::One::ResponseParser::parseResponseStatusAndReason()
  
      // if we got here we are still looking for reason-phrase bytes
      static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
-    tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
-    tok.skip('\r'); // optional trailing CR
+    (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+    if (skipLineTerminator(tok)) {
+        debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+        buf_ = tok.remaining(); // resume checkpoint
+        return 1;
+    }
+    reasonPhrase_.clear();
  
      if (tok.atEnd())
          return 0; // need more to be sure we have it all
  
-    // LF existence matters
-    if (!tok.skip('\n')) {
-        reasonPhrase_.clear();
-        return -1; // found invalid characters in the phrase
-    }
-
-    debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-    buf_ = tok.remaining(); // resume checkpoint
-    return 1;
+    debugs(74, 6, "invalid status-line. garbage in reason phrase.");
+    return -1;
  }
  
+/**
+ * Attempt to parse the method field out of an HTTP message status-line.
+ *
+ * Governed by:
+ *  RFC 1945 section 6.1
+ *  RFC 7230 section 2.6, 3.1 and 3.5
+ *
+ * Parsing state is stored between calls. The current implementation uses
+ * checkpoints after each successful status-line field.
+ * The return value tells you whether the parsing is completed or not.
+ *
+ * \retval -1  an error occurred.
+ * \retval  1  successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
+ * \retval  0  more data is needed to complete the parse
+ */
  const int
  Http::One::ResponseParser::parseResponseFirstLine()
  {
      ::Parser::Tokenizer tok(buf_);
  
+    CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
+
+    if (Config.onoff.relaxed_header_parser) {
+        // RFC 7230 section 3.5
+        // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
+        // as whitespace between status-line fields
+        WspDelim += CharacterSet::HTAB
+                  + CharacterSet("VT,FF","\x0B\x0C")
+                  + CharacterSet::CR;
+    }
+
      if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
          debugs(74, 6, "continue incremental parse for " << msgProtocol_);
          debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
          // we already found the magic, but not the full line. keep going.
-        return parseResponseStatusAndReason();
+        return parseResponseStatusAndReason(tok, WspDelim);
  
      } else if (tok.skip(Http1magic)) {
          debugs(74, 6, "found prefix magic " << Http1magic);
          // HTTP Response status-line parse
  
-        // magic contains major version, still need to find minor
-        SBuf verMinor;
-        // NP: we limit to 2-digits for speed, there really is no limit
-        // XXX: the protocols we accept dont have valid versions > 10 anyway
-        if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
-            return -1; // invalid version minor code
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid version, a single SP terminator required
-
-        debugs(74, 6, "found string version-minor=" << verMinor);
+        // magic contains major version, still need to find minor DIGIT
+        int64_t verMinor;
+        if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
+            msgProtocol_.protocol = AnyP::PROTO_HTTP;
+            msgProtocol_.major = 1;
+            msgProtocol_.minor = static_cast<unsigned int>(verMinor);
  
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(verMinor);
-        int64_t tvm = 0;
-        if (!t2.int64(tvm))
-            return -1; // ouch. digits not forming a valid number?
-        msgProtocol_.minor = static_cast<unsigned int>(tvm);
+            debugs(74, 6, "found version=" << msgProtocol_);
  
-        msgProtocol_.protocol = AnyP::PROTO_HTTP;
-        msgProtocol_.major = 1;
+            debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+            buf_ = tok.remaining(); // resume checkpoint
+            return parseResponseStatusAndReason(tok, WspDelim);
  
-        debugs(74, 6, "found version=" << msgProtocol_);
-
-        debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-        buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
+        } else if (tok.atEnd())
+            return 0; // need more to be sure we have it all
+        else
+            return -1; // invalid version or delimiter, a single SP terminator required
  
      } else if (tok.skip(IcyMagic)) {
          debugs(74, 6, "found prefix magic " << IcyMagic);
@@ -158,7 +161,7 @@ Http::One::ResponseParser::parseResponseFirstLine()
          // NP: ICY has no /major.minor details
          debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
          buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
+        return parseResponseStatusAndReason(tok, WspDelim);
  
      } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
          debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
diff --git a/src/http/one/ResponseParser.h b/src/http/one/ResponseParser.h

index d81f1d4189704e2f49999fa141822e66bd1b098c..93f8fd0b8d36c8f7e613b67baa846130d37c6770 100644 (file)
--- a/src/http/one/ResponseParser.h
+++ b/src/http/one/ResponseParser.h
@@ -35,7 +35,7 @@ public:
  
  private:
      const int parseResponseFirstLine();
-    const int parseResponseStatusAndReason();
+    const int parseResponseStatusAndReason(::Parser::Tokenizer&, const CharacterSet &);
  
      /// magic prefix for identifying ICY response messages
      static const SBuf IcyMagic;
author	Amos Jeffries <squid3@treenet.co.nz>
	Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Fri, 23 Jan 2015 07:43:39 +0000 (23:43 -0800)
src/http/one/Parser.cc		patch \| blob \| blame \| history
src/http/one/Parser.h		patch \| blob \| blame \| history
src/http/one/ResponseParser.cc		patch \| blob \| blame \| history
src/http/one/ResponseParser.h		patch \| blob \| blame \| history