Bug 4368: A simpler and more robust HTTP request line parser.

author Alex Rousskov <rousskov@measurement-factory.com>

Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)

committer Amos Jeffries <squid3@treenet.co.nz>

Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)
author Alex Rousskov <rousskov@measurement-factory.com>
Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)
committer Amos Jeffries <squid3@treenet.co.nz>
Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)
diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc

index 6da9f151066be89752eb6e0866bec9958c32e09b..e560e66beedc9736a0186435a3832b007cb06fb1 100644 (file)
--- a/src/http/one/RequestParser.cc
+++ b/src/http/one/RequestParser.cc
@@ -14,9 +14,14 @@
  #include "profiler/Profiler.h"
  #include "SquidConfig.h"
  
+// the right debugs() level for parsing errors
+inline static int
+ErrorLevel() {
+    return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
+}
+
  Http::One::RequestParser::RequestParser() :
-    Parser(),
-    firstLineGarbage_(0)
+    Parser()
  {}
  
  Http1::Parser::size_type
@@ -62,82 +67,111 @@ Http::One::RequestParser::skipGarbageLines()
   * Governed by:
   *  RFC 1945 section 5.1
   *  RFC 7230 section 2.6, 3.1 and 3.5
- *
- * Parsing state is stored between calls. The current implementation uses
- * checkpoints after each successful request-line field.
- * The return value tells you whether the parsing is completed or not.
- *
- * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
- * \retval  1  successful parse. method_ is filled and buffer consumed including first delimiter.
- * \retval  0  more data is needed to complete the parse
   */
-int
-Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
+bool
+Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
  {
-    // scan for up to 16 valid method characters.
-    static const size_t maxMethodLength = 16; // TODO: make this configurable?
-
      // method field is a sequence of TCHAR.
-    SBuf methodFound;
-    if (tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength) && tok.skipOne(WspDelim)) {
-
-        method_ = HttpRequestMethod(methodFound);
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-    } else if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find method");
-        return 0;
-
-    } // else error(s)
+    // Limit to 32 characters to prevent overly long sequences of non-HTTP
+    // being sucked in before mismatch is detected. 32 is itself annoyingly
+    // big but there are methods registered by IANA that reach 17 bytes:
+    //  http://www.iana.org/assignments/http-methods
+    static const size_t maxMethodLength = 32; // TODO: make this configurable?
  
-    // non-delimiter found after accepted method bytes means ...
-    if (methodFound.length() == maxMethodLength) {
-        // method longer than acceptible.
-        // RFC 7230 section 3.1.1 mandatory (SHOULD) 501 response
-        parseStatusCode = Http::scNotImplemented;
-        debugs(33, 5, "invalid request-line. method too long");
-    } else {
-        // invalid character in the URL
-        // RFC 7230 section 3.1.1 required (SHOULD) 400 response
+    SBuf methodFound;
+    if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
+        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
          parseStatusCode = Http::scBadRequest;
-        debugs(33, 5, "invalid request-line. missing method delimiter");
+        return false;
      }
-    return -1;
+    method_ = HttpRequestMethod(methodFound);
+    return true;
  }
  
-static CharacterSet
-uriValidCharacters()
+/// the characters which truly are valid within URI
+static const CharacterSet &
+UriValidCharacters()
  {
-    CharacterSet UriChars("URI-Chars","");
-
      /* RFC 3986 section 2:
       * "
       *   A URI is composed from a limited set of characters consisting of
       *   digits, letters, and a few graphic symbols.
       * "
       */
-    // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
-    UriChars.add('%');
-    UriChars += CharacterSet::HEXDIG;
-    // RFC 3986 section 2.2 - reserved characters
-    UriChars += CharacterSet("gen-delims", ":/?#[]@");
-    UriChars += CharacterSet("sub-delims", "!$&'()*+,;=");
-    // RFC 3986 section 2.3 - unreserved characters
-    UriChars += CharacterSet::ALPHA;
-    UriChars += CharacterSet::DIGIT;
-    UriChars += CharacterSet("unreserved", "-._~");
+    static const CharacterSet UriChars =
+        CharacterSet("URI-Chars","") +
+        // RFC 3986 section 2.2 - reserved characters
+        CharacterSet("gen-delims", ":/?#[]@") +
+        CharacterSet("sub-delims", "!$&'()*+,;=") +
+        // RFC 3986 section 2.3 - unreserved characters
+        CharacterSet::ALPHA +
+        CharacterSet::DIGIT +
+        CharacterSet("unreserved", "-._~") +
+        // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
+        CharacterSet("pct-encoded", "%") +
+        CharacterSet::HEXDIG;
  
      return UriChars;
  }
  
-int
-Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+/// characters HTTP permits tolerant parsers to accept as delimiters
+static const CharacterSet &
+RelaxedDelimiterCharacters()
+{
+    // RFC 7230 section 3.5
+    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
+    // or bare CR as whitespace between request-line fields
+    static const CharacterSet RelaxedDels =
+        CharacterSet::SP +
+        CharacterSet::HTAB +
+        CharacterSet("VT,FF","\x0B\x0C") +
+        CharacterSet::CR;
+
+    return RelaxedDels;
+}
+
+/// characters used to separate HTTP fields
+const CharacterSet &
+Http::One::RequestParser::DelimiterCharacters()
+{
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::SP;
+}
+
+/// characters which Squid will accept in the HTTP request-target (URI)
+const CharacterSet &
+Http::One::RequestParser::RequestTargetCharacters()
  {
-    // URI field is a sequence of ... what? segments all have different valid charset
-    // go with non-whitespace non-binary characters for now
-    static CharacterSet UriChars = uriValidCharacters();
+    if (Config.onoff.relaxed_header_parser) {
+#if USE_HTTP_VIOLATIONS
+        static const CharacterSet RelaxedExtended =
+            UriValidCharacters() +
+            // accept whitespace (extended), it will be dealt with later
+            DelimiterCharacters() +
+            // RFC 2396 unwise character set which must never be transmitted
+            // in un-escaped form. But many web services do anyway.
+            CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
+            // UTF-8 because we want to be future-proof
+            CharacterSet("UTF-8", 128, 255);
+
+        return RelaxedExtended;
+#else
+        static const CharacterSet RelaxedCompliant =
+            UriValidCharacters() +
+            // accept whitespace (extended), it will be dealt with later.
+            DelimiterCharacters();
+
+        return RelaxedCompliant;
+#endif
+    }
+
+    // strict parse only accepts what the RFC say we can
+    return UriValidCharacters();
+}
  
+bool
+Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+{
      /* Arbitrary 64KB URI upper length limit.
       *
       * Not quite as arbitrary as it seems though. Old SquidString objects
@@ -147,85 +181,93 @@ Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
       * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
       * at least 8000 octets for the whole line, including method and version.
       */
-    const size_t maxUriLength = min(static_cast<size_t>(Config.maxRequestHeaderSize) - firstLineSize(),
-                                    static_cast<size_t>((64*1024)-1));
+    const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
  
      SBuf uriFound;
-
-    // RFC 7230 HTTP/1.x URI are followed by at least one whitespace delimiter
-    if (tok.prefix(uriFound, UriChars, maxUriLength) && tok.skipOne(CharacterSet::SP)) {
-        uri_ = uriFound;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-        // RFC 1945 for GET the line terminator may follow URL instead of a delimiter
-    } else if (method_ == Http::METHOD_GET && skipLineTerminator(tok)) {
-        debugs(33, 5, "HTTP/0.9 syntax request-line detected");
-        msgProtocol_ = Http::ProtocolVersion(0,9);
-        uri_ = uriFound; // found by successful prefix() call earlier.
-        parseStatusCode = Http::scOkay;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
-
-    } else if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find URI");
-        return 0;
+    if (!tok.prefix(uriFound, RequestTargetCharacters())) {
+        parseStatusCode = Http::scBadRequest;
+        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
+        return false;
      }
  
-    // else errors...
-
-    if (uriFound.length() == maxUriLength) {
+    if (uriFound.length() > maxUriLength) {
          // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
          parseStatusCode = Http::scUriTooLong;
-        debugs(33, 5, "invalid request-line. URI longer than " << maxUriLength << " bytes");
-    } else {
-        // RFC 7230 section 3.1.1 required (SHOULD) 400 response
-        parseStatusCode = Http::scBadRequest;
-        debugs(33, 5, "invalid request-line. missing URI delimiter");
+        debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
+               "-byte URI exceeds " << maxUriLength << "-byte limit");
+        return false;
      }
-    return -1;
+
+    uri_ = uriFound;
+    return true;
  }
  
-int
+bool
  Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
  {
-    // partial match of HTTP/1 magic prefix
-    if (tok.remaining().length() < Http1magic.length() && Http1magic.startsWith(tok.remaining())) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
-    }
+    const auto savedTok = tok;
  
-    if (!tok.skip(Http1magic)) {
-        debugs(74, 5, "invalid request-line. not HTTP/1 protocol");
-        parseStatusCode = Http::scHttpVersionNotSupported;
-        return -1;
+    SBuf digit;
+    // Searching for Http1magic precludes detecting HTTP/2+ versions.
+    // Rewrite if we ever _need_ to return 505 (Version Not Supported) errors.
+    if (tok.suffix(digit, CharacterSet::DIGIT) && tok.skipSuffix(Http1magic)) {
+        msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
+        return true;
      }
  
-    if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
+    // A GET request might use HTTP/0.9 syntax
+    if (method_ == Http::METHOD_GET) {
+        // RFC 1945 - no HTTP version field at all
+        tok = savedTok; // in case the URI ends with a digit
+        // report this assumption as an error if configured to triage parsing
+        debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
+        msgProtocol_ = Http::ProtocolVersion(0,9);
+        return true;
      }
  
-    // get the version minor DIGIT
-    SBuf digit;
-    if (tok.prefix(digit, CharacterSet::DIGIT, 1) && skipLineTerminator(tok)) {
+    debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
+    parseStatusCode = Http::scBadRequest;
+    return false;
+}
  
-        // found version fully AND terminator
-        msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
-        parseStatusCode = Http::scOkay;
-        buf_ = tok.remaining(); // incremental parse checkpoint
-        return 1;
+/**
+ * Skip characters separating request-line fields.
+ * To handle bidirectional parsing, the caller does the actual skipping and
+ * we just check how many character the caller has skipped.
+ */
+bool
+Http::One::RequestParser::skipDelimiter(const size_t count)
+{
+    if (count <= 0) {
+        debugs(33, ErrorLevel(), "invalid request-line: missing delimiter");
+        parseStatusCode = Http::scBadRequest;
+        return false;
+    }
  
-    } else if (tok.atEnd() || (tok.skip('\r') && tok.atEnd())) {
-        debugs(74, 5, "Parser needs more data to find version");
-        return 0;
+    // tolerant parser allows multiple whitespace characters between request-line fields
+    if (count > 1 && !Config.onoff.relaxed_header_parser) {
+        debugs(33, ErrorLevel(), "invalid request-line: too many delimiters");
+        parseStatusCode = Http::scBadRequest;
+        return false;
+    }
  
-    } // else error ...
+    return true;
+}
  
-    // non-DIGIT. invalid version number.
-    parseStatusCode = Http::scHttpVersionNotSupported;
-    debugs(33, 5, "invalid request-line. garbage before line terminator");
-    return -1;
+/// Parse CRs at the end of request-line, just before the terminating LF.
+bool
+Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+{
+    if (Config.onoff.relaxed_header_parser) {
+        (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
+    } else {
+        if (!tok.skipOneTrailing(CharacterSet::CR)) {
+            debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
+            parseStatusCode = Http::scBadRequest;
+            return false;
+        }
+    }
+    return true;
  }
  
  /**
@@ -235,10 +277,6 @@ Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
   *  RFC 1945 section 5.1
   *  RFC 7230 section 2.6, 3.1 and 3.5
   *
- * Parsing state is stored between calls. The current implementation uses
- * checkpoints after each successful request-line field.
- * The return value tells you whether the parsing is completed or not.
- *
   * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
   * \retval  1  successful parse. member fields contain the request-line items
   * \retval  0  more data is needed to complete the parse
@@ -246,186 +284,53 @@ Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
  int
  Http::One::RequestParser::parseRequestFirstLine()
  {
-    Http1::Tokenizer tok(buf_);
-
      debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
      debugs(74, DBG_DATA, buf_);
  
-    // NP: would be static, except it need to change with reconfigure
-    CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
+    SBuf line;
  
-    if (Config.onoff.relaxed_header_parser) {
-        // RFC 7230 section 3.5
-        // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
-        // as whitespace between request-line fields
-        WspDelim += CharacterSet::HTAB
-                    + CharacterSet("VT,FF","\x0B\x0C")
-                    + CharacterSet::CR;
-        debugs(74, 5, "using Parser relaxed WSP characters");
-    }
-
-    // only search for method if we have not yet found one
-    if (method_ == Http::METHOD_NONE) {
-        const int res = parseMethodField(tok, WspDelim);
-        if (res < 1)
-            return res;
-        // else keep going...
-    }
-
-    // tolerant parser allows multiple whitespace characters between request-line fields
-    if (Config.onoff.relaxed_header_parser) {
-        const size_t garbage = tok.skipAll(WspDelim);
-        if (garbage > 0) {
-            firstLineGarbage_ += garbage;
-            buf_ = tok.remaining(); // re-checkpoint after garbage
-        }
-    }
-    if (tok.atEnd()) {
+    // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
+    // Now, the request line has to end at the first LF.
+    static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
+    ::Parser::Tokenizer lineTok(buf_);
+    if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
          debugs(74, 5, "Parser needs more data");
          return 0;
      }
  
-    // from here on, we have two possible parse paths: whitespace tolerant, and strict
-    if (Config.onoff.relaxed_header_parser) {
-        // whitespace tolerant
-
-        int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
-
-        // NOTES:
-        // * this would be static, except WspDelim changes with reconfigure
-        // * HTTP-version charset is included by uriValidCharacters()
-        // * terminal CR is included by WspDelim here in relaxed parsing
-        CharacterSet LfDelim = uriValidCharacters() + WspDelim;
-
-        // seek the LF character, then tokenize the line in reverse
-        SBuf line;
-        if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
-            Http1::Tokenizer rTok(line);
-            SBuf nil;
-            (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
-            SBuf digit;
-            if (rTok.suffix(digit,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
-                uri_ = rTok.remaining();
-                msgProtocol_ = Http::ProtocolVersion(1, (*digit.rawContent() - '0'));
-                if (uri_.isEmpty()) {
-                    debugs(33, warnOnError, "invalid request-line. missing URL");
-                    parseStatusCode = Http::scBadRequest;
-                    return -1;
-                }
-
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-
-            } else if (method_ == Http::METHOD_GET) {
-                // RFC 1945 - for GET the line terminator may follow URL instead of a delimiter
-                debugs(33, 5, "HTTP/0.9 syntax request-line detected");
-                msgProtocol_ = Http::ProtocolVersion(0,9);
-                static const SBuf cr("\r",1);
-                uri_ = line.trim(cr,false,true);
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-            }
-
-            debugs(33, warnOnError, "invalid request-line. not HTTP");
-            parseStatusCode = Http::scBadRequest;
-            return -1;
-        }
+    Http1::Tokenizer tok(line);
+    const CharacterSet &delimiters = DelimiterCharacters();
  
-        if (!tok.atEnd()) {
+    if (!parseMethodField(tok))
+        return -1;
  
-#if USE_HTTP_VIOLATIONS
-            /*
-             * RFC 3986 explicitly lists the characters permitted in URI.
-             * A non-permitted character was found somewhere in the request-line.
-             * However, as long as we can find the LF, accept the characters
-             * which we know are invalid in any URI but actively used.
-             */
-            LfDelim.add('\0'); // Java
-            LfDelim.add(' ');  // IIS
-            LfDelim.add('\"'); // Bing
-            LfDelim.add('\\'); // MSIE, Firefox
-            LfDelim.add('|');  // Amazon
-            LfDelim.add('^');  // Microsoft News
-
-            // other ASCII characters for which RFC 2396 has explicitly disallowed use
-            // since 1998 and which were not later permitted by RFC 3986 in 2005.
-            LfDelim.add('<');  // HTML embedded in URL
-            LfDelim.add('>');  // HTML embedded in URL
-            LfDelim.add('`');  // Shell Script embedded in URL
-            LfDelim.add('{');  // JSON or Javascript embedded in URL
-            LfDelim.add('}');  // JSON or Javascript embedded in URL
-
-            // reset the tokenizer from anything the above did, then seek the LF character.
-            tok.reset(buf_);
-
-            if (tok.prefix(line, LfDelim) && tok.skip('\n')) {
-
-                Http1::Tokenizer rTok(line);
-
-                // strip terminating CR (if any)
-                SBuf nil;
-                (void)rTok.suffix(nil,CharacterSet::CR); // optional CR in terminator
-                line = rTok.remaining();
-
-                // strip terminating 'WSP HTTP-version' (if any)
-                if (rTok.suffix(nil,CharacterSet::DIGIT) && rTok.skipSuffix(Http1magic) && rTok.suffix(nil,WspDelim)) {
-                    hackExpectsMime_ = true; // client thinks its speaking HTTP, probably sent a mime block.
-                    uri_ = rTok.remaining();
-                } else
-                    uri_ = line; // no HTTP/1.x label found. Use the whole line.
-
-                if (uri_.isEmpty()) {
-                    debugs(33, warnOnError, "invalid request-line. missing URL");
-                    parseStatusCode = Http::scBadRequest;
-                    return -1;
-                }
-
-                debugs(33, warnOnError, "invalid request-line. treating as HTTP/0.9" << (hackExpectsMime_?" (with mime)":""));
-                msgProtocol_ = Http::ProtocolVersion(0,9);
-                parseStatusCode = Http::scOkay;
-                buf_ = tok.remaining(); // incremental parse checkpoint
-                return 1;
-
-            } else if (tok.atEnd()) {
-                debugs(74, 5, "Parser needs more data");
-                return 0;
-            }
-            // else, drop back to invalid request-line handling
-#endif
-            const SBuf t = tok.remaining();
-            debugs(33, warnOnError, "invalid request-line characters." << Raw("data", t.rawContent(), t.length()));
-            parseStatusCode = Http::scBadRequest;
-            return -1;
-        }
-        debugs(74, 5, "Parser needs more data");
-        return 0;
-    }
-    // else strict non-whitespace tolerant parse
-
-    // only search for request-target (URL) if we have not yet found one
-    if (uri_.isEmpty()) {
-        const int res = parseUriField(tok);
-        if (res < 1 || msgProtocol_.protocol == AnyP::PROTO_HTTP)
-            return res;
-        // else keep going...
-    }
+    if (!skipDelimiter(tok.skipAll(delimiters)))
+        return -1;
  
-    if (tok.atEnd()) {
-        debugs(74, 5, "Parser needs more data");
-        return 0;
-    }
+    /* now parse backwards, to leave just the URI */
+    if (!skipTrailingCrs(tok))
+        return -1;
+
+    if (!parseHttpVersionField(tok))
+        return -1;
  
-    // HTTP/1 version suffix (protocol magic) followed by CR*LF
-    if (msgProtocol_.protocol == AnyP::PROTO_NONE) {
-        return parseHttpVersionField(tok);
+    if (!http0() && !skipDelimiter(tok.skipAllTrailing(delimiters)))
+        return -1;
+
+    /* parsed everything before and after the URI */
+
+    if (!parseUriField(tok))
+        return -1;
+
+    if (!tok.atEnd()) {
+        debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
+        parseStatusCode = Http::scBadRequest;
+        return -1;
      }
  
-    // If we got here this method has been called too many times
-    parseStatusCode = Http::scInternalServerError;
-    debugs(33, 5, "ERROR: Parser already processed request-line");
-    return -1;
+    parseStatusCode = Http::scOkay;
+    buf_ = lineTok.remaining(); // incremental parse checkpoint
+    return 1;
  }
  
  bool
diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h

index c48ad5ed57ae0b04f5ea0479d9eb81b9a3e929d2..deb1b0d15c85f66df73814df09fb4d8950165a8d 100644 (file)
--- a/src/http/one/RequestParser.h
+++ b/src/http/one/RequestParser.h
@@ -47,19 +47,23 @@ public:
  private:
      void skipGarbageLines();
      int parseRequestFirstLine();
-    int parseMethodField(Http1::Tokenizer &, const CharacterSet &);
-    int parseUriField(Http1::Tokenizer &);
-    int parseHttpVersionField(Http1::Tokenizer &);
+
+    /* all these return false and set parseStatusCode on parsing failures */
+    bool parseMethodField(Http1::Tokenizer &);
+    bool parseUriField(Http1::Tokenizer &);
+    bool parseHttpVersionField(Http1::Tokenizer &);
+    bool skipDelimiter(const size_t count);
+    bool skipTrailingCrs(Http1::Tokenizer &tok);
+
+    bool http0() const {return !msgProtocol_.major;}
+    static const CharacterSet &DelimiterCharacters();
+    static const CharacterSet &RequestTargetCharacters();
  
      /// what request method has been found on the first line
      HttpRequestMethod method_;
  
      /// raw copy of the original client request-line URI field
      SBuf uri_;
-
-    /// amount of garbage bytes tolerantly skipped inside the request-line
-    /// may be -1 if sender only omitted CR on terminator
-    int64_t firstLineGarbage_;
  };
  
  } // namespace One
diff --git a/src/tests/testHttp1Parser.cc b/src/tests/testHttp1Parser.cc

index 82077c8bfb6748e0aa83b07da3f56e2b6662f6b2..e66f478c41970aeae46006c736f5c882bf179833 100644 (file)
--- a/src/tests/testHttp1Parser.cc
+++ b/src/tests/testHttp1Parser.cc
@@ -53,21 +53,65 @@ struct resultSet {
      AnyP::ProtocolVersion version;
  };
  
+// define SQUID_DEBUG_TESTS to see exactly which test sub-cases fail and where
+#ifdef SQUID_DEBUG_TESTS
+// not optimized for runtime use
+static void
+Replace(SBuf &where, const SBuf &what, const SBuf &with)
+{
+    // prevent infinite loops
+    if (!what.length() || with.find(what) != SBuf::npos)
+        return;
+
+    SBuf::size_type pos = 0;
+    while ((pos = where.find(what, pos)) != SBuf::npos) {
+        SBuf buf = where.substr(0, pos);
+        buf.append(with);
+        buf.append(where.substr(pos+what.length()));
+        where = buf;
+        pos += with.length();
+    }
+}
+
+static SBuf Pretty(SBuf raw)
+{
+    Replace(raw, SBuf("\r"), SBuf("\\r"));
+    Replace(raw, SBuf("\n"), SBuf("\\n"));
+    return raw;
+}
+#endif
+
  static void
  testResults(int line, const SBuf &input, Http1::RequestParser &output, struct resultSet &expect)
  {
-#if WHEN_TEST_DEBUG_IS_NEEDED
-    printf("TEST @%d, in=%u: " SQUIDSBUFPH "\n", line, input.length(), SQUIDSBUFPRINT(input));
+#ifdef SQUID_DEBUG_TESTS
+    std::cerr << "TEST @" << line << ", in=" << Pretty(input) << "\n";
+#endif
+
+    const bool parsed = output.parse(input);
+
+#ifdef SQUID_DEBUG_TESTS
+    if (expect.parsed != parsed)
+        std::cerr << "\tparse-FAILED: " << expect.parsed << "!=" << parsed << "\n";
+    else if (parsed && expect.method != output.method_)
+        std::cerr << "\tmethod-FAILED: " << expect.method << "!=" << output.method_ << "\n";
+    if (expect.status != output.parseStatusCode)
+        std::cerr << "\tscode-FAILED: " << expect.status << "!=" << output.parseStatusCode << "\n";
+    if (expect.suffixSz != output.buf_.length())
+        std::cerr << "\tsuffixSz-FAILED: " << expect.suffixSz << "!=" << output.buf_.length() << "\n";
  #endif
  
      // runs the parse
-    CPPUNIT_ASSERT_EQUAL(expect.parsed, output.parse(input));
+    CPPUNIT_ASSERT_EQUAL(expect.parsed, parsed);
+
+    // if parsing was successful, check easily visible field outputs
+    if (parsed) {
+        CPPUNIT_ASSERT_EQUAL(expect.method, output.method_);
+        if (expect.uri != NULL)
+            CPPUNIT_ASSERT_EQUAL(0, output.uri_.cmp(expect.uri));
+        CPPUNIT_ASSERT_EQUAL(expect.version, output.msgProtocol_);
+    }
  
-    // check easily visible field outputs
-    CPPUNIT_ASSERT_EQUAL(expect.method, output.method_);
-    if (expect.uri != NULL)
-        CPPUNIT_ASSERT_EQUAL(0, output.uri_.cmp(expect.uri));
-    CPPUNIT_ASSERT_EQUAL(expect.version, output.msgProtocol_);
      CPPUNIT_ASSERT_EQUAL(expect.status, output.parseStatusCode);
  
      // check more obscure states
@@ -146,7 +190,7 @@ testHttp1Parser::testParseRequestLineProtocols()
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
              .status = Http::scBadRequest,
-            .suffixSz = 3,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_POST),
              .uri = NULL,
              .version = AnyP::ProtocolVersion()
@@ -217,9 +261,9 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_MIME,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
          };
@@ -232,8 +276,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_MIME,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -250,8 +294,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -268,8 +312,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -286,8 +330,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -304,8 +348,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -322,8 +366,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -340,8 +384,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -358,8 +402,8 @@ testHttp1Parser::testParseRequestLineProtocols()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
              .method = HttpRequestMethod(Http::METHOD_GET),
              .uri = "/",
              .version = AnyP::ProtocolVersion()
@@ -403,8 +447,8 @@ testHttp1Parser::testParseRequestLineStrange()
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
              .status = Http::scBadRequest,
-            .suffixSz = input.length()-4,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
              .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
@@ -435,9 +479,9 @@ testHttp1Parser::testParseRequestLineStrange()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported, // version being "o/ HTTP/1.1"
-            .suffixSz = 13,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
              .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
@@ -498,10 +542,10 @@ testHttp1Parser::testParseRequestLineTerminators()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = 9,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
          output.clear();
@@ -533,10 +577,10 @@ testHttp1Parser::testParseRequestLineTerminators()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = 10,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
          output.clear();
@@ -552,10 +596,10 @@ testHttp1Parser::testParseRequestLineTerminators()
              .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scHttpVersionNotSupported,
-            .suffixSz = input.length()-6,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "/",
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
          output.clear();
@@ -645,6 +689,7 @@ testHttp1Parser::testParseRequestLineMethods()
          input.clear();
      }
  
+#if 0
      // too-long method (over 16 bytes)
      {
          input.append("HELLOSTRANGEWORLD / HTTP/1.1\r\n", 31);
@@ -662,6 +707,7 @@ testHttp1Parser::testParseRequestLineMethods()
          testResults(__LINE__, input, output, expect);
          input.clear();
      }
+#endif
  
      // method-only
      {
@@ -882,9 +928,9 @@ testHttp1Parser::testParseRequestLineInvalid()
          input.clear();
      }
  
-    // binary code in method (invalid)
+    // binary code after method (invalid)
      {
-        input.append("GET\x0A / HTTP/1.1\r\n", 17);
+        input.append("GET\x16 / HTTP/1.1\r\n", 17);
          struct resultSet expect = {
              .parsed = false,
              .needsMore = false,
@@ -900,7 +946,7 @@ testHttp1Parser::testParseRequestLineInvalid()
          input.clear();
      }
  
-    // binary code NUL! in method (always invalid)
+    // binary code NUL! after method (always invalid)
      {
          input.append("GET\0 / HTTP/1.1\r\n", 17);
          struct resultSet expect = {
@@ -918,20 +964,20 @@ testHttp1Parser::testParseRequestLineInvalid()
          input.clear();
      }
  
-    // no URL (grammer invalid, ambiguous with RFC 1945 HTTP/0.9 simple-request)
+    // Either an RFC 1945 HTTP/0.9 simple-request for an "HTTP/1.1" URI or
+    // an invalid (no URI) HTTP/1.1 request. We treat this as latter, naturally.
      {
          input.append("GET  HTTP/1.1\r\n", 15);
-        // RFC 7230 tolerance allows sequence of SP to make this ambiguous
          Config.onoff.relaxed_header_parser = 1;
          struct resultSet expect = {
-            .parsed = true,
+            .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scOkay,
-            .suffixSz = 0,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "HTTP/1.1",
-            .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,0,9)
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
+            .version = AnyP::ProtocolVersion()
          };
          output.clear();
          testResults(__LINE__, input, output, expect);
@@ -942,8 +988,8 @@ testHttp1Parser::testParseRequestLineInvalid()
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
              .status = Http::scBadRequest,
-            .suffixSz = 11,
-            .method = HttpRequestMethod(Http::METHOD_GET),
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
              .uri = NULL,
              .version = AnyP::ProtocolVersion()
          };
@@ -952,18 +998,19 @@ testHttp1Parser::testParseRequestLineInvalid()
          input.clear();
      }
  
-    // no URL (grammer invalid, ambiguous with RFC 1945 HTTP/0.9 simple-request)
+    // Either an RFC 1945 HTTP/0.9 simple-request for an "HTTP/1.1" URI or
+    // an invalid (no URI) HTTP/1.1 request. We treat this as latter, naturally.
      {
          input.append("GET HTTP/1.1\r\n", 14);
          struct resultSet expect = {
-            .parsed = true,
+            .parsed = false,
              .needsMore = false,
              .parserState = Http1::HTTP_PARSE_DONE,
-            .status = Http::scOkay,
-            .suffixSz = 0,
-            .method = HttpRequestMethod(Http::METHOD_GET),
-            .uri = "HTTP/1.1",
-            .version = AnyP::ProtocolVersion(AnyP::PROTO_HTTP,0,9)
+            .status = Http::scBadRequest,
+            .suffixSz = input.length(),
+            .method = HttpRequestMethod(),
+            .uri = NULL,
+            .version = AnyP::ProtocolVersion()
          };
          output.clear();
          testResults(__LINE__, input, output, expect);
@@ -1036,9 +1083,7 @@ testHttp1Parser::testDripFeed()
      data.append("\n\n\n\n\n\n\n\n\n\n\n\n", 12);
      SBuf::size_type garbageEnd = data.length();
      data.append("GET ", 4);
-    SBuf::size_type methodEnd = data.length()-1;
      data.append("http://example.com/ ", 20);
-    SBuf::size_type uriEnd = data.length()-1;
      data.append("HTTP/1.1\r\n", 10);
      SBuf::size_type reqLineEnd = data.length() - 1;
      data.append("Host: example.com\r\n\r\n", 21);
@@ -1091,19 +1136,6 @@ testHttp1Parser::testDripFeed()
              if (pos >= garbageEnd)
                  expect.suffixSz = ioBuf.length();
  
-            // at end of request line expect to see method details
-            if (pos == methodEnd) {
-                expect.suffixSz = 0; // and a checkpoint buffer reset
-                expect.method = HttpRequestMethod(Http::METHOD_GET);
-            }
-
-            // at end of URI strict expects to see method, URI details
-            // relaxed must wait to end of line for whitespace tolerance
-            if (pos == uriEnd && !Config.onoff.relaxed_header_parser) {
-                expect.suffixSz = 0; // and a checkpoint buffer reset
-                expect.uri = "http://example.com/";
-            }
-
              // at end of request line expect to see method, URI, version details
              // and switch to seeking Mime header section
              if (pos == reqLineEnd) {
author	Alex Rousskov <rousskov@measurement-factory.com>
	Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Wed, 18 Nov 2015 23:56:16 +0000 (15:56 -0800)
src/http/one/RequestParser.cc		patch \| blob \| blame \| history
src/http/one/RequestParser.h		patch \| blob \| blame \| history
src/tests/testHttp1Parser.cc		patch \| blob \| blame \| history