Bug 4492: Chunk extension parser is too pedantic.

author Alex Rousskov <rousskov@measurement-factory.com>

Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)

committer Amos Jeffries <squid3@treenet.co.nz>

Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)
author Alex Rousskov <rousskov@measurement-factory.com>
Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)
committer Amos Jeffries <squid3@treenet.co.nz>
Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc

index 01aa183498469b5f70d4052c47101ed80bbd879a..a87adc257202a7fa52775e2b40d8528ced84359a 100644 (file)
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -47,7 +47,13 @@ RelaxedDelimiterCharacters()
      return RelaxedDels;
  }
  
-/// characters used to separate HTTP fields
+const CharacterSet &
+Http::One::Parser::WhitespaceCharacters()
+{
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::WSP;
+}
+
  const CharacterSet &
  Http::One::Parser::DelimiterCharacters()
  {
@@ -259,11 +265,24 @@ Http::One::Parser::getHeaderField(const char *name)
      return NULL;
  }
  
-#if USE_HTTP_VIOLATIONS
  int
-Http::One::Parser::violationLevel() const
+Http::One::ErrorLevel()
  {
      return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
  }
-#endif
+
+// BWS = *( SP / HTAB ) ; WhitespaceCharacters() may relax this RFC 7230 rule
+bool
+Http::One::ParseBws(Tokenizer &tok)
+{
+    if (const auto count = tok.skipAll(Parser::WhitespaceCharacters())) {
+        // Generating BWS is a MUST-level violation so warn about it as needed.
+        debugs(33, ErrorLevel(), "found " << count << " BWS octets");
+        // RFC 7230 says we MUST parse BWS, so we fall through even if
+        // Config.onoff.relaxed_header_parser is off.
+    }
+    // else we successfully "parsed" an empty BWS sequence
+
+    return true;
+}
  
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index 560ac1f0b13958dbd5e14187663d9133d04b75b3..c65552e9b7285643d6551d523a8f48caba4f3cf7 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -91,11 +91,6 @@ public:
      /// the remaining unprocessed section of buffer
      const SBuf &remaining() const {return buf_;}
  
-#if USE_HTTP_VIOLATIONS
-    /// the right debugs() level for parsing HTTP violation messages
-    int violationLevel() const;
-#endif
-
      /**
       * HTTP status code resulting from the parse process.
       * to be used on the invalid message handling.
@@ -106,8 +101,16 @@ public:
       */
      Http::StatusCode parseStatusCode;
  
-    /// the characters which are to be considered valid whitespace
-    /// (WSP / BSP / OWS)
+    /// Whitespace between regular protocol elements.
+    /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
+    /// See also: DelimiterCharacters().
+    static const CharacterSet &WhitespaceCharacters();
+
+    /// Whitespace between protocol elements in restricted contexts like
+    /// request line, status line, asctime-date, and credentials
+    /// Seen in RFCs as SP but may be "relaxed" by us.
+    /// See also: WhitespaceCharacters().
+    /// XXX: Misnamed and overused.
      static const CharacterSet &DelimiterCharacters();
  
  protected:
@@ -155,6 +158,13 @@ private:
      void unfoldMime();
  };
  
+/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+/// \returns true (always; unlike all the skip*() functions)
+bool ParseBws(Tokenizer &tok);
+
+/// the right debugs() level for logging HTTP violation messages
+int ErrorLevel();
+
  } // namespace One
  } // namespace Http
  
diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc

index 44482a913fd25f37a5d43ce9ac50c7e2b93c7ae9..12f2c036dcb7e9eba3d82fce222fe54af008676d 100644 (file)
--- a/src/http/one/RequestParser.cc
+++ b/src/http/one/RequestParser.cc
@@ -14,12 +14,6 @@
  #include "profiler/Profiler.h"
  #include "SquidConfig.h"
  
-// the right debugs() level for parsing errors
-inline static int
-ErrorLevel() {
-    return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
-}
-
  Http::One::RequestParser::RequestParser(bool preserveParsed) :
      Parser(),
      preserveParsed_(preserveParsed)
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc

index d730cdb8d230b18366bc33c2d43be46bf01fd6bf..42a090776c45b41fe11f703485ba16af4803c79f 100644 (file)
--- a/src/http/one/TeChunkedParser.cc
+++ b/src/http/one/TeChunkedParser.cc
@@ -105,10 +105,10 @@ Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
  }
  
  /**
- * Parses a set of RFC 7230 section 4.1.1 chunk-ext
- * http://tools.ietf.org/html/rfc7230#section-4.1.1
+ * Parses chunk metadata suffix, looking for interesting extensions and/or
+ * getting to the line terminator. RFC 7230 section 4.1.1 and its Errata #4667:
   *
- *   chunk-ext      = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
+ *   chunk-ext = *( BWS  ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
   *   chunk-ext-name = token
   *   chunk-ext-val  = token / quoted-string
   *
@@ -117,17 +117,16 @@ Http::One::TeChunkedParser::parseChunkSize(Http1::Tokenizer &tok)
  bool
  Http::One::TeChunkedParser::parseChunkExtension(Http1::Tokenizer &tok, bool skipKnown)
  {
-    // Bug 4492: IBM_HTTP_Server sends SP padding
-    if (auto n = tok.skipAll(CharacterSet::SP)) {
-        debugs(94, 3, "skipping " << n << " spurious whitespace at start of chunk extension");
-    }
-
      SBuf ext;
      SBuf value;
-    while (tok.skip(';') && tok.prefix(ext, CharacterSet::TCHAR)) {
+    while (
+        ParseBws(tok) && // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
+        tok.skip(';') &&
+        ParseBws(tok) && // Bug 4492: ICAP servers send SP before chunk-ext-name
+        tok.prefix(ext, CharacterSet::TCHAR)) { // chunk-ext-name
  
          // whole value part is optional. if no '=' expect next chunk-ext
-        if (tok.skip('=')) {
+        if (ParseBws(tok) && tok.skip('=') && ParseBws(tok)) {
  
              if (!skipKnown) {
                  if (ext.cmp("use-original-body",17) == 0 && tok.int64(useOriginBody, 10)) {
author	Alex Rousskov <rousskov@measurement-factory.com>
	Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Mon, 19 Jun 2017 10:07:40 +0000 (22:07 +1200)
src/http/one/Parser.cc		patch \| blob \| blame \| history
src/http/one/Parser.h		patch \| blob \| blame \| history
src/http/one/RequestParser.cc		patch \| blob \| blame \| history
src/http/one/TeChunkedParser.cc		patch \| blob \| blame \| history