HTTP/1.1: unfold mime header blocks

author Amos Jeffries <squid3@treenet.co.nz>

Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)

committer Amos Jeffries <squid3@treenet.co.nz>

Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)
author Amos Jeffries <squid3@treenet.co.nz>
Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)
committer Amos Jeffries <squid3@treenet.co.nz>
Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc

index e762957af0a56dbb5b18afc56f503a4296cfc67e..1eb1ea46b882feb37edc055f06e6bbed4622764c 100644 (file)
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -16,6 +16,12 @@
  /// RFC 7230 section 2.6 - 7 magic octets
  const SBuf Http::One::Parser::Http1magic("HTTP/1.");
  
+const SBuf &Http::One::CrLf()
+{
+    static const SBuf crlf("\r\n");
+    return crlf;
+}
+
  void
  Http::One::Parser::clear()
  {
@@ -25,11 +31,34 @@ Http::One::Parser::clear()
      mimeHeaderBlock_.clear();
  }
  
+/// characters HTTP permits tolerant parsers to accept as delimiters
+static const CharacterSet &
+RelaxedDelimiterCharacters()
+{
+    // RFC 7230 section 3.5
+    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
+    // or bare CR as whitespace between request-line fields
+    static const CharacterSet RelaxedDels =
+        (CharacterSet::SP +
+         CharacterSet::HTAB +
+         CharacterSet("VT,FF","\x0B\x0C") +
+         CharacterSet::CR).rename("relaxed-WSP");
+
+    return RelaxedDels;
+}
+
+/// characters used to separate HTTP fields
+const CharacterSet &
+Http::One::Parser::DelimiterCharacters()
+{
+    return Config.onoff.relaxed_header_parser ?
+           RelaxedDelimiterCharacters() : CharacterSet::SP;
+}
+
  bool
  Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
  {
-    static const SBuf crlf("\r\n");
-    if (tok.skip(crlf))
+    if (tok.skip(Http1::CrLf()))
          return true;
  
      if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
@@ -38,6 +67,88 @@ Http::One::Parser::skipLineTerminator(Http1::Tokenizer &tok) const
      return false;
  }
  
+/// all characters except the LF line terminator
+static const CharacterSet &
+LineCharacters()
+{
+    static const CharacterSet line = CharacterSet::LF.complement("non-LF");
+    return line;
+}
+
+/**
+ * Remove invalid lines (if any) from the mime prefix
+ *
+ * RFC 7230 section 3:
+ * "A recipient that receives whitespace between the start-line and
+ * the first header field MUST ... consume each whitespace-preceded
+ * line without further processing of it."
+ *
+ * We need to always use the relaxed delimiters here to prevent
+ * line smuggling through strict parsers.
+ *
+ * Note that 'whitespace' in RFC 7230 includes CR. So that means
+ * sequences of CRLF will be pruned, but not sequences of bare-LF.
+ */
+void
+Http::One::Parser::cleanMimePrefix()
+{
+    Http1::Tokenizer tok(mimeHeaderBlock_);
+    while (tok.skipOne(RelaxedDelimiterCharacters())) {
+        (void)tok.skipAll(LineCharacters()); // optional line content
+        // LF terminator is required.
+        // trust headersEnd() to ensure that we have at least one LF
+        (void)tok.skipOne(CharacterSet::LF);
+    }
+
+    // If mimeHeaderBlock_ had just whitespace line(s) followed by CRLF,
+    // then we skipped everything, including that terminating LF.
+    // Restore the terminating CRLF if needed.
+    if (tok.atEnd())
+        mimeHeaderBlock_ = Http1::CrLf();
+    else
+        mimeHeaderBlock_ = tok.remaining();
+    // now mimeHeaderBlock_ has 0+ fields followed by the LF terminator
+}
+
+/**
+ * Replace obs-fold with a single SP,
+ *
+ * RFC 7230 section 3.2.4
+ * "A server that receives an obs-fold in a request message that is not
+ *  within a message/http container MUST ... replace
+ *  each received obs-fold with one or more SP octets prior to
+ *  interpreting the field value or forwarding the message downstream."
+ *
+ * "A proxy or gateway that receives an obs-fold in a response message
+ *  that is not within a message/http container MUST ... replace each
+ *  received obs-fold with one or more SP octets prior to interpreting
+ *  the field value or forwarding the message downstream."
+ */
+void
+Http::One::Parser::unfoldMime()
+{
+    Http1::Tokenizer tok(mimeHeaderBlock_);
+    const auto szLimit = mimeHeaderBlock_.length();
+    mimeHeaderBlock_.clear();
+    // prevent the mime sender being able to make append() realloc/grow multiple times.
+    mimeHeaderBlock_.reserveSpace(szLimit);
+
+    static const CharacterSet nonCRLF = (CharacterSet::CR + CharacterSet::LF).complement().rename("non-CRLF");
+
+    while (!tok.atEnd()) {
+        const SBuf all(tok.remaining());
+        const auto blobLen = tok.skipAll(nonCRLF); // may not be there
+        const auto crLen = tok.skipAll(CharacterSet::CR); // may not be there
+        const auto lfLen = tok.skipOne(CharacterSet::LF); // may not be there
+
+        if (lfLen && tok.skipAll(CharacterSet::WSP)) { // obs-fold!
+            mimeHeaderBlock_.append(all.substr(0, blobLen));
+            mimeHeaderBlock_.append(' '); // replace one obs-fold with one SP
+        } else
+            mimeHeaderBlock_.append(all.substr(0, blobLen + crLen + lfLen));
+    }
+}
+
  bool
  Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
  {
@@ -51,8 +162,8 @@ Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
           *       So the rest of the code will need to deal with '0'-byte headers
           *       (ie, none, so don't try parsing em)
           */
-        // XXX: c_str() reallocates. performance regression.
-        if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) {
+        bool containsObsFold;
+        if (SBuf::size_type mimeHeaderBytes = headersEnd(buf_, containsObsFold)) {
  
              // Squid could handle these headers, but admin does not want to
              if (firstLineSize() + mimeHeaderBytes >= limit) {
@@ -64,6 +175,10 @@ Http::One::Parser::grabMimeBlock(const char *which, const size_t limit)
              }
  
              mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
+            cleanMimePrefix();
+            if (containsObsFold)
+                unfoldMime();
+
              debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
  
          } else { // headersEnd() == 0
@@ -102,12 +217,10 @@ Http::One::Parser::getHeaderField(const char *name)
      debugs(25, 5, "looking for " << name);
  
      // while we can find more LF in the SBuf
-    static CharacterSet iso8859Line = CharacterSet("non-LF",'\0','\n'-1) + CharacterSet(NULL, '\n'+1, (unsigned char)0xFF);
      Http1::Tokenizer tok(mimeHeaderBlock_);
      SBuf p;
-    static const SBuf crlf("\r\n");
  
-    while (tok.prefix(p, iso8859Line)) {
+    while (tok.prefix(p, LineCharacters())) {
          if (!tok.skipOne(CharacterSet::LF)) // move tokenizer past the LF
              break; // error. reached invalid octet or end of buffer insted of an LF ??
  
@@ -120,7 +233,7 @@ Http::One::Parser::getHeaderField(const char *name)
              continue;
  
          // drop any trailing *CR sequence
-        p.trim(crlf, false, true);
+        p.trim(Http1::CrLf(), false, true);
  
          debugs(25, 5, "checking " << p);
          p.consume(namelen + 1);
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index dc57583e6e3529824d0e8144509a870ebabc1801..d24c8c503ffece5bb292701e5ff03196b1752e75 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -111,6 +111,10 @@ protected:
      /// consume from the tokenizer and return true only if found
      bool skipLineTerminator(Http1::Tokenizer &tok) const;
  
+    /// the characters which are to be considered valid whitespace
+    /// (WSP / BSP / OWS)
+    static const CharacterSet &DelimiterCharacters();
+
      /**
       * Scan to find the mime headers block for current message.
       *
@@ -139,6 +143,10 @@ protected:
  
      /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
      bool hackExpectsMime_;
+
+private:
+    void cleanMimePrefix();
+    void unfoldMime();
  };
  
  } // namespace One
diff --git a/src/http/one/RequestParser.cc b/src/http/one/RequestParser.cc

index ed04e85ed25f252dfced6a1fd574b72381fb487c..a1c18942e258834682a84c80246cabbd7fe6cb6c 100644 (file)
--- a/src/http/one/RequestParser.cc
+++ b/src/http/one/RequestParser.cc
@@ -114,30 +114,6 @@ UriValidCharacters()
      return UriChars;
  }
  
-/// characters HTTP permits tolerant parsers to accept as delimiters
-static const CharacterSet &
-RelaxedDelimiterCharacters()
-{
-    // RFC 7230 section 3.5
-    // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C),
-    // or bare CR as whitespace between request-line fields
-    static const CharacterSet RelaxedDels =
-        CharacterSet::SP +
-        CharacterSet::HTAB +
-        CharacterSet("VT,FF","\x0B\x0C") +
-        CharacterSet::CR;
-
-    return RelaxedDels;
-}
-
-/// characters used to separate HTTP fields
-const CharacterSet &
-Http::One::RequestParser::DelimiterCharacters()
-{
-    return Config.onoff.relaxed_header_parser ?
-           RelaxedDelimiterCharacters() : CharacterSet::SP;
-}
-
  /// characters which Squid will accept in the HTTP request-target (URI)
  const CharacterSet &
  Http::One::RequestParser::RequestTargetCharacters()
diff --git a/src/http/one/RequestParser.h b/src/http/one/RequestParser.h

index d339e94b3c8a7c40863e28b8c1b0330550bd6eeb..107b8cf4f7630e6a6e2deffa49cc118953fa1620 100644 (file)
--- a/src/http/one/RequestParser.h
+++ b/src/http/one/RequestParser.h
@@ -56,7 +56,6 @@ private:
      bool skipTrailingCrs(Http1::Tokenizer &tok);
  
      bool http0() const {return !msgProtocol_.major;}
-    static const CharacterSet &DelimiterCharacters();
      static const CharacterSet &RequestTargetCharacters();
  
      /// what request method has been found on the first line
diff --git a/src/http/one/forward.h b/src/http/one/forward.h

index 93f39634c6e0409f254f8f988049a3ab0ed7d02c..791dc100e1e3f586a6572a5ff06ab44d7249effa 100644 (file)
--- a/src/http/one/forward.h
+++ b/src/http/one/forward.h
@@ -10,6 +10,7 @@
  #define SQUID_SRC_HTTP_ONE_FORWARD_H
  
  #include "base/RefCount.h"
+#include "sbuf/forward.h"
  
  namespace Http {
  namespace One {
@@ -27,6 +28,9 @@ typedef RefCount<Http::One::RequestParser> RequestParserPointer;
  class ResponseParser;
  typedef RefCount<Http::One::ResponseParser> ResponseParserPointer;
  
+/// CRLF textual representation
+const SBuf &CrLf();
+
  } // namespace One
  } // namespace Http
  
diff --git a/src/mime_header.cc b/src/mime_header.cc

index 746cd120eb37b2db747191f78385e572bb6210ae..f8f5354e978f51f166eb3e1d0b2fcdae043ed75b 100644 (file)
--- a/src/mime_header.cc
+++ b/src/mime_header.cc
@@ -13,10 +13,11 @@
  #include "profiler/Profiler.h"
  
  size_t
-headersEnd(const char *mime, size_t l)
+headersEnd(const char *mime, size_t l, bool &containsObsFold)
  {
      size_t e = 0;
      int state = 1;
+    containsObsFold = false;
  
      PROF_start(headersEnd);
  
@@ -35,7 +36,10 @@ headersEnd(const char *mime, size_t l)
                  state = 2;
              else if ('\n' == mime[e])
                  state = 3;
-            else
+            else if (' ' == mime[e] || '\t' == mime[e]) {
+                containsObsFold = true;
+                state = 0;
+            } else
                  state = 0;
  
              break;
diff --git a/src/mime_header.h b/src/mime_header.h

index b57cc0d225f06557ff8946cc027d6a37c69fd483..ca176f329c4994e59edf572d1cf97fc238a7e44a 100644 (file)
--- a/src/mime_header.h
+++ b/src/mime_header.h
@@ -11,7 +11,35 @@
  #ifndef SQUID_MIME_HEADER_H_
  #define SQUID_MIME_HEADER_H_
  
-size_t headersEnd(const char *, size_t);
+/**
+ * Scan for the end of mime header block.
+ *
+ * Which is one of the following octet patterns:
+ * - CRLF CRLF, or
+ * - CRLF LF, or
+ * - LF CRLF, or
+ * - LF LF
+ *
+ * Also detects whether a obf-fold pattern exists within the mime block
+ * - CR*LF (SP / HTAB)
+ *
+ * \param containsObsFold will be set to true if obs-fold pattern is found.
+ */
+size_t headersEnd(const char *, size_t, bool &containsObsFold);
+
+inline size_t
+headersEnd(const SBuf &buf, bool &containsObsFold)
+{
+    return headersEnd(buf.rawContent(), buf.length(), containsObsFold);
+}
+
+/// \deprecated caller needs to be fixed to handle obs-fold
+inline size_t
+headersEnd(const char *buf, size_t sz)
+{
+    bool ignored;
+    return headersEnd(buf, sz, ignored);
+}
  
  #endif /* SQUID_MIME_HEADER_H_ */
  
diff --git a/src/tests/stub_mime.cc b/src/tests/stub_mime.cc

index 42a9a5f53e9fea5126669e6667a10933c68cca0e..a284b5eb3accedd1fc1e776d953042ade99e82a6 100644 (file)
--- a/src/tests/stub_mime.cc
+++ b/src/tests/stub_mime.cc
@@ -11,5 +11,5 @@
  #define STUB_API "mime.cc"
  #include "tests/STUB.h"
  
-size_t headersEnd(const char *mime, size_t l) STUB_RETVAL(0)
+size_t headersEnd(const char *, size_t, bool &) STUB_RETVAL(0)
author	Amos Jeffries <squid3@treenet.co.nz>
	Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Fri, 20 May 2016 08:28:33 +0000 (20:28 +1200)
src/http/one/Parser.cc		patch \| blob \| blame \| history
src/http/one/Parser.h		patch \| blob \| blame \| history
src/http/one/RequestParser.cc		patch \| blob \| blame \| history
src/http/one/RequestParser.h		patch \| blob \| blame \| history
src/http/one/forward.h		patch \| blob \| blame \| history
src/mime_header.cc		patch \| blob \| blame \| history
src/mime_header.h		patch \| blob \| blame \| history
src/tests/stub_mime.cc		patch \| blob \| blame \| history