RFC 9112: Improve HTTP chunked encoding compliance (#1498)

author Amos Jeffries <yadij@users.noreply.github.com>

Fri, 13 Oct 2023 08:44:16 +0000 (08:44 +0000)

committer Squid Anubis <squid-anubis@squid-cache.org>

Fri, 13 Oct 2023 09:08:02 +0000 (09:08 +0000)
author Amos Jeffries <yadij@users.noreply.github.com>
Fri, 13 Oct 2023 08:44:16 +0000 (08:44 +0000)
committer Squid Anubis <squid-anubis@squid-cache.org>
Fri, 13 Oct 2023 09:08:02 +0000 (09:08 +0000)
diff --git a/src/http/one/Parser.cc b/src/http/one/Parser.cc

index 964371b4e8a4d089da2102806e28299a4788f522..b1908316a0beaea3570ad63b6318868a9a1d96cb 100644 (file)
--- a/src/http/one/Parser.cc
+++ b/src/http/one/Parser.cc
@@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters()
  void
  Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
  {
-    if (tok.skip(Http1::CrLf()))
-        return;
-
      if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
          return;
  
-    if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
-        throw InsufficientInput();
-
-    throw TexcHere("garbage instead of CRLF line terminator");
+    tok.skipRequired("line-terminating CRLF", Http1::CrLf());
  }
  
  /// all characters except the LF line terminator
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index 5892a7a59f1447b4b14391ab7667c7d7084d1292..503c61d3ff18631c83b35e7c1362fc68e886d375 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -124,9 +124,7 @@ protected:
       * detect and skip the CRLF or (if tolerant) LF line terminator
       * consume from the tokenizer.
       *
-     * \throws exception on bad or InsuffientInput.
-     * \retval true only if line terminator found.
-     * \retval false incomplete or missing line terminator, need more data.
+     * \throws exception on bad or InsufficientInput
       */
      void skipLineTerminator(Tokenizer &) const;
  
diff --git a/src/http/one/TeChunkedParser.cc b/src/http/one/TeChunkedParser.cc

index d9138fe9a208340c44dd353b7748567875e4762f..9cce10fdc916988f7c1238c7915196cfb8c7b1c9 100644 (file)
--- a/src/http/one/TeChunkedParser.cc
+++ b/src/http/one/TeChunkedParser.cc
@@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
  {
      Must(theChunkSize <= 0); // Should(), really
  
+    static const SBuf bannedHexPrefixLower("0x");
+    static const SBuf bannedHexPrefixUpper("0X");
+    if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
+        throw TextException("chunk starts with 0x", Here());
+
      int64_t size = -1;
      if (tok.int64(size, 16, false) && !tok.atEnd()) {
          if (size < 0)
@@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
      // bad or insufficient input, like in the code below. TODO: Expand up.
      try {
          parseChunkExtensions(tok); // a possibly empty chunk-ext list
-        skipLineTerminator(tok);
+        tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
          buf_ = tok.remaining();
          parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
          return true;
@@ -132,12 +137,14 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
      // other exceptions bubble up to kill message parsing
  }
  
-/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
+/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
  /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
  void
-Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
+Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
  {
      do {
+        auto tok = callerTok;
+
          ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size
  
          if (!tok.skip(';'))
@@ -145,6 +152,7 @@ Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
  
          parseOneChunkExtension(tok);
          buf_ = tok.remaining(); // got one extension
+        callerTok = tok;
      } while (true);
  }
  
@@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName
  /// Parses a single chunk-ext list element:
  /// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
  void
-Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
+Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
  {
+    auto tok = callerTok;
+
      ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name
  
      const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
+    callerTok = tok; // in case we determine that this is a valueless chunk-ext
  
      ParseBws(tok);
  
@@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
          customExtensionValueParser->parse(tok, extName);
      else
          ChunkExtensionValueParser::Ignore(tok, extName);
+
+    callerTok = tok;
  }
  
  bool
@@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
      Must(theLeftBodySize == 0); // Should(), really
  
      try {
-        skipLineTerminator(tok);
+        tok.skipRequired("chunk CRLF", Http1::CrLf());
          buf_ = tok.remaining(); // parse checkpoint
          theChunkSize = 0; // done with the current chunk
          parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
diff --git a/src/parser/Tokenizer.cc b/src/parser/Tokenizer.cc

index 0bb0b855d435995432f8bacad3d9aed64da63d9e..51654bae90d9591b98f622af597e3e14667b7ee4 100644 (file)
--- a/src/parser/Tokenizer.cc
+++ b/src/parser/Tokenizer.cc
@@ -145,6 +145,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
      return success(prefixLen);
  }
  
+void
+Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
+{
+    if (skip(tokenToSkip) || tokenToSkip.isEmpty())
+        return;
+
+    if (tokenToSkip.startsWith(buf_))
+        throw InsufficientInput();
+
+    throw TextException(ToSBuf("cannot skip ", description), Here());
+}
+
  bool
  Parser::Tokenizer::skipOne(const CharacterSet &chars)
  {
diff --git a/src/parser/Tokenizer.h b/src/parser/Tokenizer.h

index 7bae1ccbb481d282cd035c2d8f16e6cba0c87e55..3cfa7dd6c0414aa5e5850a0d65258d490b8c3b1c 100644 (file)
--- a/src/parser/Tokenizer.h
+++ b/src/parser/Tokenizer.h
@@ -115,6 +115,13 @@ public:
       */
      SBuf::size_type skipAll(const CharacterSet &discardables);
  
+    /** skips a given character sequence (string);
+     * does nothing if the sequence is empty
+     *
+     * \throws exception on mismatching prefix or InsufficientInput
+     */
+    void skipRequired(const char *description, const SBuf &tokenToSkip);
+
      /** Removes a single trailing character from the set.
       *
       * \return whether a character was removed
author	Amos Jeffries <yadij@users.noreply.github.com>
	Fri, 13 Oct 2023 08:44:16 +0000 (08:44 +0000)
committer	Squid Anubis <squid-anubis@squid-cache.org>
	Fri, 13 Oct 2023 09:08:02 +0000 (09:08 +0000)
src/http/one/Parser.cc		patch \| blob \| blame \| history
src/http/one/Parser.h		patch \| blob \| blame \| history
src/http/one/TeChunkedParser.cc		patch \| blob \| blame \| history
src/parser/Tokenizer.cc		patch \| blob \| blame \| history
src/parser/Tokenizer.h		patch \| blob \| blame \| history