Source Format Enforcement (#1234)

[thirdparty/squid.git] / src / http / one / ResponseParser.cc
diff --git a/src/http/one/ResponseParser.cc b/src/http/one/ResponseParser.cc

index 5f04e0829e17e3dd7c48fa72f35543eaf5170d37..e9c092dc60e2d5dec9f07f7e303cccae4402cd72 100644 (file)
--- a/src/http/one/ResponseParser.cc
+++ b/src/http/one/ResponseParser.cc
@@ -1,9 +1,18 @@
+/*
+ * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
  #include "squid.h"
-#include "Debug.h"
+#include "base/Raw.h"
+#include "debug/Stream.h"
  #include "http/one/ResponseParser.h"
  #include "http/ProtocolVersion.h"
  #include "parser/Tokenizer.h"
-#include "profiler/Profiler.h"
+#include "sbuf/Stream.h"
  #include "SquidConfig.h"
  
  const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
@@ -25,7 +34,7 @@ Http::One::ResponseParser::firstLineSize() const
          return result;
      }
      // NP: the parser does not accept >2 DIGIT for version numbers
-    if (msgProtocol_.minor >10)
+    if (msgProtocol_.minor > 9)
          result += 2;
      else
          result += 1;
@@ -38,118 +47,113 @@ Http::One::ResponseParser::firstLineSize() const
  
  // NP: we found the protocol version and consumed it already.
  // just need the status code and reason phrase
-const int
-Http::One::ResponseParser::parseResponseStatusAndReason()
+int
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok)
  {
-    if (buf_.isEmpty())
-        return 0;
-
-    ::Parser::Tokenizer tok(buf_);
-
-    if (!completedStatus_) {
-        debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
-        SBuf status;
-        // status code is 3 DIGIT octets
-        // NP: search space is >3 to get terminator character)
-        if(!tok.prefix(status, CharacterSet::DIGIT, 4))
-            return -1; // invalid status
-        // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid status, a single SP terminator required
+    try {
+        if (!completedStatus_) {
+            debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
+            ParseResponseStatus(tok, statusCode_);
+            buf_ = tok.remaining(); // resume checkpoint
+            completedStatus_ = true;
+        }
          // NOTE: any whitespace after the single SP is part of the reason phrase.
  
-        debugs(74, 6, "found string status-code=" << status);
-
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(status);
-        int64_t statusValue;
-        if (!t2.int64(statusValue))
-            return -1; // ouch. digits not forming a valid number?
-        debugs(74, 6, "found int64 status-code=" << statusValue);
-        if (statusValue < 0 || statusValue > 999)
-            return -1; // ouch. digits not within valid status code range.
-
-        statusCode_ = static_cast<Http::StatusCode>(statusValue);
-
+        /* RFC 7230 says we SHOULD ignore the reason phrase content
+         * but it has a definite valid vs invalid character set.
+         * We interpret the SHOULD as ignoring absence and syntax, but
+         * producing an error if it contains an invalid octet.
+         */
+
+        debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
+        // if we got here we are still looking for reason-phrase bytes
+        static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+        (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+        skipLineTerminator(tok);
          buf_ = tok.remaining(); // resume checkpoint
-        completedStatus_ = true;
-    }
-
-    if (tok.atEnd())
-        return 0; // need more to be sure we have it all
-
-    /* RFC 7230 says we SHOULD ignore the reason phrase content
-     * but it has a definite valid vs invalid character set.
-     * We interpret the SHOULD as ignoring absence and syntax, but
-     * producing an error if it contains an invalid octet.
-     */
-
-    debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
-
-    // if we got here we are still looking for reason-phrase bytes
-    static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
-    tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
-    tok.skip('\r'); // optional trailing CR
-
-    if (tok.atEnd())
-        return 0; // need more to be sure we have it all
-
-    // LF existence matters
-    if (!tok.skip('\n')) {
+        debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
+        return 1;
+    } catch (const InsufficientInput &) {
          reasonPhrase_.clear();
-        return -1; // found invalid characters in the phrase
+        return 0; // need more to be sure we have it all
+    } catch (const std::exception &ex) {
+        debugs(74, 6, "invalid status-line: " << ex.what());
      }
+    return -1;
+}
  
-    debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-    buf_ = tok.remaining(); // resume checkpoint
-    return 1;
+void
+Http::One::ResponseParser::ParseResponseStatus(Tokenizer &tok, StatusCode &code)
+{
+    int64_t statusValue;
+    if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(Parser::DelimiterCharacters())) {
+        debugs(74, 6, "raw status-code=" << statusValue);
+        code = static_cast<StatusCode>(statusValue); // may be invalid
+
+        // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
+        if (code <= 99)
+            throw TextException(ToSBuf("status-code too short: ", code), Here());
+
+        // Codes with a non-standard first digit (a.k.a. response class) are
+        // considered semantically invalid per the following HTTP WG discussion:
+        // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
+        if (code >= 600)
+            throw TextException(ToSBuf("status-code from an invalid response class: ", code), Here());
+    } else if (tok.atEnd()) {
+        throw InsufficientInput();
+    } else {
+        throw TextException("syntactically invalid status-code area", Here());
+    }
  }
  
-const int
+/**
+ * Attempt to parse the method field out of an HTTP message status-line.
+ *
+ * Governed by:
+ *  RFC 1945 section 6.1
+ *  RFC 7230 section 2.6, 3.1 and 3.5
+ *
+ * Parsing state is stored between calls. The current implementation uses
+ * checkpoints after each successful status-line field.
+ * The return value tells you whether the parsing is completed or not.
+ *
+ * \retval -1  an error occurred.
+ * \retval  1  successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
+ * \retval  0  more data is needed to complete the parse
+ */
+int
  Http::One::ResponseParser::parseResponseFirstLine()
  {
-    ::Parser::Tokenizer tok(buf_);
+    Tokenizer tok(buf_);
  
      if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
          debugs(74, 6, "continue incremental parse for " << msgProtocol_);
          debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
          // we already found the magic, but not the full line. keep going.
-        return parseResponseStatusAndReason();
+        return parseResponseStatusAndReason(tok);
  
      } else if (tok.skip(Http1magic)) {
          debugs(74, 6, "found prefix magic " << Http1magic);
          // HTTP Response status-line parse
  
-        // magic contains major version, still need to find minor
-        SBuf verMinor;
-        // NP: we limit to 2-digits for speed, there really is no limit
-        // XXX: the protocols we accept dont have valid versions > 10 anyway
-        if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
-            return -1; // invalid version minor code
-        if (tok.atEnd())
-            return 0; // need more to be sure we have it all
-        if(!tok.skip(' '))
-            return -1; // invalid version, a single SP terminator required
-
-        debugs(74, 6, "found string version-minor=" << verMinor);
+        // magic contains major version, still need to find minor DIGIT
+        int64_t verMinor;
+        const auto &WspDelim = DelimiterCharacters();
+        if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
+            msgProtocol_.protocol = AnyP::PROTO_HTTP;
+            msgProtocol_.major = 1;
+            msgProtocol_.minor = static_cast<unsigned int>(verMinor);
  
-        // get the actual numeric value of the 0-3 digits we found
-        ::Parser::Tokenizer t2(verMinor);
-        int64_t tvm = 0;
-        if (!t2.int64(tvm))
-            return -1; // ouch. digits not forming a valid number?
-        msgProtocol_.minor = static_cast<unsigned int>(tvm);
+            debugs(74, 6, "found version=" << msgProtocol_);
  
-        msgProtocol_.protocol = AnyP::PROTO_HTTP;
-        msgProtocol_.major = 1;
+            debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+            buf_ = tok.remaining(); // resume checkpoint
+            return parseResponseStatusAndReason(tok);
  
-        debugs(74, 6, "found version=" << msgProtocol_);
-
-        debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
-        buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
+        } else if (tok.atEnd())
+            return 0; // need more to be sure we have it all
+        else
+            return -1; // invalid version or delimiter, a single SP terminator required
  
      } else if (tok.skip(IcyMagic)) {
          debugs(74, 6, "found prefix magic " << IcyMagic);
@@ -158,9 +162,14 @@ Http::One::ResponseParser::parseResponseFirstLine()
          // NP: ICY has no /major.minor details
          debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
          buf_ = tok.remaining(); // resume checkpoint
-        return parseResponseStatusAndReason();
-
-    } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
+        return parseResponseStatusAndReason(tok);
+    } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) {
+        debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length()));
+        return 0;
+    } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) {
+        debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length()));
+        return 0;
+    } else {
          debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
          // found something that looks like an HTTP/0.9 response
          // Gateway/Transform it into HTTP/1.1
@@ -180,7 +189,9 @@ Http::One::ResponseParser::parseResponseFirstLine()
          return 1; // no more parsing
      }
  
-    return 0; // need more to parse anything.
+    // unreachable
+    assert(false);
+    return -1;
  }
  
  bool
@@ -203,33 +214,31 @@ Http::One::ResponseParser::parse(const SBuf &aBuf)
  
      // stage 2: parse the status-line
      if (parsingStage_ == HTTP_PARSE_FIRST) {
-        PROF_start(HttpParserParseReplyLine);
-
-        int retcode = parseResponseFirstLine();
+        const int retcode = parseResponseFirstLine();
  
          // first-line (or a look-alike) found successfully.
-        if (retcode > 0)
+        if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
              parsingStage_ = HTTP_PARSE_MIME;
          debugs(74, 5, "status-line: retval " << retcode);
          debugs(74, 5, "status-line: proto " << msgProtocol_);
          debugs(74, 5, "status-line: status-code " << statusCode_);
          debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
          debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
-        PROF_stop(HttpParserParseReplyLine);
  
          // syntax errors already
          if (retcode < 0) {
              parsingStage_ = HTTP_PARSE_DONE;
-            statusCode_ = Http::scInvalidHeader;
+            parseStatusCode = Http::scInvalidHeader;
              return false;
          }
      }
  
      // stage 3: locate the mime header block
      if (parsingStage_ == HTTP_PARSE_MIME) {
-        if (!findMimeBlock("Response", Config.maxReplyHeaderSize))
+        if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
              return false;
      }
  
      return !needsMoreData();
  }
+