Source Format Enforcement (#763)

[thirdparty/squid.git] / src / http / one / Parser.h
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index ff1c8d290969c96ab5b6c5f6277dd9b62267085d..96fae6864211b27860d7a1bebdbebf7f8ce495ce 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
   *
   * Squid software is distributed under GPLv2+ license and includes
   * contributions from numerous individuals and organizations.
@@ -11,21 +11,22 @@
  
  #include "anyp/ProtocolVersion.h"
  #include "http/one/forward.h"
-#include "SBuf.h"
-
-namespace Parser {
-class Tokenizer;
-}
+#include "http/StatusCode.h"
+#include "parser/forward.h"
+#include "sbuf/SBuf.h"
  
  namespace Http {
  namespace One {
  
  // Parser states
  enum ParseState {
-    HTTP_PARSE_NONE,     ///< initialized, but nothing usefully parsed yet
-    HTTP_PARSE_FIRST,    ///< HTTP/1 message first-line
-    HTTP_PARSE_MIME,     ///< HTTP/1 mime-header block
-    HTTP_PARSE_DONE      ///< parsed a message header, or reached a terminal syntax error
+    HTTP_PARSE_NONE,      ///< initialized, but nothing usefully parsed yet
+    HTTP_PARSE_FIRST,     ///< HTTP/1 message first-line
+    HTTP_PARSE_CHUNK_SZ,  ///< HTTP/1.1 chunked encoding chunk-size
+    HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
+    HTTP_PARSE_CHUNK,     ///< HTTP/1.1 chunked encoding chunk-data
+    HTTP_PARSE_MIME,      ///< HTTP/1 mime-header block
+    HTTP_PARSE_DONE       ///< parsed a message header, or reached a terminal syntax error
  };
  
  /** HTTP/1.x protocol parser
@@ -33,15 +34,20 @@ enum ParseState {
   * Works on a raw character I/O buffer and tokenizes the content into
   * the major CRLF delimited segments of an HTTP/1 procotol message:
   *
- * \item first-line (request-line / simple-request / status-line)
- * \item mime-header 0*( header-name ':' SP field-value CRLF)
+ * \li first-line (request-line / simple-request / status-line)
+ * \li mime-header 0*( header-name ':' SP field-value CRLF)
   */
  class Parser : public RefCountable
  {
  public:
      typedef SBuf::size_type size_type;
+    typedef ::Parser::Tokenizer Tokenizer;
  
-    Parser() : parsingStage_(HTTP_PARSE_NONE) {}
+    Parser() = default;
+    Parser(const Parser &) = default;
+    Parser &operator =(const Parser &) = default;
+    Parser(Parser &&) = default;
+    Parser &operator =(Parser &&) = default;
      virtual ~Parser() {}
  
      /// Set this parser back to a default state.
@@ -78,7 +84,7 @@ public:
      const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;}
  
      /**
-     * Scan the mime header block (badly) for a header with teh given name.
+     * Scan the mime header block (badly) for a Host header.
       *
       * BUG: omits lines when searching for headers with obs-fold or multiple entries.
       *
@@ -86,15 +92,54 @@ public:
       *
       * \return A pointer to a field-value of the first matching field-name, or NULL.
       */
-    char *getHeaderField(const char *name);
+    char *getHostHeaderField();
  
      /// the remaining unprocessed section of buffer
      const SBuf &remaining() const {return buf_;}
  
+    /**
+     * HTTP status code resulting from the parse process.
+     * to be used on the invalid message handling.
+     *
+     * Http::scNone indicates incomplete parse,
+     * Http::scOkay indicates no error,
+     * other codes represent a parse error.
+     */
+    Http::StatusCode parseStatusCode = Http::scNone;
+
+    /// Whitespace between regular protocol elements.
+    /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
+    /// See also: DelimiterCharacters().
+    static const CharacterSet &WhitespaceCharacters();
+
+    /// Whitespace between protocol elements in restricted contexts like
+    /// request line, status line, asctime-date, and credentials
+    /// Seen in RFCs as SP but may be "relaxed" by us.
+    /// See also: WhitespaceCharacters().
+    /// XXX: Misnamed and overused.
+    static const CharacterSet &DelimiterCharacters();
+
  protected:
-    /// detect and skip the CRLF or (if tolerant) LF line terminator
-    /// consume from the tokenizer and return true only if found
-    bool skipLineTerminator(::Parser::Tokenizer &tok) const;
+    /**
+     * detect and skip the CRLF or (if tolerant) LF line terminator
+     * consume from the tokenizer.
+     *
+     * \throws exception on bad or InsuffientInput.
+     * \retval true only if line terminator found.
+     * \retval false incomplete or missing line terminator, need more data.
+     */
+    void skipLineTerminator(Tokenizer &) const;
+
+    /**
+     * Scan to find the mime headers block for current message.
+     *
+     * \retval true   If mime block (or a blocks non-existence) has been
+     *                identified accurately within limit characters.
+     *                mimeHeaderBlock_ has been updated and buf_ consumed.
+     *
+     * \retval false  An error occurred, or no mime terminator found within limit.
+     */
+    bool grabMimeBlock(const char *which, const size_t limit);
  
      /// RFC 7230 section 2.6 - 7 magic octets
      static const SBuf Http1magic;
@@ -103,15 +148,29 @@ protected:
      SBuf buf_;
  
      /// what stage the parser is currently up to
-    ParseState parsingStage_;
+    ParseState parsingStage_ = HTTP_PARSE_NONE;
  
      /// what protocol label has been found in the first line (if any)
      AnyP::ProtocolVersion msgProtocol_;
  
      /// buffer holding the mime headers (if any)
      SBuf mimeHeaderBlock_;
+
+    /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
+    bool hackExpectsMime_ = false;
+
+private:
+    void cleanMimePrefix();
+    void unfoldMime();
  };
  
+/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
+/// \throws InsufficientInput when the end of BWS cannot be confirmed
+void ParseBws(Parser::Tokenizer &);
+
+/// the right debugs() level for logging HTTP violation messages
+int ErrorLevel();
+
  } // namespace One
  } // namespace Http