src/http/one/Parser.h

   1 /*
   2  * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
  10 #define _SQUID_SRC_HTTP_ONE_PARSER_H
  11
  12 #include "anyp/ProtocolVersion.h"
  13 #include "http/one/forward.h"
  14 #include "http/StatusCode.h"
  15 #include "sbuf/SBuf.h"
  16
  17 namespace Http {
  18 namespace One {
  19
  20 // Parser states
  21 enum ParseState {
  22     HTTP_PARSE_NONE,      ///< initialized, but nothing usefully parsed yet
  23     HTTP_PARSE_FIRST,     ///< HTTP/1 message first-line
  24     HTTP_PARSE_CHUNK_SZ,  ///< HTTP/1.1 chunked encoding chunk-size
  25     HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
  26     HTTP_PARSE_CHUNK,     ///< HTTP/1.1 chunked encoding chunk-data
  27     HTTP_PARSE_MIME,      ///< HTTP/1 mime-header block
  28     HTTP_PARSE_DONE       ///< parsed a message header, or reached a terminal syntax error
  29 };
  30
  31 /** HTTP/1.x protocol parser
  32  *
  33  * Works on a raw character I/O buffer and tokenizes the content into
  34  * the major CRLF delimited segments of an HTTP/1 procotol message:
  35  *
  36  * \item first-line (request-line / simple-request / status-line)
  37  * \item mime-header 0*( header-name ':' SP field-value CRLF)
  38  */
  39 class Parser : public RefCountable
  40 {
  41 public:
  42     typedef SBuf::size_type size_type;
  43
  44     Parser() = default;
  45     Parser(const Parser &) = default;
  46     Parser &operator =(const Parser &) = default;
  47     Parser(Parser &&) = default;
  48     Parser &operator =(Parser &&) = default;
  49     virtual ~Parser() {}
  50
  51     /// Set this parser back to a default state.
  52     /// Will DROP any reference to a buffer (does not free).
  53     virtual void clear() = 0;
  54
  55     /// attempt to parse a message from the buffer
  56     /// \retval true if a full message was found and parsed
  57     /// \retval false if incomplete, invalid or no message was found
  58     virtual bool parse(const SBuf &aBuf) = 0;
  59
  60     /** Whether the parser is waiting on more data to complete parsing a message.
  61      * Use to distinguish between incomplete data and error results
  62      * when parse() returns false.
  63      */
  64     bool needsMoreData() const {return parsingStage_!=HTTP_PARSE_DONE;}
  65
  66     /// size in bytes of the first line including CRLF terminator
  67     virtual size_type firstLineSize() const = 0;
  68
  69     /// size in bytes of the message headers including CRLF terminator(s)
  70     /// but excluding first-line bytes
  71     size_type headerBlockSize() const {return mimeHeaderBlock_.length();}
  72
  73     /// size in bytes of HTTP message block, includes first-line and mime headers
  74     /// excludes any body/entity/payload bytes
  75     /// excludes any garbage prefix before the first-line
  76     size_type messageHeaderSize() const {return firstLineSize() + headerBlockSize();}
  77
  78     /// buffer containing HTTP mime headers, excluding message first-line.
  79     SBuf mimeHeader() const {return mimeHeaderBlock_;}
  80
  81     /// the protocol label for this message
  82     const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;}
  83
  84     /**
  85      * Scan the mime header block (badly) for a header with the given name.
  86      *
  87      * BUG: omits lines when searching for headers with obs-fold or multiple entries.
  88      *
  89      * BUG: limits output to just 1KB when Squid accepts up to 64KB line length.
  90      *
  91      * \return A pointer to a field-value of the first matching field-name, or NULL.
  92      */
  93     char *getHeaderField(const char *name);
  94
  95     /// the remaining unprocessed section of buffer
  96     const SBuf &remaining() const {return buf_;}
  97
  98     /**
  99      * HTTP status code resulting from the parse process.
 100      * to be used on the invalid message handling.
 101      *
 102      * Http::scNone indicates incomplete parse,
 103      * Http::scOkay indicates no error,
 104      * other codes represent a parse error.
 105      */
 106     Http::StatusCode parseStatusCode = Http::scNone;
 107
 108     /// Whitespace between regular protocol elements.
 109     /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
 110     /// See also: DelimiterCharacters().
 111     static const CharacterSet &WhitespaceCharacters();
 112
 113     /// Whitespace between protocol elements in restricted contexts like
 114     /// request line, status line, asctime-date, and credentials
 115     /// Seen in RFCs as SP but may be "relaxed" by us.
 116     /// See also: WhitespaceCharacters().
 117     /// XXX: Misnamed and overused.
 118     static const CharacterSet &DelimiterCharacters();
 119
 120 protected:
 121     /**
 122      * detect and skip the CRLF or (if tolerant) LF line terminator
 123      * consume from the tokenizer.
 124      *
 125      * throws if non-terminator is detected.
 126      * \retval true only if line terminator found.
 127      * \retval false incomplete or missing line terminator, need more data.
 128      */
 129     bool skipLineTerminator(Http1::Tokenizer &tok) const;
 130
 131     /**
 132      * Scan to find the mime headers block for current message.
 133      *
 134      * \retval true   If mime block (or a blocks non-existence) has been
 135      *                identified accurately within limit characters.
 136      *                mimeHeaderBlock_ has been updated and buf_ consumed.
 137      *
 138      * \retval false  An error occured, or no mime terminator found within limit.
 139      */
 140     bool grabMimeBlock(const char *which, const size_t limit);
 141
 142     /// RFC 7230 section 2.6 - 7 magic octets
 143     static const SBuf Http1magic;
 144
 145     /// bytes remaining to be parsed
 146     SBuf buf_;
 147
 148     /// what stage the parser is currently up to
 149     ParseState parsingStage_ = HTTP_PARSE_NONE;
 150
 151     /// what protocol label has been found in the first line (if any)
 152     AnyP::ProtocolVersion msgProtocol_;
 153
 154     /// buffer holding the mime headers (if any)
 155     SBuf mimeHeaderBlock_;
 156
 157     /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
 158     bool hackExpectsMime_ = false;
 159
 160 private:
 161     void cleanMimePrefix();
 162     void unfoldMime();
 163 };
 164
 165 /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
 166 /// \returns true (always; unlike all the skip*() functions)
 167 bool ParseBws(Tokenizer &tok);
 168
 169 /// the right debugs() level for logging HTTP violation messages
 170 int ErrorLevel();
 171
 172 } // namespace One
 173 } // namespace Http
 174
 175 #endif /*  _SQUID_SRC_HTTP_ONE_PARSER_H */
 176