]>
Commit | Line | Data |
---|---|---|
48a37aee | 1 | /* |
77b1029d | 2 | * Copyright (C) 1996-2020 The Squid Software Foundation and contributors |
48a37aee AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
83aacd9a AJ |
9 | #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H |
10 | #define _SQUID_SRC_HTTP_ONE_PARSER_H | |
4c14658e | 11 | |
c99510dd AJ |
12 | #include "anyp/ProtocolVersion.h" |
13 | #include "http/one/forward.h" | |
f1d5359e | 14 | #include "http/StatusCode.h" |
417da400 | 15 | #include "parser/forward.h" |
65e41a45 | 16 | #include "sbuf/SBuf.h" |
4c14658e | 17 | |
bb86dcd4 | 18 | namespace Http { |
1b51ee7b | 19 | namespace One { |
bb86dcd4 | 20 | |
4c14658e | 21 | // Parser states |
678451c0 | 22 | enum ParseState { |
350ec67a AJ |
23 | HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet |
24 | HTTP_PARSE_FIRST, ///< HTTP/1 message first-line | |
25 | HTTP_PARSE_CHUNK_SZ, ///< HTTP/1.1 chunked encoding chunk-size | |
26 | HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext | |
27 | HTTP_PARSE_CHUNK, ///< HTTP/1.1 chunked encoding chunk-data | |
28 | HTTP_PARSE_MIME, ///< HTTP/1 mime-header block | |
29 | HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error | |
678451c0 | 30 | }; |
4c14658e | 31 | |
36a9c964 | 32 | /** HTTP/1.x protocol parser |
4c14658e | 33 | * |
00589b8e | 34 | * Works on a raw character I/O buffer and tokenizes the content into |
36a9c964 | 35 | * the major CRLF delimited segments of an HTTP/1 procotol message: |
4c14658e | 36 | * |
f439fbd2 AJ |
37 | * \li first-line (request-line / simple-request / status-line) |
38 | * \li mime-header 0*( header-name ':' SP field-value CRLF) | |
4c14658e | 39 | */ |
7322c9dd | 40 | class Parser : public RefCountable |
4c14658e AJ |
41 | { |
42 | public: | |
8e677087 | 43 | typedef SBuf::size_type size_type; |
417da400 | 44 | typedef ::Parser::Tokenizer Tokenizer; |
8e677087 | 45 | |
20b1beab AJ |
46 | Parser() = default; |
47 | Parser(const Parser &) = default; | |
48 | Parser &operator =(const Parser &) = default; | |
49 | Parser(Parser &&) = default; | |
50 | Parser &operator =(Parser &&) = default; | |
f9688132 | 51 | virtual ~Parser() {} |
4c14658e AJ |
52 | |
53 | /// Set this parser back to a default state. | |
54 | /// Will DROP any reference to a buffer (does not free). | |
f9688132 | 55 | virtual void clear() = 0; |
4c14658e | 56 | |
36a9c964 AJ |
57 | /// attempt to parse a message from the buffer |
58 | /// \retval true if a full message was found and parsed | |
59 | /// \retval false if incomplete, invalid or no message was found | |
60 | virtual bool parse(const SBuf &aBuf) = 0; | |
f9daf571 | 61 | |
36a9c964 AJ |
62 | /** Whether the parser is waiting on more data to complete parsing a message. |
63 | * Use to distinguish between incomplete data and error results | |
64 | * when parse() returns false. | |
87abd755 | 65 | */ |
36a9c964 | 66 | bool needsMoreData() const {return parsingStage_!=HTTP_PARSE_DONE;} |
f9daf571 AJ |
67 | |
68 | /// size in bytes of the first line including CRLF terminator | |
8e677087 | 69 | virtual size_type firstLineSize() const = 0; |
7e1d6c48 | 70 | |
f4880526 | 71 | /// size in bytes of the message headers including CRLF terminator(s) |
7322c9dd | 72 | /// but excluding first-line bytes |
8e677087 | 73 | size_type headerBlockSize() const {return mimeHeaderBlock_.length();} |
7e1d6c48 | 74 | |
7322c9dd | 75 | /// size in bytes of HTTP message block, includes first-line and mime headers |
7e1d6c48 | 76 | /// excludes any body/entity/payload bytes |
7322c9dd | 77 | /// excludes any garbage prefix before the first-line |
8e677087 | 78 | size_type messageHeaderSize() const {return firstLineSize() + headerBlockSize();} |
7e1d6c48 | 79 | |
7322c9dd | 80 | /// buffer containing HTTP mime headers, excluding message first-line. |
36a9c964 | 81 | SBuf mimeHeader() const {return mimeHeaderBlock_;} |
7322c9dd AJ |
82 | |
83 | /// the protocol label for this message | |
84 | const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;} | |
afff15b2 | 85 | |
a4181565 | 86 | /** |
2a51e34e | 87 | * Scan the mime header block (badly) for a Host header. |
687696c1 AJ |
88 | * |
89 | * BUG: omits lines when searching for headers with obs-fold or multiple entries. | |
90 | * | |
91 | * BUG: limits output to just 1KB when Squid accepts up to 64KB line length. | |
92 | * | |
a4181565 AJ |
93 | * \return A pointer to a field-value of the first matching field-name, or NULL. |
94 | */ | |
2a51e34e | 95 | char *getHostHeaderField(); |
a4181565 | 96 | |
b749de75 AJ |
97 | /// the remaining unprocessed section of buffer |
98 | const SBuf &remaining() const {return buf_;} | |
99 | ||
f1d5359e AJ |
100 | /** |
101 | * HTTP status code resulting from the parse process. | |
102 | * to be used on the invalid message handling. | |
103 | * | |
104 | * Http::scNone indicates incomplete parse, | |
105 | * Http::scOkay indicates no error, | |
106 | * other codes represent a parse error. | |
107 | */ | |
7a010fb2 | 108 | Http::StatusCode parseStatusCode = Http::scNone; |
f1d5359e | 109 | |
26f0a359 AR |
110 | /// Whitespace between regular protocol elements. |
111 | /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us. | |
112 | /// See also: DelimiterCharacters(). | |
113 | static const CharacterSet &WhitespaceCharacters(); | |
114 | ||
115 | /// Whitespace between protocol elements in restricted contexts like | |
116 | /// request line, status line, asctime-date, and credentials | |
117 | /// Seen in RFCs as SP but may be "relaxed" by us. | |
118 | /// See also: WhitespaceCharacters(). | |
119 | /// XXX: Misnamed and overused. | |
a1b9ec20 AR |
120 | static const CharacterSet &DelimiterCharacters(); |
121 | ||
b749de75 | 122 | protected: |
188ad27f AJ |
123 | /** |
124 | * detect and skip the CRLF or (if tolerant) LF line terminator | |
125 | * consume from the tokenizer. | |
126 | * | |
417da400 | 127 | * \throws exception on bad or InsuffientInput. |
188ad27f AJ |
128 | * \retval true only if line terminator found. |
129 | * \retval false incomplete or missing line terminator, need more data. | |
130 | */ | |
417da400 | 131 | void skipLineTerminator(Tokenizer &) const; |
b8f86fd2 AJ |
132 | |
133 | /** | |
f8cab755 | 134 | * Scan to find the mime headers block for current message. |
b8f86fd2 | 135 | * |
f8cab755 AJ |
136 | * \retval true If mime block (or a blocks non-existence) has been |
137 | * identified accurately within limit characters. | |
138 | * mimeHeaderBlock_ has been updated and buf_ consumed. | |
139 | * | |
61beade2 | 140 | * \retval false An error occurred, or no mime terminator found within limit. |
b8f86fd2 | 141 | */ |
f8cab755 | 142 | bool grabMimeBlock(const char *which, const size_t limit); |
f1d5359e | 143 | |
9651320a AJ |
144 | /// RFC 7230 section 2.6 - 7 magic octets |
145 | static const SBuf Http1magic; | |
146 | ||
b749de75 AJ |
147 | /// bytes remaining to be parsed |
148 | SBuf buf_; | |
74f478f8 | 149 | |
7322c9dd | 150 | /// what stage the parser is currently up to |
7a010fb2 | 151 | ParseState parsingStage_ = HTTP_PARSE_NONE; |
7322c9dd | 152 | |
7a4fa6a0 AJ |
153 | /// what protocol label has been found in the first line (if any) |
154 | AnyP::ProtocolVersion msgProtocol_; | |
7322c9dd | 155 | |
7a4fa6a0 | 156 | /// buffer holding the mime headers (if any) |
7322c9dd | 157 | SBuf mimeHeaderBlock_; |
e47e0802 AJ |
158 | |
159 | /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block | |
7a010fb2 | 160 | bool hackExpectsMime_ = false; |
00237269 AJ |
161 | |
162 | private: | |
163 | void cleanMimePrefix(); | |
164 | void unfoldMime(); | |
7322c9dd AJ |
165 | }; |
166 | ||
26f0a359 | 167 | /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace) |
417da400 EB |
168 | /// \throws InsufficientInput when the end of BWS cannot be confirmed |
169 | void ParseBws(Parser::Tokenizer &); | |
26f0a359 AR |
170 | |
171 | /// the right debugs() level for logging HTTP violation messages | |
172 | int ErrorLevel(); | |
173 | ||
1b51ee7b | 174 | } // namespace One |
bb86dcd4 AJ |
175 | } // namespace Http |
176 | ||
83aacd9a | 177 | #endif /* _SQUID_SRC_HTTP_ONE_PARSER_H */ |
f53969cc | 178 |