]>
Commit | Line | Data |
---|---|---|
48a37aee | 1 | /* |
5b74111a | 2 | * Copyright (C) 1996-2018 The Squid Software Foundation and contributors |
48a37aee AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
83aacd9a AJ |
9 | #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H |
10 | #define _SQUID_SRC_HTTP_ONE_PARSER_H | |
4c14658e | 11 | |
c99510dd AJ |
12 | #include "anyp/ProtocolVersion.h" |
13 | #include "http/one/forward.h" | |
f1d5359e | 14 | #include "http/StatusCode.h" |
65e41a45 | 15 | #include "sbuf/SBuf.h" |
4c14658e | 16 | |
bb86dcd4 | 17 | namespace Http { |
1b51ee7b | 18 | namespace One { |
bb86dcd4 | 19 | |
4c14658e | 20 | // Parser states |
678451c0 | 21 | enum ParseState { |
350ec67a AJ |
22 | HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet |
23 | HTTP_PARSE_FIRST, ///< HTTP/1 message first-line | |
24 | HTTP_PARSE_CHUNK_SZ, ///< HTTP/1.1 chunked encoding chunk-size | |
25 | HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext | |
26 | HTTP_PARSE_CHUNK, ///< HTTP/1.1 chunked encoding chunk-data | |
27 | HTTP_PARSE_MIME, ///< HTTP/1 mime-header block | |
28 | HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error | |
678451c0 | 29 | }; |
4c14658e | 30 | |
36a9c964 | 31 | /** HTTP/1.x protocol parser |
4c14658e | 32 | * |
00589b8e | 33 | * Works on a raw character I/O buffer and tokenizes the content into |
36a9c964 | 34 | * the major CRLF delimited segments of an HTTP/1 procotol message: |
4c14658e | 35 | * |
7322c9dd | 36 | * \item first-line (request-line / simple-request / status-line) |
36a9c964 | 37 | * \item mime-header 0*( header-name ':' SP field-value CRLF) |
4c14658e | 38 | */ |
7322c9dd | 39 | class Parser : public RefCountable |
4c14658e AJ |
40 | { |
41 | public: | |
8e677087 AJ |
42 | typedef SBuf::size_type size_type; |
43 | ||
20b1beab AJ |
44 | Parser() = default; |
45 | Parser(const Parser &) = default; | |
46 | Parser &operator =(const Parser &) = default; | |
47 | Parser(Parser &&) = default; | |
48 | Parser &operator =(Parser &&) = default; | |
f9688132 | 49 | virtual ~Parser() {} |
4c14658e AJ |
50 | |
51 | /// Set this parser back to a default state. | |
52 | /// Will DROP any reference to a buffer (does not free). | |
f9688132 | 53 | virtual void clear() = 0; |
4c14658e | 54 | |
36a9c964 AJ |
55 | /// attempt to parse a message from the buffer |
56 | /// \retval true if a full message was found and parsed | |
57 | /// \retval false if incomplete, invalid or no message was found | |
58 | virtual bool parse(const SBuf &aBuf) = 0; | |
f9daf571 | 59 | |
36a9c964 AJ |
60 | /** Whether the parser is waiting on more data to complete parsing a message. |
61 | * Use to distinguish between incomplete data and error results | |
62 | * when parse() returns false. | |
87abd755 | 63 | */ |
36a9c964 | 64 | bool needsMoreData() const {return parsingStage_!=HTTP_PARSE_DONE;} |
f9daf571 AJ |
65 | |
66 | /// size in bytes of the first line including CRLF terminator | |
8e677087 | 67 | virtual size_type firstLineSize() const = 0; |
7e1d6c48 | 68 | |
f4880526 | 69 | /// size in bytes of the message headers including CRLF terminator(s) |
7322c9dd | 70 | /// but excluding first-line bytes |
8e677087 | 71 | size_type headerBlockSize() const {return mimeHeaderBlock_.length();} |
7e1d6c48 | 72 | |
7322c9dd | 73 | /// size in bytes of HTTP message block, includes first-line and mime headers |
7e1d6c48 | 74 | /// excludes any body/entity/payload bytes |
7322c9dd | 75 | /// excludes any garbage prefix before the first-line |
8e677087 | 76 | size_type messageHeaderSize() const {return firstLineSize() + headerBlockSize();} |
7e1d6c48 | 77 | |
7322c9dd | 78 | /// buffer containing HTTP mime headers, excluding message first-line. |
36a9c964 | 79 | SBuf mimeHeader() const {return mimeHeaderBlock_;} |
7322c9dd AJ |
80 | |
81 | /// the protocol label for this message | |
82 | const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;} | |
afff15b2 | 83 | |
a4181565 | 84 | /** |
f1d5359e | 85 | * Scan the mime header block (badly) for a header with the given name. |
687696c1 AJ |
86 | * |
87 | * BUG: omits lines when searching for headers with obs-fold or multiple entries. | |
88 | * | |
89 | * BUG: limits output to just 1KB when Squid accepts up to 64KB line length. | |
90 | * | |
a4181565 AJ |
91 | * \return A pointer to a field-value of the first matching field-name, or NULL. |
92 | */ | |
93 | char *getHeaderField(const char *name); | |
94 | ||
b749de75 AJ |
95 | /// the remaining unprocessed section of buffer |
96 | const SBuf &remaining() const {return buf_;} | |
97 | ||
f1d5359e AJ |
98 | /** |
99 | * HTTP status code resulting from the parse process. | |
100 | * to be used on the invalid message handling. | |
101 | * | |
102 | * Http::scNone indicates incomplete parse, | |
103 | * Http::scOkay indicates no error, | |
104 | * other codes represent a parse error. | |
105 | */ | |
7a010fb2 | 106 | Http::StatusCode parseStatusCode = Http::scNone; |
f1d5359e | 107 | |
26f0a359 AR |
108 | /// Whitespace between regular protocol elements. |
109 | /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us. | |
110 | /// See also: DelimiterCharacters(). | |
111 | static const CharacterSet &WhitespaceCharacters(); | |
112 | ||
113 | /// Whitespace between protocol elements in restricted contexts like | |
114 | /// request line, status line, asctime-date, and credentials | |
115 | /// Seen in RFCs as SP but may be "relaxed" by us. | |
116 | /// See also: WhitespaceCharacters(). | |
117 | /// XXX: Misnamed and overused. | |
a1b9ec20 AR |
118 | static const CharacterSet &DelimiterCharacters(); |
119 | ||
b749de75 | 120 | protected: |
188ad27f AJ |
121 | /** |
122 | * detect and skip the CRLF or (if tolerant) LF line terminator | |
123 | * consume from the tokenizer. | |
124 | * | |
125 | * throws if non-terminator is detected. | |
126 | * \retval true only if line terminator found. | |
127 | * \retval false incomplete or missing line terminator, need more data. | |
128 | */ | |
f29718b0 | 129 | bool skipLineTerminator(Http1::Tokenizer &tok) const; |
b8f86fd2 AJ |
130 | |
131 | /** | |
f8cab755 | 132 | * Scan to find the mime headers block for current message. |
b8f86fd2 | 133 | * |
f8cab755 AJ |
134 | * \retval true If mime block (or a blocks non-existence) has been |
135 | * identified accurately within limit characters. | |
136 | * mimeHeaderBlock_ has been updated and buf_ consumed. | |
137 | * | |
61beade2 | 138 | * \retval false An error occurred, or no mime terminator found within limit. |
b8f86fd2 | 139 | */ |
f8cab755 | 140 | bool grabMimeBlock(const char *which, const size_t limit); |
f1d5359e | 141 | |
9651320a AJ |
142 | /// RFC 7230 section 2.6 - 7 magic octets |
143 | static const SBuf Http1magic; | |
144 | ||
b749de75 AJ |
145 | /// bytes remaining to be parsed |
146 | SBuf buf_; | |
74f478f8 | 147 | |
7322c9dd | 148 | /// what stage the parser is currently up to |
7a010fb2 | 149 | ParseState parsingStage_ = HTTP_PARSE_NONE; |
7322c9dd | 150 | |
7a4fa6a0 AJ |
151 | /// what protocol label has been found in the first line (if any) |
152 | AnyP::ProtocolVersion msgProtocol_; | |
7322c9dd | 153 | |
7a4fa6a0 | 154 | /// buffer holding the mime headers (if any) |
7322c9dd | 155 | SBuf mimeHeaderBlock_; |
e47e0802 AJ |
156 | |
157 | /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block | |
7a010fb2 | 158 | bool hackExpectsMime_ = false; |
00237269 AJ |
159 | |
160 | private: | |
161 | void cleanMimePrefix(); | |
162 | void unfoldMime(); | |
7322c9dd AJ |
163 | }; |
164 | ||
26f0a359 AR |
165 | /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace) |
166 | /// \returns true (always; unlike all the skip*() functions) | |
167 | bool ParseBws(Tokenizer &tok); | |
168 | ||
169 | /// the right debugs() level for logging HTTP violation messages | |
170 | int ErrorLevel(); | |
171 | ||
1b51ee7b | 172 | } // namespace One |
bb86dcd4 AJ |
173 | } // namespace Http |
174 | ||
83aacd9a | 175 | #endif /* _SQUID_SRC_HTTP_ONE_PARSER_H */ |
f53969cc | 176 |