]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/Parser.h
Source Format Enforcement (#763)
[thirdparty/squid.git] / src / http / one / Parser.h
CommitLineData
48a37aee 1/*
f70aedc4 2 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
48a37aee
AJ
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
83aacd9a
AJ
9#ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
10#define _SQUID_SRC_HTTP_ONE_PARSER_H
4c14658e 11
c99510dd
AJ
12#include "anyp/ProtocolVersion.h"
13#include "http/one/forward.h"
f1d5359e 14#include "http/StatusCode.h"
417da400 15#include "parser/forward.h"
65e41a45 16#include "sbuf/SBuf.h"
4c14658e 17
bb86dcd4 18namespace Http {
1b51ee7b 19namespace One {
bb86dcd4 20
4c14658e 21// Parser states
678451c0 22enum ParseState {
350ec67a
AJ
23 HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
24 HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
25 HTTP_PARSE_CHUNK_SZ, ///< HTTP/1.1 chunked encoding chunk-size
26 HTTP_PARSE_CHUNK_EXT, ///< HTTP/1.1 chunked encoding chunk-ext
27 HTTP_PARSE_CHUNK, ///< HTTP/1.1 chunked encoding chunk-data
28 HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
29 HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
678451c0 30};
4c14658e 31
36a9c964 32/** HTTP/1.x protocol parser
4c14658e 33 *
00589b8e 34 * Works on a raw character I/O buffer and tokenizes the content into
36a9c964 35 * the major CRLF delimited segments of an HTTP/1 procotol message:
4c14658e 36 *
f439fbd2
AJ
37 * \li first-line (request-line / simple-request / status-line)
38 * \li mime-header 0*( header-name ':' SP field-value CRLF)
4c14658e 39 */
7322c9dd 40class Parser : public RefCountable
4c14658e
AJ
41{
42public:
8e677087 43 typedef SBuf::size_type size_type;
417da400 44 typedef ::Parser::Tokenizer Tokenizer;
8e677087 45
20b1beab
AJ
46 Parser() = default;
47 Parser(const Parser &) = default;
48 Parser &operator =(const Parser &) = default;
49 Parser(Parser &&) = default;
50 Parser &operator =(Parser &&) = default;
f9688132 51 virtual ~Parser() {}
4c14658e
AJ
52
53 /// Set this parser back to a default state.
54 /// Will DROP any reference to a buffer (does not free).
f9688132 55 virtual void clear() = 0;
4c14658e 56
36a9c964
AJ
57 /// attempt to parse a message from the buffer
58 /// \retval true if a full message was found and parsed
59 /// \retval false if incomplete, invalid or no message was found
60 virtual bool parse(const SBuf &aBuf) = 0;
f9daf571 61
36a9c964
AJ
62 /** Whether the parser is waiting on more data to complete parsing a message.
63 * Use to distinguish between incomplete data and error results
64 * when parse() returns false.
87abd755 65 */
36a9c964 66 bool needsMoreData() const {return parsingStage_!=HTTP_PARSE_DONE;}
f9daf571
AJ
67
68 /// size in bytes of the first line including CRLF terminator
8e677087 69 virtual size_type firstLineSize() const = 0;
7e1d6c48 70
f4880526 71 /// size in bytes of the message headers including CRLF terminator(s)
7322c9dd 72 /// but excluding first-line bytes
8e677087 73 size_type headerBlockSize() const {return mimeHeaderBlock_.length();}
7e1d6c48 74
7322c9dd 75 /// size in bytes of HTTP message block, includes first-line and mime headers
7e1d6c48 76 /// excludes any body/entity/payload bytes
7322c9dd 77 /// excludes any garbage prefix before the first-line
8e677087 78 size_type messageHeaderSize() const {return firstLineSize() + headerBlockSize();}
7e1d6c48 79
7322c9dd 80 /// buffer containing HTTP mime headers, excluding message first-line.
36a9c964 81 SBuf mimeHeader() const {return mimeHeaderBlock_;}
7322c9dd
AJ
82
83 /// the protocol label for this message
84 const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;}
afff15b2 85
a4181565 86 /**
2a51e34e 87 * Scan the mime header block (badly) for a Host header.
687696c1
AJ
88 *
89 * BUG: omits lines when searching for headers with obs-fold or multiple entries.
90 *
91 * BUG: limits output to just 1KB when Squid accepts up to 64KB line length.
92 *
a4181565
AJ
93 * \return A pointer to a field-value of the first matching field-name, or NULL.
94 */
2a51e34e 95 char *getHostHeaderField();
a4181565 96
b749de75
AJ
97 /// the remaining unprocessed section of buffer
98 const SBuf &remaining() const {return buf_;}
99
f1d5359e
AJ
100 /**
101 * HTTP status code resulting from the parse process.
102 * to be used on the invalid message handling.
103 *
104 * Http::scNone indicates incomplete parse,
105 * Http::scOkay indicates no error,
106 * other codes represent a parse error.
107 */
7a010fb2 108 Http::StatusCode parseStatusCode = Http::scNone;
f1d5359e 109
26f0a359
AR
110 /// Whitespace between regular protocol elements.
111 /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
112 /// See also: DelimiterCharacters().
113 static const CharacterSet &WhitespaceCharacters();
114
115 /// Whitespace between protocol elements in restricted contexts like
116 /// request line, status line, asctime-date, and credentials
117 /// Seen in RFCs as SP but may be "relaxed" by us.
118 /// See also: WhitespaceCharacters().
119 /// XXX: Misnamed and overused.
a1b9ec20
AR
120 static const CharacterSet &DelimiterCharacters();
121
b749de75 122protected:
188ad27f
AJ
123 /**
124 * detect and skip the CRLF or (if tolerant) LF line terminator
125 * consume from the tokenizer.
126 *
417da400 127 * \throws exception on bad or InsuffientInput.
188ad27f
AJ
128 * \retval true only if line terminator found.
129 * \retval false incomplete or missing line terminator, need more data.
130 */
417da400 131 void skipLineTerminator(Tokenizer &) const;
b8f86fd2
AJ
132
133 /**
f8cab755 134 * Scan to find the mime headers block for current message.
b8f86fd2 135 *
f8cab755
AJ
136 * \retval true If mime block (or a blocks non-existence) has been
137 * identified accurately within limit characters.
138 * mimeHeaderBlock_ has been updated and buf_ consumed.
139 *
61beade2 140 * \retval false An error occurred, or no mime terminator found within limit.
b8f86fd2 141 */
f8cab755 142 bool grabMimeBlock(const char *which, const size_t limit);
f1d5359e 143
9651320a
AJ
144 /// RFC 7230 section 2.6 - 7 magic octets
145 static const SBuf Http1magic;
146
b749de75
AJ
147 /// bytes remaining to be parsed
148 SBuf buf_;
74f478f8 149
7322c9dd 150 /// what stage the parser is currently up to
7a010fb2 151 ParseState parsingStage_ = HTTP_PARSE_NONE;
7322c9dd 152
7a4fa6a0
AJ
153 /// what protocol label has been found in the first line (if any)
154 AnyP::ProtocolVersion msgProtocol_;
7322c9dd 155
7a4fa6a0 156 /// buffer holding the mime headers (if any)
7322c9dd 157 SBuf mimeHeaderBlock_;
e47e0802
AJ
158
159 /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
7a010fb2 160 bool hackExpectsMime_ = false;
00237269
AJ
161
162private:
163 void cleanMimePrefix();
164 void unfoldMime();
7322c9dd
AJ
165};
166
26f0a359 167/// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
417da400
EB
168/// \throws InsufficientInput when the end of BWS cannot be confirmed
169void ParseBws(Parser::Tokenizer &);
26f0a359
AR
170
171/// the right debugs() level for logging HTTP violation messages
172int ErrorLevel();
173
1b51ee7b 174} // namespace One
bb86dcd4
AJ
175} // namespace Http
176
83aacd9a 177#endif /* _SQUID_SRC_HTTP_ONE_PARSER_H */
f53969cc 178