2 * Copyright (C) 1996-2018 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
9 #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
10 #define _SQUID_SRC_HTTP_ONE_PARSER_H
12 #include "anyp/ProtocolVersion.h"
13 #include "http/one/forward.h"
14 #include "http/StatusCode.h"
15 #include "sbuf/SBuf.h"
22 HTTP_PARSE_NONE
, ///< initialized, but nothing usefully parsed yet
23 HTTP_PARSE_FIRST
, ///< HTTP/1 message first-line
24 HTTP_PARSE_CHUNK_SZ
, ///< HTTP/1.1 chunked encoding chunk-size
25 HTTP_PARSE_CHUNK_EXT
, ///< HTTP/1.1 chunked encoding chunk-ext
26 HTTP_PARSE_CHUNK
, ///< HTTP/1.1 chunked encoding chunk-data
27 HTTP_PARSE_MIME
, ///< HTTP/1 mime-header block
28 HTTP_PARSE_DONE
///< parsed a message header, or reached a terminal syntax error
31 /** HTTP/1.x protocol parser
33 * Works on a raw character I/O buffer and tokenizes the content into
34 * the major CRLF delimited segments of an HTTP/1 procotol message:
36 * \item first-line (request-line / simple-request / status-line)
37 * \item mime-header 0*( header-name ':' SP field-value CRLF)
39 class Parser
: public RefCountable
42 typedef SBuf::size_type size_type
;
45 Parser(const Parser
&) = default;
46 Parser
&operator =(const Parser
&) = default;
47 Parser(Parser
&&) = default;
48 Parser
&operator =(Parser
&&) = default;
51 /// Set this parser back to a default state.
52 /// Will DROP any reference to a buffer (does not free).
53 virtual void clear() = 0;
55 /// attempt to parse a message from the buffer
56 /// \retval true if a full message was found and parsed
57 /// \retval false if incomplete, invalid or no message was found
58 virtual bool parse(const SBuf
&aBuf
) = 0;
60 /** Whether the parser is waiting on more data to complete parsing a message.
61 * Use to distinguish between incomplete data and error results
62 * when parse() returns false.
64 bool needsMoreData() const {return parsingStage_
!=HTTP_PARSE_DONE
;}
66 /// size in bytes of the first line including CRLF terminator
67 virtual size_type
firstLineSize() const = 0;
69 /// size in bytes of the message headers including CRLF terminator(s)
70 /// but excluding first-line bytes
71 size_type
headerBlockSize() const {return mimeHeaderBlock_
.length();}
73 /// size in bytes of HTTP message block, includes first-line and mime headers
74 /// excludes any body/entity/payload bytes
75 /// excludes any garbage prefix before the first-line
76 size_type
messageHeaderSize() const {return firstLineSize() + headerBlockSize();}
78 /// buffer containing HTTP mime headers, excluding message first-line.
79 SBuf
mimeHeader() const {return mimeHeaderBlock_
;}
81 /// the protocol label for this message
82 const AnyP::ProtocolVersion
& messageProtocol() const {return msgProtocol_
;}
85 * Scan the mime header block (badly) for a header with the given name.
87 * BUG: omits lines when searching for headers with obs-fold or multiple entries.
89 * BUG: limits output to just 1KB when Squid accepts up to 64KB line length.
91 * \return A pointer to a field-value of the first matching field-name, or NULL.
93 char *getHeaderField(const char *name
);
95 /// the remaining unprocessed section of buffer
96 const SBuf
&remaining() const {return buf_
;}
99 * HTTP status code resulting from the parse process.
100 * to be used on the invalid message handling.
102 * Http::scNone indicates incomplete parse,
103 * Http::scOkay indicates no error,
104 * other codes represent a parse error.
106 Http::StatusCode parseStatusCode
= Http::scNone
;
108 /// Whitespace between regular protocol elements.
109 /// Seen in RFCs as OWS, RWS, BWS, SP/HTAB but may be "relaxed" by us.
110 /// See also: DelimiterCharacters().
111 static const CharacterSet
&WhitespaceCharacters();
113 /// Whitespace between protocol elements in restricted contexts like
114 /// request line, status line, asctime-date, and credentials
115 /// Seen in RFCs as SP but may be "relaxed" by us.
116 /// See also: WhitespaceCharacters().
117 /// XXX: Misnamed and overused.
118 static const CharacterSet
&DelimiterCharacters();
122 * detect and skip the CRLF or (if tolerant) LF line terminator
123 * consume from the tokenizer.
125 * throws if non-terminator is detected.
126 * \retval true only if line terminator found.
127 * \retval false incomplete or missing line terminator, need more data.
129 bool skipLineTerminator(Http1::Tokenizer
&tok
) const;
132 * Scan to find the mime headers block for current message.
134 * \retval true If mime block (or a blocks non-existence) has been
135 * identified accurately within limit characters.
136 * mimeHeaderBlock_ has been updated and buf_ consumed.
138 * \retval false An error occured, or no mime terminator found within limit.
140 bool grabMimeBlock(const char *which
, const size_t limit
);
142 /// RFC 7230 section 2.6 - 7 magic octets
143 static const SBuf Http1magic
;
145 /// bytes remaining to be parsed
148 /// what stage the parser is currently up to
149 ParseState parsingStage_
= HTTP_PARSE_NONE
;
151 /// what protocol label has been found in the first line (if any)
152 AnyP::ProtocolVersion msgProtocol_
;
154 /// buffer holding the mime headers (if any)
155 SBuf mimeHeaderBlock_
;
157 /// Whether the invalid HTTP as HTTP/0.9 hack expects a mime header block
158 bool hackExpectsMime_
= false;
161 void cleanMimePrefix();
165 /// skips and, if needed, warns about RFC 7230 BWS ("bad" whitespace)
166 /// \returns true (always; unlike all the skip*() functions)
167 bool ParseBws(Tokenizer
&tok
);
169 /// the right debugs() level for logging HTTP violation messages
175 #endif /* _SQUID_SRC_HTTP_ONE_PARSER_H */