]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/Parser.h
Parser-NG: HTTP Response Parser upgrade
[thirdparty/squid.git] / src / http / one / Parser.h
1 /*
2 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #ifndef _SQUID_SRC_HTTP_ONE_PARSER_H
10 #define _SQUID_SRC_HTTP_ONE_PARSER_H
11
12 #include "anyp/ProtocolVersion.h"
13 #include "http/one/forward.h"
14 #include "http/StatusCode.h"
15 #include "SBuf.h"
16
17 namespace Parser {
18 class Tokenizer;
19 }
20
21 namespace Http {
22 namespace One {
23
24 // Parser states
25 enum ParseState {
26 HTTP_PARSE_NONE, ///< initialized, but nothing usefully parsed yet
27 HTTP_PARSE_FIRST, ///< HTTP/1 message first-line
28 HTTP_PARSE_MIME, ///< HTTP/1 mime-header block
29 HTTP_PARSE_DONE ///< parsed a message header, or reached a terminal syntax error
30 };
31
32 /** HTTP/1.x protocol parser
33 *
34 * Works on a raw character I/O buffer and tokenizes the content into
35 * the major CRLF delimited segments of an HTTP/1 procotol message:
36 *
37 * \item first-line (request-line / simple-request / status-line)
38 * \item mime-header 0*( header-name ':' SP field-value CRLF)
39 */
40 class Parser : public RefCountable
41 {
42 public:
43 typedef SBuf::size_type size_type;
44
45 Parser() : parseStatusCode(Http::scNone), parsingStage_(HTTP_PARSE_NONE) {}
46 virtual ~Parser() {}
47
48 /// Set this parser back to a default state.
49 /// Will DROP any reference to a buffer (does not free).
50 virtual void clear() = 0;
51
52 /// attempt to parse a message from the buffer
53 /// \retval true if a full message was found and parsed
54 /// \retval false if incomplete, invalid or no message was found
55 virtual bool parse(const SBuf &aBuf) = 0;
56
57 /** Whether the parser is waiting on more data to complete parsing a message.
58 * Use to distinguish between incomplete data and error results
59 * when parse() returns false.
60 */
61 bool needsMoreData() const {return parsingStage_!=HTTP_PARSE_DONE;}
62
63 /// size in bytes of the first line including CRLF terminator
64 virtual size_type firstLineSize() const = 0;
65
66 /// size in bytes of the message headers including CRLF terminator(s)
67 /// but excluding first-line bytes
68 size_type headerBlockSize() const {return mimeHeaderBlock_.length();}
69
70 /// size in bytes of HTTP message block, includes first-line and mime headers
71 /// excludes any body/entity/payload bytes
72 /// excludes any garbage prefix before the first-line
73 size_type messageHeaderSize() const {return firstLineSize() + headerBlockSize();}
74
75 /// buffer containing HTTP mime headers, excluding message first-line.
76 SBuf mimeHeader() const {return mimeHeaderBlock_;}
77
78 /// the protocol label for this message
79 const AnyP::ProtocolVersion & messageProtocol() const {return msgProtocol_;}
80
81 /**
82 * Scan the mime header block (badly) for a header with the given name.
83 *
84 * BUG: omits lines when searching for headers with obs-fold or multiple entries.
85 *
86 * BUG: limits output to just 1KB when Squid accepts up to 64KB line length.
87 *
88 * \return A pointer to a field-value of the first matching field-name, or NULL.
89 */
90 char *getHeaderField(const char *name);
91
92 /// the remaining unprocessed section of buffer
93 const SBuf &remaining() const {return buf_;}
94
95 /**
96 * HTTP status code resulting from the parse process.
97 * to be used on the invalid message handling.
98 *
99 * Http::scNone indicates incomplete parse,
100 * Http::scOkay indicates no error,
101 * other codes represent a parse error.
102 */
103 Http::StatusCode parseStatusCode;
104
105 protected:
106 /// detect and skip the CRLF or (if tolerant) LF line terminator
107 /// consume from the tokenizer and return true only if found
108 bool skipLineTerminator(::Parser::Tokenizer &tok) const;
109
110 /**
111 * Scan to find the mime headers block for current message.
112 *
113 * \retval true If mime block (or a blocks non-existence) has been
114 * identified accurately within limit characters.
115 * mimeHeaderBlock_ has been updated and buf_ consumed.
116 *
117 * \retval false An error occured, or no mime terminator found within limit.
118 */
119 bool grabMimeBlock(const char *which, const size_t limit);
120
121 /// RFC 7230 section 2.6 - 7 magic octets
122 static const SBuf Http1magic;
123
124 /// bytes remaining to be parsed
125 SBuf buf_;
126
127 /// what stage the parser is currently up to
128 ParseState parsingStage_;
129
130 /// what protocol label has been found in the first line (if any)
131 AnyP::ProtocolVersion msgProtocol_;
132
133 /// buffer holding the mime headers (if any)
134 SBuf mimeHeaderBlock_;
135 };
136
137 } // namespace One
138 } // namespace Http
139
140 #endif /* _SQUID_SRC_HTTP_ONE_PARSER_H */
141