3 #include "http/one/RequestParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "mime_header.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
9 Http::One::RequestParser::RequestParser() :
11 request_parse_status(Http::scNone
)
13 req
.start
= req
.end
= -1;
14 req
.m_start
= req
.m_end
= -1;
15 req
.u_start
= req
.u_end
= -1;
16 req
.v_start
= req
.v_end
= -1;
20 * Attempt to parse the first line of a new request message.
22 * Governed by RFC 7230 section 3.5
24 * In the interest of robustness, a server that is expecting to receive
25 * and parse a request-line SHOULD ignore at least one empty line (CRLF)
26 * received prior to the request-line.
29 * Parsing state is stored between calls to avoid repeating buffer scans.
30 * If garbage is found the parsing offset is incremented.
33 Http::One::RequestParser::skipGarbageLines()
35 if (Config
.onoff
.relaxed_header_parser
) {
36 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf_
[0] == '\r' || buf_
[0] == '\n'))
37 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
38 "CRLF bytes received ahead of request-line. " <<
39 "Ignored due to relaxed_header_parser.");
40 // Be tolerant of prefix empty lines
41 // ie any series of either \n or \r\n with no other characters and no repeated \r
42 while (!buf_
.isEmpty() && (buf_
[0] == '\n' || (buf_
[0] == '\r' && buf_
[1] == '\n'))) {
47 /* XXX: this is a Squid-specific tolerance
48 * it appears never to have been relevant outside out unit-tests
49 * because the ConnStateData parser loop starts with consumeWhitespace()
50 * which absorbs any SP HTAB VTAB CR LF characters.
51 * But unit-tests called the HttpParser method directly without that pruning.
53 #if USE_HTTP_VIOLATIONS
54 if (Config
.onoff
.relaxed_header_parser
) {
55 if (Config
.onoff
.relaxed_header_parser
< 0 && buf_
[0] == ' ')
56 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
57 "Whitespace bytes received ahead of method. " <<
58 "Ignored due to relaxed_header_parser.");
59 // Be tolerant of prefix spaces (other bytes are valid method values)
60 while (!buf_
.isEmpty() && buf_
[0] == ' ') {
68 * Attempt to parse the first line of a new request message.
71 * RFC 1945 section 5.1
72 * RFC 7230 section 3.1 and 3.5
74 * Parsing state is stored between calls. However the current implementation
75 * begins parsing from scratch on every call.
76 * The return value tells you whether the parsing state fields are valid or not.
78 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
79 * \retval 1 successful parse. member fields contain the request-line items
80 * \retval 0 more data is needed to complete the parse
83 Http::One::RequestParser::parseRequestFirstLine()
85 int second_word
= -1; // track the suspected URI start
86 int first_whitespace
= -1, last_whitespace
= -1; // track the first and last SP byte
87 int line_end
= -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
89 debugs(74, 5, "parsing possible request: buf.length=" << buf_
.length());
90 debugs(74, DBG_DATA
, buf_
);
92 // Single-pass parse: (provided we have the whole line anyways)
96 for (SBuf::size_type i
= 0; i
< buf_
.length(); ++i
) {
97 // track first and last whitespace (SP only)
100 if (first_whitespace
< req
.start
)
101 first_whitespace
= i
;
104 // track next non-SP/non-HT byte after first_whitespace
105 if (second_word
< first_whitespace
&& buf_
[i
] != ' ' && buf_
[i
] != '\t') {
109 // locate line terminator
110 if (buf_
[i
] == '\n') {
115 if (i
< buf_
.length() - 1 && buf_
[i
] == '\r') {
116 if (Config
.onoff
.relaxed_header_parser
) {
117 if (Config
.onoff
.relaxed_header_parser
< 0 && buf_
[i
+ 1] == '\r')
118 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
119 "Series of carriage-return bytes received prior to line terminator. " <<
120 "Ignored due to relaxed_header_parser.");
122 // Be tolerant of invalid multiple \r prior to terminal \n
123 if (buf_
[i
+ 1] == '\n' || buf_
[i
+ 1] == '\r')
125 while (i
< buf_
.length() - 1 && buf_
[i
+ 1] == '\r')
128 if (buf_
[i
+ 1] == '\n') {
133 if (buf_
[i
+ 1] == '\n') {
140 // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
141 // However it does explicitly state an exact syntax which omits un-encoded CR
142 // and defines 400 (Bad Request) as the required action when
143 // handed an invalid request-line.
144 request_parse_status
= Http::scBadRequest
;
150 // DoS protection against long first-line
151 if ((size_t)buf_
.length() >= Config
.maxRequestHeaderSize
) {
152 debugs(33, 5, "Too large request-line");
153 // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
154 request_parse_status
= Http::scUriTooLong
;
158 debugs(74, 5, "Parser: retval 0: from " << req
.start
<<
159 "->" << req
.end
<< ": needs more data to complete first line.");
163 // NP: we have now seen EOL, more-data (0) cannot occur.
164 // From here on any failure is -1, success is 1
168 // DoS protection against long first-line
169 if ((size_t)(req
.end
-req
.start
) >= Config
.maxRequestHeaderSize
) {
170 debugs(33, 5, "Too large request-line");
171 request_parse_status
= Http::scUriTooLong
;
175 // Process what we now know about the line structure into field offsets
176 // generating HTTP status for any aborts as we go.
178 // First non-whitespace = beginning of method
179 if (req
.start
> line_end
) {
180 request_parse_status
= Http::scBadRequest
;
183 req
.m_start
= req
.start
;
185 // First whitespace = end of method
186 if (first_whitespace
> line_end
|| first_whitespace
< req
.start
) {
187 request_parse_status
= Http::scBadRequest
; // no method
190 req
.m_end
= first_whitespace
- 1;
191 if (req
.m_end
< req
.m_start
) {
192 request_parse_status
= Http::scBadRequest
; // missing URI?
197 const SBuf tmp
= buf_
.substr(req
.m_start
, req
.m_end
- req
.m_start
+ 1);
198 method_
= HttpRequestMethod(tmp
);
200 // First non-whitespace after first SP = beginning of URL+Version
201 if (second_word
> line_end
|| second_word
< req
.start
) {
202 request_parse_status
= Http::scBadRequest
; // missing URI
205 req
.u_start
= second_word
;
207 // RFC 1945: SP and version following URI are optional, marking version 0.9
208 // we identify this by the last whitespace being earlier than URI start
209 if (last_whitespace
< second_word
&& last_whitespace
>= req
.start
) {
210 msgProtocol_
= Http::ProtocolVersion(0,9);
211 req
.u_end
= line_end
;
212 uri_
= buf_
.substr(req
.u_start
, req
.u_end
- req
.u_start
+ 1);
213 request_parse_status
= Http::scOkay
; // HTTP/0.9
216 // otherwise last whitespace is somewhere after end of URI.
217 req
.u_end
= last_whitespace
;
218 // crop any trailing whitespace in the area we think of as URI
219 for (; req
.u_end
>= req
.u_start
&& xisspace(buf_
[req
.u_end
]); --req
.u_end
);
221 if (req
.u_end
< req
.u_start
) {
222 request_parse_status
= Http::scBadRequest
; // missing URI
225 uri_
= buf_
.substr(req
.u_start
, req
.u_end
- req
.u_start
+ 1);
227 // Last whitespace SP = before start of protocol/version
228 if (last_whitespace
>= line_end
) {
229 request_parse_status
= Http::scBadRequest
; // missing version
232 req
.v_start
= last_whitespace
+ 1;
233 req
.v_end
= line_end
;
235 /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
236 if ((req
.v_end
- req
.v_start
+1) < (int)Http1magic
.length() || !buf_
.substr(req
.v_start
, SBuf::npos
).startsWith(Http1magic
)) {
237 // non-HTTP/1 protocols not supported / implemented.
238 request_parse_status
= Http::scHttpVersionNotSupported
;
241 // NP: magic octets include the protocol name and major version DIGIT.
242 msgProtocol_
.protocol
= AnyP::PROTO_HTTP
;
243 msgProtocol_
.major
= 1;
245 int i
= req
.v_start
+ Http1magic
.length() -1;
247 // catch missing minor part
248 if (++i
> line_end
) {
249 request_parse_status
= Http::scHttpVersionNotSupported
;
252 /* next should be one or more digits */
253 if (!isdigit(buf_
[i
])) {
254 request_parse_status
= Http::scHttpVersionNotSupported
;
258 for (; i
<= line_end
&& (isdigit(buf_
[i
])) && min
< 65536; ++i
) {
260 min
= min
+ (buf_
[i
]) - '0';
262 // catch too-big values or trailing garbage
263 if (min
>= 65536 || i
< line_end
) {
264 request_parse_status
= Http::scHttpVersionNotSupported
;
267 msgProtocol_
.minor
= min
;
270 * Rightio - we have all the schtuff. Return true; we've got enough.
272 request_parse_status
= Http::scOkay
;
277 Http::One::RequestParser::parse(const SBuf
&aBuf
)
280 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
282 // stage 1: locate the request-line
283 if (parsingStage_
== HTTP_PARSE_NONE
) {
286 // if we hit something before EOS treat it as a message
288 parsingStage_
= HTTP_PARSE_FIRST
;
293 // stage 2: parse the request-line
294 if (parsingStage_
== HTTP_PARSE_FIRST
) {
295 PROF_start(HttpParserParseReqLine
);
296 const int retcode
= parseRequestFirstLine();
298 // first-line (or a look-alike) found successfully.
300 buf_
.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
301 parsingStage_
= HTTP_PARSE_MIME
;
304 debugs(74, 5, "request-line: retval " << retcode
<< ": from " << req
.start
<< "->" << req
.end
<<
305 " line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
306 debugs(74, 5, "request-line: method " << req
.m_start
<< "->" << req
.m_end
<< " (" << method_
<< ")");
307 debugs(74, 5, "request-line: url " << req
.u_start
<< "->" << req
.u_end
<< " (" << uri_
<< ")");
308 debugs(74, 5, "request-line: proto " << req
.v_start
<< "->" << req
.v_end
<< " (" << msgProtocol_
<< ")");
309 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
310 PROF_stop(HttpParserParseReqLine
);
312 // syntax errors already
314 parsingStage_
= HTTP_PARSE_DONE
;
319 // stage 3: locate the mime header block
320 if (parsingStage_
== HTTP_PARSE_MIME
) {
321 // HTTP/1.x request-line is valid and parsing completed.
322 if (msgProtocol_
.major
== 1) {
323 /* NOTE: HTTP/0.9 requests do not have a mime header block.
324 * So the rest of the code will need to deal with '0'-byte headers
325 * (ie, none, so don't try parsing em)
327 int64_t mimeHeaderBytes
= 0;
328 // XXX: c_str() reallocates. performance regression.
329 if ((mimeHeaderBytes
= headersEnd(buf_
.c_str(), buf_
.length())) == 0) {
330 if (buf_
.length()+firstLineSize() >= Config
.maxRequestHeaderSize
) {
331 debugs(33, 5, "Too large request");
332 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
333 parsingStage_
= HTTP_PARSE_DONE
;
335 debugs(33, 5, "Incomplete request, waiting for end of headers");
338 mimeHeaderBlock_
= buf_
.consume(mimeHeaderBytes
);
339 debugs(74, 5, "mime header (0-" << mimeHeaderBytes
<< ") {" << mimeHeaderBlock_
<< "}");
342 debugs(33, 3, "Missing HTTP/1.x identifier");
344 // NP: we do not do any further stages here yet so go straight to DONE
345 parsingStage_
= HTTP_PARSE_DONE
;
347 // Squid could handle these headers, but admin does not want to
348 if (messageHeaderSize() >= Config
.maxRequestHeaderSize
) {
349 debugs(33, 5, "Too large request");
350 request_parse_status
= Http::scRequestHeaderFieldsTooLarge
;
355 return !needsMoreData();