3 #include "http/one/RequestParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "mime_header.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
10 Http::One::RequestParser::clear()
12 Http1::Parser::clear();
14 request_parse_status
= Http::scNone
;
15 req
.start
= req
.end
= -1;
16 req
.m_start
= req
.m_end
= -1;
17 req
.u_start
= req
.u_end
= -1;
18 req
.v_start
= req
.v_end
= -1;
19 method_
= HttpRequestMethod();
23 * Attempt to parse the first line of a new request message.
25 * Governed by RFC 2616 section 4.1
27 * In the interest of robustness, servers SHOULD ignore any empty
28 * line(s) received where a Request-Line is expected. In other words, if
29 * the server is reading the protocol stream at the beginning of a
30 * message and receives a CRLF first, it should ignore the CRLF.
32 * ... To restate what is explicitly forbidden by the
33 * BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
37 * Parsing state is stored between calls to avoid repeating buffer scans.
38 * If garbage is found the parsing offset is incremented.
41 Http::One::RequestParser::skipGarbageLines()
43 #if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
44 if (Config
.onoff
.relaxed_header_parser
) {
45 if (Config
.onoff
.relaxed_header_parser
< 0 && (buf
[0] == '\r' || buf
[0] == '\n'))
46 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
47 "CRLF bytes received ahead of request-line. " <<
48 "Ignored due to relaxed_header_parser.");
49 // Be tolerant of prefix empty lines
50 // ie any series of either \n or \r\n with no other characters and no repeated \r
51 while (!buf
.isEmpty() && (buf
[0] == '\n' || (buf
[0] == '\r' && buf
[1] == '\n'))) {
57 /* XXX: this is a Squid-specific tolerance
58 * it appears never to have been relevant outside out unit-tests
59 * because the ConnStateData parser loop starts with consumeWhitespace()
60 * which absorbs any SP HTAB VTAB CR LF characters.
61 * But unit-tests called the HttpParser method directly without that pruning.
63 #if USE_HTTP_VIOLATIONS
64 if (Config
.onoff
.relaxed_header_parser
) {
65 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[0] == ' ')
66 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
67 "Whitespace bytes received ahead of method. " <<
68 "Ignored due to relaxed_header_parser.");
69 // Be tolerant of prefix spaces (other bytes are valid method values)
70 while (!buf
.isEmpty() && buf
[0] == ' ') {
78 * Attempt to parse the first line of a new request message.
81 * RFC 1945 section 5.1
82 * RFC 2616 section 5.1
84 * Parsing state is stored between calls. However the current implementation
85 * begins parsing from scratch on every call.
86 * The return value tells you whether the parsing state fields are valid or not.
88 * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
89 * \retval 1 successful parse. member fields contain the request-line items
90 * \retval 0 more data is needed to complete the parse
93 Http::One::RequestParser::parseRequestFirstLine()
95 int second_word
= -1; // track the suspected URI start
96 int first_whitespace
= -1, last_whitespace
= -1; // track the first and last SP byte
97 int line_end
= -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
99 debugs(74, 5, "parsing possible request: buf.length=" << buf
.length());
100 debugs(74, DBG_DATA
, buf
);
102 // Single-pass parse: (provided we have the whole line anyways)
106 for (SBuf::size_type i
= 0; i
< buf
.length(); ++i
) {
107 // track first and last whitespace (SP only)
110 if (first_whitespace
< req
.start
)
111 first_whitespace
= i
;
114 // track next non-SP/non-HT byte after first_whitespace
115 if (second_word
< first_whitespace
&& buf
[i
] != ' ' && buf
[i
] != '\t') {
119 // locate line terminator
120 if (buf
[i
] == '\n') {
125 if (i
< buf
.length() - 1 && buf
[i
] == '\r') {
126 if (Config
.onoff
.relaxed_header_parser
) {
127 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[i
+ 1] == '\r')
128 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
129 "Series of carriage-return bytes received prior to line terminator. " <<
130 "Ignored due to relaxed_header_parser.");
132 // Be tolerant of invalid multiple \r prior to terminal \n
133 if (buf
[i
+ 1] == '\n' || buf
[i
+ 1] == '\r')
135 while (i
< buf
.length() - 1 && buf
[i
+ 1] == '\r')
138 if (buf
[i
+ 1] == '\n') {
143 if (buf
[i
+ 1] == '\n') {
150 // RFC 2616 section 5.1
151 // "No CR or LF is allowed except in the final CRLF sequence"
152 request_parse_status
= Http::scBadRequest
;
158 // DoS protection against long first-line
159 if ((size_t)buf
.length() >= Config
.maxRequestHeaderSize
) {
160 debugs(33, 5, "Too large request-line");
161 // XXX: return URL-too-log status code if second_whitespace is not yet found.
162 request_parse_status
= Http::scHeaderTooLarge
;
166 debugs(74, 5, "Parser: retval 0: from " << req
.start
<<
167 "->" << req
.end
<< ": needs more data to complete first line.");
171 // NP: we have now seen EOL, more-data (0) cannot occur.
172 // From here on any failure is -1, success is 1
176 // DoS protection against long first-line
177 if ((size_t)(req
.end
-req
.start
) >= Config
.maxRequestHeaderSize
) {
178 debugs(33, 5, "Too large request-line");
179 request_parse_status
= Http::scHeaderTooLarge
;
183 // Process what we now know about the line structure into field offsets
184 // generating HTTP status for any aborts as we go.
186 // First non-whitespace = beginning of method
187 if (req
.start
> line_end
) {
188 request_parse_status
= Http::scBadRequest
;
191 req
.m_start
= req
.start
;
193 // First whitespace = end of method
194 if (first_whitespace
> line_end
|| first_whitespace
< req
.start
) {
195 request_parse_status
= Http::scBadRequest
; // no method
198 req
.m_end
= first_whitespace
- 1;
199 if (req
.m_end
< req
.m_start
) {
200 request_parse_status
= Http::scBadRequest
; // missing URI?
205 SBuf tmp
= buf
.substr(req
.m_start
, req
.m_end
- req
.m_start
+ 1);
206 method_
= HttpRequestMethod(tmp
);
208 // First non-whitespace after first SP = beginning of URL+Version
209 if (second_word
> line_end
|| second_word
< req
.start
) {
210 request_parse_status
= Http::scBadRequest
; // missing URI
213 req
.u_start
= second_word
;
215 // RFC 1945: SP and version following URI are optional, marking version 0.9
216 // we identify this by the last whitespace being earlier than URI start
217 if (last_whitespace
< second_word
&& last_whitespace
>= req
.start
) {
218 msgProtocol_
= Http::ProtocolVersion(0,9);
219 req
.u_end
= line_end
;
220 uri_
= buf
.substr(req
.u_start
, req
.u_end
- req
.u_start
+ 1);
221 request_parse_status
= Http::scOkay
; // HTTP/0.9
224 // otherwise last whitespace is somewhere after end of URI.
225 req
.u_end
= last_whitespace
;
226 // crop any trailing whitespace in the area we think of as URI
227 for (; req
.u_end
>= req
.u_start
&& xisspace(buf
[req
.u_end
]); --req
.u_end
);
229 if (req
.u_end
< req
.u_start
) {
230 request_parse_status
= Http::scBadRequest
; // missing URI
233 uri_
= buf
.substr(req
.u_start
, req
.u_end
- req
.u_start
+ 1);
235 // Last whitespace SP = before start of protocol/version
236 if (last_whitespace
>= line_end
) {
237 request_parse_status
= Http::scBadRequest
; // missing version
240 req
.v_start
= last_whitespace
+ 1;
241 req
.v_end
= line_end
;
243 // We only accept HTTP protocol requests right now.
244 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
245 if ((req
.v_end
- req
.v_start
+1) < 5 || buf
.substr(req
.v_start
, 5).caseCmp(SBuf("HTTP/")) != 0) {
246 #if USE_HTTP_VIOLATIONS
247 // being lax; old parser accepted strange versions
248 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
249 msgProtocol_
= Http::ProtocolVersion(0,9);
250 req
.u_end
= line_end
;
251 request_parse_status
= Http::scOkay
; // treat as HTTP/0.9
254 // protocol not supported / implemented.
255 request_parse_status
= Http::scHttpVersionNotSupported
;
259 msgProtocol_
.protocol
= AnyP::PROTO_HTTP
;
261 int i
= req
.v_start
+ sizeof("HTTP/") -1;
263 /* next should be 1 or more digits */
264 if (!isdigit(buf
[i
])) {
265 request_parse_status
= Http::scHttpVersionNotSupported
;
269 for (; i
<= line_end
&& (isdigit(buf
[i
])) && maj
< 65536; ++i
) {
271 maj
= maj
+ (buf
[i
]) - '0';
273 // catch too-big values or missing remainders
274 if (maj
>= 65536 || i
> line_end
) {
275 request_parse_status
= Http::scHttpVersionNotSupported
;
278 msgProtocol_
.major
= maj
;
280 /* next should be .; we -have- to have this as we have a whole line.. */
282 request_parse_status
= Http::scHttpVersionNotSupported
;
285 // catch missing minor part
286 if (++i
> line_end
) {
287 request_parse_status
= Http::scHttpVersionNotSupported
;
290 /* next should be one or more digits */
291 if (!isdigit(buf
[i
])) {
292 request_parse_status
= Http::scHttpVersionNotSupported
;
296 for (; i
<= line_end
&& (isdigit(buf
[i
])) && min
< 65536; ++i
) {
298 min
= min
+ (buf
[i
]) - '0';
300 // catch too-big values or trailing garbage
301 if (min
>= 65536 || i
< line_end
) {
302 request_parse_status
= Http::scHttpVersionNotSupported
;
305 msgProtocol_
.minor
= min
;
307 /* RFC 2616 section 10.5.6 : handle unsupported HTTP major versions cleanly. */
308 /* We currently only support 0.9, 1.0, 1.1 properly in this parser */
309 if ((maj
== 0 && min
!= 9) || (maj
> 1)) {
310 request_parse_status
= Http::scHttpVersionNotSupported
;
315 * Rightio - we have all the schtuff. Return true; we've got enough.
317 request_parse_status
= Http::scOkay
;
322 Http::One::RequestParser::parse(const SBuf
&aBuf
)
325 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
327 // stage 1: locate the request-line
328 if (parsingStage_
== HTTP_PARSE_NONE
) {
331 // if we hit something before EOS treat it as a message
333 parsingStage_
= HTTP_PARSE_FIRST
;
338 // stage 2: parse the request-line
339 if (parsingStage_
== HTTP_PARSE_FIRST
) {
340 PROF_start(HttpParserParseReqLine
);
341 const int retcode
= parseRequestFirstLine();
343 // first-line (or a look-alike) found successfully.
345 buf
.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
346 parsingStage_
= HTTP_PARSE_MIME
;
349 debugs(74, 5, "request-line: retval " << retcode
<< ": from " << req
.start
<< "->" << req
.end
<<
350 " line={" << aBuf
.length() << ", data='" << aBuf
<< "'}");
351 debugs(74, 5, "request-line: method " << req
.m_start
<< "->" << req
.m_end
<< " (" << method_
<< ")");
352 debugs(74, 5, "request-line: url " << req
.u_start
<< "->" << req
.u_end
<< " (" << uri_
<< ")");
353 debugs(74, 5, "request-line: proto " << req
.v_start
<< "->" << req
.v_end
<< " (" << msgProtocol_
<< ")");
354 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf
.length()));
355 PROF_stop(HttpParserParseReqLine
);
357 // syntax errors already
359 parsingStage_
= HTTP_PARSE_DONE
;
364 // stage 3: locate the mime header block
365 if (parsingStage_
== HTTP_PARSE_MIME
) {
366 // HTTP/1.x request-line is valid and parsing completed.
367 if (msgProtocol_
.major
== 1) {
368 /* NOTE: HTTP/0.9 requests do not have a mime header block.
369 * So the rest of the code will need to deal with '0'-byte headers
370 * (ie, none, so don't try parsing em)
372 int64_t mimeHeaderBytes
= 0;
373 if ((mimeHeaderBytes
= headersEnd(buf
.c_str(), buf
.length())) == 0) {
374 if (buf
.length()+firstLineSize() >= Config
.maxRequestHeaderSize
) {
375 debugs(33, 5, "Too large request");
376 request_parse_status
= Http::scHeaderTooLarge
;
377 parsingStage_
= HTTP_PARSE_DONE
;
379 debugs(33, 5, "Incomplete request, waiting for end of headers");
382 mimeHeaderBlock_
= buf
.substr(req
.end
+1, mimeHeaderBytes
);
383 buf
.consume(mimeHeaderBytes
); // done with these bytes now.
386 debugs(33, 3, "Missing HTTP/1.x identifier");
388 // NP: we do not do any further stages here yet so go straight to DONE
389 parsingStage_
= HTTP_PARSE_DONE
;
391 // Squid could handle these headers, but admin does not want to
392 if (messageHeaderSize() >= Config
.maxRequestHeaderSize
) {
393 debugs(33, 5, "Too large request");
394 request_parse_status
= Http::scHeaderTooLarge
;
399 return !needsMoreData();