2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "HttpParser.h"
12 #include "profiler/Profiler.h"
13 #include "SquidConfig.h"
18 state
= HTTP_PARSE_NONE
;
19 request_parse_status
= Http::scNone
;
22 req
.start
= req
.end
= -1;
23 hdr_start
= hdr_end
= -1;
24 req
.m_start
= req
.m_end
= -1;
25 req
.u_start
= req
.u_end
= -1;
26 req
.v_start
= req
.v_end
= -1;
27 req
.v_maj
= req
.v_min
= 0;
31 HttpParser::reset(const char *aBuf
, int len
)
33 clear(); // empty the state.
34 state
= HTTP_PARSE_NEW
;
37 debugs(74, 5, HERE
<< "Request buffer is " << buf
);
41 HttpParser::parseRequestFirstLine()
43 int second_word
= -1; // track the suspected URI start
44 int first_whitespace
= -1, last_whitespace
= -1; // track the first and last SP byte
45 int line_end
= -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
47 debugs(74, 5, HERE
<< "parsing possible request: " << buf
);
49 // Single-pass parse: (provided we have the whole line anyways)
52 if (Config
.onoff
.relaxed_header_parser
) {
53 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[req
.start
] == ' ')
54 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
55 "Whitespace bytes received ahead of method. " <<
56 "Ignored due to relaxed_header_parser.");
57 // Be tolerant of prefix spaces (other bytes are valid method values)
58 for (; req
.start
< bufsiz
&& buf
[req
.start
] == ' '; ++req
.start
);
61 for (int i
= 0; i
< bufsiz
; ++i
) {
62 // track first and last whitespace (SP only)
65 if (first_whitespace
< req
.start
)
69 // track next non-SP/non-HT byte after first_whitespace
70 if (second_word
< first_whitespace
&& buf
[i
] != ' ' && buf
[i
] != '\t') {
74 // locate line terminator
80 if (i
< bufsiz
- 1 && buf
[i
] == '\r') {
81 if (Config
.onoff
.relaxed_header_parser
) {
82 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[i
+ 1] == '\r')
83 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
84 "Series of carriage-return bytes received prior to line terminator. " <<
85 "Ignored due to relaxed_header_parser.");
87 // Be tolerant of invalid multiple \r prior to terminal \n
88 if (buf
[i
+ 1] == '\n' || buf
[i
+ 1] == '\r')
90 while (i
< bufsiz
- 1 && buf
[i
+ 1] == '\r')
93 if (buf
[i
+ 1] == '\n') {
98 if (buf
[i
+ 1] == '\n') {
105 // RFC 2616 section 5.1
106 // "No CR or LF is allowed except in the final CRLF sequence"
107 request_parse_status
= Http::scBadRequest
;
112 debugs(74, 5, "Parser: retval 0: from " << req
.start
<<
113 "->" << req
.end
<< ": needs more data to complete first line.");
117 // NP: we have now seen EOL, more-data (0) cannot occur.
118 // From here on any failure is -1, success is 1
122 // Process what we now know about the line structure into field offsets
123 // generating HTTP status for any aborts as we go.
125 // First non-whitespace = beginning of method
126 if (req
.start
> line_end
) {
127 request_parse_status
= Http::scBadRequest
;
130 req
.m_start
= req
.start
;
132 // First whitespace = end of method
133 if (first_whitespace
> line_end
|| first_whitespace
< req
.start
) {
134 request_parse_status
= Http::scBadRequest
; // no method
137 req
.m_end
= first_whitespace
- 1;
138 if (req
.m_end
< req
.m_start
) {
139 request_parse_status
= Http::scBadRequest
; // missing URI?
143 // First non-whitespace after first SP = beginning of URL+Version
144 if (second_word
> line_end
|| second_word
< req
.start
) {
145 request_parse_status
= Http::scBadRequest
; // missing URI
148 req
.u_start
= second_word
;
150 // RFC 1945: SP and version following URI are optional, marking version 0.9
151 // we identify this by the last whitespace being earlier than URI start
152 if (last_whitespace
< second_word
&& last_whitespace
>= req
.start
) {
155 req
.u_end
= line_end
;
156 request_parse_status
= Http::scOkay
; // HTTP/0.9
159 // otherwise last whitespace is somewhere after end of URI.
160 req
.u_end
= last_whitespace
;
161 // crop any trailing whitespace in the area we think of as URI
162 for (; req
.u_end
>= req
.u_start
&& xisspace(buf
[req
.u_end
]); --req
.u_end
);
164 if (req
.u_end
< req
.u_start
) {
165 request_parse_status
= Http::scBadRequest
; // missing URI
169 // Last whitespace SP = before start of protocol/version
170 if (last_whitespace
>= line_end
) {
171 request_parse_status
= Http::scBadRequest
; // missing version
174 req
.v_start
= last_whitespace
+ 1;
175 req
.v_end
= line_end
;
177 // We only accept HTTP protocol requests right now.
178 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
179 if ((req
.v_end
- req
.v_start
+1) < 5 || strncasecmp(&buf
[req
.v_start
], "HTTP/", 5) != 0) {
180 #if USE_HTTP_VIOLATIONS
181 // being lax; old parser accepted strange versions
182 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
185 req
.u_end
= line_end
;
186 request_parse_status
= Http::scOkay
; // treat as HTTP/0.9
189 // protocol not supported / implemented.
190 request_parse_status
= Http::scHttpVersionNotSupported
;
195 int i
= req
.v_start
+ sizeof("HTTP/") -1;
197 /* next should be 1 or more digits */
198 if (!isdigit(buf
[i
])) {
199 request_parse_status
= Http::scHttpVersionNotSupported
;
203 for (; i
<= line_end
&& (isdigit(buf
[i
])) && maj
< 65536; ++i
) {
205 maj
= maj
+ (buf
[i
]) - '0';
207 // catch too-big values or missing remainders
208 if (maj
>= 65536 || i
> line_end
) {
209 request_parse_status
= Http::scHttpVersionNotSupported
;
214 /* next should be .; we -have- to have this as we have a whole line.. */
216 request_parse_status
= Http::scHttpVersionNotSupported
;
219 // catch missing minor part
220 if (++i
> line_end
) {
221 request_parse_status
= Http::scHttpVersionNotSupported
;
224 /* next should be one or more digits */
225 if (!isdigit(buf
[i
])) {
226 request_parse_status
= Http::scHttpVersionNotSupported
;
230 for (; i
<= line_end
&& (isdigit(buf
[i
])) && min
< 65536; ++i
) {
232 min
= min
+ (buf
[i
]) - '0';
234 // catch too-big values or trailing garbage
235 if (min
>= 65536 || i
< line_end
) {
236 request_parse_status
= Http::scHttpVersionNotSupported
;
242 * Rightio - we have all the schtuff. Return true; we've got enough.
244 request_parse_status
= Http::scOkay
;
249 HttpParserParseReqLine(HttpParser
*hmsg
)
251 PROF_start(HttpParserParseReqLine
);
252 int retcode
= hmsg
->parseRequestFirstLine();
253 debugs(74, 5, "Parser: retval " << retcode
<< ": from " << hmsg
->req
.start
<<
254 "->" << hmsg
->req
.end
<< ": method " << hmsg
->req
.m_start
<< "->" <<
255 hmsg
->req
.m_end
<< "; url " << hmsg
->req
.u_start
<< "->" << hmsg
->req
.u_end
<<
256 "; version " << hmsg
->req
.v_start
<< "->" << hmsg
->req
.v_end
<< " (" << hmsg
->req
.v_maj
<<
257 "/" << hmsg
->req
.v_min
<< ")");
258 PROF_stop(HttpParserParseReqLine
);
263 /* XXX This should eventually turn into something inlined or #define'd */
265 HttpParserReqSz(HttpParser
*hp
)
267 assert(hp
->state
== HTTP_PARSE_NEW
);
268 assert(hp
->req
.start
!= -1);
269 assert(hp
->req
.end
!= -1);
270 return hp
->req
.end
- hp
->req
.start
+ 1;
274 * This +1 makes it 'right' but won't make any sense if
275 * there's a 0 byte header? This won't happen normally - a valid header
276 * is at -least- a blank line (\n, or \r\n.)
279 HttpParserHdrSz(HttpParser
*hp
)
281 assert(hp
->state
== HTTP_PARSE_NEW
);
282 assert(hp
->hdr_start
!= -1);
283 assert(hp
->hdr_end
!= -1);
284 return hp
->hdr_end
- hp
->hdr_start
+ 1;
288 HttpParserHdrBuf(HttpParser
*hp
)
290 assert(hp
->state
== HTTP_PARSE_NEW
);
291 assert(hp
->hdr_start
!= -1);
292 assert(hp
->hdr_end
!= -1);
293 return hp
->buf
+ hp
->hdr_start
;
297 HttpParserRequestLen(HttpParser
*hp
)
299 return hp
->hdr_end
- hp
->req
.start
+ 1;