3 #include "HttpParser.h"
5 #include "profiler/Profiler.h"
10 state
= HTTP_PARSE_NONE
;
11 request_parse_status
= HTTP_STATUS_NONE
;
14 req
.start
= req
.end
= -1;
15 hdr_start
= hdr_end
= -1;
16 req
.m_start
= req
.m_end
= -1;
17 req
.u_start
= req
.u_end
= -1;
18 req
.v_start
= req
.v_end
= -1;
19 req
.v_maj
= req
.v_min
= 0;
23 HttpParser::reset(const char *aBuf
, int len
)
25 clear(); // empty the state.
26 state
= HTTP_PARSE_NEW
;
29 debugs(74, 5, HERE
<< "Request buffer is " << buf
);
33 HttpParser::parseRequestFirstLine()
35 int second_word
= -1; // track the suspected URI start
36 int first_whitespace
= -1, last_whitespace
= -1; // track the first and last SP byte
37 int line_end
= -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
39 debugs(74, 5, HERE
<< "parsing possible request: " << buf
);
41 // Single-pass parse: (provided we have the whole line anyways)
44 if (Config
.onoff
.relaxed_header_parser
) {
45 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[req
.start
] == ' ')
46 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
47 "Whitespace bytes received ahead of method. " <<
48 "Ignored due to relaxed_header_parser.");
49 // Be tolerant of prefix spaces (other bytes are valid method values)
50 for (; req
.start
< bufsiz
&& buf
[req
.start
] == ' '; ++req
.start
);
53 for (int i
= 0; i
< bufsiz
; ++i
) {
54 // track first and last whitespace (SP only)
57 if (first_whitespace
< req
.start
)
61 // track next non-SP/non-HT byte after first_whitespace
62 if (second_word
< first_whitespace
&& buf
[i
] != ' ' && buf
[i
] != '\t') {
66 // locate line terminator
72 if (i
< bufsiz
- 1 && buf
[i
] == '\r') {
73 if (Config
.onoff
.relaxed_header_parser
) {
74 if (Config
.onoff
.relaxed_header_parser
< 0 && buf
[i
+ 1] == '\r')
75 debugs(74, DBG_IMPORTANT
, "WARNING: Invalid HTTP Request: " <<
76 "Series of carriage-return bytes received prior to line terminator. " <<
77 "Ignored due to relaxed_header_parser.");
79 // Be tolerant of invalid multiple \r prior to terminal \n
80 if (buf
[i
+ 1] == '\n' || buf
[i
+ 1] == '\r')
82 while (i
< bufsiz
- 1 && buf
[i
+ 1] == '\r')
85 if (buf
[i
+ 1] == '\n') {
90 if (buf
[i
+ 1] == '\n') {
97 // RFC 2616 section 5.1
98 // "No CR or LF is allowed except in the final CRLF sequence"
99 request_parse_status
= HTTP_BAD_REQUEST
;
104 debugs(74, 5, "Parser: retval 0: from " << req
.start
<<
105 "->" << req
.end
<< ": needs more data to complete first line.");
109 // NP: we have now seen EOL, more-data (0) cannot occur.
110 // From here on any failure is -1, success is 1
114 // Process what we now know about the line structure into field offsets
115 // generating HTTP status for any aborts as we go.
117 // First non-whitespace = beginning of method
118 if (req
.start
> line_end
) {
119 request_parse_status
= HTTP_BAD_REQUEST
;
122 req
.m_start
= req
.start
;
124 // First whitespace = end of method
125 if (first_whitespace
> line_end
|| first_whitespace
< req
.start
) {
126 request_parse_status
= HTTP_BAD_REQUEST
; // no method
129 req
.m_end
= first_whitespace
- 1;
130 if (req
.m_end
< req
.m_start
) {
131 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI?
135 // First non-whitespace after first SP = beginning of URL+Version
136 if (second_word
> line_end
|| second_word
< req
.start
) {
137 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI
140 req
.u_start
= second_word
;
142 // RFC 1945: SP and version following URI are optional, marking version 0.9
143 // we identify this by the last whitespace being earlier than URI start
144 if (last_whitespace
< second_word
&& last_whitespace
>= req
.start
) {
147 req
.u_end
= line_end
;
148 request_parse_status
= HTTP_OK
; // HTTP/0.9
151 // otherwise last whitespace is somewhere after end of URI.
152 req
.u_end
= last_whitespace
;
153 // crop any trailing whitespace in the area we think of as URI
154 for (; req
.u_end
>= req
.u_start
&& xisspace(buf
[req
.u_end
]); --req
.u_end
);
156 if (req
.u_end
< req
.u_start
) {
157 request_parse_status
= HTTP_BAD_REQUEST
; // missing URI
161 // Last whitespace SP = before start of protocol/version
162 if (last_whitespace
>= line_end
) {
163 request_parse_status
= HTTP_BAD_REQUEST
; // missing version
166 req
.v_start
= last_whitespace
+ 1;
167 req
.v_end
= line_end
;
169 // We only accept HTTP protocol requests right now.
170 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
171 if ((req
.v_end
- req
.v_start
+1) < 5 || strncasecmp(&buf
[req
.v_start
], "HTTP/", 5) != 0) {
172 #if USE_HTTP_VIOLATIONS
173 // being lax; old parser accepted strange versions
174 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
177 req
.u_end
= line_end
;
178 request_parse_status
= HTTP_OK
; // treat as HTTP/0.9
181 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
; // protocol not supported / implemented.
186 int i
= req
.v_start
+ sizeof("HTTP/") -1;
188 /* next should be 1 or more digits */
189 if (!isdigit(buf
[i
])) {
190 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
194 for (; i
<= line_end
&& (isdigit(buf
[i
])) && maj
< 65536; ++i
) {
196 maj
= maj
+ (buf
[i
]) - '0';
198 // catch too-big values or missing remainders
199 if (maj
>= 65536 || i
> line_end
) {
200 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
205 /* next should be .; we -have- to have this as we have a whole line.. */
207 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
210 // catch missing minor part
211 if (++i
> line_end
) {
212 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
215 /* next should be one or more digits */
216 if (!isdigit(buf
[i
])) {
217 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
221 for (; i
<= line_end
&& (isdigit(buf
[i
])) && min
< 65536; ++i
) {
223 min
= min
+ (buf
[i
]) - '0';
225 // catch too-big values or trailing garbage
226 if (min
>= 65536 || i
< line_end
) {
227 request_parse_status
= HTTP_HTTP_VERSION_NOT_SUPPORTED
;
233 * Rightio - we have all the schtuff. Return true; we've got enough.
235 request_parse_status
= HTTP_OK
;
240 HttpParserParseReqLine(HttpParser
*hmsg
)
242 PROF_start(HttpParserParseReqLine
);
243 int retcode
= hmsg
->parseRequestFirstLine();
244 debugs(74, 5, "Parser: retval " << retcode
<< ": from " << hmsg
->req
.start
<<
245 "->" << hmsg
->req
.end
<< ": method " << hmsg
->req
.m_start
<< "->" <<
246 hmsg
->req
.m_end
<< "; url " << hmsg
->req
.u_start
<< "->" << hmsg
->req
.u_end
<<
247 "; version " << hmsg
->req
.v_start
<< "->" << hmsg
->req
.v_end
<< " (" << hmsg
->req
.v_maj
<<
248 "/" << hmsg
->req
.v_min
<< ")");
249 PROF_stop(HttpParserParseReqLine
);
254 /* XXX This should eventually turn into something inlined or #define'd */
256 HttpParserReqSz(HttpParser
*hp
)
258 assert(hp
->state
== HTTP_PARSE_NEW
);
259 assert(hp
->req
.start
!= -1);
260 assert(hp
->req
.end
!= -1);
261 return hp
->req
.end
- hp
->req
.start
+ 1;
265 * This +1 makes it 'right' but won't make any sense if
266 * there's a 0 byte header? This won't happen normally - a valid header
267 * is at -least- a blank line (\n, or \r\n.)
270 HttpParserHdrSz(HttpParser
*hp
)
272 assert(hp
->state
== HTTP_PARSE_NEW
);
273 assert(hp
->hdr_start
!= -1);
274 assert(hp
->hdr_end
!= -1);
275 return hp
->hdr_end
- hp
->hdr_start
+ 1;
279 HttpParserHdrBuf(HttpParser
*hp
)
281 assert(hp
->state
== HTTP_PARSE_NEW
);
282 assert(hp
->hdr_start
!= -1);
283 assert(hp
->hdr_end
!= -1);
284 return hp
->buf
+ hp
->hdr_start
;
288 HttpParserRequestLen(HttpParser
*hp
)
290 return hp
->hdr_end
- hp
->req
.start
+ 1;