]>
Commit | Line | Data |
---|---|---|
4c14658e AJ |
1 | #include "config.h" |
2 | #include "Debug.h" | |
3 | #include "HttpParser.h" | |
4 | #include "structs.h" | |
5 | ||
6 | void | |
7 | HttpParser::clear() | |
8 | { | |
9 | state = HTTP_PARSE_NONE; | |
10 | request_parse_status = HTTP_STATUS_NONE; | |
11 | buf = NULL; | |
12 | bufsiz = 0; | |
13 | req_start = req_end = -1; | |
14 | hdr_start = hdr_end = -1; | |
15 | m_start = m_end = -1; | |
16 | u_start = u_end = -1; | |
17 | v_start = v_end = -1; | |
18 | v_maj = v_min = 0; | |
19 | } | |
20 | ||
21 | void | |
22 | HttpParser::reset(const char *aBuf, int len) | |
23 | { | |
24 | clear(); // empty the state. | |
25 | state = HTTP_PARSE_NEW; | |
26 | buf = aBuf; | |
27 | bufsiz = len; | |
28 | debugs(74, 5, HERE << "Request buffer is " << buf); | |
29 | } | |
30 | ||
31 | int | |
32 | HttpParser::parseRequestFirstLine() | |
33 | { | |
34 | int second_word = -1; // track the suspected URI start | |
35 | int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte | |
36 | int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence | |
37 | ||
38 | debugs(74, 5, HERE << "parsing possible request: " << buf); | |
39 | ||
40 | // Single-pass parse: (provided we have the whole line anyways) | |
41 | ||
42 | req_start = 0; | |
43 | if (Config.onoff.relaxed_header_parser) { | |
44 | if (Config.onoff.relaxed_header_parser < 0 && buf[req_start] == ' ') | |
45 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << | |
46 | "Whitespace bytes received ahead of method. " << | |
47 | "Ignored due to relaxed_header_parser."); | |
48 | // Be tolerant of prefix spaces (other bytes are valid method values) | |
49 | for (; req_start < bufsiz && buf[req_start] == ' '; req_start++); | |
50 | } | |
51 | req_end = -1; | |
52 | for (int i = 0; i < bufsiz; i++) { | |
53 | // track first and last whitespace (SP only) | |
54 | if (buf[i] == ' ') { | |
55 | last_whitespace = i; | |
56 | if (first_whitespace < req_start) | |
57 | first_whitespace = i; | |
58 | } | |
59 | ||
60 | // track next non-SP/non-HT byte after first_whitespace | |
61 | if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') { | |
62 | second_word = i; | |
63 | } | |
64 | ||
65 | // locate line terminator | |
66 | if (buf[i] == '\n') { | |
67 | req_end = i; | |
68 | line_end = i - 1; | |
69 | break; | |
70 | } | |
71 | if (i < bufsiz - 1 && buf[i] == '\r') { | |
72 | if (Config.onoff.relaxed_header_parser) { | |
73 | if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r') | |
74 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << | |
75 | "Series of carriage-return bytes received prior to line terminator. " << | |
76 | "Ignored due to relaxed_header_parser."); | |
77 | ||
78 | // Be tolerant of invalid multiple \r prior to terminal \n | |
79 | if (buf[i + 1] == '\n' || buf[i + 1] == '\r') | |
80 | line_end = i - 1; | |
81 | while (i < bufsiz - 1 && buf[i + 1] == '\r') | |
82 | i++; | |
83 | ||
84 | if (buf[i + 1] == '\n') { | |
85 | req_end = i + 1; | |
86 | break; | |
87 | } | |
88 | } else { | |
89 | if (buf[i + 1] == '\n') { | |
90 | req_end = i + 1; | |
91 | line_end = i - 1; | |
92 | break; | |
93 | } | |
94 | } | |
95 | ||
96 | // RFC 2616 section 5.1 | |
97 | // "No CR or LF is allowed except in the final CRLF sequence" | |
98 | request_parse_status = HTTP_BAD_REQUEST; | |
99 | return -1; | |
100 | } | |
101 | } | |
102 | if (req_end == -1) { | |
103 | debugs(74, 5, "Parser: retval 0: from " << req_start << | |
104 | "->" << req_end << ": needs more data to complete first line."); | |
105 | return 0; | |
106 | } | |
107 | ||
108 | // NP: we have now seen EOL, more-data (0) cannot occur. | |
109 | // From here on any failure is -1, success is 1 | |
110 | ||
111 | ||
112 | // Input Validation: | |
113 | ||
114 | // Process what we now know about the line structure into field offsets | |
115 | // generating HTTP status for any aborts as we go. | |
116 | ||
117 | // First non-whitespace = beginning of method | |
118 | if (req_start > line_end) { | |
119 | request_parse_status = HTTP_BAD_REQUEST; | |
120 | return -1; | |
121 | } | |
122 | m_start = req_start; | |
123 | ||
124 | // First whitespace = end of method | |
125 | if (first_whitespace > line_end || first_whitespace < req_start) { | |
126 | request_parse_status = HTTP_BAD_REQUEST; // no method | |
127 | return -1; | |
128 | } | |
129 | m_end = first_whitespace - 1; | |
130 | if (m_end < m_start) { | |
131 | request_parse_status = HTTP_BAD_REQUEST; // missing URI? | |
132 | return -1; | |
133 | } | |
134 | ||
135 | // First non-whitespace after first SP = beginning of URL+Version | |
136 | if (second_word > line_end || second_word < req_start) { | |
137 | request_parse_status = HTTP_BAD_REQUEST; // missing URI | |
138 | return -1; | |
139 | } | |
140 | u_start = second_word; | |
141 | ||
142 | // RFC 1945: SP and version following URI are optional, marking version 0.9 | |
143 | // we identify this by the last whitespace being earlier than URI start | |
144 | if (last_whitespace < second_word && last_whitespace >= req_start) { | |
145 | v_maj = 0; | |
146 | v_min = 9; | |
147 | u_end = line_end; | |
148 | request_parse_status = HTTP_OK; // HTTP/0.9 | |
149 | return 1; | |
150 | } else { | |
151 | // otherwise last whitespace is somewhere after end of URI. | |
152 | u_end = last_whitespace; | |
153 | // crop any trailing whitespace in the area we think of as URI | |
154 | for (; u_end >= u_start && xisspace(buf[u_end]); u_end--); | |
155 | } | |
156 | if (u_end < u_start) { | |
157 | request_parse_status = HTTP_BAD_REQUEST; // missing URI | |
158 | return -1; | |
159 | } | |
160 | ||
161 | // Last whitespace SP = before start of protocol/version | |
162 | if (last_whitespace >= line_end) { | |
163 | request_parse_status = HTTP_BAD_REQUEST; // missing version | |
164 | return -1; | |
165 | } | |
166 | v_start = last_whitespace + 1; | |
167 | v_end = line_end; | |
168 | ||
169 | // We only accept HTTP protocol requests right now. | |
170 | // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc | |
171 | if ((v_end - v_start +1) < 5 || strncasecmp(&buf[v_start], "HTTP/", 5) != 0) { | |
172 | #if USE_HTTP_VIOLATIONS | |
173 | // being lax; old parser accepted strange versions | |
174 | // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here. | |
175 | v_maj = 0; | |
176 | v_min = 9; | |
177 | u_end = line_end; | |
178 | request_parse_status = HTTP_OK; // treat as HTTP/0.9 | |
179 | return 1; | |
180 | #else | |
181 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; // protocol not supported / implemented. | |
182 | return -1; | |
183 | #endif | |
184 | } | |
185 | ||
186 | int i = v_start + sizeof("HTTP/") -1; | |
187 | ||
188 | /* next should be 1 or more digits */ | |
189 | if (!isdigit(buf[i])) { | |
190 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
191 | return -1; | |
192 | } | |
193 | int maj = 0; | |
194 | for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; i++) { | |
195 | maj = maj * 10; | |
196 | maj = maj + (buf[i]) - '0'; | |
197 | } | |
198 | // catch too-big values or missing remainders | |
199 | if (maj >= 65536 || i > line_end) { | |
200 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
201 | return -1; | |
202 | } | |
203 | v_maj = maj; | |
204 | ||
205 | /* next should be .; we -have- to have this as we have a whole line.. */ | |
206 | if (buf[i] != '.') { | |
207 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
208 | return -1; | |
209 | } | |
210 | // catch missing minor part | |
211 | if (++i > line_end) { | |
212 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
213 | return -1; | |
214 | } | |
215 | /* next should be one or more digits */ | |
216 | if (!isdigit(buf[i])) { | |
217 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
218 | return -1; | |
219 | } | |
220 | int min = 0; | |
221 | for (; i <= line_end && (isdigit(buf[i])) && min < 65536; i++) { | |
222 | min = min * 10; | |
223 | min = min + (buf[i]) - '0'; | |
224 | } | |
225 | // catch too-big values or trailing garbage | |
226 | if (min >= 65536 || i < line_end) { | |
227 | request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; | |
228 | return -1; | |
229 | } | |
230 | v_min = min; | |
231 | ||
232 | /* | |
233 | * Rightio - we have all the schtuff. Return true; we've got enough. | |
234 | */ | |
235 | request_parse_status = HTTP_OK; | |
236 | return 1; | |
237 | } | |
238 | ||
239 | int | |
240 | HttpParserParseReqLine(HttpParser *hmsg) | |
241 | { | |
242 | PROF_start(HttpParserParseReqLine); | |
243 | int retcode = hmsg->parseRequestFirstLine(); | |
244 | debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start << | |
245 | "->" << hmsg->req_end << ": method " << hmsg->m_start << "->" << | |
246 | hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end << | |
247 | "; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << hmsg->v_maj << | |
248 | "/" << hmsg->v_min << ")"); | |
249 | PROF_stop(HttpParserParseReqLine); | |
250 | return retcode; | |
251 | } | |
252 | ||
253 | #if MSGDODEBUG | |
254 | /* XXX This should eventually turn into something inlined or #define'd */ | |
255 | int | |
256 | HttpParserReqSz(HttpParser *hp) | |
257 | { | |
258 | assert(hp->state == HTTP_PARSE_NEW); | |
259 | assert(hp->req_start != -1); | |
260 | assert(hp->req_end != -1); | |
261 | return hp->req_end - hp->req_start + 1; | |
262 | } | |
263 | ||
264 | /* | |
265 | * This +1 makes it 'right' but won't make any sense if | |
266 | * there's a 0 byte header? This won't happen normally - a valid header | |
267 | * is at -least- a blank line (\n, or \r\n.) | |
268 | */ | |
269 | int | |
270 | HttpParserHdrSz(HttpParser *hp) | |
271 | { | |
272 | assert(hp->state == HTTP_PARSE_NEW); | |
273 | assert(hp->hdr_start != -1); | |
274 | assert(hp->hdr_end != -1); | |
275 | return hp->hdr_end - hp->hdr_start + 1; | |
276 | } | |
277 | ||
278 | const char * | |
279 | HttpParserHdrBuf(HttpParser *hp) | |
280 | { | |
281 | assert(hp->state == HTTP_PARSE_NEW); | |
282 | assert(hp->hdr_start != -1); | |
283 | assert(hp->hdr_end != -1); | |
284 | return hp->buf + hp->hdr_start; | |
285 | } | |
286 | ||
287 | int | |
288 | HttpParserRequestLen(HttpParser *hp) | |
289 | { | |
290 | return hp->hdr_end - hp->req_start + 1; | |
291 | } | |
292 | #endif | |
293 |