]>
Commit | Line | Data |
---|---|---|
f7f3304a | 1 | #include "squid.h" |
4c14658e AJ |
2 | #include "Debug.h" |
3 | #include "HttpParser.h" | |
582c2af2 | 4 | #include "profiler/Profiler.h" |
4d5904f7 | 5 | #include "SquidConfig.h" |
4c14658e AJ |
6 | |
7 | void | |
8 | HttpParser::clear() | |
9 | { | |
10 | state = HTTP_PARSE_NONE; | |
955394ce | 11 | request_parse_status = Http::scNone; |
4c14658e AJ |
12 | buf = NULL; |
13 | bufsiz = 0; | |
74f478f8 | 14 | req.start = req.end = -1; |
4c14658e | 15 | hdr_start = hdr_end = -1; |
74f478f8 AJ |
16 | req.m_start = req.m_end = -1; |
17 | req.u_start = req.u_end = -1; | |
18 | req.v_start = req.v_end = -1; | |
19 | req.v_maj = req.v_min = 0; | |
4c14658e AJ |
20 | } |
21 | ||
22 | void | |
23 | HttpParser::reset(const char *aBuf, int len) | |
24 | { | |
25 | clear(); // empty the state. | |
26 | state = HTTP_PARSE_NEW; | |
27 | buf = aBuf; | |
28 | bufsiz = len; | |
29 | debugs(74, 5, HERE << "Request buffer is " << buf); | |
30 | } | |
31 | ||
32 | int | |
33 | HttpParser::parseRequestFirstLine() | |
34 | { | |
35 | int second_word = -1; // track the suspected URI start | |
36 | int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte | |
37 | int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence | |
38 | ||
39 | debugs(74, 5, HERE << "parsing possible request: " << buf); | |
40 | ||
41 | // Single-pass parse: (provided we have the whole line anyways) | |
42 | ||
74f478f8 | 43 | req.start = 0; |
4c14658e | 44 | if (Config.onoff.relaxed_header_parser) { |
74f478f8 | 45 | if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ') |
4c14658e AJ |
46 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
47 | "Whitespace bytes received ahead of method. " << | |
48 | "Ignored due to relaxed_header_parser."); | |
49 | // Be tolerant of prefix spaces (other bytes are valid method values) | |
95dc7ff4 | 50 | for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start); |
4c14658e | 51 | } |
74f478f8 | 52 | req.end = -1; |
95dc7ff4 | 53 | for (int i = 0; i < bufsiz; ++i) { |
4c14658e AJ |
54 | // track first and last whitespace (SP only) |
55 | if (buf[i] == ' ') { | |
56 | last_whitespace = i; | |
74f478f8 | 57 | if (first_whitespace < req.start) |
4c14658e AJ |
58 | first_whitespace = i; |
59 | } | |
60 | ||
61 | // track next non-SP/non-HT byte after first_whitespace | |
62 | if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') { | |
63 | second_word = i; | |
64 | } | |
65 | ||
66 | // locate line terminator | |
67 | if (buf[i] == '\n') { | |
74f478f8 | 68 | req.end = i; |
4c14658e AJ |
69 | line_end = i - 1; |
70 | break; | |
71 | } | |
72 | if (i < bufsiz - 1 && buf[i] == '\r') { | |
73 | if (Config.onoff.relaxed_header_parser) { | |
74 | if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r') | |
75 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << | |
76 | "Series of carriage-return bytes received prior to line terminator. " << | |
77 | "Ignored due to relaxed_header_parser."); | |
78 | ||
79 | // Be tolerant of invalid multiple \r prior to terminal \n | |
80 | if (buf[i + 1] == '\n' || buf[i + 1] == '\r') | |
81 | line_end = i - 1; | |
82 | while (i < bufsiz - 1 && buf[i + 1] == '\r') | |
95dc7ff4 | 83 | ++i; |
4c14658e AJ |
84 | |
85 | if (buf[i + 1] == '\n') { | |
74f478f8 | 86 | req.end = i + 1; |
4c14658e AJ |
87 | break; |
88 | } | |
89 | } else { | |
90 | if (buf[i + 1] == '\n') { | |
74f478f8 | 91 | req.end = i + 1; |
4c14658e AJ |
92 | line_end = i - 1; |
93 | break; | |
94 | } | |
95 | } | |
96 | ||
97 | // RFC 2616 section 5.1 | |
98 | // "No CR or LF is allowed except in the final CRLF sequence" | |
955394ce | 99 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
100 | return -1; |
101 | } | |
102 | } | |
74f478f8 AJ |
103 | if (req.end == -1) { |
104 | debugs(74, 5, "Parser: retval 0: from " << req.start << | |
105 | "->" << req.end << ": needs more data to complete first line."); | |
4c14658e AJ |
106 | return 0; |
107 | } | |
108 | ||
109 | // NP: we have now seen EOL, more-data (0) cannot occur. | |
110 | // From here on any failure is -1, success is 1 | |
111 | ||
4c14658e AJ |
112 | // Input Validation: |
113 | ||
114 | // Process what we now know about the line structure into field offsets | |
115 | // generating HTTP status for any aborts as we go. | |
116 | ||
117 | // First non-whitespace = beginning of method | |
74f478f8 | 118 | if (req.start > line_end) { |
955394ce | 119 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
120 | return -1; |
121 | } | |
74f478f8 | 122 | req.m_start = req.start; |
4c14658e AJ |
123 | |
124 | // First whitespace = end of method | |
74f478f8 | 125 | if (first_whitespace > line_end || first_whitespace < req.start) { |
955394ce | 126 | request_parse_status = Http::scBadRequest; // no method |
4c14658e AJ |
127 | return -1; |
128 | } | |
74f478f8 AJ |
129 | req.m_end = first_whitespace - 1; |
130 | if (req.m_end < req.m_start) { | |
955394ce | 131 | request_parse_status = Http::scBadRequest; // missing URI? |
4c14658e AJ |
132 | return -1; |
133 | } | |
134 | ||
135 | // First non-whitespace after first SP = beginning of URL+Version | |
74f478f8 | 136 | if (second_word > line_end || second_word < req.start) { |
955394ce | 137 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
138 | return -1; |
139 | } | |
74f478f8 | 140 | req.u_start = second_word; |
4c14658e AJ |
141 | |
142 | // RFC 1945: SP and version following URI are optional, marking version 0.9 | |
143 | // we identify this by the last whitespace being earlier than URI start | |
74f478f8 AJ |
144 | if (last_whitespace < second_word && last_whitespace >= req.start) { |
145 | req.v_maj = 0; | |
146 | req.v_min = 9; | |
147 | req.u_end = line_end; | |
955394ce | 148 | request_parse_status = Http::scOkay; // HTTP/0.9 |
4c14658e AJ |
149 | return 1; |
150 | } else { | |
151 | // otherwise last whitespace is somewhere after end of URI. | |
74f478f8 | 152 | req.u_end = last_whitespace; |
4c14658e | 153 | // crop any trailing whitespace in the area we think of as URI |
5e263176 | 154 | for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end); |
4c14658e | 155 | } |
74f478f8 | 156 | if (req.u_end < req.u_start) { |
955394ce | 157 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
158 | return -1; |
159 | } | |
160 | ||
161 | // Last whitespace SP = before start of protocol/version | |
162 | if (last_whitespace >= line_end) { | |
955394ce | 163 | request_parse_status = Http::scBadRequest; // missing version |
4c14658e AJ |
164 | return -1; |
165 | } | |
74f478f8 AJ |
166 | req.v_start = last_whitespace + 1; |
167 | req.v_end = line_end; | |
4c14658e AJ |
168 | |
169 | // We only accept HTTP protocol requests right now. | |
170 | // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc | |
74f478f8 | 171 | if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) { |
4c14658e AJ |
172 | #if USE_HTTP_VIOLATIONS |
173 | // being lax; old parser accepted strange versions | |
174 | // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here. | |
74f478f8 AJ |
175 | req.v_maj = 0; |
176 | req.v_min = 9; | |
177 | req.u_end = line_end; | |
955394ce | 178 | request_parse_status = Http::scOkay; // treat as HTTP/0.9 |
4c14658e AJ |
179 | return 1; |
180 | #else | |
955394ce AJ |
181 | // protocol not supported / implemented. |
182 | request_parse_status = Http::scHttpVersionNotSupported; | |
4c14658e AJ |
183 | return -1; |
184 | #endif | |
185 | } | |
186 | ||
74f478f8 | 187 | int i = req.v_start + sizeof("HTTP/") -1; |
4c14658e AJ |
188 | |
189 | /* next should be 1 or more digits */ | |
190 | if (!isdigit(buf[i])) { | |
955394ce | 191 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
192 | return -1; |
193 | } | |
194 | int maj = 0; | |
95dc7ff4 | 195 | for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) { |
4c14658e AJ |
196 | maj = maj * 10; |
197 | maj = maj + (buf[i]) - '0'; | |
198 | } | |
199 | // catch too-big values or missing remainders | |
200 | if (maj >= 65536 || i > line_end) { | |
955394ce | 201 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
202 | return -1; |
203 | } | |
74f478f8 | 204 | req.v_maj = maj; |
4c14658e AJ |
205 | |
206 | /* next should be .; we -have- to have this as we have a whole line.. */ | |
207 | if (buf[i] != '.') { | |
955394ce | 208 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
209 | return -1; |
210 | } | |
211 | // catch missing minor part | |
212 | if (++i > line_end) { | |
955394ce | 213 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
214 | return -1; |
215 | } | |
216 | /* next should be one or more digits */ | |
217 | if (!isdigit(buf[i])) { | |
955394ce | 218 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
219 | return -1; |
220 | } | |
221 | int min = 0; | |
95dc7ff4 | 222 | for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) { |
4c14658e AJ |
223 | min = min * 10; |
224 | min = min + (buf[i]) - '0'; | |
225 | } | |
226 | // catch too-big values or trailing garbage | |
227 | if (min >= 65536 || i < line_end) { | |
955394ce | 228 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
229 | return -1; |
230 | } | |
74f478f8 | 231 | req.v_min = min; |
4c14658e AJ |
232 | |
233 | /* | |
234 | * Rightio - we have all the schtuff. Return true; we've got enough. | |
235 | */ | |
955394ce | 236 | request_parse_status = Http::scOkay; |
4c14658e AJ |
237 | return 1; |
238 | } | |
239 | ||
240 | int | |
241 | HttpParserParseReqLine(HttpParser *hmsg) | |
242 | { | |
243 | PROF_start(HttpParserParseReqLine); | |
244 | int retcode = hmsg->parseRequestFirstLine(); | |
74f478f8 AJ |
245 | debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req.start << |
246 | "->" << hmsg->req.end << ": method " << hmsg->req.m_start << "->" << | |
247 | hmsg->req.m_end << "; url " << hmsg->req.u_start << "->" << hmsg->req.u_end << | |
248 | "; version " << hmsg->req.v_start << "->" << hmsg->req.v_end << " (" << hmsg->req.v_maj << | |
249 | "/" << hmsg->req.v_min << ")"); | |
4c14658e AJ |
250 | PROF_stop(HttpParserParseReqLine); |
251 | return retcode; | |
252 | } | |
253 | ||
254 | #if MSGDODEBUG | |
255 | /* XXX This should eventually turn into something inlined or #define'd */ | |
256 | int | |
257 | HttpParserReqSz(HttpParser *hp) | |
258 | { | |
259 | assert(hp->state == HTTP_PARSE_NEW); | |
74f478f8 AJ |
260 | assert(hp->req.start != -1); |
261 | assert(hp->req.end != -1); | |
262 | return hp->req.end - hp->req.start + 1; | |
4c14658e AJ |
263 | } |
264 | ||
265 | /* | |
266 | * This +1 makes it 'right' but won't make any sense if | |
267 | * there's a 0 byte header? This won't happen normally - a valid header | |
268 | * is at -least- a blank line (\n, or \r\n.) | |
269 | */ | |
270 | int | |
271 | HttpParserHdrSz(HttpParser *hp) | |
272 | { | |
273 | assert(hp->state == HTTP_PARSE_NEW); | |
274 | assert(hp->hdr_start != -1); | |
275 | assert(hp->hdr_end != -1); | |
276 | return hp->hdr_end - hp->hdr_start + 1; | |
277 | } | |
278 | ||
279 | const char * | |
280 | HttpParserHdrBuf(HttpParser *hp) | |
281 | { | |
282 | assert(hp->state == HTTP_PARSE_NEW); | |
283 | assert(hp->hdr_start != -1); | |
284 | assert(hp->hdr_end != -1); | |
285 | return hp->buf + hp->hdr_start; | |
286 | } | |
287 | ||
288 | int | |
289 | HttpParserRequestLen(HttpParser *hp) | |
290 | { | |
74f478f8 | 291 | return hp->hdr_end - hp->req.start + 1; |
4c14658e AJ |
292 | } |
293 | #endif | |
294 |