]>
Commit | Line | Data |
---|---|---|
f7f3304a | 1 | #include "squid.h" |
4c14658e | 2 | #include "Debug.h" |
5bc8fb2c | 3 | #include "http/Http1Parser.h" |
582c2af2 | 4 | #include "profiler/Profiler.h" |
4d5904f7 | 5 | #include "SquidConfig.h" |
4c14658e AJ |
6 | |
7 | void | |
bb86dcd4 | 8 | Http::Http1Parser::clear() |
4c14658e | 9 | { |
b6a7fc85 | 10 | completedState_ = HTTP_PARSE_NONE; |
955394ce | 11 | request_parse_status = Http::scNone; |
4c14658e AJ |
12 | buf = NULL; |
13 | bufsiz = 0; | |
b6a7fc85 | 14 | parseOffset_ = 0; |
74f478f8 | 15 | req.start = req.end = -1; |
4c14658e | 16 | hdr_start = hdr_end = -1; |
74f478f8 AJ |
17 | req.m_start = req.m_end = -1; |
18 | req.u_start = req.u_end = -1; | |
19 | req.v_start = req.v_end = -1; | |
5aedd08d | 20 | msgProtocol_ = AnyP::ProtocolVersion(); |
4c14658e AJ |
21 | } |
22 | ||
23 | void | |
bb86dcd4 | 24 | Http::Http1Parser::reset(const char *aBuf, int len) |
4c14658e AJ |
25 | { |
26 | clear(); // empty the state. | |
b6a7fc85 | 27 | completedState_ = HTTP_PARSE_NEW; |
4c14658e AJ |
28 | buf = aBuf; |
29 | bufsiz = len; | |
b6a7fc85 | 30 | debugs(74, DBG_DATA, "Request parse " << Raw("buf", buf, bufsiz)); |
4c14658e AJ |
31 | } |
32 | ||
33 | int | |
bb86dcd4 | 34 | Http::Http1Parser::parseRequestFirstLine() |
4c14658e AJ |
35 | { |
36 | int second_word = -1; // track the suspected URI start | |
37 | int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte | |
38 | int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence | |
39 | ||
b6a7fc85 AJ |
40 | debugs(74, 5, "parsing possible request: bufsiz=" << bufsiz << ", offset=" << parseOffset_); |
41 | debugs(74, DBG_DATA, Raw("(buf+offset)", buf+parseOffset_, bufsiz-parseOffset_)); | |
4c14658e AJ |
42 | |
43 | // Single-pass parse: (provided we have the whole line anyways) | |
44 | ||
b6a7fc85 AJ |
45 | assert(completedState_ == HTTP_PARSE_NEW); |
46 | ||
47 | req.start = parseOffset_; // avoid re-parsing any portion we managed to complete | |
4c14658e | 48 | if (Config.onoff.relaxed_header_parser) { |
74f478f8 | 49 | if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ') |
4c14658e AJ |
50 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << |
51 | "Whitespace bytes received ahead of method. " << | |
52 | "Ignored due to relaxed_header_parser."); | |
53 | // Be tolerant of prefix spaces (other bytes are valid method values) | |
95dc7ff4 | 54 | for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start); |
b6a7fc85 | 55 | parseOffset_ = req.start; |
4c14658e | 56 | } |
74f478f8 | 57 | req.end = -1; |
95dc7ff4 | 58 | for (int i = 0; i < bufsiz; ++i) { |
4c14658e AJ |
59 | // track first and last whitespace (SP only) |
60 | if (buf[i] == ' ') { | |
61 | last_whitespace = i; | |
74f478f8 | 62 | if (first_whitespace < req.start) |
4c14658e AJ |
63 | first_whitespace = i; |
64 | } | |
65 | ||
66 | // track next non-SP/non-HT byte after first_whitespace | |
67 | if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') { | |
68 | second_word = i; | |
69 | } | |
70 | ||
71 | // locate line terminator | |
72 | if (buf[i] == '\n') { | |
74f478f8 | 73 | req.end = i; |
4c14658e AJ |
74 | line_end = i - 1; |
75 | break; | |
76 | } | |
77 | if (i < bufsiz - 1 && buf[i] == '\r') { | |
78 | if (Config.onoff.relaxed_header_parser) { | |
79 | if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r') | |
80 | debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << | |
81 | "Series of carriage-return bytes received prior to line terminator. " << | |
82 | "Ignored due to relaxed_header_parser."); | |
83 | ||
84 | // Be tolerant of invalid multiple \r prior to terminal \n | |
85 | if (buf[i + 1] == '\n' || buf[i + 1] == '\r') | |
86 | line_end = i - 1; | |
87 | while (i < bufsiz - 1 && buf[i + 1] == '\r') | |
95dc7ff4 | 88 | ++i; |
4c14658e AJ |
89 | |
90 | if (buf[i + 1] == '\n') { | |
74f478f8 | 91 | req.end = i + 1; |
4c14658e AJ |
92 | break; |
93 | } | |
94 | } else { | |
95 | if (buf[i + 1] == '\n') { | |
74f478f8 | 96 | req.end = i + 1; |
4c14658e AJ |
97 | line_end = i - 1; |
98 | break; | |
99 | } | |
100 | } | |
101 | ||
102 | // RFC 2616 section 5.1 | |
103 | // "No CR or LF is allowed except in the final CRLF sequence" | |
955394ce | 104 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
105 | return -1; |
106 | } | |
107 | } | |
74f478f8 AJ |
108 | if (req.end == -1) { |
109 | debugs(74, 5, "Parser: retval 0: from " << req.start << | |
110 | "->" << req.end << ": needs more data to complete first line."); | |
4c14658e AJ |
111 | return 0; |
112 | } | |
113 | ||
114 | // NP: we have now seen EOL, more-data (0) cannot occur. | |
115 | // From here on any failure is -1, success is 1 | |
116 | ||
4c14658e AJ |
117 | // Input Validation: |
118 | ||
119 | // Process what we now know about the line structure into field offsets | |
120 | // generating HTTP status for any aborts as we go. | |
121 | ||
122 | // First non-whitespace = beginning of method | |
74f478f8 | 123 | if (req.start > line_end) { |
955394ce | 124 | request_parse_status = Http::scBadRequest; |
4c14658e AJ |
125 | return -1; |
126 | } | |
74f478f8 | 127 | req.m_start = req.start; |
4c14658e AJ |
128 | |
129 | // First whitespace = end of method | |
74f478f8 | 130 | if (first_whitespace > line_end || first_whitespace < req.start) { |
955394ce | 131 | request_parse_status = Http::scBadRequest; // no method |
4c14658e AJ |
132 | return -1; |
133 | } | |
74f478f8 AJ |
134 | req.m_end = first_whitespace - 1; |
135 | if (req.m_end < req.m_start) { | |
955394ce | 136 | request_parse_status = Http::scBadRequest; // missing URI? |
4c14658e AJ |
137 | return -1; |
138 | } | |
139 | ||
140 | // First non-whitespace after first SP = beginning of URL+Version | |
74f478f8 | 141 | if (second_word > line_end || second_word < req.start) { |
955394ce | 142 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
143 | return -1; |
144 | } | |
74f478f8 | 145 | req.u_start = second_word; |
4c14658e AJ |
146 | |
147 | // RFC 1945: SP and version following URI are optional, marking version 0.9 | |
148 | // we identify this by the last whitespace being earlier than URI start | |
74f478f8 | 149 | if (last_whitespace < second_word && last_whitespace >= req.start) { |
5aedd08d | 150 | msgProtocol_ = Http::ProtocolVersion(0,9); |
74f478f8 | 151 | req.u_end = line_end; |
955394ce | 152 | request_parse_status = Http::scOkay; // HTTP/0.9 |
b6a7fc85 | 153 | parseOffset_ = line_end; |
4c14658e AJ |
154 | return 1; |
155 | } else { | |
156 | // otherwise last whitespace is somewhere after end of URI. | |
74f478f8 | 157 | req.u_end = last_whitespace; |
4c14658e | 158 | // crop any trailing whitespace in the area we think of as URI |
5e263176 | 159 | for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end); |
4c14658e | 160 | } |
74f478f8 | 161 | if (req.u_end < req.u_start) { |
955394ce | 162 | request_parse_status = Http::scBadRequest; // missing URI |
4c14658e AJ |
163 | return -1; |
164 | } | |
165 | ||
166 | // Last whitespace SP = before start of protocol/version | |
167 | if (last_whitespace >= line_end) { | |
955394ce | 168 | request_parse_status = Http::scBadRequest; // missing version |
4c14658e AJ |
169 | return -1; |
170 | } | |
74f478f8 AJ |
171 | req.v_start = last_whitespace + 1; |
172 | req.v_end = line_end; | |
4c14658e AJ |
173 | |
174 | // We only accept HTTP protocol requests right now. | |
175 | // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc | |
74f478f8 | 176 | if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) { |
4c14658e AJ |
177 | #if USE_HTTP_VIOLATIONS |
178 | // being lax; old parser accepted strange versions | |
179 | // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here. | |
5aedd08d | 180 | msgProtocol_ = Http::ProtocolVersion(0,9); |
74f478f8 | 181 | req.u_end = line_end; |
955394ce | 182 | request_parse_status = Http::scOkay; // treat as HTTP/0.9 |
b6a7fc85 AJ |
183 | completedState_ = HTTP_PARSE_FIRST; |
184 | parseOffset_ = req.end; | |
4c14658e AJ |
185 | return 1; |
186 | #else | |
955394ce AJ |
187 | // protocol not supported / implemented. |
188 | request_parse_status = Http::scHttpVersionNotSupported; | |
4c14658e AJ |
189 | return -1; |
190 | #endif | |
191 | } | |
5aedd08d | 192 | msgProtocol_.protocol = AnyP::PROTO_HTTP; |
4c14658e | 193 | |
74f478f8 | 194 | int i = req.v_start + sizeof("HTTP/") -1; |
4c14658e AJ |
195 | |
196 | /* next should be 1 or more digits */ | |
197 | if (!isdigit(buf[i])) { | |
955394ce | 198 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
199 | return -1; |
200 | } | |
201 | int maj = 0; | |
95dc7ff4 | 202 | for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) { |
4c14658e AJ |
203 | maj = maj * 10; |
204 | maj = maj + (buf[i]) - '0'; | |
205 | } | |
206 | // catch too-big values or missing remainders | |
207 | if (maj >= 65536 || i > line_end) { | |
955394ce | 208 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
209 | return -1; |
210 | } | |
5aedd08d | 211 | msgProtocol_.major = maj; |
4c14658e AJ |
212 | |
213 | /* next should be .; we -have- to have this as we have a whole line.. */ | |
214 | if (buf[i] != '.') { | |
955394ce | 215 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
216 | return -1; |
217 | } | |
218 | // catch missing minor part | |
219 | if (++i > line_end) { | |
955394ce | 220 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
221 | return -1; |
222 | } | |
223 | /* next should be one or more digits */ | |
224 | if (!isdigit(buf[i])) { | |
955394ce | 225 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
226 | return -1; |
227 | } | |
228 | int min = 0; | |
95dc7ff4 | 229 | for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) { |
4c14658e AJ |
230 | min = min * 10; |
231 | min = min + (buf[i]) - '0'; | |
232 | } | |
233 | // catch too-big values or trailing garbage | |
234 | if (min >= 65536 || i < line_end) { | |
955394ce | 235 | request_parse_status = Http::scHttpVersionNotSupported; |
4c14658e AJ |
236 | return -1; |
237 | } | |
5aedd08d | 238 | msgProtocol_.minor = min; |
4c14658e AJ |
239 | |
240 | /* | |
241 | * Rightio - we have all the schtuff. Return true; we've got enough. | |
242 | */ | |
955394ce | 243 | request_parse_status = Http::scOkay; |
b6a7fc85 AJ |
244 | parseOffset_ = req.end+1; // req.end is the \n byte. Next parse step needs to start *after* that byte. |
245 | completedState_ = HTTP_PARSE_FIRST; | |
4c14658e AJ |
246 | return 1; |
247 | } | |
248 | ||
87abd755 | 249 | bool |
bb86dcd4 | 250 | Http::Http1Parser::parseRequest() |
4c14658e AJ |
251 | { |
252 | PROF_start(HttpParserParseReqLine); | |
afff15b2 AJ |
253 | int retcode = parseRequestFirstLine(); |
254 | debugs(74, 5, "Parser: retval " << retcode << ": from " << req.start << | |
255 | "->" << req.end << ": method " << req.m_start << "->" << | |
256 | req.m_end << "; url " << req.u_start << "->" << req.u_end << | |
5aedd08d | 257 | "; proto-version " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")"); |
4c14658e | 258 | PROF_stop(HttpParserParseReqLine); |
87abd755 AJ |
259 | |
260 | if (retcode != 0) | |
261 | completedState_ = HTTP_PARSE_DONE; | |
262 | ||
263 | return (retcode > 0); | |
4c14658e | 264 | } |