]> git.ipfire.org Git - thirdparty/squid.git/blob - src/HttpParser.cc
SourceLayout: shuffle StatusCode.h to http/StatusCode.h
[thirdparty/squid.git] / src / HttpParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "HttpParser.h"
4 #include "profiler/Profiler.h"
5 #include "SquidConfig.h"
6
7 void
8 HttpParser::clear()
9 {
10 state = HTTP_PARSE_NONE;
11 request_parse_status = Http::scNone;
12 buf = NULL;
13 bufsiz = 0;
14 req.start = req.end = -1;
15 hdr_start = hdr_end = -1;
16 req.m_start = req.m_end = -1;
17 req.u_start = req.u_end = -1;
18 req.v_start = req.v_end = -1;
19 req.v_maj = req.v_min = 0;
20 }
21
22 void
23 HttpParser::reset(const char *aBuf, int len)
24 {
25 clear(); // empty the state.
26 state = HTTP_PARSE_NEW;
27 buf = aBuf;
28 bufsiz = len;
29 debugs(74, 5, HERE << "Request buffer is " << buf);
30 }
31
32 int
33 HttpParser::parseRequestFirstLine()
34 {
35 int second_word = -1; // track the suspected URI start
36 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
37 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
38
39 debugs(74, 5, HERE << "parsing possible request: " << buf);
40
41 // Single-pass parse: (provided we have the whole line anyways)
42
43 req.start = 0;
44 if (Config.onoff.relaxed_header_parser) {
45 if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
46 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
47 "Whitespace bytes received ahead of method. " <<
48 "Ignored due to relaxed_header_parser.");
49 // Be tolerant of prefix spaces (other bytes are valid method values)
50 for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
51 }
52 req.end = -1;
53 for (int i = 0; i < bufsiz; ++i) {
54 // track first and last whitespace (SP only)
55 if (buf[i] == ' ') {
56 last_whitespace = i;
57 if (first_whitespace < req.start)
58 first_whitespace = i;
59 }
60
61 // track next non-SP/non-HT byte after first_whitespace
62 if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
63 second_word = i;
64 }
65
66 // locate line terminator
67 if (buf[i] == '\n') {
68 req.end = i;
69 line_end = i - 1;
70 break;
71 }
72 if (i < bufsiz - 1 && buf[i] == '\r') {
73 if (Config.onoff.relaxed_header_parser) {
74 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
75 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
76 "Series of carriage-return bytes received prior to line terminator. " <<
77 "Ignored due to relaxed_header_parser.");
78
79 // Be tolerant of invalid multiple \r prior to terminal \n
80 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
81 line_end = i - 1;
82 while (i < bufsiz - 1 && buf[i + 1] == '\r')
83 ++i;
84
85 if (buf[i + 1] == '\n') {
86 req.end = i + 1;
87 break;
88 }
89 } else {
90 if (buf[i + 1] == '\n') {
91 req.end = i + 1;
92 line_end = i - 1;
93 break;
94 }
95 }
96
97 // RFC 2616 section 5.1
98 // "No CR or LF is allowed except in the final CRLF sequence"
99 request_parse_status = Http::scBadRequest;
100 return -1;
101 }
102 }
103 if (req.end == -1) {
104 debugs(74, 5, "Parser: retval 0: from " << req.start <<
105 "->" << req.end << ": needs more data to complete first line.");
106 return 0;
107 }
108
109 // NP: we have now seen EOL, more-data (0) cannot occur.
110 // From here on any failure is -1, success is 1
111
112 // Input Validation:
113
114 // Process what we now know about the line structure into field offsets
115 // generating HTTP status for any aborts as we go.
116
117 // First non-whitespace = beginning of method
118 if (req.start > line_end) {
119 request_parse_status = Http::scBadRequest;
120 return -1;
121 }
122 req.m_start = req.start;
123
124 // First whitespace = end of method
125 if (first_whitespace > line_end || first_whitespace < req.start) {
126 request_parse_status = Http::scBadRequest; // no method
127 return -1;
128 }
129 req.m_end = first_whitespace - 1;
130 if (req.m_end < req.m_start) {
131 request_parse_status = Http::scBadRequest; // missing URI?
132 return -1;
133 }
134
135 // First non-whitespace after first SP = beginning of URL+Version
136 if (second_word > line_end || second_word < req.start) {
137 request_parse_status = Http::scBadRequest; // missing URI
138 return -1;
139 }
140 req.u_start = second_word;
141
142 // RFC 1945: SP and version following URI are optional, marking version 0.9
143 // we identify this by the last whitespace being earlier than URI start
144 if (last_whitespace < second_word && last_whitespace >= req.start) {
145 req.v_maj = 0;
146 req.v_min = 9;
147 req.u_end = line_end;
148 request_parse_status = Http::scOkay; // HTTP/0.9
149 return 1;
150 } else {
151 // otherwise last whitespace is somewhere after end of URI.
152 req.u_end = last_whitespace;
153 // crop any trailing whitespace in the area we think of as URI
154 for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
155 }
156 if (req.u_end < req.u_start) {
157 request_parse_status = Http::scBadRequest; // missing URI
158 return -1;
159 }
160
161 // Last whitespace SP = before start of protocol/version
162 if (last_whitespace >= line_end) {
163 request_parse_status = Http::scBadRequest; // missing version
164 return -1;
165 }
166 req.v_start = last_whitespace + 1;
167 req.v_end = line_end;
168
169 // We only accept HTTP protocol requests right now.
170 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
171 if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) {
172 #if USE_HTTP_VIOLATIONS
173 // being lax; old parser accepted strange versions
174 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
175 req.v_maj = 0;
176 req.v_min = 9;
177 req.u_end = line_end;
178 request_parse_status = Http::scOkay; // treat as HTTP/0.9
179 return 1;
180 #else
181 // protocol not supported / implemented.
182 request_parse_status = Http::scHttpVersionNotSupported;
183 return -1;
184 #endif
185 }
186
187 int i = req.v_start + sizeof("HTTP/") -1;
188
189 /* next should be 1 or more digits */
190 if (!isdigit(buf[i])) {
191 request_parse_status = Http::scHttpVersionNotSupported;
192 return -1;
193 }
194 int maj = 0;
195 for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
196 maj = maj * 10;
197 maj = maj + (buf[i]) - '0';
198 }
199 // catch too-big values or missing remainders
200 if (maj >= 65536 || i > line_end) {
201 request_parse_status = Http::scHttpVersionNotSupported;
202 return -1;
203 }
204 req.v_maj = maj;
205
206 /* next should be .; we -have- to have this as we have a whole line.. */
207 if (buf[i] != '.') {
208 request_parse_status = Http::scHttpVersionNotSupported;
209 return -1;
210 }
211 // catch missing minor part
212 if (++i > line_end) {
213 request_parse_status = Http::scHttpVersionNotSupported;
214 return -1;
215 }
216 /* next should be one or more digits */
217 if (!isdigit(buf[i])) {
218 request_parse_status = Http::scHttpVersionNotSupported;
219 return -1;
220 }
221 int min = 0;
222 for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
223 min = min * 10;
224 min = min + (buf[i]) - '0';
225 }
226 // catch too-big values or trailing garbage
227 if (min >= 65536 || i < line_end) {
228 request_parse_status = Http::scHttpVersionNotSupported;
229 return -1;
230 }
231 req.v_min = min;
232
233 /*
234 * Rightio - we have all the schtuff. Return true; we've got enough.
235 */
236 request_parse_status = Http::scOkay;
237 return 1;
238 }
239
240 int
241 HttpParserParseReqLine(HttpParser *hmsg)
242 {
243 PROF_start(HttpParserParseReqLine);
244 int retcode = hmsg->parseRequestFirstLine();
245 debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req.start <<
246 "->" << hmsg->req.end << ": method " << hmsg->req.m_start << "->" <<
247 hmsg->req.m_end << "; url " << hmsg->req.u_start << "->" << hmsg->req.u_end <<
248 "; version " << hmsg->req.v_start << "->" << hmsg->req.v_end << " (" << hmsg->req.v_maj <<
249 "/" << hmsg->req.v_min << ")");
250 PROF_stop(HttpParserParseReqLine);
251 return retcode;
252 }
253
254 #if MSGDODEBUG
255 /* XXX This should eventually turn into something inlined or #define'd */
256 int
257 HttpParserReqSz(HttpParser *hp)
258 {
259 assert(hp->state == HTTP_PARSE_NEW);
260 assert(hp->req.start != -1);
261 assert(hp->req.end != -1);
262 return hp->req.end - hp->req.start + 1;
263 }
264
265 /*
266 * This +1 makes it 'right' but won't make any sense if
267 * there's a 0 byte header? This won't happen normally - a valid header
268 * is at -least- a blank line (\n, or \r\n.)
269 */
270 int
271 HttpParserHdrSz(HttpParser *hp)
272 {
273 assert(hp->state == HTTP_PARSE_NEW);
274 assert(hp->hdr_start != -1);
275 assert(hp->hdr_end != -1);
276 return hp->hdr_end - hp->hdr_start + 1;
277 }
278
279 const char *
280 HttpParserHdrBuf(HttpParser *hp)
281 {
282 assert(hp->state == HTTP_PARSE_NEW);
283 assert(hp->hdr_start != -1);
284 assert(hp->hdr_end != -1);
285 return hp->buf + hp->hdr_start;
286 }
287
288 int
289 HttpParserRequestLen(HttpParser *hp)
290 {
291 return hp->hdr_end - hp->req.start + 1;
292 }
293 #endif
294