]> git.ipfire.org Git - thirdparty/squid.git/blob - src/HttpParser.cc
Boilerplate: update copyright blurbs on src/
[thirdparty/squid.git] / src / HttpParser.cc
1 /*
2 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "HttpParser.h"
12 #include "profiler/Profiler.h"
13 #include "SquidConfig.h"
14
15 void
16 HttpParser::clear()
17 {
18 state = HTTP_PARSE_NONE;
19 request_parse_status = Http::scNone;
20 buf = NULL;
21 bufsiz = 0;
22 req.start = req.end = -1;
23 hdr_start = hdr_end = -1;
24 req.m_start = req.m_end = -1;
25 req.u_start = req.u_end = -1;
26 req.v_start = req.v_end = -1;
27 req.v_maj = req.v_min = 0;
28 }
29
30 void
31 HttpParser::reset(const char *aBuf, int len)
32 {
33 clear(); // empty the state.
34 state = HTTP_PARSE_NEW;
35 buf = aBuf;
36 bufsiz = len;
37 debugs(74, 5, HERE << "Request buffer is " << buf);
38 }
39
40 int
41 HttpParser::parseRequestFirstLine()
42 {
43 int second_word = -1; // track the suspected URI start
44 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
45 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
46
47 debugs(74, 5, HERE << "parsing possible request: " << buf);
48
49 // Single-pass parse: (provided we have the whole line anyways)
50
51 req.start = 0;
52 if (Config.onoff.relaxed_header_parser) {
53 if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
54 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
55 "Whitespace bytes received ahead of method. " <<
56 "Ignored due to relaxed_header_parser.");
57 // Be tolerant of prefix spaces (other bytes are valid method values)
58 for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
59 }
60 req.end = -1;
61 for (int i = 0; i < bufsiz; ++i) {
62 // track first and last whitespace (SP only)
63 if (buf[i] == ' ') {
64 last_whitespace = i;
65 if (first_whitespace < req.start)
66 first_whitespace = i;
67 }
68
69 // track next non-SP/non-HT byte after first_whitespace
70 if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
71 second_word = i;
72 }
73
74 // locate line terminator
75 if (buf[i] == '\n') {
76 req.end = i;
77 line_end = i - 1;
78 break;
79 }
80 if (i < bufsiz - 1 && buf[i] == '\r') {
81 if (Config.onoff.relaxed_header_parser) {
82 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
83 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
84 "Series of carriage-return bytes received prior to line terminator. " <<
85 "Ignored due to relaxed_header_parser.");
86
87 // Be tolerant of invalid multiple \r prior to terminal \n
88 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
89 line_end = i - 1;
90 while (i < bufsiz - 1 && buf[i + 1] == '\r')
91 ++i;
92
93 if (buf[i + 1] == '\n') {
94 req.end = i + 1;
95 break;
96 }
97 } else {
98 if (buf[i + 1] == '\n') {
99 req.end = i + 1;
100 line_end = i - 1;
101 break;
102 }
103 }
104
105 // RFC 2616 section 5.1
106 // "No CR or LF is allowed except in the final CRLF sequence"
107 request_parse_status = Http::scBadRequest;
108 return -1;
109 }
110 }
111 if (req.end == -1) {
112 debugs(74, 5, "Parser: retval 0: from " << req.start <<
113 "->" << req.end << ": needs more data to complete first line.");
114 return 0;
115 }
116
117 // NP: we have now seen EOL, more-data (0) cannot occur.
118 // From here on any failure is -1, success is 1
119
120 // Input Validation:
121
122 // Process what we now know about the line structure into field offsets
123 // generating HTTP status for any aborts as we go.
124
125 // First non-whitespace = beginning of method
126 if (req.start > line_end) {
127 request_parse_status = Http::scBadRequest;
128 return -1;
129 }
130 req.m_start = req.start;
131
132 // First whitespace = end of method
133 if (first_whitespace > line_end || first_whitespace < req.start) {
134 request_parse_status = Http::scBadRequest; // no method
135 return -1;
136 }
137 req.m_end = first_whitespace - 1;
138 if (req.m_end < req.m_start) {
139 request_parse_status = Http::scBadRequest; // missing URI?
140 return -1;
141 }
142
143 // First non-whitespace after first SP = beginning of URL+Version
144 if (second_word > line_end || second_word < req.start) {
145 request_parse_status = Http::scBadRequest; // missing URI
146 return -1;
147 }
148 req.u_start = second_word;
149
150 // RFC 1945: SP and version following URI are optional, marking version 0.9
151 // we identify this by the last whitespace being earlier than URI start
152 if (last_whitespace < second_word && last_whitespace >= req.start) {
153 req.v_maj = 0;
154 req.v_min = 9;
155 req.u_end = line_end;
156 request_parse_status = Http::scOkay; // HTTP/0.9
157 return 1;
158 } else {
159 // otherwise last whitespace is somewhere after end of URI.
160 req.u_end = last_whitespace;
161 // crop any trailing whitespace in the area we think of as URI
162 for (; req.u_end >= req.u_start && xisspace(buf[req.u_end]); --req.u_end);
163 }
164 if (req.u_end < req.u_start) {
165 request_parse_status = Http::scBadRequest; // missing URI
166 return -1;
167 }
168
169 // Last whitespace SP = before start of protocol/version
170 if (last_whitespace >= line_end) {
171 request_parse_status = Http::scBadRequest; // missing version
172 return -1;
173 }
174 req.v_start = last_whitespace + 1;
175 req.v_end = line_end;
176
177 // We only accept HTTP protocol requests right now.
178 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
179 if ((req.v_end - req.v_start +1) < 5 || strncasecmp(&buf[req.v_start], "HTTP/", 5) != 0) {
180 #if USE_HTTP_VIOLATIONS
181 // being lax; old parser accepted strange versions
182 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
183 req.v_maj = 0;
184 req.v_min = 9;
185 req.u_end = line_end;
186 request_parse_status = Http::scOkay; // treat as HTTP/0.9
187 return 1;
188 #else
189 // protocol not supported / implemented.
190 request_parse_status = Http::scHttpVersionNotSupported;
191 return -1;
192 #endif
193 }
194
195 int i = req.v_start + sizeof("HTTP/") -1;
196
197 /* next should be 1 or more digits */
198 if (!isdigit(buf[i])) {
199 request_parse_status = Http::scHttpVersionNotSupported;
200 return -1;
201 }
202 int maj = 0;
203 for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; ++i) {
204 maj = maj * 10;
205 maj = maj + (buf[i]) - '0';
206 }
207 // catch too-big values or missing remainders
208 if (maj >= 65536 || i > line_end) {
209 request_parse_status = Http::scHttpVersionNotSupported;
210 return -1;
211 }
212 req.v_maj = maj;
213
214 /* next should be .; we -have- to have this as we have a whole line.. */
215 if (buf[i] != '.') {
216 request_parse_status = Http::scHttpVersionNotSupported;
217 return -1;
218 }
219 // catch missing minor part
220 if (++i > line_end) {
221 request_parse_status = Http::scHttpVersionNotSupported;
222 return -1;
223 }
224 /* next should be one or more digits */
225 if (!isdigit(buf[i])) {
226 request_parse_status = Http::scHttpVersionNotSupported;
227 return -1;
228 }
229 int min = 0;
230 for (; i <= line_end && (isdigit(buf[i])) && min < 65536; ++i) {
231 min = min * 10;
232 min = min + (buf[i]) - '0';
233 }
234 // catch too-big values or trailing garbage
235 if (min >= 65536 || i < line_end) {
236 request_parse_status = Http::scHttpVersionNotSupported;
237 return -1;
238 }
239 req.v_min = min;
240
241 /*
242 * Rightio - we have all the schtuff. Return true; we've got enough.
243 */
244 request_parse_status = Http::scOkay;
245 return 1;
246 }
247
248 int
249 HttpParserParseReqLine(HttpParser *hmsg)
250 {
251 PROF_start(HttpParserParseReqLine);
252 int retcode = hmsg->parseRequestFirstLine();
253 debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req.start <<
254 "->" << hmsg->req.end << ": method " << hmsg->req.m_start << "->" <<
255 hmsg->req.m_end << "; url " << hmsg->req.u_start << "->" << hmsg->req.u_end <<
256 "; version " << hmsg->req.v_start << "->" << hmsg->req.v_end << " (" << hmsg->req.v_maj <<
257 "/" << hmsg->req.v_min << ")");
258 PROF_stop(HttpParserParseReqLine);
259 return retcode;
260 }
261
262 #if MSGDODEBUG
263 /* XXX This should eventually turn into something inlined or #define'd */
264 int
265 HttpParserReqSz(HttpParser *hp)
266 {
267 assert(hp->state == HTTP_PARSE_NEW);
268 assert(hp->req.start != -1);
269 assert(hp->req.end != -1);
270 return hp->req.end - hp->req.start + 1;
271 }
272
273 /*
274 * This +1 makes it 'right' but won't make any sense if
275 * there's a 0 byte header? This won't happen normally - a valid header
276 * is at -least- a blank line (\n, or \r\n.)
277 */
278 int
279 HttpParserHdrSz(HttpParser *hp)
280 {
281 assert(hp->state == HTTP_PARSE_NEW);
282 assert(hp->hdr_start != -1);
283 assert(hp->hdr_end != -1);
284 return hp->hdr_end - hp->hdr_start + 1;
285 }
286
287 const char *
288 HttpParserHdrBuf(HttpParser *hp)
289 {
290 assert(hp->state == HTTP_PARSE_NEW);
291 assert(hp->hdr_start != -1);
292 assert(hp->hdr_end != -1);
293 return hp->buf + hp->hdr_start;
294 }
295
296 int
297 HttpParserRequestLen(HttpParser *hp)
298 {
299 return hp->hdr_end - hp->req.start + 1;
300 }
301 #endif
302