]> git.ipfire.org Git - thirdparty/squid.git/blame - src/HttpParser.cc
SourceLayout: separate HttpParser from HttpMsg and HttpRequest files
[thirdparty/squid.git] / src / HttpParser.cc
CommitLineData
4c14658e
AJ
1#include "config.h"
2#include "Debug.h"
3#include "HttpParser.h"
4#include "structs.h"
5
6void
7HttpParser::clear()
8{
9 state = HTTP_PARSE_NONE;
10 request_parse_status = HTTP_STATUS_NONE;
11 buf = NULL;
12 bufsiz = 0;
13 req_start = req_end = -1;
14 hdr_start = hdr_end = -1;
15 m_start = m_end = -1;
16 u_start = u_end = -1;
17 v_start = v_end = -1;
18 v_maj = v_min = 0;
19}
20
21void
22HttpParser::reset(const char *aBuf, int len)
23{
24 clear(); // empty the state.
25 state = HTTP_PARSE_NEW;
26 buf = aBuf;
27 bufsiz = len;
28 debugs(74, 5, HERE << "Request buffer is " << buf);
29}
30
31int
32HttpParser::parseRequestFirstLine()
33{
34 int second_word = -1; // track the suspected URI start
35 int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
36 int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
37
38 debugs(74, 5, HERE << "parsing possible request: " << buf);
39
40 // Single-pass parse: (provided we have the whole line anyways)
41
42 req_start = 0;
43 if (Config.onoff.relaxed_header_parser) {
44 if (Config.onoff.relaxed_header_parser < 0 && buf[req_start] == ' ')
45 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
46 "Whitespace bytes received ahead of method. " <<
47 "Ignored due to relaxed_header_parser.");
48 // Be tolerant of prefix spaces (other bytes are valid method values)
49 for (; req_start < bufsiz && buf[req_start] == ' '; req_start++);
50 }
51 req_end = -1;
52 for (int i = 0; i < bufsiz; i++) {
53 // track first and last whitespace (SP only)
54 if (buf[i] == ' ') {
55 last_whitespace = i;
56 if (first_whitespace < req_start)
57 first_whitespace = i;
58 }
59
60 // track next non-SP/non-HT byte after first_whitespace
61 if (second_word < first_whitespace && buf[i] != ' ' && buf[i] != '\t') {
62 second_word = i;
63 }
64
65 // locate line terminator
66 if (buf[i] == '\n') {
67 req_end = i;
68 line_end = i - 1;
69 break;
70 }
71 if (i < bufsiz - 1 && buf[i] == '\r') {
72 if (Config.onoff.relaxed_header_parser) {
73 if (Config.onoff.relaxed_header_parser < 0 && buf[i + 1] == '\r')
74 debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
75 "Series of carriage-return bytes received prior to line terminator. " <<
76 "Ignored due to relaxed_header_parser.");
77
78 // Be tolerant of invalid multiple \r prior to terminal \n
79 if (buf[i + 1] == '\n' || buf[i + 1] == '\r')
80 line_end = i - 1;
81 while (i < bufsiz - 1 && buf[i + 1] == '\r')
82 i++;
83
84 if (buf[i + 1] == '\n') {
85 req_end = i + 1;
86 break;
87 }
88 } else {
89 if (buf[i + 1] == '\n') {
90 req_end = i + 1;
91 line_end = i - 1;
92 break;
93 }
94 }
95
96 // RFC 2616 section 5.1
97 // "No CR or LF is allowed except in the final CRLF sequence"
98 request_parse_status = HTTP_BAD_REQUEST;
99 return -1;
100 }
101 }
102 if (req_end == -1) {
103 debugs(74, 5, "Parser: retval 0: from " << req_start <<
104 "->" << req_end << ": needs more data to complete first line.");
105 return 0;
106 }
107
108 // NP: we have now seen EOL, more-data (0) cannot occur.
109 // From here on any failure is -1, success is 1
110
111
112 // Input Validation:
113
114 // Process what we now know about the line structure into field offsets
115 // generating HTTP status for any aborts as we go.
116
117 // First non-whitespace = beginning of method
118 if (req_start > line_end) {
119 request_parse_status = HTTP_BAD_REQUEST;
120 return -1;
121 }
122 m_start = req_start;
123
124 // First whitespace = end of method
125 if (first_whitespace > line_end || first_whitespace < req_start) {
126 request_parse_status = HTTP_BAD_REQUEST; // no method
127 return -1;
128 }
129 m_end = first_whitespace - 1;
130 if (m_end < m_start) {
131 request_parse_status = HTTP_BAD_REQUEST; // missing URI?
132 return -1;
133 }
134
135 // First non-whitespace after first SP = beginning of URL+Version
136 if (second_word > line_end || second_word < req_start) {
137 request_parse_status = HTTP_BAD_REQUEST; // missing URI
138 return -1;
139 }
140 u_start = second_word;
141
142 // RFC 1945: SP and version following URI are optional, marking version 0.9
143 // we identify this by the last whitespace being earlier than URI start
144 if (last_whitespace < second_word && last_whitespace >= req_start) {
145 v_maj = 0;
146 v_min = 9;
147 u_end = line_end;
148 request_parse_status = HTTP_OK; // HTTP/0.9
149 return 1;
150 } else {
151 // otherwise last whitespace is somewhere after end of URI.
152 u_end = last_whitespace;
153 // crop any trailing whitespace in the area we think of as URI
154 for (; u_end >= u_start && xisspace(buf[u_end]); u_end--);
155 }
156 if (u_end < u_start) {
157 request_parse_status = HTTP_BAD_REQUEST; // missing URI
158 return -1;
159 }
160
161 // Last whitespace SP = before start of protocol/version
162 if (last_whitespace >= line_end) {
163 request_parse_status = HTTP_BAD_REQUEST; // missing version
164 return -1;
165 }
166 v_start = last_whitespace + 1;
167 v_end = line_end;
168
169 // We only accept HTTP protocol requests right now.
170 // TODO: accept other protocols; RFC 2326 (RTSP protocol) etc
171 if ((v_end - v_start +1) < 5 || strncasecmp(&buf[v_start], "HTTP/", 5) != 0) {
172#if USE_HTTP_VIOLATIONS
173 // being lax; old parser accepted strange versions
174 // there is a LOT of cases which are ambiguous, therefore we cannot use relaxed_header_parser here.
175 v_maj = 0;
176 v_min = 9;
177 u_end = line_end;
178 request_parse_status = HTTP_OK; // treat as HTTP/0.9
179 return 1;
180#else
181 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED; // protocol not supported / implemented.
182 return -1;
183#endif
184 }
185
186 int i = v_start + sizeof("HTTP/") -1;
187
188 /* next should be 1 or more digits */
189 if (!isdigit(buf[i])) {
190 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
191 return -1;
192 }
193 int maj = 0;
194 for (; i <= line_end && (isdigit(buf[i])) && maj < 65536; i++) {
195 maj = maj * 10;
196 maj = maj + (buf[i]) - '0';
197 }
198 // catch too-big values or missing remainders
199 if (maj >= 65536 || i > line_end) {
200 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
201 return -1;
202 }
203 v_maj = maj;
204
205 /* next should be .; we -have- to have this as we have a whole line.. */
206 if (buf[i] != '.') {
207 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
208 return -1;
209 }
210 // catch missing minor part
211 if (++i > line_end) {
212 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
213 return -1;
214 }
215 /* next should be one or more digits */
216 if (!isdigit(buf[i])) {
217 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
218 return -1;
219 }
220 int min = 0;
221 for (; i <= line_end && (isdigit(buf[i])) && min < 65536; i++) {
222 min = min * 10;
223 min = min + (buf[i]) - '0';
224 }
225 // catch too-big values or trailing garbage
226 if (min >= 65536 || i < line_end) {
227 request_parse_status = HTTP_HTTP_VERSION_NOT_SUPPORTED;
228 return -1;
229 }
230 v_min = min;
231
232 /*
233 * Rightio - we have all the schtuff. Return true; we've got enough.
234 */
235 request_parse_status = HTTP_OK;
236 return 1;
237}
238
239int
240HttpParserParseReqLine(HttpParser *hmsg)
241{
242 PROF_start(HttpParserParseReqLine);
243 int retcode = hmsg->parseRequestFirstLine();
244 debugs(74, 5, "Parser: retval " << retcode << ": from " << hmsg->req_start <<
245 "->" << hmsg->req_end << ": method " << hmsg->m_start << "->" <<
246 hmsg->m_end << "; url " << hmsg->u_start << "->" << hmsg->u_end <<
247 "; version " << hmsg->v_start << "->" << hmsg->v_end << " (" << hmsg->v_maj <<
248 "/" << hmsg->v_min << ")");
249 PROF_stop(HttpParserParseReqLine);
250 return retcode;
251}
252
253#if MSGDODEBUG
254/* XXX This should eventually turn into something inlined or #define'd */
255int
256HttpParserReqSz(HttpParser *hp)
257{
258 assert(hp->state == HTTP_PARSE_NEW);
259 assert(hp->req_start != -1);
260 assert(hp->req_end != -1);
261 return hp->req_end - hp->req_start + 1;
262}
263
264/*
265 * This +1 makes it 'right' but won't make any sense if
266 * there's a 0 byte header? This won't happen normally - a valid header
267 * is at -least- a blank line (\n, or \r\n.)
268 */
269int
270HttpParserHdrSz(HttpParser *hp)
271{
272 assert(hp->state == HTTP_PARSE_NEW);
273 assert(hp->hdr_start != -1);
274 assert(hp->hdr_end != -1);
275 return hp->hdr_end - hp->hdr_start + 1;
276}
277
278const char *
279HttpParserHdrBuf(HttpParser *hp)
280{
281 assert(hp->state == HTTP_PARSE_NEW);
282 assert(hp->hdr_start != -1);
283 assert(hp->hdr_end != -1);
284 return hp->buf + hp->hdr_start;
285}
286
287int
288HttpParserRequestLen(HttpParser *hp)
289{
290 return hp->hdr_end - hp->req_start + 1;
291}
292#endif
293