]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/ResponseParser.cc
merge from trunk-r14667
[thirdparty/squid.git] / src / http / one / ResponseParser.cc
1 /*
2 * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "Debug.h"
11 #include "http/one/ResponseParser.h"
12 #include "http/one/Tokenizer.h"
13 #include "http/ProtocolVersion.h"
14 #include "profiler/Profiler.h"
15 #include "SquidConfig.h"
16
17 const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
18
19 Http1::Parser::size_type
20 Http::One::ResponseParser::firstLineSize() const
21 {
22 Http1::Parser::size_type result = 0;
23
24 switch (msgProtocol_.protocol)
25 {
26 case AnyP::PROTO_HTTP:
27 result += Http1magic.length();
28 break;
29 case AnyP::PROTO_ICY:
30 result += IcyMagic.length();
31 break;
32 default: // no other protocols supported
33 return result;
34 }
35 // NP: the parser does not accept >2 DIGIT for version numbers
36 if (msgProtocol_.minor > 9)
37 result += 2;
38 else
39 result += 1;
40
41 result += 5; /* 5 octets in: SP status SP */
42 result += reasonPhrase_.length();
43 result += 2; /* CRLF terminator */
44 return result;
45 }
46
47 // NP: we found the protocol version and consumed it already.
48 // just need the status code and reason phrase
49 int
50 Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim)
51 {
52 if (!completedStatus_) {
53 debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
54 /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets.
55 * There is no limit on what those octets may be.
56 * 000 through 999 are all valid.
57 */
58 int64_t statusValue;
59 if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) {
60
61 debugs(74, 6, "found int64 status-code=" << statusValue);
62 statusCode_ = static_cast<Http::StatusCode>(statusValue);
63
64 buf_ = tok.remaining(); // resume checkpoint
65 completedStatus_ = true;
66
67 } else if (tok.atEnd()) {
68 debugs(74, 6, "Parser needs more data");
69 return 0; // need more to be sure we have it all
70
71 } else {
72 debugs(74, 6, "invalid status-line. invalid code.");
73 return -1; // invalid status, a single SP terminator required
74 }
75 // NOTE: any whitespace after the single SP is part of the reason phrase.
76 }
77
78 if (tok.atEnd())
79 return 0; // need more to be sure we have it all
80
81 /* RFC 7230 says we SHOULD ignore the reason phrase content
82 * but it has a definite valid vs invalid character set.
83 * We interpret the SHOULD as ignoring absence and syntax, but
84 * producing an error if it contains an invalid octet.
85 */
86
87 debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
88
89 // if we got here we are still looking for reason-phrase bytes
90 static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
91 (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
92 if (skipLineTerminator(tok)) {
93 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
94 buf_ = tok.remaining(); // resume checkpoint
95 return 1;
96 }
97 reasonPhrase_.clear();
98
99 if (tok.atEnd())
100 return 0; // need more to be sure we have it all
101
102 debugs(74, 6, "invalid status-line. garbage in reason phrase.");
103 return -1;
104 }
105
106 /**
107 * Attempt to parse the method field out of an HTTP message status-line.
108 *
109 * Governed by:
110 * RFC 1945 section 6.1
111 * RFC 7230 section 2.6, 3.1 and 3.5
112 *
113 * Parsing state is stored between calls. The current implementation uses
114 * checkpoints after each successful status-line field.
115 * The return value tells you whether the parsing is completed or not.
116 *
117 * \retval -1 an error occurred.
118 * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
119 * \retval 0 more data is needed to complete the parse
120 */
121 int
122 Http::One::ResponseParser::parseResponseFirstLine()
123 {
124 Http1::Tokenizer tok(buf_);
125
126 CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
127
128 if (Config.onoff.relaxed_header_parser) {
129 // RFC 7230 section 3.5
130 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
131 // as whitespace between status-line fields
132 WspDelim += CharacterSet::HTAB
133 + CharacterSet("VT,FF","\x0B\x0C")
134 + CharacterSet::CR;
135 }
136
137 if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
138 debugs(74, 6, "continue incremental parse for " << msgProtocol_);
139 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
140 // we already found the magic, but not the full line. keep going.
141 return parseResponseStatusAndReason(tok, WspDelim);
142
143 } else if (tok.skip(Http1magic)) {
144 debugs(74, 6, "found prefix magic " << Http1magic);
145 // HTTP Response status-line parse
146
147 // magic contains major version, still need to find minor DIGIT
148 int64_t verMinor;
149 if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
150 msgProtocol_.protocol = AnyP::PROTO_HTTP;
151 msgProtocol_.major = 1;
152 msgProtocol_.minor = static_cast<unsigned int>(verMinor);
153
154 debugs(74, 6, "found version=" << msgProtocol_);
155
156 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
157 buf_ = tok.remaining(); // resume checkpoint
158 return parseResponseStatusAndReason(tok, WspDelim);
159
160 } else if (tok.atEnd())
161 return 0; // need more to be sure we have it all
162 else
163 return -1; // invalid version or delimiter, a single SP terminator required
164
165 } else if (tok.skip(IcyMagic)) {
166 debugs(74, 6, "found prefix magic " << IcyMagic);
167 // ICY Response status-line parse (same as HTTP/1 after the magic version)
168 msgProtocol_.protocol = AnyP::PROTO_ICY;
169 // NP: ICY has no /major.minor details
170 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
171 buf_ = tok.remaining(); // resume checkpoint
172 return parseResponseStatusAndReason(tok, WspDelim);
173
174 } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
175 debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
176 // found something that looks like an HTTP/0.9 response
177 // Gateway/Transform it into HTTP/1.1
178 msgProtocol_ = Http::ProtocolVersion(1,1);
179 // XXX: probably should use version 0.9 here and upgrade on output,
180 // but the old code did 1.1 transformation now.
181 statusCode_ = Http::scOkay;
182 static const SBuf gatewayPhrase("Gatewaying");
183 reasonPhrase_ = gatewayPhrase;
184 static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
185 /* Server: visible_appname_string */
186 "Mime-Version: 1.0\r\n"
187 /* Date: squid_curtime */
188 "Expires: -1\r\n\r\n");
189 mimeHeaderBlock_ = fakeHttpMimeBlock;
190 parsingStage_ = HTTP_PARSE_DONE;
191 return 1; // no more parsing
192 }
193
194 return 0; // need more to parse anything.
195 }
196
197 bool
198 Http::One::ResponseParser::parse(const SBuf &aBuf)
199 {
200 buf_ = aBuf;
201 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
202
203 // stage 1: locate the status-line
204 if (parsingStage_ == HTTP_PARSE_NONE) {
205 // RFC 7230 explicitly states whether garbage whitespace is to be handled
206 // at each point of the message framing boundaries.
207 // It omits mentioning garbage prior to HTTP Responses.
208 // Therefore, if we receive anything at all treat it as Response message.
209 if (!buf_.isEmpty())
210 parsingStage_ = HTTP_PARSE_FIRST;
211 else
212 return false;
213 }
214
215 // stage 2: parse the status-line
216 if (parsingStage_ == HTTP_PARSE_FIRST) {
217 PROF_start(HttpParserParseReplyLine);
218
219 const int retcode = parseResponseFirstLine();
220
221 // first-line (or a look-alike) found successfully.
222 if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
223 parsingStage_ = HTTP_PARSE_MIME;
224 debugs(74, 5, "status-line: retval " << retcode);
225 debugs(74, 5, "status-line: proto " << msgProtocol_);
226 debugs(74, 5, "status-line: status-code " << statusCode_);
227 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
228 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
229 PROF_stop(HttpParserParseReplyLine);
230
231 // syntax errors already
232 if (retcode < 0) {
233 parsingStage_ = HTTP_PARSE_DONE;
234 parseStatusCode = Http::scInvalidHeader;
235 return false;
236 }
237 }
238
239 // stage 3: locate the mime header block
240 if (parsingStage_ == HTTP_PARSE_MIME) {
241 if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
242 return false;
243 }
244
245 return !needsMoreData();
246 }
247