]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/ResponseParser.cc
Merge from trunk rev.13866
[thirdparty/squid.git] / src / http / one / ResponseParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "http/one/ResponseParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "parser/Tokenizer.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
8
9 const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
10
11 Http1::Parser::size_type
12 Http::One::ResponseParser::firstLineSize() const
13 {
14 Http1::Parser::size_type result = 0;
15
16 switch (msgProtocol_.protocol)
17 {
18 case AnyP::PROTO_HTTP:
19 result += Http1magic.length();
20 break;
21 case AnyP::PROTO_ICY:
22 result += IcyMagic.length();
23 break;
24 default: // no other protocols supported
25 return result;
26 }
27 // NP: the parser does not accept >2 DIGIT for version numbers
28 if (msgProtocol_.minor >10)
29 result += 2;
30 else
31 result += 1;
32
33 result += 5; /* 5 octets in: SP status SP */
34 result += reasonPhrase_.length();
35 result += 2; /* CRLF terminator */
36 return result;
37 }
38
39 // NP: we found the protocol version and consumed it already.
40 // just need the status code and reason phrase
41 const int
42 Http::One::ResponseParser::parseResponseStatusAndReason()
43 {
44 if (buf_.isEmpty())
45 return 0;
46
47 ::Parser::Tokenizer tok(buf_);
48
49 if (!completedStatus_) {
50 debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
51 SBuf status;
52 // status code is 3 DIGIT octets
53 // NP: search space is >3 to get terminator character)
54 if(!tok.prefix(status, CharacterSet::DIGIT, 4))
55 return -1; // invalid status
56 // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
57 if (tok.atEnd())
58 return 0; // need more to be sure we have it all
59 if(!tok.skip(' '))
60 return -1; // invalid status, a single SP terminator required
61 // NOTE: any whitespace after the single SP is part of the reason phrase.
62
63 debugs(74, 6, "found string status-code=" << status);
64
65 // get the actual numeric value of the 0-3 digits we found
66 ::Parser::Tokenizer t2(status);
67 int64_t statusValue;
68 if (!t2.int64(statusValue))
69 return -1; // ouch. digits not forming a valid number?
70 debugs(74, 6, "found int64 status-code=" << statusValue);
71 if (statusValue < 0 || statusValue > 999)
72 return -1; // ouch. digits not within valid status code range.
73
74 statusCode_ = static_cast<Http::StatusCode>(statusValue);
75
76 buf_ = tok.remaining(); // resume checkpoint
77 completedStatus_ = true;
78 }
79
80 if (tok.atEnd())
81 return 0; // need more to be sure we have it all
82
83 /* RFC 7230 says we SHOULD ignore the reason phrase content
84 * but it has a definite valid vs invalid character set.
85 * We interpret the SHOULD as ignoring absence and syntax, but
86 * producing an error if it contains an invalid octet.
87 */
88
89 debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
90
91 // if we got here we are still looking for reason-phrase bytes
92 static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
93 tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
94 tok.skip('\r'); // optional trailing CR
95
96 if (tok.atEnd())
97 return 0; // need more to be sure we have it all
98
99 // LF existence matters
100 if (!tok.skip('\n')) {
101 reasonPhrase_.clear();
102 return -1; // found invalid characters in the phrase
103 }
104
105 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
106 buf_ = tok.remaining(); // resume checkpoint
107 return 1;
108 }
109
110 const int
111 Http::One::ResponseParser::parseResponseFirstLine()
112 {
113 ::Parser::Tokenizer tok(buf_);
114
115 if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
116 debugs(74, 6, "continue incremental parse for " << msgProtocol_);
117 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
118 // we already found the magic, but not the full line. keep going.
119 return parseResponseStatusAndReason();
120
121 } else if (tok.skip(Http1magic)) {
122 debugs(74, 6, "found prefix magic " << Http1magic);
123 // HTTP Response status-line parse
124
125 // magic contains major version, still need to find minor
126 SBuf verMinor;
127 // NP: we limit to 2-digits for speed, there really is no limit
128 // XXX: the protocols we accept dont have valid versions > 10 anyway
129 if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
130 return -1; // invalid version minor code
131 if (tok.atEnd())
132 return 0; // need more to be sure we have it all
133 if(!tok.skip(' '))
134 return -1; // invalid version, a single SP terminator required
135
136 debugs(74, 6, "found string version-minor=" << verMinor);
137
138 // get the actual numeric value of the 0-3 digits we found
139 ::Parser::Tokenizer t2(verMinor);
140 int64_t tvm = 0;
141 if (!t2.int64(tvm))
142 return -1; // ouch. digits not forming a valid number?
143 msgProtocol_.minor = static_cast<unsigned int>(tvm);
144
145 msgProtocol_.protocol = AnyP::PROTO_HTTP;
146 msgProtocol_.major = 1;
147
148 debugs(74, 6, "found version=" << msgProtocol_);
149
150 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
151 buf_ = tok.remaining(); // resume checkpoint
152 return parseResponseStatusAndReason();
153
154 } else if (tok.skip(IcyMagic)) {
155 debugs(74, 6, "found prefix magic " << IcyMagic);
156 // ICY Response status-line parse (same as HTTP/1 after the magic version)
157 msgProtocol_.protocol = AnyP::PROTO_ICY;
158 // NP: ICY has no /major.minor details
159 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
160 buf_ = tok.remaining(); // resume checkpoint
161 return parseResponseStatusAndReason();
162
163 } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
164 debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
165 // found something that looks like an HTTP/0.9 response
166 // Gateway/Transform it into HTTP/1.1
167 msgProtocol_ = Http::ProtocolVersion(1,1);
168 // XXX: probably should use version 0.9 here and upgrade on output,
169 // but the old code did 1.1 transformation now.
170 statusCode_ = Http::scOkay;
171 static const SBuf gatewayPhrase("Gatewaying");
172 reasonPhrase_ = gatewayPhrase;
173 static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
174 /* Server: visible_appname_string */
175 "Mime-Version: 1.0\r\n"
176 /* Date: squid_curtime */
177 "Expires: -1\r\n\r\n");
178 mimeHeaderBlock_ = fakeHttpMimeBlock;
179 parsingStage_ = HTTP_PARSE_DONE;
180 return 1; // no more parsing
181 }
182
183 return 0; // need more to parse anything.
184 }
185
186 bool
187 Http::One::ResponseParser::parse(const SBuf &aBuf)
188 {
189 buf_ = aBuf;
190 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
191
192 // stage 1: locate the status-line
193 if (parsingStage_ == HTTP_PARSE_NONE) {
194 // RFC 7230 explicitly states whether garbage whitespace is to be handled
195 // at each point of the message framing boundaries.
196 // It omits mentioning garbage prior to HTTP Responses.
197 // Therefore, if we receive anything at all treat it as Response message.
198 if (!buf_.isEmpty())
199 parsingStage_ = HTTP_PARSE_FIRST;
200 else
201 return false;
202 }
203
204 // stage 2: parse the status-line
205 if (parsingStage_ == HTTP_PARSE_FIRST) {
206 PROF_start(HttpParserParseReplyLine);
207
208 int retcode = parseResponseFirstLine();
209
210 // first-line (or a look-alike) found successfully.
211 if (retcode > 0)
212 parsingStage_ = HTTP_PARSE_MIME;
213 debugs(74, 5, "status-line: retval " << retcode);
214 debugs(74, 5, "status-line: proto " << msgProtocol_);
215 debugs(74, 5, "status-line: status-code " << statusCode_);
216 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
217 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
218 PROF_stop(HttpParserParseReplyLine);
219
220 // syntax errors already
221 if (retcode < 0) {
222 parsingStage_ = HTTP_PARSE_DONE;
223 statusCode_ = Http::scInvalidHeader;
224 return false;
225 }
226 }
227
228 // stage 3: locate the mime header block
229 if (parsingStage_ == HTTP_PARSE_MIME) {
230 if (!findMimeBlock("Response", Config.maxReplyHeaderSize))
231 return false;
232 }
233
234 return !needsMoreData();
235 }