]>
Commit | Line | Data |
---|---|---|
ad20e647 | 1 | /* |
4ac4a490 | 2 | * Copyright (C) 1996-2017 The Squid Software Foundation and contributors |
ad20e647 AJ |
3 | * |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
f1d5359e AJ |
9 | #include "squid.h" |
10 | #include "Debug.h" | |
11 | #include "http/one/ResponseParser.h" | |
f29718b0 | 12 | #include "http/one/Tokenizer.h" |
f1d5359e | 13 | #include "http/ProtocolVersion.h" |
f1d5359e AJ |
14 | #include "profiler/Profiler.h" |
15 | #include "SquidConfig.h" | |
16 | ||
17 | const SBuf Http::One::ResponseParser::IcyMagic("ICY "); | |
18 | ||
19 | Http1::Parser::size_type | |
20 | Http::One::ResponseParser::firstLineSize() const | |
21 | { | |
22 | Http1::Parser::size_type result = 0; | |
23 | ||
24 | switch (msgProtocol_.protocol) | |
25 | { | |
26 | case AnyP::PROTO_HTTP: | |
27 | result += Http1magic.length(); | |
28 | break; | |
29 | case AnyP::PROTO_ICY: | |
30 | result += IcyMagic.length(); | |
31 | break; | |
32 | default: // no other protocols supported | |
33 | return result; | |
34 | } | |
35 | // NP: the parser does not accept >2 DIGIT for version numbers | |
b8f86fd2 | 36 | if (msgProtocol_.minor > 9) |
f1d5359e AJ |
37 | result += 2; |
38 | else | |
39 | result += 1; | |
40 | ||
41 | result += 5; /* 5 octets in: SP status SP */ | |
42 | result += reasonPhrase_.length(); | |
05f32cc2 | 43 | result += 2; /* CRLF terminator */ |
f1d5359e AJ |
44 | return result; |
45 | } | |
46 | ||
47 | // NP: we found the protocol version and consumed it already. | |
48 | // just need the status code and reason phrase | |
5aea71e7 | 49 | int |
f29718b0 | 50 | Http::One::ResponseParser::parseResponseStatusAndReason(Http1::Tokenizer &tok, const CharacterSet &WspDelim) |
f1d5359e | 51 | { |
f1d5359e | 52 | if (!completedStatus_) { |
db6a29e1 | 53 | debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "..."); |
b8f86fd2 AJ |
54 | /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets. |
55 | * There is no limit on what those octets may be. | |
56 | * 000 through 999 are all valid. | |
57 | */ | |
58 | int64_t statusValue; | |
59 | if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) { | |
f1d5359e | 60 | |
b8f86fd2 AJ |
61 | debugs(74, 6, "found int64 status-code=" << statusValue); |
62 | statusCode_ = static_cast<Http::StatusCode>(statusValue); | |
db6a29e1 | 63 | |
b8f86fd2 AJ |
64 | buf_ = tok.remaining(); // resume checkpoint |
65 | completedStatus_ = true; | |
f1d5359e | 66 | |
b8f86fd2 AJ |
67 | } else if (tok.atEnd()) { |
68 | debugs(74, 6, "Parser needs more data"); | |
69 | return 0; // need more to be sure we have it all | |
f1d5359e | 70 | |
b8f86fd2 AJ |
71 | } else { |
72 | debugs(74, 6, "invalid status-line. invalid code."); | |
73 | return -1; // invalid status, a single SP terminator required | |
74 | } | |
75 | // NOTE: any whitespace after the single SP is part of the reason phrase. | |
f1d5359e AJ |
76 | } |
77 | ||
f1d5359e AJ |
78 | /* RFC 7230 says we SHOULD ignore the reason phrase content |
79 | * but it has a definite valid vs invalid character set. | |
80 | * We interpret the SHOULD as ignoring absence and syntax, but | |
81 | * producing an error if it contains an invalid octet. | |
82 | */ | |
83 | ||
db6a29e1 AJ |
84 | debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "..."); |
85 | ||
f1d5359e AJ |
86 | // if we got here we are still looking for reason-phrase bytes |
87 | static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT; | |
b8f86fd2 | 88 | (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing |
188ad27f AJ |
89 | try { |
90 | if (skipLineTerminator(tok)) { | |
91 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); | |
92 | buf_ = tok.remaining(); // resume checkpoint | |
93 | return 1; | |
94 | } | |
95 | reasonPhrase_.clear(); | |
f1d5359e AJ |
96 | return 0; // need more to be sure we have it all |
97 | ||
188ad27f AJ |
98 | } catch (const std::exception &ex) { |
99 | debugs(74, 6, "invalid status-line: " << ex.what()); | |
100 | } | |
b8f86fd2 | 101 | return -1; |
f1d5359e AJ |
102 | } |
103 | ||
b8f86fd2 AJ |
104 | /** |
105 | * Attempt to parse the method field out of an HTTP message status-line. | |
106 | * | |
107 | * Governed by: | |
108 | * RFC 1945 section 6.1 | |
109 | * RFC 7230 section 2.6, 3.1 and 3.5 | |
110 | * | |
111 | * Parsing state is stored between calls. The current implementation uses | |
112 | * checkpoints after each successful status-line field. | |
113 | * The return value tells you whether the parsing is completed or not. | |
114 | * | |
115 | * \retval -1 an error occurred. | |
116 | * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter. | |
117 | * \retval 0 more data is needed to complete the parse | |
118 | */ | |
5aea71e7 | 119 | int |
f1d5359e AJ |
120 | Http::One::ResponseParser::parseResponseFirstLine() |
121 | { | |
f29718b0 | 122 | Http1::Tokenizer tok(buf_); |
f1d5359e | 123 | |
b8f86fd2 AJ |
124 | CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP |
125 | ||
126 | if (Config.onoff.relaxed_header_parser) { | |
127 | // RFC 7230 section 3.5 | |
128 | // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR | |
129 | // as whitespace between status-line fields | |
130 | WspDelim += CharacterSet::HTAB | |
1810a0cb SM |
131 | + CharacterSet("VT,FF","\x0B\x0C") |
132 | + CharacterSet::CR; | |
b8f86fd2 AJ |
133 | } |
134 | ||
f1d5359e | 135 | if (msgProtocol_.protocol != AnyP::PROTO_NONE) { |
db6a29e1 AJ |
136 | debugs(74, 6, "continue incremental parse for " << msgProtocol_); |
137 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); | |
f1d5359e | 138 | // we already found the magic, but not the full line. keep going. |
b8f86fd2 | 139 | return parseResponseStatusAndReason(tok, WspDelim); |
f1d5359e AJ |
140 | |
141 | } else if (tok.skip(Http1magic)) { | |
db6a29e1 | 142 | debugs(74, 6, "found prefix magic " << Http1magic); |
f1d5359e AJ |
143 | // HTTP Response status-line parse |
144 | ||
b8f86fd2 AJ |
145 | // magic contains major version, still need to find minor DIGIT |
146 | int64_t verMinor; | |
147 | if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) { | |
148 | msgProtocol_.protocol = AnyP::PROTO_HTTP; | |
149 | msgProtocol_.major = 1; | |
150 | msgProtocol_.minor = static_cast<unsigned int>(verMinor); | |
f1d5359e | 151 | |
b8f86fd2 | 152 | debugs(74, 6, "found version=" << msgProtocol_); |
db6a29e1 | 153 | |
b8f86fd2 AJ |
154 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); |
155 | buf_ = tok.remaining(); // resume checkpoint | |
156 | return parseResponseStatusAndReason(tok, WspDelim); | |
db6a29e1 | 157 | |
b8f86fd2 AJ |
158 | } else if (tok.atEnd()) |
159 | return 0; // need more to be sure we have it all | |
160 | else | |
161 | return -1; // invalid version or delimiter, a single SP terminator required | |
f1d5359e AJ |
162 | |
163 | } else if (tok.skip(IcyMagic)) { | |
db6a29e1 | 164 | debugs(74, 6, "found prefix magic " << IcyMagic); |
f1d5359e AJ |
165 | // ICY Response status-line parse (same as HTTP/1 after the magic version) |
166 | msgProtocol_.protocol = AnyP::PROTO_ICY; | |
167 | // NP: ICY has no /major.minor details | |
db6a29e1 | 168 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); |
f1d5359e | 169 | buf_ = tok.remaining(); // resume checkpoint |
b8f86fd2 | 170 | return parseResponseStatusAndReason(tok, WspDelim); |
f1d5359e AJ |
171 | |
172 | } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) { | |
db6a29e1 | 173 | debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9"); |
f1d5359e | 174 | // found something that looks like an HTTP/0.9 response |
db6a29e1 | 175 | // Gateway/Transform it into HTTP/1.1 |
f1d5359e AJ |
176 | msgProtocol_ = Http::ProtocolVersion(1,1); |
177 | // XXX: probably should use version 0.9 here and upgrade on output, | |
178 | // but the old code did 1.1 transformation now. | |
179 | statusCode_ = Http::scOkay; | |
180 | static const SBuf gatewayPhrase("Gatewaying"); | |
181 | reasonPhrase_ = gatewayPhrase; | |
182 | static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n" | |
183 | /* Server: visible_appname_string */ | |
184 | "Mime-Version: 1.0\r\n" | |
185 | /* Date: squid_curtime */ | |
186 | "Expires: -1\r\n\r\n"); | |
db6a29e1 AJ |
187 | mimeHeaderBlock_ = fakeHttpMimeBlock; |
188 | parsingStage_ = HTTP_PARSE_DONE; | |
f1d5359e AJ |
189 | return 1; // no more parsing |
190 | } | |
191 | ||
192 | return 0; // need more to parse anything. | |
193 | } | |
194 | ||
195 | bool | |
196 | Http::One::ResponseParser::parse(const SBuf &aBuf) | |
197 | { | |
198 | buf_ = aBuf; | |
199 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); | |
200 | ||
201 | // stage 1: locate the status-line | |
202 | if (parsingStage_ == HTTP_PARSE_NONE) { | |
203 | // RFC 7230 explicitly states whether garbage whitespace is to be handled | |
204 | // at each point of the message framing boundaries. | |
205 | // It omits mentioning garbage prior to HTTP Responses. | |
206 | // Therefore, if we receive anything at all treat it as Response message. | |
207 | if (!buf_.isEmpty()) | |
208 | parsingStage_ = HTTP_PARSE_FIRST; | |
209 | else | |
210 | return false; | |
211 | } | |
212 | ||
213 | // stage 2: parse the status-line | |
214 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
215 | PROF_start(HttpParserParseReplyLine); | |
216 | ||
f8cab755 | 217 | const int retcode = parseResponseFirstLine(); |
f1d5359e AJ |
218 | |
219 | // first-line (or a look-alike) found successfully. | |
617b7cca | 220 | if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST) |
f1d5359e AJ |
221 | parsingStage_ = HTTP_PARSE_MIME; |
222 | debugs(74, 5, "status-line: retval " << retcode); | |
223 | debugs(74, 5, "status-line: proto " << msgProtocol_); | |
224 | debugs(74, 5, "status-line: status-code " << statusCode_); | |
225 | debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_); | |
226 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); | |
227 | PROF_stop(HttpParserParseReplyLine); | |
228 | ||
229 | // syntax errors already | |
230 | if (retcode < 0) { | |
231 | parsingStage_ = HTTP_PARSE_DONE; | |
ada1f18c | 232 | parseStatusCode = Http::scInvalidHeader; |
f1d5359e AJ |
233 | return false; |
234 | } | |
235 | } | |
236 | ||
237 | // stage 3: locate the mime header block | |
238 | if (parsingStage_ == HTTP_PARSE_MIME) { | |
f8cab755 | 239 | if (!grabMimeBlock("Response", Config.maxReplyHeaderSize)) |
f1d5359e AJ |
240 | return false; |
241 | } | |
242 | ||
243 | return !needsMoreData(); | |
244 | } | |
1810a0cb | 245 |