]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 1996-2020 The Squid Software Foundation and contributors | |
3 | * | |
4 | * Squid software is distributed under GPLv2+ license and includes | |
5 | * contributions from numerous individuals and organizations. | |
6 | * Please see the COPYING and CONTRIBUTORS files for details. | |
7 | */ | |
8 | ||
9 | #include "squid.h" | |
10 | #include "Debug.h" | |
11 | #include "http/one/ResponseParser.h" | |
12 | #include "http/ProtocolVersion.h" | |
13 | #include "parser/Tokenizer.h" | |
14 | #include "profiler/Profiler.h" | |
15 | #include "SquidConfig.h" | |
16 | ||
17 | const SBuf Http::One::ResponseParser::IcyMagic("ICY "); | |
18 | ||
19 | Http1::Parser::size_type | |
20 | Http::One::ResponseParser::firstLineSize() const | |
21 | { | |
22 | Http1::Parser::size_type result = 0; | |
23 | ||
24 | switch (msgProtocol_.protocol) | |
25 | { | |
26 | case AnyP::PROTO_HTTP: | |
27 | result += Http1magic.length(); | |
28 | break; | |
29 | case AnyP::PROTO_ICY: | |
30 | result += IcyMagic.length(); | |
31 | break; | |
32 | default: // no other protocols supported | |
33 | return result; | |
34 | } | |
35 | // NP: the parser does not accept >2 DIGIT for version numbers | |
36 | if (msgProtocol_.minor > 9) | |
37 | result += 2; | |
38 | else | |
39 | result += 1; | |
40 | ||
41 | result += 5; /* 5 octets in: SP status SP */ | |
42 | result += reasonPhrase_.length(); | |
43 | result += 2; /* CRLF terminator */ | |
44 | return result; | |
45 | } | |
46 | ||
47 | // NP: we found the protocol version and consumed it already. | |
48 | // just need the status code and reason phrase | |
49 | int | |
50 | Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok, const CharacterSet &WspDelim) | |
51 | { | |
52 | if (!completedStatus_) { | |
53 | debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "..."); | |
54 | /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets. | |
55 | * There is no limit on what those octets may be. | |
56 | * 000 through 999 are all valid. | |
57 | */ | |
58 | int64_t statusValue; | |
59 | if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) { | |
60 | ||
61 | debugs(74, 6, "found int64 status-code=" << statusValue); | |
62 | statusCode_ = static_cast<Http::StatusCode>(statusValue); | |
63 | ||
64 | buf_ = tok.remaining(); // resume checkpoint | |
65 | completedStatus_ = true; | |
66 | ||
67 | } else if (tok.atEnd()) { | |
68 | debugs(74, 6, "Parser needs more data"); | |
69 | return 0; // need more to be sure we have it all | |
70 | ||
71 | } else { | |
72 | debugs(74, 6, "invalid status-line. invalid code."); | |
73 | return -1; // invalid status, a single SP terminator required | |
74 | } | |
75 | // NOTE: any whitespace after the single SP is part of the reason phrase. | |
76 | } | |
77 | ||
78 | /* RFC 7230 says we SHOULD ignore the reason phrase content | |
79 | * but it has a definite valid vs invalid character set. | |
80 | * We interpret the SHOULD as ignoring absence and syntax, but | |
81 | * producing an error if it contains an invalid octet. | |
82 | */ | |
83 | ||
84 | debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "..."); | |
85 | ||
86 | // if we got here we are still looking for reason-phrase bytes | |
87 | static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT; | |
88 | (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing | |
89 | try { | |
90 | skipLineTerminator(tok); | |
91 | buf_ = tok.remaining(); // resume checkpoint | |
92 | debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length())); | |
93 | return 1; | |
94 | } catch (const InsufficientInput &) { | |
95 | reasonPhrase_.clear(); | |
96 | return 0; // need more to be sure we have it all | |
97 | } catch (const std::exception &ex) { | |
98 | debugs(74, 6, "invalid status-line: " << ex.what()); | |
99 | } | |
100 | return -1; | |
101 | } | |
102 | ||
103 | /** | |
104 | * Attempt to parse the method field out of an HTTP message status-line. | |
105 | * | |
106 | * Governed by: | |
107 | * RFC 1945 section 6.1 | |
108 | * RFC 7230 section 2.6, 3.1 and 3.5 | |
109 | * | |
110 | * Parsing state is stored between calls. The current implementation uses | |
111 | * checkpoints after each successful status-line field. | |
112 | * The return value tells you whether the parsing is completed or not. | |
113 | * | |
114 | * \retval -1 an error occurred. | |
115 | * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter. | |
116 | * \retval 0 more data is needed to complete the parse | |
117 | */ | |
118 | int | |
119 | Http::One::ResponseParser::parseResponseFirstLine() | |
120 | { | |
121 | Tokenizer tok(buf_); | |
122 | ||
123 | const CharacterSet &WspDelim = DelimiterCharacters(); | |
124 | ||
125 | if (msgProtocol_.protocol != AnyP::PROTO_NONE) { | |
126 | debugs(74, 6, "continue incremental parse for " << msgProtocol_); | |
127 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); | |
128 | // we already found the magic, but not the full line. keep going. | |
129 | return parseResponseStatusAndReason(tok, WspDelim); | |
130 | ||
131 | } else if (tok.skip(Http1magic)) { | |
132 | debugs(74, 6, "found prefix magic " << Http1magic); | |
133 | // HTTP Response status-line parse | |
134 | ||
135 | // magic contains major version, still need to find minor DIGIT | |
136 | int64_t verMinor; | |
137 | if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) { | |
138 | msgProtocol_.protocol = AnyP::PROTO_HTTP; | |
139 | msgProtocol_.major = 1; | |
140 | msgProtocol_.minor = static_cast<unsigned int>(verMinor); | |
141 | ||
142 | debugs(74, 6, "found version=" << msgProtocol_); | |
143 | ||
144 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); | |
145 | buf_ = tok.remaining(); // resume checkpoint | |
146 | return parseResponseStatusAndReason(tok, WspDelim); | |
147 | ||
148 | } else if (tok.atEnd()) | |
149 | return 0; // need more to be sure we have it all | |
150 | else | |
151 | return -1; // invalid version or delimiter, a single SP terminator required | |
152 | ||
153 | } else if (tok.skip(IcyMagic)) { | |
154 | debugs(74, 6, "found prefix magic " << IcyMagic); | |
155 | // ICY Response status-line parse (same as HTTP/1 after the magic version) | |
156 | msgProtocol_.protocol = AnyP::PROTO_ICY; | |
157 | // NP: ICY has no /major.minor details | |
158 | debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}"); | |
159 | buf_ = tok.remaining(); // resume checkpoint | |
160 | return parseResponseStatusAndReason(tok, WspDelim); | |
161 | } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) { | |
162 | debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length())); | |
163 | return 0; | |
164 | } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) { | |
165 | debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length())); | |
166 | return 0; | |
167 | } else { | |
168 | debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9"); | |
169 | // found something that looks like an HTTP/0.9 response | |
170 | // Gateway/Transform it into HTTP/1.1 | |
171 | msgProtocol_ = Http::ProtocolVersion(1,1); | |
172 | // XXX: probably should use version 0.9 here and upgrade on output, | |
173 | // but the old code did 1.1 transformation now. | |
174 | statusCode_ = Http::scOkay; | |
175 | static const SBuf gatewayPhrase("Gatewaying"); | |
176 | reasonPhrase_ = gatewayPhrase; | |
177 | static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n" | |
178 | /* Server: visible_appname_string */ | |
179 | "Mime-Version: 1.0\r\n" | |
180 | /* Date: squid_curtime */ | |
181 | "Expires: -1\r\n\r\n"); | |
182 | mimeHeaderBlock_ = fakeHttpMimeBlock; | |
183 | parsingStage_ = HTTP_PARSE_DONE; | |
184 | return 1; // no more parsing | |
185 | } | |
186 | ||
187 | // unreachable | |
188 | assert(false); | |
189 | return -1; | |
190 | } | |
191 | ||
192 | bool | |
193 | Http::One::ResponseParser::parse(const SBuf &aBuf) | |
194 | { | |
195 | buf_ = aBuf; | |
196 | debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}"); | |
197 | ||
198 | // stage 1: locate the status-line | |
199 | if (parsingStage_ == HTTP_PARSE_NONE) { | |
200 | // RFC 7230 explicitly states whether garbage whitespace is to be handled | |
201 | // at each point of the message framing boundaries. | |
202 | // It omits mentioning garbage prior to HTTP Responses. | |
203 | // Therefore, if we receive anything at all treat it as Response message. | |
204 | if (!buf_.isEmpty()) | |
205 | parsingStage_ = HTTP_PARSE_FIRST; | |
206 | else | |
207 | return false; | |
208 | } | |
209 | ||
210 | // stage 2: parse the status-line | |
211 | if (parsingStage_ == HTTP_PARSE_FIRST) { | |
212 | PROF_start(HttpParserParseReplyLine); | |
213 | ||
214 | const int retcode = parseResponseFirstLine(); | |
215 | ||
216 | // first-line (or a look-alike) found successfully. | |
217 | if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST) | |
218 | parsingStage_ = HTTP_PARSE_MIME; | |
219 | debugs(74, 5, "status-line: retval " << retcode); | |
220 | debugs(74, 5, "status-line: proto " << msgProtocol_); | |
221 | debugs(74, 5, "status-line: status-code " << statusCode_); | |
222 | debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_); | |
223 | debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length())); | |
224 | PROF_stop(HttpParserParseReplyLine); | |
225 | ||
226 | // syntax errors already | |
227 | if (retcode < 0) { | |
228 | parsingStage_ = HTTP_PARSE_DONE; | |
229 | parseStatusCode = Http::scInvalidHeader; | |
230 | return false; | |
231 | } | |
232 | } | |
233 | ||
234 | // stage 3: locate the mime header block | |
235 | if (parsingStage_ == HTTP_PARSE_MIME) { | |
236 | if (!grabMimeBlock("Response", Config.maxReplyHeaderSize)) | |
237 | return false; | |
238 | } | |
239 | ||
240 | return !needsMoreData(); | |
241 | } | |
242 |