]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/ResponseParser.cc
SourceLayout: Move debugs()-related code into src/debug/ (#984)
[thirdparty/squid.git] / src / http / one / ResponseParser.cc
1 /*
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
3 *
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
7 */
8
9 #include "squid.h"
10 #include "base/Raw.h"
11 #include "debug/Stream.h"
12 #include "http/one/ResponseParser.h"
13 #include "http/ProtocolVersion.h"
14 #include "parser/Tokenizer.h"
15 #include "sbuf/Stream.h"
16 #include "SquidConfig.h"
17
18 const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
19
20 Http1::Parser::size_type
21 Http::One::ResponseParser::firstLineSize() const
22 {
23 Http1::Parser::size_type result = 0;
24
25 switch (msgProtocol_.protocol)
26 {
27 case AnyP::PROTO_HTTP:
28 result += Http1magic.length();
29 break;
30 case AnyP::PROTO_ICY:
31 result += IcyMagic.length();
32 break;
33 default: // no other protocols supported
34 return result;
35 }
36 // NP: the parser does not accept >2 DIGIT for version numbers
37 if (msgProtocol_.minor > 9)
38 result += 2;
39 else
40 result += 1;
41
42 result += 5; /* 5 octets in: SP status SP */
43 result += reasonPhrase_.length();
44 result += 2; /* CRLF terminator */
45 return result;
46 }
47
48 // NP: we found the protocol version and consumed it already.
49 // just need the status code and reason phrase
50 int
51 Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok)
52 {
53 try {
54 if (!completedStatus_) {
55 debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
56 ParseResponseStatus(tok, statusCode_);
57 buf_ = tok.remaining(); // resume checkpoint
58 completedStatus_ = true;
59 }
60 // NOTE: any whitespace after the single SP is part of the reason phrase.
61
62 /* RFC 7230 says we SHOULD ignore the reason phrase content
63 * but it has a definite valid vs invalid character set.
64 * We interpret the SHOULD as ignoring absence and syntax, but
65 * producing an error if it contains an invalid octet.
66 */
67
68 debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
69 // if we got here we are still looking for reason-phrase bytes
70 static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
71 (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
72 skipLineTerminator(tok);
73 buf_ = tok.remaining(); // resume checkpoint
74 debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
75 return 1;
76 } catch (const InsufficientInput &) {
77 reasonPhrase_.clear();
78 return 0; // need more to be sure we have it all
79 } catch (const std::exception &ex) {
80 debugs(74, 6, "invalid status-line: " << ex.what());
81 }
82 return -1;
83 }
84
85 void
86 Http::One::ResponseParser::ParseResponseStatus(Tokenizer &tok, StatusCode &code)
87 {
88 int64_t statusValue;
89 if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(Parser::DelimiterCharacters())) {
90 debugs(74, 6, "raw status-code=" << statusValue);
91 code = static_cast<StatusCode>(statusValue); // may be invalid
92
93 // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
94 if (code <= 99)
95 throw TextException(ToSBuf("status-code too short: ", code), Here());
96
97 // Codes with a non-standard first digit (a.k.a. response class) are
98 // considered semantically invalid per the following HTTP WG discussion:
99 // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
100 if (code >= 600)
101 throw TextException(ToSBuf("status-code from an invalid response class: ", code), Here());
102 } else if (tok.atEnd()) {
103 throw InsufficientInput();
104 } else {
105 throw TextException("syntactically invalid status-code area", Here());
106 }
107 }
108
109 /**
110 * Attempt to parse the method field out of an HTTP message status-line.
111 *
112 * Governed by:
113 * RFC 1945 section 6.1
114 * RFC 7230 section 2.6, 3.1 and 3.5
115 *
116 * Parsing state is stored between calls. The current implementation uses
117 * checkpoints after each successful status-line field.
118 * The return value tells you whether the parsing is completed or not.
119 *
120 * \retval -1 an error occurred.
121 * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
122 * \retval 0 more data is needed to complete the parse
123 */
124 int
125 Http::One::ResponseParser::parseResponseFirstLine()
126 {
127 Tokenizer tok(buf_);
128
129 if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
130 debugs(74, 6, "continue incremental parse for " << msgProtocol_);
131 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
132 // we already found the magic, but not the full line. keep going.
133 return parseResponseStatusAndReason(tok);
134
135 } else if (tok.skip(Http1magic)) {
136 debugs(74, 6, "found prefix magic " << Http1magic);
137 // HTTP Response status-line parse
138
139 // magic contains major version, still need to find minor DIGIT
140 int64_t verMinor;
141 const auto &WspDelim = DelimiterCharacters();
142 if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
143 msgProtocol_.protocol = AnyP::PROTO_HTTP;
144 msgProtocol_.major = 1;
145 msgProtocol_.minor = static_cast<unsigned int>(verMinor);
146
147 debugs(74, 6, "found version=" << msgProtocol_);
148
149 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
150 buf_ = tok.remaining(); // resume checkpoint
151 return parseResponseStatusAndReason(tok);
152
153 } else if (tok.atEnd())
154 return 0; // need more to be sure we have it all
155 else
156 return -1; // invalid version or delimiter, a single SP terminator required
157
158 } else if (tok.skip(IcyMagic)) {
159 debugs(74, 6, "found prefix magic " << IcyMagic);
160 // ICY Response status-line parse (same as HTTP/1 after the magic version)
161 msgProtocol_.protocol = AnyP::PROTO_ICY;
162 // NP: ICY has no /major.minor details
163 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
164 buf_ = tok.remaining(); // resume checkpoint
165 return parseResponseStatusAndReason(tok);
166 } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) {
167 debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length()));
168 return 0;
169 } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) {
170 debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length()));
171 return 0;
172 } else {
173 debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
174 // found something that looks like an HTTP/0.9 response
175 // Gateway/Transform it into HTTP/1.1
176 msgProtocol_ = Http::ProtocolVersion(1,1);
177 // XXX: probably should use version 0.9 here and upgrade on output,
178 // but the old code did 1.1 transformation now.
179 statusCode_ = Http::scOkay;
180 static const SBuf gatewayPhrase("Gatewaying");
181 reasonPhrase_ = gatewayPhrase;
182 static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
183 /* Server: visible_appname_string */
184 "Mime-Version: 1.0\r\n"
185 /* Date: squid_curtime */
186 "Expires: -1\r\n\r\n");
187 mimeHeaderBlock_ = fakeHttpMimeBlock;
188 parsingStage_ = HTTP_PARSE_DONE;
189 return 1; // no more parsing
190 }
191
192 // unreachable
193 assert(false);
194 return -1;
195 }
196
197 bool
198 Http::One::ResponseParser::parse(const SBuf &aBuf)
199 {
200 buf_ = aBuf;
201 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
202
203 // stage 1: locate the status-line
204 if (parsingStage_ == HTTP_PARSE_NONE) {
205 // RFC 7230 explicitly states whether garbage whitespace is to be handled
206 // at each point of the message framing boundaries.
207 // It omits mentioning garbage prior to HTTP Responses.
208 // Therefore, if we receive anything at all treat it as Response message.
209 if (!buf_.isEmpty())
210 parsingStage_ = HTTP_PARSE_FIRST;
211 else
212 return false;
213 }
214
215 // stage 2: parse the status-line
216 if (parsingStage_ == HTTP_PARSE_FIRST) {
217 const int retcode = parseResponseFirstLine();
218
219 // first-line (or a look-alike) found successfully.
220 if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
221 parsingStage_ = HTTP_PARSE_MIME;
222 debugs(74, 5, "status-line: retval " << retcode);
223 debugs(74, 5, "status-line: proto " << msgProtocol_);
224 debugs(74, 5, "status-line: status-code " << statusCode_);
225 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
226 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
227
228 // syntax errors already
229 if (retcode < 0) {
230 parsingStage_ = HTTP_PARSE_DONE;
231 parseStatusCode = Http::scInvalidHeader;
232 return false;
233 }
234 }
235
236 // stage 3: locate the mime header block
237 if (parsingStage_ == HTTP_PARSE_MIME) {
238 if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
239 return false;
240 }
241
242 return !needsMoreData();
243 }
244