]>
git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/ResponseParser.cc
2 * Copyright (C) 1996-2022 The Squid Software Foundation and contributors
4 * Squid software is distributed under GPLv2+ license and includes
5 * contributions from numerous individuals and organizations.
6 * Please see the COPYING and CONTRIBUTORS files for details.
11 #include "debug/Stream.h"
12 #include "http/one/ResponseParser.h"
13 #include "http/ProtocolVersion.h"
14 #include "parser/Tokenizer.h"
15 #include "sbuf/Stream.h"
16 #include "SquidConfig.h"
18 const SBuf
Http::One::ResponseParser::IcyMagic("ICY ");
20 Http1::Parser::size_type
21 Http::One::ResponseParser::firstLineSize() const
23 Http1::Parser::size_type result
= 0;
25 switch (msgProtocol_
.protocol
)
27 case AnyP::PROTO_HTTP
:
28 result
+= Http1magic
.length();
31 result
+= IcyMagic
.length();
33 default: // no other protocols supported
36 // NP: the parser does not accept >2 DIGIT for version numbers
37 if (msgProtocol_
.minor
> 9)
42 result
+= 5; /* 5 octets in: SP status SP */
43 result
+= reasonPhrase_
.length();
44 result
+= 2; /* CRLF terminator */
48 // NP: we found the protocol version and consumed it already.
49 // just need the status code and reason phrase
51 Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer
&tok
)
54 if (!completedStatus_
) {
55 debugs(74, 9, "seek status-code in: " << tok
.remaining().substr(0,10) << "...");
56 ParseResponseStatus(tok
, statusCode_
);
57 buf_
= tok
.remaining(); // resume checkpoint
58 completedStatus_
= true;
60 // NOTE: any whitespace after the single SP is part of the reason phrase.
62 /* RFC 7230 says we SHOULD ignore the reason phrase content
63 * but it has a definite valid vs invalid character set.
64 * We interpret the SHOULD as ignoring absence and syntax, but
65 * producing an error if it contains an invalid octet.
68 debugs(74, 9, "seek reason-phrase in: " << tok
.remaining().substr(0,50) << "...");
69 // if we got here we are still looking for reason-phrase bytes
70 static const CharacterSet phraseChars
= CharacterSet::WSP
+ CharacterSet::VCHAR
+ CharacterSet::OBSTEXT
;
71 (void)tok
.prefix(reasonPhrase_
, phraseChars
); // optional, no error if missing
72 skipLineTerminator(tok
);
73 buf_
= tok
.remaining(); // resume checkpoint
74 debugs(74, DBG_DATA
, Raw("leftovers", buf_
.rawContent(), buf_
.length()));
76 } catch (const InsufficientInput
&) {
77 reasonPhrase_
.clear();
78 return 0; // need more to be sure we have it all
79 } catch (const std::exception
&ex
) {
80 debugs(74, 6, "invalid status-line: " << ex
.what());
86 Http::One::ResponseParser::ParseResponseStatus(Tokenizer
&tok
, StatusCode
&code
)
89 if (tok
.int64(statusValue
, 10, false, 3) && tok
.skipOne(Parser::DelimiterCharacters())) {
90 debugs(74, 6, "raw status-code=" << statusValue
);
91 code
= static_cast<StatusCode
>(statusValue
); // may be invalid
93 // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
95 throw TextException(ToSBuf("status-code too short: ", code
), Here());
97 // Codes with a non-standard first digit (a.k.a. response class) are
98 // considered semantically invalid per the following HTTP WG discussion:
99 // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
101 throw TextException(ToSBuf("status-code from an invalid response class: ", code
), Here());
102 } else if (tok
.atEnd()) {
103 throw InsufficientInput();
105 throw TextException("syntactically invalid status-code area", Here());
110 * Attempt to parse the method field out of an HTTP message status-line.
113 * RFC 1945 section 6.1
114 * RFC 7230 section 2.6, 3.1 and 3.5
116 * Parsing state is stored between calls. The current implementation uses
117 * checkpoints after each successful status-line field.
118 * The return value tells you whether the parsing is completed or not.
120 * \retval -1 an error occurred.
121 * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
122 * \retval 0 more data is needed to complete the parse
125 Http::One::ResponseParser::parseResponseFirstLine()
129 if (msgProtocol_
.protocol
!= AnyP::PROTO_NONE
) {
130 debugs(74, 6, "continue incremental parse for " << msgProtocol_
);
131 debugs(74, DBG_DATA
, "parse remaining buf={length=" << tok
.remaining().length() << ", data='" << tok
.remaining() << "'}");
132 // we already found the magic, but not the full line. keep going.
133 return parseResponseStatusAndReason(tok
);
135 } else if (tok
.skip(Http1magic
)) {
136 debugs(74, 6, "found prefix magic " << Http1magic
);
137 // HTTP Response status-line parse
139 // magic contains major version, still need to find minor DIGIT
141 const auto &WspDelim
= DelimiterCharacters();
142 if (tok
.int64(verMinor
, 10, false, 1) && tok
.skipOne(WspDelim
)) {
143 msgProtocol_
.protocol
= AnyP::PROTO_HTTP
;
144 msgProtocol_
.major
= 1;
145 msgProtocol_
.minor
= static_cast<unsigned int>(verMinor
);
147 debugs(74, 6, "found version=" << msgProtocol_
);
149 debugs(74, DBG_DATA
, "parse remaining buf={length=" << tok
.remaining().length() << ", data='" << tok
.remaining() << "'}");
150 buf_
= tok
.remaining(); // resume checkpoint
151 return parseResponseStatusAndReason(tok
);
153 } else if (tok
.atEnd())
154 return 0; // need more to be sure we have it all
156 return -1; // invalid version or delimiter, a single SP terminator required
158 } else if (tok
.skip(IcyMagic
)) {
159 debugs(74, 6, "found prefix magic " << IcyMagic
);
160 // ICY Response status-line parse (same as HTTP/1 after the magic version)
161 msgProtocol_
.protocol
= AnyP::PROTO_ICY
;
162 // NP: ICY has no /major.minor details
163 debugs(74, DBG_DATA
, "parse remaining buf={length=" << tok
.remaining().length() << ", data='" << tok
.remaining() << "'}");
164 buf_
= tok
.remaining(); // resume checkpoint
165 return parseResponseStatusAndReason(tok
);
166 } else if (buf_
.length() < Http1magic
.length() && Http1magic
.startsWith(buf_
)) {
167 debugs(74, 7, Raw("valid HTTP/1 prefix", buf_
.rawContent(), buf_
.length()));
169 } else if (buf_
.length() < IcyMagic
.length() && IcyMagic
.startsWith(buf_
)) {
170 debugs(74, 7, Raw("valid ICY prefix", buf_
.rawContent(), buf_
.length()));
173 debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
174 // found something that looks like an HTTP/0.9 response
175 // Gateway/Transform it into HTTP/1.1
176 msgProtocol_
= Http::ProtocolVersion(1,1);
177 // XXX: probably should use version 0.9 here and upgrade on output,
178 // but the old code did 1.1 transformation now.
179 statusCode_
= Http::scOkay
;
180 static const SBuf
gatewayPhrase("Gatewaying");
181 reasonPhrase_
= gatewayPhrase
;
182 static const SBuf
fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
183 /* Server: visible_appname_string */
184 "Mime-Version: 1.0\r\n"
185 /* Date: squid_curtime */
186 "Expires: -1\r\n\r\n");
187 mimeHeaderBlock_
= fakeHttpMimeBlock
;
188 parsingStage_
= HTTP_PARSE_DONE
;
189 return 1; // no more parsing
198 Http::One::ResponseParser::parse(const SBuf
&aBuf
)
201 debugs(74, DBG_DATA
, "Parse buf={length=" << aBuf
.length() << ", data='" << aBuf
<< "'}");
203 // stage 1: locate the status-line
204 if (parsingStage_
== HTTP_PARSE_NONE
) {
205 // RFC 7230 explicitly states whether garbage whitespace is to be handled
206 // at each point of the message framing boundaries.
207 // It omits mentioning garbage prior to HTTP Responses.
208 // Therefore, if we receive anything at all treat it as Response message.
210 parsingStage_
= HTTP_PARSE_FIRST
;
215 // stage 2: parse the status-line
216 if (parsingStage_
== HTTP_PARSE_FIRST
) {
217 const int retcode
= parseResponseFirstLine();
219 // first-line (or a look-alike) found successfully.
220 if (retcode
> 0 && parsingStage_
== HTTP_PARSE_FIRST
)
221 parsingStage_
= HTTP_PARSE_MIME
;
222 debugs(74, 5, "status-line: retval " << retcode
);
223 debugs(74, 5, "status-line: proto " << msgProtocol_
);
224 debugs(74, 5, "status-line: status-code " << statusCode_
);
225 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_
);
226 debugs(74, 5, "Parser: bytes processed=" << (aBuf
.length()-buf_
.length()));
228 // syntax errors already
230 parsingStage_
= HTTP_PARSE_DONE
;
231 parseStatusCode
= Http::scInvalidHeader
;
236 // stage 3: locate the mime header block
237 if (parsingStage_
== HTTP_PARSE_MIME
) {
238 if (!grabMimeBlock("Response", Config
.maxReplyHeaderSize
))
242 return !needsMoreData();