]> git.ipfire.org Git - thirdparty/squid.git/blame - src/http/one/ResponseParser.cc
Parser-NG: HTTP Response Parser upgrade
[thirdparty/squid.git] / src / http / one / ResponseParser.cc
CommitLineData
f1d5359e
AJ
1#include "squid.h"
2#include "Debug.h"
3#include "http/one/ResponseParser.h"
4#include "http/ProtocolVersion.h"
5#include "parser/Tokenizer.h"
6#include "profiler/Profiler.h"
7#include "SquidConfig.h"
8
9const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
10
11Http1::Parser::size_type
12Http::One::ResponseParser::firstLineSize() const
13{
14 Http1::Parser::size_type result = 0;
15
16 switch (msgProtocol_.protocol)
17 {
18 case AnyP::PROTO_HTTP:
19 result += Http1magic.length();
20 break;
21 case AnyP::PROTO_ICY:
22 result += IcyMagic.length();
23 break;
24 default: // no other protocols supported
25 return result;
26 }
27 // NP: the parser does not accept >2 DIGIT for version numbers
b8f86fd2 28 if (msgProtocol_.minor > 9)
f1d5359e
AJ
29 result += 2;
30 else
31 result += 1;
32
33 result += 5; /* 5 octets in: SP status SP */
34 result += reasonPhrase_.length();
05f32cc2 35 result += 2; /* CRLF terminator */
f1d5359e
AJ
36 return result;
37}
38
39// NP: we found the protocol version and consumed it already.
40// just need the status code and reason phrase
41const int
b8f86fd2 42Http::One::ResponseParser::parseResponseStatusAndReason(::Parser::Tokenizer &tok, const CharacterSet &WspDelim)
f1d5359e 43{
f1d5359e 44 if (!completedStatus_) {
db6a29e1 45 debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
b8f86fd2
AJ
46 /* RFC 7230 section 3.1.2 - status code is 3 DIGIT octets.
47 * There is no limit on what those octets may be.
48 * 000 through 999 are all valid.
49 */
50 int64_t statusValue;
51 if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(WspDelim)) {
f1d5359e 52
b8f86fd2
AJ
53 debugs(74, 6, "found int64 status-code=" << statusValue);
54 statusCode_ = static_cast<Http::StatusCode>(statusValue);
db6a29e1 55
b8f86fd2
AJ
56 buf_ = tok.remaining(); // resume checkpoint
57 completedStatus_ = true;
f1d5359e 58
b8f86fd2
AJ
59 } else if (tok.atEnd()) {
60 debugs(74, 6, "Parser needs more data");
61 return 0; // need more to be sure we have it all
f1d5359e 62
b8f86fd2
AJ
63 } else {
64 debugs(74, 6, "invalid status-line. invalid code.");
65 return -1; // invalid status, a single SP terminator required
66 }
67 // NOTE: any whitespace after the single SP is part of the reason phrase.
f1d5359e
AJ
68 }
69
70 if (tok.atEnd())
71 return 0; // need more to be sure we have it all
72
73 /* RFC 7230 says we SHOULD ignore the reason phrase content
74 * but it has a definite valid vs invalid character set.
75 * We interpret the SHOULD as ignoring absence and syntax, but
76 * producing an error if it contains an invalid octet.
77 */
78
db6a29e1
AJ
79 debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
80
f1d5359e
AJ
81 // if we got here we are still looking for reason-phrase bytes
82 static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
b8f86fd2
AJ
83 (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
84 if (skipLineTerminator(tok)) {
85 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
86 buf_ = tok.remaining(); // resume checkpoint
87 return 1;
88 }
89 reasonPhrase_.clear();
f1d5359e
AJ
90
91 if (tok.atEnd())
92 return 0; // need more to be sure we have it all
93
b8f86fd2
AJ
94 debugs(74, 6, "invalid status-line. garbage in reason phrase.");
95 return -1;
f1d5359e
AJ
96}
97
b8f86fd2
AJ
98/**
99 * Attempt to parse the method field out of an HTTP message status-line.
100 *
101 * Governed by:
102 * RFC 1945 section 6.1
103 * RFC 7230 section 2.6, 3.1 and 3.5
104 *
105 * Parsing state is stored between calls. The current implementation uses
106 * checkpoints after each successful status-line field.
107 * The return value tells you whether the parsing is completed or not.
108 *
109 * \retval -1 an error occurred.
110 * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
111 * \retval 0 more data is needed to complete the parse
112 */
f1d5359e
AJ
113const int
114Http::One::ResponseParser::parseResponseFirstLine()
115{
116 ::Parser::Tokenizer tok(buf_);
117
b8f86fd2
AJ
118 CharacterSet WspDelim = CharacterSet::SP; // strict parse only accepts SP
119
120 if (Config.onoff.relaxed_header_parser) {
121 // RFC 7230 section 3.5
122 // tolerant parser MAY accept any of SP, HTAB, VT (%x0B), FF (%x0C), or bare CR
123 // as whitespace between status-line fields
124 WspDelim += CharacterSet::HTAB
125 + CharacterSet("VT,FF","\x0B\x0C")
126 + CharacterSet::CR;
127 }
128
f1d5359e 129 if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
db6a29e1
AJ
130 debugs(74, 6, "continue incremental parse for " << msgProtocol_);
131 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
f1d5359e 132 // we already found the magic, but not the full line. keep going.
b8f86fd2 133 return parseResponseStatusAndReason(tok, WspDelim);
f1d5359e
AJ
134
135 } else if (tok.skip(Http1magic)) {
db6a29e1 136 debugs(74, 6, "found prefix magic " << Http1magic);
f1d5359e
AJ
137 // HTTP Response status-line parse
138
b8f86fd2
AJ
139 // magic contains major version, still need to find minor DIGIT
140 int64_t verMinor;
141 if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
142 msgProtocol_.protocol = AnyP::PROTO_HTTP;
143 msgProtocol_.major = 1;
144 msgProtocol_.minor = static_cast<unsigned int>(verMinor);
f1d5359e 145
b8f86fd2 146 debugs(74, 6, "found version=" << msgProtocol_);
db6a29e1 147
b8f86fd2
AJ
148 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
149 buf_ = tok.remaining(); // resume checkpoint
150 return parseResponseStatusAndReason(tok, WspDelim);
db6a29e1 151
b8f86fd2
AJ
152 } else if (tok.atEnd())
153 return 0; // need more to be sure we have it all
154 else
155 return -1; // invalid version or delimiter, a single SP terminator required
f1d5359e
AJ
156
157 } else if (tok.skip(IcyMagic)) {
db6a29e1 158 debugs(74, 6, "found prefix magic " << IcyMagic);
f1d5359e
AJ
159 // ICY Response status-line parse (same as HTTP/1 after the magic version)
160 msgProtocol_.protocol = AnyP::PROTO_ICY;
161 // NP: ICY has no /major.minor details
db6a29e1 162 debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
f1d5359e 163 buf_ = tok.remaining(); // resume checkpoint
b8f86fd2 164 return parseResponseStatusAndReason(tok, WspDelim);
f1d5359e
AJ
165
166 } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
db6a29e1 167 debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
f1d5359e 168 // found something that looks like an HTTP/0.9 response
db6a29e1 169 // Gateway/Transform it into HTTP/1.1
f1d5359e
AJ
170 msgProtocol_ = Http::ProtocolVersion(1,1);
171 // XXX: probably should use version 0.9 here and upgrade on output,
172 // but the old code did 1.1 transformation now.
173 statusCode_ = Http::scOkay;
174 static const SBuf gatewayPhrase("Gatewaying");
175 reasonPhrase_ = gatewayPhrase;
176 static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
177 /* Server: visible_appname_string */
178 "Mime-Version: 1.0\r\n"
179 /* Date: squid_curtime */
180 "Expires: -1\r\n\r\n");
db6a29e1
AJ
181 mimeHeaderBlock_ = fakeHttpMimeBlock;
182 parsingStage_ = HTTP_PARSE_DONE;
f1d5359e
AJ
183 return 1; // no more parsing
184 }
185
186 return 0; // need more to parse anything.
187}
188
189bool
190Http::One::ResponseParser::parse(const SBuf &aBuf)
191{
192 buf_ = aBuf;
193 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
194
195 // stage 1: locate the status-line
196 if (parsingStage_ == HTTP_PARSE_NONE) {
197 // RFC 7230 explicitly states whether garbage whitespace is to be handled
198 // at each point of the message framing boundaries.
199 // It omits mentioning garbage prior to HTTP Responses.
200 // Therefore, if we receive anything at all treat it as Response message.
201 if (!buf_.isEmpty())
202 parsingStage_ = HTTP_PARSE_FIRST;
203 else
204 return false;
205 }
206
207 // stage 2: parse the status-line
208 if (parsingStage_ == HTTP_PARSE_FIRST) {
209 PROF_start(HttpParserParseReplyLine);
210
f8cab755 211 const int retcode = parseResponseFirstLine();
f1d5359e
AJ
212
213 // first-line (or a look-alike) found successfully.
214 if (retcode > 0)
215 parsingStage_ = HTTP_PARSE_MIME;
216 debugs(74, 5, "status-line: retval " << retcode);
217 debugs(74, 5, "status-line: proto " << msgProtocol_);
218 debugs(74, 5, "status-line: status-code " << statusCode_);
219 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
220 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
221 PROF_stop(HttpParserParseReplyLine);
222
223 // syntax errors already
224 if (retcode < 0) {
225 parsingStage_ = HTTP_PARSE_DONE;
db6a29e1 226 statusCode_ = Http::scInvalidHeader;
f1d5359e
AJ
227 return false;
228 }
229 }
230
231 // stage 3: locate the mime header block
232 if (parsingStage_ == HTTP_PARSE_MIME) {
f8cab755 233 if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
f1d5359e
AJ
234 return false;
235 }
236
237 return !needsMoreData();
238}