]> git.ipfire.org Git - thirdparty/squid.git/blob - src/http/one/ResponseParser.cc
Add HTTP Response parser
[thirdparty/squid.git] / src / http / one / ResponseParser.cc
1 #include "squid.h"
2 #include "Debug.h"
3 #include "http/one/ResponseParser.h"
4 #include "http/ProtocolVersion.h"
5 #include "parser/Tokenizer.h"
6 #include "profiler/Profiler.h"
7 #include "SquidConfig.h"
8
9 const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
10
11 Http1::Parser::size_type
12 Http::One::ResponseParser::firstLineSize() const
13 {
14 Http1::Parser::size_type result = 0;
15
16 switch (msgProtocol_.protocol)
17 {
18 case AnyP::PROTO_HTTP:
19 result += Http1magic.length();
20 break;
21 case AnyP::PROTO_ICY:
22 result += IcyMagic.length();
23 break;
24 default: // no other protocols supported
25 return result;
26 }
27 // NP: the parser does not accept >2 DIGIT for version numbers
28 if (msgProtocol_.minor >10)
29 result += 2;
30 else
31 result += 1;
32
33 result += 5; /* 5 octets in: SP status SP */
34 result += reasonPhrase_.length();
35 return result;
36 }
37
38 // NP: we found the protocol version and consumed it already.
39 // just need the status code and reason phrase
40 const int
41 Http::One::ResponseParser::parseResponseStatusAndReason()
42 {
43 if (buf_.isEmpty())
44 return 0;
45
46 ::Parser::Tokenizer tok(buf_);
47
48 if (!completedStatus_) {
49 SBuf status;
50 // status code is 3 DIGIT octets
51 if(!tok.prefix(status, CharacterSet::DIGIT, 3))
52 return -1; // invalid status
53 // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
54 if (tok.atEnd())
55 return 0; // need more to be sure we have it all
56 if(!tok.skip(' '))
57 return -1; // invalid status, a single SP terminator required
58 // NOTE: any whitespace after the single SP is part of the reason phrase.
59
60 // get the actual numeric value of the 0-3 digits we found
61 ::Parser::Tokenizer t2(status);
62 int64_t statusValue;
63 if (!t2.int64(statusValue))
64 return -1; // ouch. digits not forming a valid number?
65 if (statusValue < 0 || statusValue > 999)
66 return -1; // ouch. digits not within valid status code range.
67
68 statusCode_ = static_cast<Http::StatusCode>(statusValue);
69
70 buf_ = tok.remaining(); // resume checkpoint
71 completedStatus_ = true;
72 }
73
74 if (tok.atEnd())
75 return 0; // need more to be sure we have it all
76
77 /* RFC 7230 says we SHOULD ignore the reason phrase content
78 * but it has a definite valid vs invalid character set.
79 * We interpret the SHOULD as ignoring absence and syntax, but
80 * producing an error if it contains an invalid octet.
81 */
82
83 // if we got here we are still looking for reason-phrase bytes
84 static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
85 tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
86 tok.skip('\r'); // optional trailing CR
87
88 if (tok.atEnd())
89 return 0; // need more to be sure we have it all
90
91 // LF existence matters
92 if (!tok.skip('\n')) {
93 reasonPhrase_.clear();
94 return -1; // found invalid characters in the phrase
95 }
96
97 buf_ = tok.remaining(); // resume checkpoint
98 return 1;
99 }
100
101 const int
102 Http::One::ResponseParser::parseResponseFirstLine()
103 {
104 ::Parser::Tokenizer tok(buf_);
105
106 if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
107 // we already found the magic, but not the full line. keep going.
108 return parseResponseStatusAndReason();
109
110 } else if (tok.skip(Http1magic)) {
111 // HTTP Response status-line parse
112
113 // magic contains major version, still need to find minor
114 SBuf verMinor;
115 // NP: we limit to 2-digits for speed, there really is no limit
116 // XXX: the protocols we accept dont have valid versions > 10 anyway
117 if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
118 return -1; // invalid version minor code
119 if (tok.atEnd())
120 return 0; // need more to be sure we have it all
121 if(!tok.skip(' '))
122 return -1; // invalid version, a single SP terminator required
123
124 // get the actual numeric value of the 0-3 digits we found
125 ::Parser::Tokenizer t2(verMinor);
126 int64_t tvm = 0;
127 if (!t2.int64(tvm))
128 return -1; // ouch. digits not forming a valid number?
129 msgProtocol_.minor = static_cast<unsigned int>(tvm);
130
131 msgProtocol_.protocol = AnyP::PROTO_HTTP;
132 msgProtocol_.major = 1;
133 buf_ = tok.remaining(); // resume checkpoint
134 return parseResponseStatusAndReason();
135
136 } else if (tok.skip(IcyMagic)) {
137 // ICY Response status-line parse (same as HTTP/1 after the magic version)
138 msgProtocol_.protocol = AnyP::PROTO_ICY;
139 // NP: ICY has no /major.minor details
140 buf_ = tok.remaining(); // resume checkpoint
141 return parseResponseStatusAndReason();
142
143 } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
144 // found something that looks like an HTTP/0.9 response
145 msgProtocol_ = Http::ProtocolVersion(1,1);
146 // XXX: probably should use version 0.9 here and upgrade on output,
147 // but the old code did 1.1 transformation now.
148 statusCode_ = Http::scOkay;
149 static const SBuf gatewayPhrase("Gatewaying");
150 reasonPhrase_ = gatewayPhrase;
151 static const SBuf fakeHttpMimeBlock("X-Transformed-From: HTTP/0.9\r\n"
152 /* Server: visible_appname_string */
153 "Mime-Version: 1.0\r\n"
154 /* Date: squid_curtime */
155 "Expires: -1\r\n\r\n");
156 return 1; // no more parsing
157 }
158
159 return 0; // need more to parse anything.
160 }
161
162 bool
163 Http::One::ResponseParser::parse(const SBuf &aBuf)
164 {
165 buf_ = aBuf;
166 debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
167
168 // stage 1: locate the status-line
169 if (parsingStage_ == HTTP_PARSE_NONE) {
170 // RFC 7230 explicitly states whether garbage whitespace is to be handled
171 // at each point of the message framing boundaries.
172 // It omits mentioning garbage prior to HTTP Responses.
173 // Therefore, if we receive anything at all treat it as Response message.
174 if (!buf_.isEmpty())
175 parsingStage_ = HTTP_PARSE_FIRST;
176 else
177 return false;
178 }
179
180 // stage 2: parse the status-line
181 if (parsingStage_ == HTTP_PARSE_FIRST) {
182 PROF_start(HttpParserParseReplyLine);
183
184 int retcode = parseResponseFirstLine();
185
186 // first-line (or a look-alike) found successfully.
187 if (retcode > 0)
188 parsingStage_ = HTTP_PARSE_MIME;
189 debugs(74, 5, "status-line: retval " << retcode);
190 debugs(74, 5, "status-line: proto " << msgProtocol_);
191 debugs(74, 5, "status-line: status-code " << statusCode_);
192 debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
193 debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
194 PROF_stop(HttpParserParseReplyLine);
195
196 // syntax errors already
197 if (retcode < 0) {
198 parsingStage_ = HTTP_PARSE_DONE;
199 statusCode_ = scInvalidHeader;
200 return false;
201 }
202 }
203
204 // stage 3: locate the mime header block
205 if (parsingStage_ == HTTP_PARSE_MIME) {
206 if (!findMimeBlock("Response", Config.maxReplyHeaderSize))
207 return false;
208 }
209
210 return !needsMoreData();
211 }