+/*
+ * Copyright (C) 1996-2023 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
#include "squid.h"
-#include "Debug.h"
+#include "base/Raw.h"
+#include "debug/Stream.h"
#include "http/one/ResponseParser.h"
#include "http/ProtocolVersion.h"
#include "parser/Tokenizer.h"
-#include "profiler/Profiler.h"
+#include "sbuf/Stream.h"
#include "SquidConfig.h"
const SBuf Http::One::ResponseParser::IcyMagic("ICY ");
return result;
}
// NP: the parser does not accept >2 DIGIT for version numbers
- if (msgProtocol_.minor >10)
+ if (msgProtocol_.minor > 9)
result += 2;
else
result += 1;
// NP: we found the protocol version and consumed it already.
// just need the status code and reason phrase
-const int
-Http::One::ResponseParser::parseResponseStatusAndReason()
+int
+Http::One::ResponseParser::parseResponseStatusAndReason(Tokenizer &tok)
{
- if (buf_.isEmpty())
- return 0;
-
- ::Parser::Tokenizer tok(buf_);
-
- if (!completedStatus_) {
- debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
- SBuf status;
- // status code is 3 DIGIT octets
- // NP: search space is >3 to get terminator character)
- if(!tok.prefix(status, CharacterSet::DIGIT, 4))
- return -1; // invalid status
- // NOTE: multiple SP or non-SP bytes between version and status code are invalid.
- if (tok.atEnd())
- return 0; // need more to be sure we have it all
- if(!tok.skip(' '))
- return -1; // invalid status, a single SP terminator required
+ try {
+ if (!completedStatus_) {
+ debugs(74, 9, "seek status-code in: " << tok.remaining().substr(0,10) << "...");
+ ParseResponseStatus(tok, statusCode_);
+ buf_ = tok.remaining(); // resume checkpoint
+ completedStatus_ = true;
+ }
// NOTE: any whitespace after the single SP is part of the reason phrase.
- debugs(74, 6, "found string status-code=" << status);
-
- // get the actual numeric value of the 0-3 digits we found
- ::Parser::Tokenizer t2(status);
- int64_t statusValue;
- if (!t2.int64(statusValue))
- return -1; // ouch. digits not forming a valid number?
- debugs(74, 6, "found int64 status-code=" << statusValue);
- if (statusValue < 0 || statusValue > 999)
- return -1; // ouch. digits not within valid status code range.
-
- statusCode_ = static_cast<Http::StatusCode>(statusValue);
-
+ /* RFC 7230 says we SHOULD ignore the reason phrase content
+ * but it has a definite valid vs invalid character set.
+ * We interpret the SHOULD as ignoring absence and syntax, but
+ * producing an error if it contains an invalid octet.
+ */
+
+ debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
+ // if we got here we are still looking for reason-phrase bytes
+ static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
+ (void)tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
+ skipLineTerminator(tok);
buf_ = tok.remaining(); // resume checkpoint
- completedStatus_ = true;
- }
-
- if (tok.atEnd())
- return 0; // need more to be sure we have it all
-
- /* RFC 7230 says we SHOULD ignore the reason phrase content
- * but it has a definite valid vs invalid character set.
- * We interpret the SHOULD as ignoring absence and syntax, but
- * producing an error if it contains an invalid octet.
- */
-
- debugs(74, 9, "seek reason-phrase in: " << tok.remaining().substr(0,50) << "...");
-
- // if we got here we are still looking for reason-phrase bytes
- static const CharacterSet phraseChars = CharacterSet::WSP + CharacterSet::VCHAR + CharacterSet::OBSTEXT;
- tok.prefix(reasonPhrase_, phraseChars); // optional, no error if missing
- tok.skip('\r'); // optional trailing CR
-
- if (tok.atEnd())
- return 0; // need more to be sure we have it all
-
- // LF existence matters
- if (!tok.skip('\n')) {
+ debugs(74, DBG_DATA, Raw("leftovers", buf_.rawContent(), buf_.length()));
+ return 1;
+ } catch (const InsufficientInput &) {
reasonPhrase_.clear();
- return -1; // found invalid characters in the phrase
+ return 0; // need more to be sure we have it all
+ } catch (const std::exception &ex) {
+ debugs(74, 6, "invalid status-line: " << ex.what());
}
+ return -1;
+}
- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
- buf_ = tok.remaining(); // resume checkpoint
- return 1;
+void
+Http::One::ResponseParser::ParseResponseStatus(Tokenizer &tok, StatusCode &code)
+{
+ int64_t statusValue;
+ if (tok.int64(statusValue, 10, false, 3) && tok.skipOne(Parser::DelimiterCharacters())) {
+ debugs(74, 6, "raw status-code=" << statusValue);
+ code = static_cast<StatusCode>(statusValue); // may be invalid
+
+ // RFC 7230 Section 3.1.2 says status-code is exactly three DIGITs
+ if (code <= 99)
+ throw TextException(ToSBuf("status-code too short: ", code), Here());
+
+ // Codes with a non-standard first digit (a.k.a. response class) are
+ // considered semantically invalid per the following HTTP WG discussion:
+ // https://lists.w3.org/Archives/Public/ietf-http-wg/2010AprJun/0354.html
+ if (code >= 600)
+ throw TextException(ToSBuf("status-code from an invalid response class: ", code), Here());
+ } else if (tok.atEnd()) {
+ throw InsufficientInput();
+ } else {
+ throw TextException("syntactically invalid status-code area", Here());
+ }
}
-const int
+/**
+ * Attempt to parse the method field out of an HTTP message status-line.
+ *
+ * Governed by:
+ * RFC 1945 section 6.1
+ * RFC 7230 section 2.6, 3.1 and 3.5
+ *
+ * Parsing state is stored between calls. The current implementation uses
+ * checkpoints after each successful status-line field.
+ * The return value tells you whether the parsing is completed or not.
+ *
+ * \retval -1 an error occurred.
+ * \retval 1 successful parse. statusCode_ and maybe reasonPhrase_ are filled and buffer consumed including first delimiter.
+ * \retval 0 more data is needed to complete the parse
+ */
+int
Http::One::ResponseParser::parseResponseFirstLine()
{
- ::Parser::Tokenizer tok(buf_);
+ Tokenizer tok(buf_);
if (msgProtocol_.protocol != AnyP::PROTO_NONE) {
debugs(74, 6, "continue incremental parse for " << msgProtocol_);
debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
// we already found the magic, but not the full line. keep going.
- return parseResponseStatusAndReason();
+ return parseResponseStatusAndReason(tok);
} else if (tok.skip(Http1magic)) {
debugs(74, 6, "found prefix magic " << Http1magic);
// HTTP Response status-line parse
- // magic contains major version, still need to find minor
- SBuf verMinor;
- // NP: we limit to 2-digits for speed, there really is no limit
- // XXX: the protocols we accept dont have valid versions > 10 anyway
- if (!tok.prefix(verMinor, CharacterSet::DIGIT, 2))
- return -1; // invalid version minor code
- if (tok.atEnd())
- return 0; // need more to be sure we have it all
- if(!tok.skip(' '))
- return -1; // invalid version, a single SP terminator required
-
- debugs(74, 6, "found string version-minor=" << verMinor);
+ // magic contains major version, still need to find minor DIGIT
+ int64_t verMinor;
+ const auto &WspDelim = DelimiterCharacters();
+ if (tok.int64(verMinor, 10, false, 1) && tok.skipOne(WspDelim)) {
+ msgProtocol_.protocol = AnyP::PROTO_HTTP;
+ msgProtocol_.major = 1;
+ msgProtocol_.minor = static_cast<unsigned int>(verMinor);
- // get the actual numeric value of the 0-3 digits we found
- ::Parser::Tokenizer t2(verMinor);
- int64_t tvm = 0;
- if (!t2.int64(tvm))
- return -1; // ouch. digits not forming a valid number?
- msgProtocol_.minor = static_cast<unsigned int>(tvm);
+ debugs(74, 6, "found version=" << msgProtocol_);
- msgProtocol_.protocol = AnyP::PROTO_HTTP;
- msgProtocol_.major = 1;
+ debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
+ buf_ = tok.remaining(); // resume checkpoint
+ return parseResponseStatusAndReason(tok);
- debugs(74, 6, "found version=" << msgProtocol_);
-
- debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
- buf_ = tok.remaining(); // resume checkpoint
- return parseResponseStatusAndReason();
+ } else if (tok.atEnd())
+ return 0; // need more to be sure we have it all
+ else
+ return -1; // invalid version or delimiter, a single SP terminator required
} else if (tok.skip(IcyMagic)) {
debugs(74, 6, "found prefix magic " << IcyMagic);
// NP: ICY has no /major.minor details
debugs(74, DBG_DATA, "parse remaining buf={length=" << tok.remaining().length() << ", data='" << tok.remaining() << "'}");
buf_ = tok.remaining(); // resume checkpoint
- return parseResponseStatusAndReason();
-
- } else if (buf_.length() > Http1magic.length() && buf_.length() > IcyMagic.length()) {
+ return parseResponseStatusAndReason(tok);
+ } else if (buf_.length() < Http1magic.length() && Http1magic.startsWith(buf_)) {
+ debugs(74, 7, Raw("valid HTTP/1 prefix", buf_.rawContent(), buf_.length()));
+ return 0;
+ } else if (buf_.length() < IcyMagic.length() && IcyMagic.startsWith(buf_)) {
+ debugs(74, 7, Raw("valid ICY prefix", buf_.rawContent(), buf_.length()));
+ return 0;
+ } else {
debugs(74, 2, "unknown/missing prefix magic. Interpreting as HTTP/0.9");
// found something that looks like an HTTP/0.9 response
// Gateway/Transform it into HTTP/1.1
return 1; // no more parsing
}
- return 0; // need more to parse anything.
+ // unreachable
+ assert(false);
+ return -1;
}
bool
// stage 2: parse the status-line
if (parsingStage_ == HTTP_PARSE_FIRST) {
- PROF_start(HttpParserParseReplyLine);
-
- int retcode = parseResponseFirstLine();
+ const int retcode = parseResponseFirstLine();
// first-line (or a look-alike) found successfully.
- if (retcode > 0)
+ if (retcode > 0 && parsingStage_ == HTTP_PARSE_FIRST)
parsingStage_ = HTTP_PARSE_MIME;
debugs(74, 5, "status-line: retval " << retcode);
debugs(74, 5, "status-line: proto " << msgProtocol_);
debugs(74, 5, "status-line: status-code " << statusCode_);
debugs(74, 5, "status-line: reason-phrase " << reasonPhrase_);
debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
- PROF_stop(HttpParserParseReplyLine);
// syntax errors already
if (retcode < 0) {
parsingStage_ = HTTP_PARSE_DONE;
- statusCode_ = Http::scInvalidHeader;
+ parseStatusCode = Http::scInvalidHeader;
return false;
}
}
// stage 3: locate the mime header block
if (parsingStage_ == HTTP_PARSE_MIME) {
- if (!findMimeBlock("Response", Config.maxReplyHeaderSize))
+ if (!grabMimeBlock("Response", Config.maxReplyHeaderSize))
return false;
}
return !needsMoreData();
}
+