/*
- * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
+#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
-#include "mime_header.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"
-Http::One::RequestParser::RequestParser() :
+// the right debugs() level for parsing errors
+inline static int
+ErrorLevel() {
+ return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
+}
+
+Http::One::RequestParser::RequestParser(bool preserveParsed) :
Parser(),
- request_parse_status(Http::scNone)
+ preserveParsed_(preserveParsed)
+{}
+
+Http1::Parser::size_type
+Http::One::RequestParser::firstLineSize() const
{
- req.start = req.end = -1;
- req.m_start = req.m_end = -1;
- req.u_start = req.u_end = -1;
- req.v_start = req.v_end = -1;
+ // RFC 7230 section 2.6
+ /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
+ return method_.image().length() + uri_.length() + 12;
}
/**
buf_.consume(1);
}
}
+}
+
+/**
+ * Attempt to parse the method field out of an HTTP message request-line.
+ *
+ * Governed by:
+ * RFC 1945 section 5.1
+ * RFC 7230 section 2.6, 3.1 and 3.5
+ */
+bool
+Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
+{
+ // method field is a sequence of TCHAR.
+ // Limit to 32 characters to prevent overly long sequences of non-HTTP
+ // being sucked in before mismatch is detected. 32 is itself annoyingly
+ // big but there are methods registered by IANA that reach 17 bytes:
+ // http://www.iana.org/assignments/http-methods
+ static const size_t maxMethodLength = 32; // TODO: make this configurable?
+
+ SBuf methodFound;
+ if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
+ debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
+ parseStatusCode = Http::scBadRequest;
+ return false;
+ }
+ method_ = HttpRequestMethod(methodFound);
- /* XXX: this is a Squid-specific tolerance
- * it appears never to have been relevant outside out unit-tests
- * because the ConnStateData parser loop starts with consumeWhitespace()
- * which absorbs any SP HTAB VTAB CR LF characters.
- * But unit-tests called the HttpParser method directly without that pruning.
+ if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
+ return false;
+
+ return true;
+}
+
+/// the characters which truly are valid within URI
+static const CharacterSet &
+UriValidCharacters()
+{
+ /* RFC 3986 section 2:
+ * "
+ * A URI is composed from a limited set of characters consisting of
+ * digits, letters, and a few graphic symbols.
+ * "
*/
+ static const CharacterSet UriChars =
+ CharacterSet("URI-Chars","") +
+ // RFC 3986 section 2.2 - reserved characters
+ CharacterSet("gen-delims", ":/?#[]@") +
+ CharacterSet("sub-delims", "!$&'()*+,;=") +
+ // RFC 3986 section 2.3 - unreserved characters
+ CharacterSet::ALPHA +
+ CharacterSet::DIGIT +
+ CharacterSet("unreserved", "-._~") +
+ // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
+ CharacterSet("pct-encoded", "%") +
+ CharacterSet::HEXDIG;
+
+ return UriChars;
+}
+
+/// characters which Squid will accept in the HTTP request-target (URI)
+const CharacterSet &
+Http::One::RequestParser::RequestTargetCharacters()
+{
+ if (Config.onoff.relaxed_header_parser) {
#if USE_HTTP_VIOLATIONS
+ static const CharacterSet RelaxedExtended =
+ UriValidCharacters() +
+ // accept whitespace (extended), it will be dealt with later
+ DelimiterCharacters() +
+ // RFC 2396 unwise character set which must never be transmitted
+ // in un-escaped form. But many web services do anyway.
+ CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
+ // UTF-8 because we want to be future-proof
+ CharacterSet("UTF-8", 128, 255);
+
+ return RelaxedExtended;
+#else
+ static const CharacterSet RelaxedCompliant =
+ UriValidCharacters() +
+ // accept whitespace (extended), it will be dealt with later.
+ DelimiterCharacters();
+
+ return RelaxedCompliant;
+#endif
+ }
+
+ // strict parse only accepts what the RFC say we can
+ return UriValidCharacters();
+}
+
+bool
+Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
+{
+ /* Arbitrary 64KB URI upper length limit.
+ *
+ * Not quite as arbitrary as it seems though. Old SquidString objects
+ * cannot store strings larger than 64KB, so we must limit until they
+ * have all been replaced with SBuf.
+ *
+ * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
+ * at least 8000 octets for the whole line, including method and version.
+ */
+ const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
+
+ SBuf uriFound;
+ if (!tok.prefix(uriFound, RequestTargetCharacters())) {
+ parseStatusCode = Http::scBadRequest;
+ debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
+ return false;
+ }
+
+ if (uriFound.length() > maxUriLength) {
+ // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
+ parseStatusCode = Http::scUriTooLong;
+ debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
+ "-byte URI exceeds " << maxUriLength << "-byte limit");
+ return false;
+ }
+
+ uri_ = uriFound;
+ return true;
+}
+
+bool
+Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
+{
+ static const SBuf http1p0("HTTP/1.0");
+ static const SBuf http1p1("HTTP/1.1");
+ const auto savedTok = tok;
+
+ // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
+ // the vast majority of cases.
+ if (tok.skipSuffix(http1p1)) {
+ msgProtocol_ = Http::ProtocolVersion(1, 1);
+ return true;
+ } else if (tok.skipSuffix(http1p0)) {
+ msgProtocol_ = Http::ProtocolVersion(1, 0);
+ return true;
+ } else {
+ // RFC 7230 section 2.6:
+ // HTTP-version = HTTP-name "/" DIGIT "." DIGIT
+ static const CharacterSet period("Decimal point", ".");
+ static const SBuf proto("HTTP/");
+ SBuf majorDigit;
+ SBuf minorDigit;
+ if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
+ tok.skipOneTrailing(period) &&
+ tok.suffix(majorDigit, CharacterSet::DIGIT) &&
+ tok.skipSuffix(proto)) {
+ const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
+ // use '0.0' for unsupported multiple digit version numbers
+ const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
+ const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
+ msgProtocol_ = Http::ProtocolVersion(major, minor);
+ return true;
+ }
+ }
+
+ // A GET request might use HTTP/0.9 syntax
+ if (method_ == Http::METHOD_GET) {
+ // RFC 1945 - no HTTP version field at all
+ tok = savedTok; // in case the URI ends with a digit
+ // report this assumption as an error if configured to triage parsing
+ debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
+ msgProtocol_ = Http::ProtocolVersion(0,9);
+ return true;
+ }
+
+ debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
+ parseStatusCode = Http::scBadRequest;
+ return false;
+}
+
+/**
+ * Skip characters separating request-line fields.
+ * To handle bidirectional parsing, the caller does the actual skipping and
+ * we just check how many character the caller has skipped.
+ */
+bool
+Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
+{
+ if (count <= 0) {
+ debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
+ parseStatusCode = Http::scBadRequest;
+ return false;
+ }
+
+ // tolerant parser allows multiple whitespace characters between request-line fields
+ if (count > 1 && !Config.onoff.relaxed_header_parser) {
+ debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
+ parseStatusCode = Http::scBadRequest;
+ return false;
+ }
+
+ return true;
+}
+
+/// Parse CRs at the end of request-line, just before the terminating LF.
+bool
+Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
+{
if (Config.onoff.relaxed_header_parser) {
- if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
- debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
- "Whitespace bytes received ahead of method. " <<
- "Ignored due to relaxed_header_parser.");
- // Be tolerant of prefix spaces (other bytes are valid method values)
- while (!buf_.isEmpty() && buf_[0] == ' ') {
- buf_.consume(1);
+ (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
+ } else {
+ if (!tok.skipOneTrailing(CharacterSet::CR)) {
+ debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
+ parseStatusCode = Http::scBadRequest;
+ return false;
}
}
-#endif
+ return true;
}
/**
*
* Governed by:
* RFC 1945 section 5.1
- * RFC 7230 section 3.1 and 3.5
+ * RFC 7230 section 2.6, 3.1 and 3.5
*
- * Parsing state is stored between calls. However the current implementation
- * begins parsing from scratch on every call.
- * The return value tells you whether the parsing state fields are valid or not.
- *
- * \retval -1 an error occurred. request_parse_status indicates HTTP status result.
+ * \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
* \retval 1 successful parse. member fields contain the request-line items
* \retval 0 more data is needed to complete the parse
*/
int
Http::One::RequestParser::parseRequestFirstLine()
{
- int second_word = -1; // track the suspected URI start
- int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
- int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
-
debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
debugs(74, DBG_DATA, buf_);
- // Single-pass parse: (provided we have the whole line anyways)
+ SBuf line;
- req.start = 0;
- req.end = -1;
- for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
- // track first and last whitespace (SP only)
- if (buf_[i] == ' ') {
- last_whitespace = i;
- if (first_whitespace < req.start)
- first_whitespace = i;
- }
+ // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
+ // Now, the request line has to end at the first LF.
+ static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
+ ::Parser::Tokenizer lineTok(buf_);
+ if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
+ if (buf_.length() >= Config.maxRequestHeaderSize) {
+ /* who should we blame for our failure to parse this line? */
- // track next non-SP/non-HT byte after first_whitespace
- if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
- second_word = i;
- }
+ Http1::Tokenizer methodTok(buf_);
+ if (!parseMethodField(methodTok))
+ return -1; // blame a bad method (or its delimiter)
- // locate line terminator
- if (buf_[i] == '\n') {
- req.end = i;
- line_end = i - 1;
- break;
- }
- if (i < buf_.length() - 1 && buf_[i] == '\r') {
- if (Config.onoff.relaxed_header_parser) {
- if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
- debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
- "Series of carriage-return bytes received prior to line terminator. " <<
- "Ignored due to relaxed_header_parser.");
-
- // Be tolerant of invalid multiple \r prior to terminal \n
- if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
- line_end = i - 1;
- while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
- ++i;
-
- if (buf_[i + 1] == '\n') {
- req.end = i + 1;
- break;
- }
- } else {
- if (buf_[i + 1] == '\n') {
- req.end = i + 1;
- line_end = i - 1;
- break;
- }
- }
-
- // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
- // However it does explicitly state an exact syntax which omits un-encoded CR
- // and defines 400 (Bad Request) as the required action when
- // handed an invalid request-line.
- request_parse_status = Http::scBadRequest;
+ // assume it is the URI
+ debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
+ Config.maxRequestHeaderSize << "-byte limit");
+ parseStatusCode = Http::scUriTooLong;
return -1;
}
- }
-
- if (req.end == -1) {
- // DoS protection against long first-line
- if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
- debugs(33, 5, "Too large request-line");
- // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
- request_parse_status = Http::scUriTooLong;
- return -1;
- }
-
- debugs(74, 5, "Parser: retval 0: from " << req.start <<
- "->" << req.end << ": needs more data to complete first line.");
+ debugs(74, 5, "Parser needs more data");
return 0;
}
- // NP: we have now seen EOL, more-data (0) cannot occur.
- // From here on any failure is -1, success is 1
+ Http1::Tokenizer tok(line);
- // Input Validation:
-
- // DoS protection against long first-line
- if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
- debugs(33, 5, "Too large request-line");
- request_parse_status = Http::scUriTooLong;
+ if (!parseMethodField(tok))
return -1;
- }
-
- // Process what we now know about the line structure into field offsets
- // generating HTTP status for any aborts as we go.
- // First non-whitespace = beginning of method
- if (req.start > line_end) {
- request_parse_status = Http::scBadRequest;
+ /* now parse backwards, to leave just the URI */
+ if (!skipTrailingCrs(tok))
return -1;
- }
- req.m_start = req.start;
- // First whitespace = end of method
- if (first_whitespace > line_end || first_whitespace < req.start) {
- request_parse_status = Http::scBadRequest; // no method
- return -1;
- }
- req.m_end = first_whitespace - 1;
- if (req.m_end < req.m_start) {
- request_parse_status = Http::scBadRequest; // missing URI?
+ if (!parseHttpVersionField(tok))
return -1;
- }
- /* Set method_ */
- const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
- method_ = HttpRequestMethod(tmp);
-
- // First non-whitespace after first SP = beginning of URL+Version
- if (second_word > line_end || second_word < req.start) {
- request_parse_status = Http::scBadRequest; // missing URI
+ if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
return -1;
- }
- req.u_start = second_word;
- // RFC 1945: SP and version following URI are optional, marking version 0.9
- // we identify this by the last whitespace being earlier than URI start
- if (last_whitespace < second_word && last_whitespace >= req.start) {
- msgProtocol_ = Http::ProtocolVersion(0,9);
- req.u_end = line_end;
- uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
- request_parse_status = Http::scOkay; // HTTP/0.9
- return 1;
- } else {
- // otherwise last whitespace is somewhere after end of URI.
- req.u_end = last_whitespace;
- // crop any trailing whitespace in the area we think of as URI
- for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
- }
- if (req.u_end < req.u_start) {
- request_parse_status = Http::scBadRequest; // missing URI
- return -1;
- }
- uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
+ /* parsed everything before and after the URI */
- // Last whitespace SP = before start of protocol/version
- if (last_whitespace >= line_end) {
- request_parse_status = Http::scBadRequest; // missing version
+ if (!parseUriField(tok))
return -1;
- }
- req.v_start = last_whitespace + 1;
- req.v_end = line_end;
- /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
- if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
- // non-HTTP/1 protocols not supported / implemented.
- request_parse_status = Http::scHttpVersionNotSupported;
+ if (!tok.atEnd()) {
+ debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
+ parseStatusCode = Http::scBadRequest;
return -1;
}
- // NP: magic octets include the protocol name and major version DIGIT.
- msgProtocol_.protocol = AnyP::PROTO_HTTP;
- msgProtocol_.major = 1;
- int i = req.v_start + Http1magic.length() -1;
+ parseStatusCode = Http::scOkay;
+ buf_ = lineTok.remaining(); // incremental parse checkpoint
+ return 1;
+}
- // catch missing minor part
- if (++i > line_end) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
- }
- /* next should be one or more digits */
- if (!isdigit(buf_[i])) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
- }
- int min = 0;
- for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
- min = min * 10;
- min = min + (buf_[i]) - '0';
- }
- // catch too-big values or trailing garbage
- if (min >= 65536 || i < line_end) {
- request_parse_status = Http::scHttpVersionNotSupported;
- return -1;
+bool
+Http::One::RequestParser::parse(const SBuf &aBuf)
+{
+ const bool result = doParse(aBuf);
+ if (preserveParsed_) {
+ assert(aBuf.length() >= remaining().length());
+ parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
}
- msgProtocol_.minor = min;
- /*
- * Rightio - we have all the schtuff. Return true; we've got enough.
- */
- request_parse_status = Http::scOkay;
- return 1;
+ return result;
}
+// raw is not a reference because a reference might point back to our own buf_ or parsed_
bool
-Http::One::RequestParser::parse(const SBuf &aBuf)
+Http::One::RequestParser::doParse(const SBuf &aBuf)
{
buf_ = aBuf;
debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
// first-line (or a look-alike) found successfully.
if (retcode > 0) {
- buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
parsingStage_ = HTTP_PARSE_MIME;
}
- debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
- " line={" << aBuf.length() << ", data='" << aBuf << "'}");
- debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
- debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
- debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
+ debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
+ debugs(74, 5, "request-line: method: " << method_);
+ debugs(74, 5, "request-line: url: " << uri_);
+ debugs(74, 5, "request-line: proto: " << msgProtocol_);
debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
PROF_stop(HttpParserParseReqLine);
// stage 3: locate the mime header block
if (parsingStage_ == HTTP_PARSE_MIME) {
// HTTP/1.x request-line is valid and parsing completed.
- if (msgProtocol_.major == 1) {
- /* NOTE: HTTP/0.9 requests do not have a mime header block.
- * So the rest of the code will need to deal with '0'-byte headers
- * (ie, none, so don't try parsing em)
- */
- int64_t mimeHeaderBytes = 0;
- // XXX: c_str() reallocates. performance regression.
- if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
- if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
- debugs(33, 5, "Too large request");
- request_parse_status = Http::scRequestHeaderFieldsTooLarge;
- parsingStage_ = HTTP_PARSE_DONE;
- } else
- debugs(33, 5, "Incomplete request, waiting for end of headers");
- return false;
- }
- mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
- debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
-
- } else
- debugs(33, 3, "Missing HTTP/1.x identifier");
-
- // NP: we do not do any further stages here yet so go straight to DONE
- parsingStage_ = HTTP_PARSE_DONE;
-
- // Squid could handle these headers, but admin does not want to
- if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
- debugs(33, 5, "Too large request");
- request_parse_status = Http::scRequestHeaderFieldsTooLarge;
+ if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
+ if (parseStatusCode == Http::scHeaderTooLarge)
+ parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
return false;
}
}