[thirdparty/squid.git] / src / http / one / RequestParser.cc

/*
 * Copyright (C) 1996-2017 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
#include "http/one/Tokenizer.h"
#include "http/ProtocolVersion.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"

// the right debugs() level for parsing errors
inline static int
ErrorLevel() {
    return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
}

Http::One::RequestParser::RequestParser(bool preserveParsed) :
    Parser(),
    preserveParsed_(preserveParsed)
{}

Http1::Parser::size_type
Http::One::RequestParser::firstLineSize() const
{
    // RFC 7230 section 2.6
    /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
    return method_.image().length() + uri_.length() + 12;
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by RFC 7230 section 3.5
 *  "
 *    In the interest of robustness, a server that is expecting to receive
 *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
 *    received prior to the request-line.
 *  "
 *
 * Parsing state is stored between calls to avoid repeating buffer scans.
 * If garbage is found the parsing offset is incremented.
 */
void
Http::One::RequestParser::skipGarbageLines()
{
    if (Config.onoff.relaxed_header_parser) {
        if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                   "CRLF bytes received ahead of request-line. " <<
                   "Ignored due to relaxed_header_parser.");
        // Be tolerant of prefix empty lines
        // ie any series of either \n or \r\n with no other characters and no repeated \r
        while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
            buf_.consume(1);
        }
    }
}

/**
 * Attempt to parse the method field out of an HTTP message request-line.
 *
 * Governed by:
 *  RFC 1945 section 5.1
 *  RFC 7230 section 2.6, 3.1 and 3.5
 */
bool
Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
{
    // method field is a sequence of TCHAR.
    // Limit to 32 characters to prevent overly long sequences of non-HTTP
    // being sucked in before mismatch is detected. 32 is itself annoyingly
    // big but there are methods registered by IANA that reach 17 bytes:
    //  http://www.iana.org/assignments/http-methods
    static const size_t maxMethodLength = 32; // TODO: make this configurable?

    SBuf methodFound;
    if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
        parseStatusCode = Http::scBadRequest;
        return false;
    }
    method_ = HttpRequestMethod(methodFound);

    if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
        return false;

    return true;
}

/// the characters which truly are valid within URI
static const CharacterSet &
UriValidCharacters()
{
    /* RFC 3986 section 2:
     * "
     *   A URI is composed from a limited set of characters consisting of
     *   digits, letters, and a few graphic symbols.
     * "
     */
    static const CharacterSet UriChars =
        CharacterSet("URI-Chars","") +
        // RFC 3986 section 2.2 - reserved characters
        CharacterSet("gen-delims", ":/?#[]@") +
        CharacterSet("sub-delims", "!$&'()*+,;=") +
        // RFC 3986 section 2.3 - unreserved characters
        CharacterSet::ALPHA +
        CharacterSet::DIGIT +
        CharacterSet("unreserved", "-._~") +
        // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
        CharacterSet("pct-encoded", "%") +
        CharacterSet::HEXDIG;

    return UriChars;
}

/// characters which Squid will accept in the HTTP request-target (URI)
const CharacterSet &
Http::One::RequestParser::RequestTargetCharacters()
{
    if (Config.onoff.relaxed_header_parser) {
#if USE_HTTP_VIOLATIONS
        static const CharacterSet RelaxedExtended =
            UriValidCharacters() +
            // accept whitespace (extended), it will be dealt with later
            DelimiterCharacters() +
            // RFC 2396 unwise character set which must never be transmitted
            // in un-escaped form. But many web services do anyway.
            CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
            // UTF-8 because we want to be future-proof
            CharacterSet("UTF-8", 128, 255);

        return RelaxedExtended;
#else
        static const CharacterSet RelaxedCompliant =
            UriValidCharacters() +
            // accept whitespace (extended), it will be dealt with later.
            DelimiterCharacters();

        return RelaxedCompliant;
#endif
    }

    // strict parse only accepts what the RFC say we can
    return UriValidCharacters();
}

bool
Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
{
    /* Arbitrary 64KB URI upper length limit.
     *
     * Not quite as arbitrary as it seems though. Old SquidString objects
     * cannot store strings larger than 64KB, so we must limit until they
     * have all been replaced with SBuf.
     *
     * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
     * at least 8000 octets for the whole line, including method and version.
     */
    const size_t maxUriLength = static_cast<size_t>((64*1024)-1);

    SBuf uriFound;
    if (!tok.prefix(uriFound, RequestTargetCharacters())) {
        parseStatusCode = Http::scBadRequest;
        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
        return false;
    }

    if (uriFound.length() > maxUriLength) {
        // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
        parseStatusCode = Http::scUriTooLong;
        debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
               "-byte URI exceeds " << maxUriLength << "-byte limit");
        return false;
    }

    uri_ = uriFound;
    return true;
}

bool
Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
{
    static const SBuf http1p0("HTTP/1.0");
    static const SBuf http1p1("HTTP/1.1");
    const auto savedTok = tok;

    // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
    // the vast majority of cases.
    if (tok.skipSuffix(http1p1)) {
        msgProtocol_ = Http::ProtocolVersion(1, 1);
        return true;
    } else if (tok.skipSuffix(http1p0)) {
        msgProtocol_ = Http::ProtocolVersion(1, 0);
        return true;
    } else {
        // RFC 7230 section 2.6:
        // HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
        static const CharacterSet period("Decimal point", ".");
        static const SBuf proto("HTTP/");
        SBuf majorDigit;
        SBuf minorDigit;
        if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
                tok.skipOneTrailing(period) &&
                tok.suffix(majorDigit, CharacterSet::DIGIT) &&
                tok.skipSuffix(proto)) {
            const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
            // use '0.0' for unsupported multiple digit version numbers
            const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
            const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
            msgProtocol_ = Http::ProtocolVersion(major, minor);
            return true;
        }
    }

    // A GET request might use HTTP/0.9 syntax
    if (method_ == Http::METHOD_GET) {
        // RFC 1945 - no HTTP version field at all
        tok = savedTok; // in case the URI ends with a digit
        // report this assumption as an error if configured to triage parsing
        debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
        msgProtocol_ = Http::ProtocolVersion(0,9);
        return true;
    }

    debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
    parseStatusCode = Http::scBadRequest;
    return false;
}

/**
 * Skip characters separating request-line fields.
 * To handle bidirectional parsing, the caller does the actual skipping and
 * we just check how many character the caller has skipped.
 */
bool
Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
{
    if (count <= 0) {
        debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
        parseStatusCode = Http::scBadRequest;
        return false;
    }

    // tolerant parser allows multiple whitespace characters between request-line fields
    if (count > 1 && !Config.onoff.relaxed_header_parser) {
        debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
        parseStatusCode = Http::scBadRequest;
        return false;
    }

    return true;
}

/// Parse CRs at the end of request-line, just before the terminating LF.
bool
Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
{
    if (Config.onoff.relaxed_header_parser) {
        (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
    } else {
        if (!tok.skipOneTrailing(CharacterSet::CR)) {
            debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
            parseStatusCode = Http::scBadRequest;
            return false;
        }
    }
    return true;
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by:
 *  RFC 1945 section 5.1
 *  RFC 7230 section 2.6, 3.1 and 3.5
 *
 * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
 * \retval  1  successful parse. member fields contain the request-line items
 * \retval  0  more data is needed to complete the parse
 */
int
Http::One::RequestParser::parseRequestFirstLine()
{
    debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
    debugs(74, DBG_DATA, buf_);

    SBuf line;

    // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
    // Now, the request line has to end at the first LF.
    static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
    ::Parser::Tokenizer lineTok(buf_);
    if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
        if (buf_.length() >= Config.maxRequestHeaderSize) {
            /* who should we blame for our failure to parse this line? */

            Http1::Tokenizer methodTok(buf_);
            if (!parseMethodField(methodTok))
                return -1; // blame a bad method (or its delimiter)

            // assume it is the URI
            debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
                   Config.maxRequestHeaderSize << "-byte limit");
            parseStatusCode = Http::scUriTooLong;
            return -1;
        }
        debugs(74, 5, "Parser needs more data");
        return 0;
    }

    Http1::Tokenizer tok(line);

    if (!parseMethodField(tok))
        return -1;

    /* now parse backwards, to leave just the URI */
    if (!skipTrailingCrs(tok))
        return -1;

    if (!parseHttpVersionField(tok))
        return -1;

    if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
        return -1;

    /* parsed everything before and after the URI */

    if (!parseUriField(tok))
        return -1;

    if (!tok.atEnd()) {
        debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
        parseStatusCode = Http::scBadRequest;
        return -1;
    }

    parseStatusCode = Http::scOkay;
    buf_ = lineTok.remaining(); // incremental parse checkpoint
    return 1;
}

bool
Http::One::RequestParser::parse(const SBuf &aBuf)
{
    const bool result = doParse(aBuf);
    if (preserveParsed_) {
        assert(aBuf.length() >= remaining().length());
        parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
    }

    return result;
}

// raw is not a reference because a reference might point back to our own buf_ or parsed_
bool
Http::One::RequestParser::doParse(const SBuf &aBuf)
{
    buf_ = aBuf;
    debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");

    // stage 1: locate the request-line
    if (parsingStage_ == HTTP_PARSE_NONE) {
        skipGarbageLines();

        // if we hit something before EOS treat it as a message
        if (!buf_.isEmpty())
            parsingStage_ = HTTP_PARSE_FIRST;
        else
            return false;
    }

    // stage 2: parse the request-line
    if (parsingStage_ == HTTP_PARSE_FIRST) {
        PROF_start(HttpParserParseReqLine);
        const int retcode = parseRequestFirstLine();

        // first-line (or a look-alike) found successfully.
        if (retcode > 0) {
            parsingStage_ = HTTP_PARSE_MIME;
        }

        debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
        debugs(74, 5, "request-line: method: " << method_);
        debugs(74, 5, "request-line: url: " << uri_);
        debugs(74, 5, "request-line: proto: " << msgProtocol_);
        debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
        PROF_stop(HttpParserParseReqLine);

        // syntax errors already
        if (retcode < 0) {
            parsingStage_ = HTTP_PARSE_DONE;
            return false;
        }
    }

    // stage 3: locate the mime header block
    if (parsingStage_ == HTTP_PARSE_MIME) {
        // HTTP/1.x request-line is valid and parsing completed.
        if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
            if (parseStatusCode == Http::scHeaderTooLarge)
                parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
            return false;
        }
    }

    return !needsMoreData();
}
Commit	Line	Data
eac61ce1	1	/*
4ac4a490	2	* Copyright (C) 1996-2017 The Squid Software Foundation and contributors
eac61ce1 AJ	3	*
	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
	7	*/
	8
f7f3304a	9	#include "squid.h"
4c14658e	10	#include "Debug.h"
c99510dd	11	#include "http/one/RequestParser.h"
f29718b0	12	#include "http/one/Tokenizer.h"
c99510dd	13	#include "http/ProtocolVersion.h"
582c2af2	14	#include "profiler/Profiler.h"
4d5904f7	15	#include "SquidConfig.h"
4c14658e	16
e02f963c AR	17	// the right debugs() level for parsing errors
	18	inline static int
	19	ErrorLevel() {
	20	return Config.onoff.relaxed_header_parser < 0 ? DBG_IMPORTANT : 5;
	21	}
	22
6b2b6cfe CT	23	Http::One::RequestParser::RequestParser(bool preserveParsed) :
	24	Parser(),
	25	preserveParsed_(preserveParsed)
947ca0c6 AJ	26	{}
	27
	28	Http1::Parser::size_type
	29	Http::One::RequestParser::firstLineSize() const
7322c9dd	30	{
947ca0c6 AJ	31	// RFC 7230 section 2.6
	32	/* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
	33	return method_.image().length() + uri_.length() + 12;
4c14658e AJ	34	}
4c14658e AJ	35
c11191e0 AJ	36	/**
	37	* Attempt to parse the first line of a new request message.
	38	*
a4c74dd8	39	* Governed by RFC 7230 section 3.5
c11191e0	40	* "
a4c74dd8 AJ	41	* In the interest of robustness, a server that is expecting to receive
	42	* and parse a request-line SHOULD ignore at least one empty line (CRLF)
	43	* received prior to the request-line.
c11191e0 AJ	44	* "
	45	*
	46	* Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df	47	* If garbage is found the parsing offset is incremented.
c11191e0	48	*/
cbcd99df	49	void
678451c0	50	Http::One::RequestParser::skipGarbageLines()
c11191e0	51	{
c11191e0	52	if (Config.onoff.relaxed_header_parser) {
b749de75	53	if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' \|\| buf_[0] == '\n'))
c11191e0 AJ	54	debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
	55	"CRLF bytes received ahead of request-line. " <<
	56	"Ignored due to relaxed_header_parser.");
	57	// Be tolerant of prefix empty lines
cbcd99df	58	// ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75 AJ	59	while (!buf_.isEmpty() && (buf_[0] == '\n' \|\| (buf_[0] == '\r' && buf_[1] == '\n'))) {
b749de75 AJ	60	buf_.consume(1);
7a4fa6a0	61	}
c11191e0	62	}
c11191e0 AJ	63	}
	64
	65	/**
947ca0c6	66	* Attempt to parse the method field out of an HTTP message request-line.
c11191e0 AJ	67	*
	68	* Governed by:
	69	* RFC 1945 section 5.1
947ca0c6	70	* RFC 7230 section 2.6, 3.1 and 3.5
c11191e0	71	*/
e02f963c AR	72	bool
e02f963c AR	73	Http::One::RequestParser::parseMethodField(Http1::Tokenizer &tok)
4c14658e	74	{
e03114f8	75	// method field is a sequence of TCHAR.
e02f963c AR	76	// Limit to 32 characters to prevent overly long sequences of non-HTTP
	77	// being sucked in before mismatch is detected. 32 is itself annoyingly
	78	// big but there are methods registered by IANA that reach 17 bytes:
	79	// http://www.iana.org/assignments/http-methods
	80	static const size_t maxMethodLength = 32; // TODO: make this configurable?
4c14658e	81
e02f963c AR	82	SBuf methodFound;
	83	if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
	84	debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
de158bf5	85	parseStatusCode = Http::scBadRequest;
e02f963c	86	return false;
947ca0c6	87	}
e02f963c	88	method_ = HttpRequestMethod(methodFound);
f8b58a68 EB	89
	90	if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
	91	return false;
	92
e02f963c	93	return true;
947ca0c6	94	}
4c14658e	95
e02f963c AR	96	/// the characters which truly are valid within URI
	97	static const CharacterSet &
	98	UriValidCharacters()
78a63ed1	99	{
78a63ed1 AJ	100	/* RFC 3986 section 2:
	101	* "
	102	* A URI is composed from a limited set of characters consisting of
	103	* digits, letters, and a few graphic symbols.
	104	* "
	105	*/
e02f963c AR	106	static const CharacterSet UriChars =
	107	CharacterSet("URI-Chars","") +
	108	// RFC 3986 section 2.2 - reserved characters
	109	CharacterSet("gen-delims", ":/?#[]@") +
	110	CharacterSet("sub-delims", "!$&'()*+,;=") +
	111	// RFC 3986 section 2.3 - unreserved characters
	112	CharacterSet::ALPHA +
	113	CharacterSet::DIGIT +
	114	CharacterSet("unreserved", "-._~") +
	115	// RFC 3986 section 2.1 - percent encoding "%" HEXDIG
	116	CharacterSet("pct-encoded", "%") +
	117	CharacterSet::HEXDIG;
78a63ed1 AJ	118
	119	return UriChars;
	120	}
016a316b	121
e02f963c AR	122	/// characters which Squid will accept in the HTTP request-target (URI)
	123	const CharacterSet &
	124	Http::One::RequestParser::RequestTargetCharacters()
947ca0c6	125	{
e02f963c AR	126	if (Config.onoff.relaxed_header_parser) {
	127	#if USE_HTTP_VIOLATIONS
	128	static const CharacterSet RelaxedExtended =
	129	UriValidCharacters() +
	130	// accept whitespace (extended), it will be dealt with later
	131	DelimiterCharacters() +
	132	// RFC 2396 unwise character set which must never be transmitted
	133	// in un-escaped form. But many web services do anyway.
	134	CharacterSet("RFC2396-unwise","\"\\\|^<>`{}") +
	135	// UTF-8 because we want to be future-proof
	136	CharacterSet("UTF-8", 128, 255);
	137
	138	return RelaxedExtended;
	139	#else
	140	static const CharacterSet RelaxedCompliant =
	141	UriValidCharacters() +
	142	// accept whitespace (extended), it will be dealt with later.
	143	DelimiterCharacters();
	144
	145	return RelaxedCompliant;
	146	#endif
	147	}
	148
	149	// strict parse only accepts what the RFC say we can
	150	return UriValidCharacters();
	151	}
947ca0c6	152
e02f963c AR	153	bool
	154	Http::One::RequestParser::parseUriField(Http1::Tokenizer &tok)
	155	{
947ca0c6 AJ	156	/* Arbitrary 64KB URI upper length limit.
	157	*
	158	* Not quite as arbitrary as it seems though. Old SquidString objects
	159	* cannot store strings larger than 64KB, so we must limit until they
	160	* have all been replaced with SBuf.
	161	*
	162	* Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
	163	* at least 8000 octets for the whole line, including method and version.
	164	*/
e02f963c	165	const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
016a316b	166
947ca0c6	167	SBuf uriFound;
e02f963c AR	168	if (!tok.prefix(uriFound, RequestTargetCharacters())) {
	169	parseStatusCode = Http::scBadRequest;
	170	debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
	171	return false;
016a316b AJ	172	}
016a316b AJ	173
e02f963c	174	if (uriFound.length() > maxUriLength) {
e03114f8	175	// RFC 7230 section 3.1.1 mandatory (MUST) 414 response
de158bf5	176	parseStatusCode = Http::scUriTooLong;
e02f963c AR	177	debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
	178	"-byte URI exceeds " << maxUriLength << "-byte limit");
	179	return false;
4c14658e	180	}
e02f963c AR	181
	182	uri_ = uriFound;
	183	return true;
947ca0c6	184	}
4c14658e	185
e02f963c	186	bool
f29718b0	187	Http::One::RequestParser::parseHttpVersionField(Http1::Tokenizer &tok)
947ca0c6	188	{
294083a1 EB	189	static const SBuf http1p0("HTTP/1.0");
294083a1 EB	190	static const SBuf http1p1("HTTP/1.1");
e02f963c	191	const auto savedTok = tok;
4c14658e	192
294083a1 EB	193	// Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
	194	// the vast majority of cases.
	195	if (tok.skipSuffix(http1p1)) {
	196	msgProtocol_ = Http::ProtocolVersion(1, 1);
e02f963c	197	return true;
294083a1 EB	198	} else if (tok.skipSuffix(http1p0)) {
	199	msgProtocol_ = Http::ProtocolVersion(1, 0);
	200	return true;
	201	} else {
	202	// RFC 7230 section 2.6:
	203	// HTTP-version = HTTP-name "/" DIGIT "." DIGIT
	204	static const CharacterSet period("Decimal point", ".");
	205	static const SBuf proto("HTTP/");
	206	SBuf majorDigit;
	207	SBuf minorDigit;
	208	if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
	209	tok.skipOneTrailing(period) &&
	210	tok.suffix(majorDigit, CharacterSet::DIGIT) &&
	211	tok.skipSuffix(proto)) {
	212	const bool multiDigits = majorDigit.length() > 1 \|\| minorDigit.length() > 1;
	213	// use '0.0' for unsupported multiple digit version numbers
	214	const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
	215	const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
	216	msgProtocol_ = Http::ProtocolVersion(major, minor);
	217	return true;
	218	}
4c14658e AJ	219	}
4c14658e AJ	220
e02f963c AR	221	// A GET request might use HTTP/0.9 syntax
	222	if (method_ == Http::METHOD_GET) {
	223	// RFC 1945 - no HTTP version field at all
	224	tok = savedTok; // in case the URI ends with a digit
	225	// report this assumption as an error if configured to triage parsing
	226	debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
	227	msgProtocol_ = Http::ProtocolVersion(0,9);
	228	return true;
4c14658e	229	}
4c14658e	230
e02f963c AR	231	debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
	232	parseStatusCode = Http::scBadRequest;
	233	return false;
	234	}
4c14658e	235
e02f963c AR	236	/**
	237	* Skip characters separating request-line fields.
	238	* To handle bidirectional parsing, the caller does the actual skipping and
	239	* we just check how many character the caller has skipped.
	240	*/
	241	bool
f8b58a68	242	Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
e02f963c AR	243	{
e02f963c AR	244	if (count <= 0) {
f8b58a68	245	debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
e02f963c AR	246	parseStatusCode = Http::scBadRequest;
	247	return false;
	248	}
e03114f8	249
e02f963c AR	250	// tolerant parser allows multiple whitespace characters between request-line fields
e02f963c AR	251	if (count > 1 && !Config.onoff.relaxed_header_parser) {
f8b58a68	252	debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
e02f963c AR	253	parseStatusCode = Http::scBadRequest;
	254	return false;
	255	}
947ca0c6	256
e02f963c AR	257	return true;
e02f963c AR	258	}
4c14658e	259
e02f963c AR	260	/// Parse CRs at the end of request-line, just before the terminating LF.
	261	bool
	262	Http::One::RequestParser::skipTrailingCrs(Http1::Tokenizer &tok)
	263	{
	264	if (Config.onoff.relaxed_header_parser) {
	265	(void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
	266	} else {
	267	if (!tok.skipOneTrailing(CharacterSet::CR)) {
	268	debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
	269	parseStatusCode = Http::scBadRequest;
	270	return false;
	271	}
	272	}
	273	return true;
947ca0c6	274	}
274bd5ad	275
947ca0c6 AJ	276	/**
	277	* Attempt to parse the first line of a new request message.
	278	*
	279	* Governed by:
	280	* RFC 1945 section 5.1
	281	* RFC 7230 section 2.6, 3.1 and 3.5
	282	*
de158bf5	283	* \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
947ca0c6 AJ	284	* \retval 1 successful parse. member fields contain the request-line items
	285	* \retval 0 more data is needed to complete the parse
	286	*/
	287	int
	288	Http::One::RequestParser::parseRequestFirstLine()
	289	{
947ca0c6 AJ	290	debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
947ca0c6 AJ	291	debugs(74, DBG_DATA, buf_);
4c14658e	292
e02f963c	293	SBuf line;
947ca0c6	294
e02f963c AR	295	// Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
	296	// Now, the request line has to end at the first LF.
	297	static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
	298	::Parser::Tokenizer lineTok(buf_);
	299	if (!lineTok.prefix(line, lineChars) \|\| !lineTok.skip('\n')) {
f8b58a68 EB	300	if (buf_.length() >= Config.maxRequestHeaderSize) {
	301	/* who should we blame for our failure to parse this line? */
	302
	303	Http1::Tokenizer methodTok(buf_);
	304	if (!parseMethodField(methodTok))
	305	return -1; // blame a bad method (or its delimiter)
	306
	307	// assume it is the URI
	308	debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
a95f4c73	309	Config.maxRequestHeaderSize << "-byte limit");
f8b58a68 EB	310	parseStatusCode = Http::scUriTooLong;
	311	return -1;
	312	}
947ca0c6 AJ	313	debugs(74, 5, "Parser needs more data");
947ca0c6 AJ	314	return 0;
4c14658e AJ	315	}
4c14658e AJ	316
e02f963c	317	Http1::Tokenizer tok(line);
78a63ed1	318
e02f963c AR	319	if (!parseMethodField(tok))
e02f963c AR	320	return -1;
e47e0802	321
e02f963c AR	322	/* now parse backwards, to leave just the URI */
	323	if (!skipTrailingCrs(tok))
	324	return -1;
	325
	326	if (!parseHttpVersionField(tok))
	327	return -1;
947ca0c6	328
f8b58a68	329	if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
e02f963c AR	330	return -1;
	331
	332	/* parsed everything before and after the URI */
	333
	334	if (!parseUriField(tok))
	335	return -1;
	336
	337	if (!tok.atEnd()) {
	338	debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
	339	parseStatusCode = Http::scBadRequest;
	340	return -1;
4c14658e	341	}
4c14658e	342
e02f963c AR	343	parseStatusCode = Http::scOkay;
	344	buf_ = lineTok.remaining(); // incremental parse checkpoint
	345	return 1;
4c14658e	346	}
7a4fa6a0	347
87abd755	348	bool
36a9c964	349	Http::One::RequestParser::parse(const SBuf &aBuf)
6b2b6cfe CT	350	{
	351	const bool result = doParse(aBuf);
	352	if (preserveParsed_) {
	353	assert(aBuf.length() >= remaining().length());
	354	parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
	355	}
	356
	357	return result;
	358	}
	359
	360	// raw is not a reference because a reference might point back to our own buf_ or parsed_
	361	bool
	362	Http::One::RequestParser::doParse(const SBuf &aBuf)
4c14658e	363	{
b749de75	364	buf_ = aBuf;
36a9c964 AJ	365	debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
36a9c964 AJ	366
cbcd99df	367	// stage 1: locate the request-line
36a9c964	368	if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df	369	skipGarbageLines();
cbcd99df AJ	370
cbcd99df AJ	371	// if we hit something before EOS treat it as a message
b749de75	372	if (!buf_.isEmpty())
cbcd99df AJ	373	parsingStage_ = HTTP_PARSE_FIRST;
cbcd99df AJ	374	else
f9daf571	375	return false;
cbcd99df	376	}
c11191e0	377
cbcd99df AJ	378	// stage 2: parse the request-line
cbcd99df AJ	379	if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526	380	PROF_start(HttpParserParseReqLine);
678451c0	381	const int retcode = parseRequestFirstLine();
e4cff825 AJ	382
	383	// first-line (or a look-alike) found successfully.
	384	if (retcode > 0) {
e4cff825 AJ	385	parsingStage_ = HTTP_PARSE_MIME;
	386	}
	387
947ca0c6 AJ	388	debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
	389	debugs(74, 5, "request-line: method: " << method_);
	390	debugs(74, 5, "request-line: url: " << uri_);
	391	debugs(74, 5, "request-line: proto: " << msgProtocol_);
b749de75	392	debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526	393	PROF_stop(HttpParserParseReqLine);
cbcd99df AJ	394
cbcd99df AJ	395	// syntax errors already
f4880526	396	if (retcode < 0) {
cbcd99df	397	parsingStage_ = HTTP_PARSE_DONE;
f4880526 AJ	398	return false;
	399	}
	400	}
	401
	402	// stage 3: locate the mime header block
cbcd99df	403	if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526	404	// HTTP/1.x request-line is valid and parsing completed.
f8cab755	405	if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
f1d5359e AJ	406	if (parseStatusCode == Http::scHeaderTooLarge)
f1d5359e AJ	407	parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
016a316b AJ	408	return false;
016a316b AJ	409	}
f4880526	410	}
87abd755	411
36a9c964	412	return !needsMoreData();
4c14658e	413	}
f53969cc	414