[thirdparty/squid.git] / src / http / one / RequestParser.cc

/*
 * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
#include "http/ProtocolVersion.h"
#include "parser/Tokenizer.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"

Http1::Parser::size_type
Http::One::RequestParser::firstLineSize() const
{
    // RFC 7230 section 2.6
    /* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
    return method_.image().length() + uri_.length() + 12;
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by RFC 7230 section 3.5
 *  "
 *    In the interest of robustness, a server that is expecting to receive
 *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
 *    received prior to the request-line.
 *  "
 *
 * Parsing state is stored between calls to avoid repeating buffer scans.
 * If garbage is found the parsing offset is incremented.
 */
void
Http::One::RequestParser::skipGarbageLines()
{
    if (Config.onoff.relaxed_header_parser) {
        if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                   "CRLF bytes received ahead of request-line. " <<
                   "Ignored due to relaxed_header_parser.");
        // Be tolerant of prefix empty lines
        // ie any series of either \n or \r\n with no other characters and no repeated \r
        while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
            buf_.consume(1);
        }
    }
}

/**
 * Attempt to parse the method field out of an HTTP message request-line.
 *
 * Governed by:
 *  RFC 1945 section 5.1
 *  RFC 7230 section 2.6, 3.1 and 3.5
 */
bool
Http::One::RequestParser::parseMethodField(Tokenizer &tok)
{
    // method field is a sequence of TCHAR.
    // Limit to 32 characters to prevent overly long sequences of non-HTTP
    // being sucked in before mismatch is detected. 32 is itself annoyingly
    // big but there are methods registered by IANA that reach 17 bytes:
    //  http://www.iana.org/assignments/http-methods
    static const size_t maxMethodLength = 32; // TODO: make this configurable?

    SBuf methodFound;
    if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
        parseStatusCode = Http::scBadRequest;
        return false;
    }
    method_ = HttpRequestMethod(methodFound);

    if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
        return false;

    return true;
}

/// the characters which truly are valid within URI
static const CharacterSet &
UriValidCharacters()
{
    /* RFC 3986 section 2:
     * "
     *   A URI is composed from a limited set of characters consisting of
     *   digits, letters, and a few graphic symbols.
     * "
     */
    static const CharacterSet UriChars =
        CharacterSet("URI-Chars","") +
        // RFC 3986 section 2.2 - reserved characters
        CharacterSet("gen-delims", ":/?#[]@") +
        CharacterSet("sub-delims", "!$&'()*+,;=") +
        // RFC 3986 section 2.3 - unreserved characters
        CharacterSet::ALPHA +
        CharacterSet::DIGIT +
        CharacterSet("unreserved", "-._~") +
        // RFC 3986 section 2.1 - percent encoding "%" HEXDIG
        CharacterSet("pct-encoded", "%") +
        CharacterSet::HEXDIG;

    return UriChars;
}

/// characters which Squid will accept in the HTTP request-target (URI)
const CharacterSet &
Http::One::RequestParser::RequestTargetCharacters()
{
    if (Config.onoff.relaxed_header_parser) {
#if USE_HTTP_VIOLATIONS
        static const CharacterSet RelaxedExtended =
            UriValidCharacters() +
            // accept whitespace (extended), it will be dealt with later
            DelimiterCharacters() +
            // RFC 2396 unwise character set which must never be transmitted
            // in un-escaped form. But many web services do anyway.
            CharacterSet("RFC2396-unwise","\"\\|^<>`{}") +
            // UTF-8 because we want to be future-proof
            CharacterSet("UTF-8", 128, 255);

        return RelaxedExtended;
#else
        static const CharacterSet RelaxedCompliant =
            UriValidCharacters() +
            // accept whitespace (extended), it will be dealt with later.
            DelimiterCharacters();

        return RelaxedCompliant;
#endif
    }

    // strict parse only accepts what the RFC say we can
    return UriValidCharacters();
}

bool
Http::One::RequestParser::parseUriField(Tokenizer &tok)
{
    /* Arbitrary 64KB URI upper length limit.
     *
     * Not quite as arbitrary as it seems though. Old SquidString objects
     * cannot store strings larger than 64KB, so we must limit until they
     * have all been replaced with SBuf.
     *
     * Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
     * at least 8000 octets for the whole line, including method and version.
     */
    const size_t maxUriLength = static_cast<size_t>((64*1024)-1);

    SBuf uriFound;
    if (!tok.prefix(uriFound, RequestTargetCharacters())) {
        parseStatusCode = Http::scBadRequest;
        debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
        return false;
    }

    if (uriFound.length() > maxUriLength) {
        // RFC 7230 section 3.1.1 mandatory (MUST) 414 response
        parseStatusCode = Http::scUriTooLong;
        debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
               "-byte URI exceeds " << maxUriLength << "-byte limit");
        return false;
    }

    uri_ = uriFound;
    return true;
}

bool
Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
{
    static const SBuf http1p0("HTTP/1.0");
    static const SBuf http1p1("HTTP/1.1");
    const auto savedTok = tok;

    // Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
    // the vast majority of cases.
    if (tok.skipSuffix(http1p1)) {
        msgProtocol_ = Http::ProtocolVersion(1, 1);
        return true;
    } else if (tok.skipSuffix(http1p0)) {
        msgProtocol_ = Http::ProtocolVersion(1, 0);
        return true;
    } else {
        // RFC 7230 section 2.6:
        // HTTP-version  = HTTP-name "/" DIGIT "." DIGIT
        static const CharacterSet period("Decimal point", ".");
        static const SBuf proto("HTTP/");
        SBuf majorDigit;
        SBuf minorDigit;
        if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
                tok.skipOneTrailing(period) &&
                tok.suffix(majorDigit, CharacterSet::DIGIT) &&
                tok.skipSuffix(proto)) {
            const bool multiDigits = majorDigit.length() > 1 || minorDigit.length() > 1;
            // use '0.0' for unsupported multiple digit version numbers
            const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
            const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
            msgProtocol_ = Http::ProtocolVersion(major, minor);
            return true;
        }
    }

    // A GET request might use HTTP/0.9 syntax
    if (method_ == Http::METHOD_GET) {
        // RFC 1945 - no HTTP version field at all
        tok = savedTok; // in case the URI ends with a digit
        // report this assumption as an error if configured to triage parsing
        debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
        msgProtocol_ = Http::ProtocolVersion(0,9);
        return true;
    }

    debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
    parseStatusCode = Http::scBadRequest;
    return false;
}

/**
 * Skip characters separating request-line fields.
 * To handle bidirectional parsing, the caller does the actual skipping and
 * we just check how many character the caller has skipped.
 */
bool
Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
{
    if (count <= 0) {
        debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
        parseStatusCode = Http::scBadRequest;
        return false;
    }

    // tolerant parser allows multiple whitespace characters between request-line fields
    if (count > 1 && !Config.onoff.relaxed_header_parser) {
        debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
        parseStatusCode = Http::scBadRequest;
        return false;
    }

    return true;
}

/// Parse CRs at the end of request-line, just before the terminating LF.
bool
Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
{
    if (Config.onoff.relaxed_header_parser) {
        (void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
    } else {
        if (!tok.skipOneTrailing(CharacterSet::CR)) {
            debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
            parseStatusCode = Http::scBadRequest;
            return false;
        }
    }
    return true;
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by:
 *  RFC 1945 section 5.1
 *  RFC 7230 section 2.6, 3.1 and 3.5
 *
 * \retval -1  an error occurred. parseStatusCode indicates HTTP status result.
 * \retval  1  successful parse. member fields contain the request-line items
 * \retval  0  more data is needed to complete the parse
 */
int
Http::One::RequestParser::parseRequestFirstLine()
{
    debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
    debugs(74, DBG_DATA, buf_);

    SBuf line;

    // Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
    // Now, the request line has to end at the first LF.
    static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
    Tokenizer lineTok(buf_);
    if (!lineTok.prefix(line, lineChars) || !lineTok.skip('\n')) {
        if (buf_.length() >= Config.maxRequestHeaderSize) {
            /* who should we blame for our failure to parse this line? */

            Tokenizer methodTok(buf_);
            if (!parseMethodField(methodTok))
                return -1; // blame a bad method (or its delimiter)

            // assume it is the URI
            debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
                   Config.maxRequestHeaderSize << "-byte limit");
            parseStatusCode = Http::scUriTooLong;
            return -1;
        }
        debugs(74, 5, "Parser needs more data");
        return 0;
    }

    Tokenizer tok(line);

    if (!parseMethodField(tok))
        return -1;

    /* now parse backwards, to leave just the URI */
    if (!skipTrailingCrs(tok))
        return -1;

    if (!parseHttpVersionField(tok))
        return -1;

    if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
        return -1;

    /* parsed everything before and after the URI */

    if (!parseUriField(tok))
        return -1;

    if (!tok.atEnd()) {
        debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
        parseStatusCode = Http::scBadRequest;
        return -1;
    }

    parseStatusCode = Http::scOkay;
    buf_ = lineTok.remaining(); // incremental parse checkpoint
    return 1;
}

bool
Http::One::RequestParser::parse(const SBuf &aBuf)
{
    const bool result = doParse(aBuf);
    if (preserveParsed_) {
        assert(aBuf.length() >= remaining().length());
        parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
    }

    return result;
}

// raw is not a reference because a reference might point back to our own buf_ or parsed_
bool
Http::One::RequestParser::doParse(const SBuf &aBuf)
{
    buf_ = aBuf;
    debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");

    // stage 1: locate the request-line
    if (parsingStage_ == HTTP_PARSE_NONE) {
        skipGarbageLines();

        // if we hit something before EOS treat it as a message
        if (!buf_.isEmpty())
            parsingStage_ = HTTP_PARSE_FIRST;
        else
            return false;
    }

    // stage 2: parse the request-line
    if (parsingStage_ == HTTP_PARSE_FIRST) {
        PROF_start(HttpParserParseReqLine);
        const int retcode = parseRequestFirstLine();

        // first-line (or a look-alike) found successfully.
        if (retcode > 0) {
            parsingStage_ = HTTP_PARSE_MIME;
        }

        debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
        debugs(74, 5, "request-line: method: " << method_);
        debugs(74, 5, "request-line: url: " << uri_);
        debugs(74, 5, "request-line: proto: " << msgProtocol_);
        debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
        PROF_stop(HttpParserParseReqLine);

        // syntax errors already
        if (retcode < 0) {
            parsingStage_ = HTTP_PARSE_DONE;
            return false;
        }
    }

    // stage 3: locate the mime header block
    if (parsingStage_ == HTTP_PARSE_MIME) {
        // HTTP/1.x request-line is valid and parsing completed.
        if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
            if (parseStatusCode == Http::scHeaderTooLarge)
                parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
            return false;
        }
    }

    return !needsMoreData();
}
Commit	Line	Data
eac61ce1	1	/*
f70aedc4	2	* Copyright (C) 1996-2021 The Squid Software Foundation and contributors
eac61ce1 AJ	3	*
	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
	7	*/
	8
f7f3304a	9	#include "squid.h"
4c14658e	10	#include "Debug.h"
c99510dd AJ	11	#include "http/one/RequestParser.h"
c99510dd AJ	12	#include "http/ProtocolVersion.h"
417da400	13	#include "parser/Tokenizer.h"
582c2af2	14	#include "profiler/Profiler.h"
4d5904f7	15	#include "SquidConfig.h"
4c14658e	16
947ca0c6 AJ	17	Http1::Parser::size_type
947ca0c6 AJ	18	Http::One::RequestParser::firstLineSize() const
7322c9dd	19	{
947ca0c6 AJ	20	// RFC 7230 section 2.6
	21	/* method SP request-target SP "HTTP/" DIGIT "." DIGIT CRLF */
	22	return method_.image().length() + uri_.length() + 12;
4c14658e AJ	23	}
4c14658e AJ	24
c11191e0 AJ	25	/**
	26	* Attempt to parse the first line of a new request message.
	27	*
a4c74dd8	28	* Governed by RFC 7230 section 3.5
c11191e0	29	* "
a4c74dd8 AJ	30	* In the interest of robustness, a server that is expecting to receive
	31	* and parse a request-line SHOULD ignore at least one empty line (CRLF)
	32	* received prior to the request-line.
c11191e0 AJ	33	* "
	34	*
	35	* Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df	36	* If garbage is found the parsing offset is incremented.
c11191e0	37	*/
cbcd99df	38	void
678451c0	39	Http::One::RequestParser::skipGarbageLines()
c11191e0	40	{
c11191e0	41	if (Config.onoff.relaxed_header_parser) {
b749de75	42	if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' \|\| buf_[0] == '\n'))
c11191e0 AJ	43	debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
	44	"CRLF bytes received ahead of request-line. " <<
	45	"Ignored due to relaxed_header_parser.");
	46	// Be tolerant of prefix empty lines
cbcd99df	47	// ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75 AJ	48	while (!buf_.isEmpty() && (buf_[0] == '\n' \|\| (buf_[0] == '\r' && buf_[1] == '\n'))) {
b749de75 AJ	49	buf_.consume(1);
7a4fa6a0	50	}
c11191e0	51	}
c11191e0 AJ	52	}
	53
	54	/**
947ca0c6	55	* Attempt to parse the method field out of an HTTP message request-line.
c11191e0 AJ	56	*
	57	* Governed by:
	58	* RFC 1945 section 5.1
947ca0c6	59	* RFC 7230 section 2.6, 3.1 and 3.5
c11191e0	60	*/
e02f963c	61	bool
417da400	62	Http::One::RequestParser::parseMethodField(Tokenizer &tok)
4c14658e	63	{
e03114f8	64	// method field is a sequence of TCHAR.
e02f963c AR	65	// Limit to 32 characters to prevent overly long sequences of non-HTTP
	66	// being sucked in before mismatch is detected. 32 is itself annoyingly
	67	// big but there are methods registered by IANA that reach 17 bytes:
	68	// http://www.iana.org/assignments/http-methods
	69	static const size_t maxMethodLength = 32; // TODO: make this configurable?
4c14658e	70
e02f963c AR	71	SBuf methodFound;
	72	if (!tok.prefix(methodFound, CharacterSet::TCHAR, maxMethodLength)) {
	73	debugs(33, ErrorLevel(), "invalid request-line: missing or malformed method");
de158bf5	74	parseStatusCode = Http::scBadRequest;
e02f963c	75	return false;
947ca0c6	76	}
e02f963c	77	method_ = HttpRequestMethod(methodFound);
f8b58a68 EB	78
	79	if (!skipDelimiter(tok.skipAll(DelimiterCharacters()), "after method"))
	80	return false;
	81
e02f963c	82	return true;
947ca0c6	83	}
4c14658e	84
e02f963c AR	85	/// the characters which truly are valid within URI
	86	static const CharacterSet &
	87	UriValidCharacters()
78a63ed1	88	{
78a63ed1 AJ	89	/* RFC 3986 section 2:
	90	* "
	91	* A URI is composed from a limited set of characters consisting of
	92	* digits, letters, and a few graphic symbols.
	93	* "
	94	*/
e02f963c AR	95	static const CharacterSet UriChars =
	96	CharacterSet("URI-Chars","") +
	97	// RFC 3986 section 2.2 - reserved characters
	98	CharacterSet("gen-delims", ":/?#[]@") +
	99	CharacterSet("sub-delims", "!$&'()*+,;=") +
	100	// RFC 3986 section 2.3 - unreserved characters
	101	CharacterSet::ALPHA +
	102	CharacterSet::DIGIT +
	103	CharacterSet("unreserved", "-._~") +
	104	// RFC 3986 section 2.1 - percent encoding "%" HEXDIG
	105	CharacterSet("pct-encoded", "%") +
	106	CharacterSet::HEXDIG;
78a63ed1 AJ	107
	108	return UriChars;
	109	}
016a316b	110
e02f963c AR	111	/// characters which Squid will accept in the HTTP request-target (URI)
	112	const CharacterSet &
	113	Http::One::RequestParser::RequestTargetCharacters()
947ca0c6	114	{
e02f963c AR	115	if (Config.onoff.relaxed_header_parser) {
	116	#if USE_HTTP_VIOLATIONS
	117	static const CharacterSet RelaxedExtended =
	118	UriValidCharacters() +
	119	// accept whitespace (extended), it will be dealt with later
	120	DelimiterCharacters() +
	121	// RFC 2396 unwise character set which must never be transmitted
	122	// in un-escaped form. But many web services do anyway.
	123	CharacterSet("RFC2396-unwise","\"\\\|^<>`{}") +
	124	// UTF-8 because we want to be future-proof
	125	CharacterSet("UTF-8", 128, 255);
	126
	127	return RelaxedExtended;
	128	#else
	129	static const CharacterSet RelaxedCompliant =
	130	UriValidCharacters() +
	131	// accept whitespace (extended), it will be dealt with later.
	132	DelimiterCharacters();
	133
	134	return RelaxedCompliant;
	135	#endif
	136	}
	137
	138	// strict parse only accepts what the RFC say we can
	139	return UriValidCharacters();
	140	}
947ca0c6	141
e02f963c	142	bool
417da400	143	Http::One::RequestParser::parseUriField(Tokenizer &tok)
e02f963c	144	{
947ca0c6 AJ	145	/* Arbitrary 64KB URI upper length limit.
	146	*
	147	* Not quite as arbitrary as it seems though. Old SquidString objects
	148	* cannot store strings larger than 64KB, so we must limit until they
	149	* have all been replaced with SBuf.
	150	*
	151	* Not that it matters but RFC 7230 section 3.1.1 requires (RECOMMENDED)
	152	* at least 8000 octets for the whole line, including method and version.
	153	*/
e02f963c	154	const size_t maxUriLength = static_cast<size_t>((64*1024)-1);
016a316b	155
947ca0c6	156	SBuf uriFound;
e02f963c AR	157	if (!tok.prefix(uriFound, RequestTargetCharacters())) {
	158	parseStatusCode = Http::scBadRequest;
	159	debugs(33, ErrorLevel(), "invalid request-line: missing or malformed URI");
	160	return false;
016a316b AJ	161	}
016a316b AJ	162
e02f963c	163	if (uriFound.length() > maxUriLength) {
e03114f8	164	// RFC 7230 section 3.1.1 mandatory (MUST) 414 response
de158bf5	165	parseStatusCode = Http::scUriTooLong;
e02f963c AR	166	debugs(33, ErrorLevel(), "invalid request-line: " << uriFound.length() <<
	167	"-byte URI exceeds " << maxUriLength << "-byte limit");
	168	return false;
4c14658e	169	}
e02f963c AR	170
	171	uri_ = uriFound;
	172	return true;
947ca0c6	173	}
4c14658e	174
e02f963c	175	bool
417da400	176	Http::One::RequestParser::parseHttpVersionField(Tokenizer &tok)
947ca0c6	177	{
294083a1 EB	178	static const SBuf http1p0("HTTP/1.0");
294083a1 EB	179	static const SBuf http1p1("HTTP/1.1");
e02f963c	180	const auto savedTok = tok;
4c14658e	181
294083a1 EB	182	// Optimization: Expect (and quickly parse) HTTP/1.1 or HTTP/1.0 in
	183	// the vast majority of cases.
	184	if (tok.skipSuffix(http1p1)) {
	185	msgProtocol_ = Http::ProtocolVersion(1, 1);
e02f963c	186	return true;
294083a1 EB	187	} else if (tok.skipSuffix(http1p0)) {
	188	msgProtocol_ = Http::ProtocolVersion(1, 0);
	189	return true;
	190	} else {
	191	// RFC 7230 section 2.6:
	192	// HTTP-version = HTTP-name "/" DIGIT "." DIGIT
	193	static const CharacterSet period("Decimal point", ".");
	194	static const SBuf proto("HTTP/");
	195	SBuf majorDigit;
	196	SBuf minorDigit;
	197	if (tok.suffix(minorDigit, CharacterSet::DIGIT) &&
	198	tok.skipOneTrailing(period) &&
	199	tok.suffix(majorDigit, CharacterSet::DIGIT) &&
	200	tok.skipSuffix(proto)) {
	201	const bool multiDigits = majorDigit.length() > 1 \|\| minorDigit.length() > 1;
	202	// use '0.0' for unsupported multiple digit version numbers
	203	const unsigned int major = multiDigits ? 0 : (*majorDigit.rawContent() - '0');
	204	const unsigned int minor = multiDigits ? 0 : (*minorDigit.rawContent() - '0');
	205	msgProtocol_ = Http::ProtocolVersion(major, minor);
	206	return true;
	207	}
4c14658e AJ	208	}
4c14658e AJ	209
e02f963c AR	210	// A GET request might use HTTP/0.9 syntax
	211	if (method_ == Http::METHOD_GET) {
	212	// RFC 1945 - no HTTP version field at all
	213	tok = savedTok; // in case the URI ends with a digit
	214	// report this assumption as an error if configured to triage parsing
	215	debugs(33, ErrorLevel(), "assuming HTTP/0.9 request-line");
	216	msgProtocol_ = Http::ProtocolVersion(0,9);
	217	return true;
4c14658e	218	}
4c14658e	219
e02f963c AR	220	debugs(33, ErrorLevel(), "invalid request-line: not HTTP");
	221	parseStatusCode = Http::scBadRequest;
	222	return false;
	223	}
4c14658e	224
e02f963c AR	225	/**
	226	* Skip characters separating request-line fields.
	227	* To handle bidirectional parsing, the caller does the actual skipping and
	228	* we just check how many character the caller has skipped.
	229	*/
	230	bool
f8b58a68	231	Http::One::RequestParser::skipDelimiter(const size_t count, const char *where)
e02f963c AR	232	{
e02f963c AR	233	if (count <= 0) {
f8b58a68	234	debugs(33, ErrorLevel(), "invalid request-line: missing delimiter " << where);
e02f963c AR	235	parseStatusCode = Http::scBadRequest;
	236	return false;
	237	}
e03114f8	238
e02f963c AR	239	// tolerant parser allows multiple whitespace characters between request-line fields
e02f963c AR	240	if (count > 1 && !Config.onoff.relaxed_header_parser) {
f8b58a68	241	debugs(33, ErrorLevel(), "invalid request-line: too many delimiters " << where);
e02f963c AR	242	parseStatusCode = Http::scBadRequest;
	243	return false;
	244	}
947ca0c6	245
e02f963c AR	246	return true;
e02f963c AR	247	}
4c14658e	248
e02f963c AR	249	/// Parse CRs at the end of request-line, just before the terminating LF.
e02f963c AR	250	bool
417da400	251	Http::One::RequestParser::skipTrailingCrs(Tokenizer &tok)
e02f963c AR	252	{
	253	if (Config.onoff.relaxed_header_parser) {
	254	(void)tok.skipAllTrailing(CharacterSet::CR); // optional; multiple OK
	255	} else {
	256	if (!tok.skipOneTrailing(CharacterSet::CR)) {
	257	debugs(33, ErrorLevel(), "invalid request-line: missing CR before LF");
	258	parseStatusCode = Http::scBadRequest;
	259	return false;
	260	}
	261	}
	262	return true;
947ca0c6	263	}
274bd5ad	264
947ca0c6 AJ	265	/**
	266	* Attempt to parse the first line of a new request message.
	267	*
	268	* Governed by:
	269	* RFC 1945 section 5.1
	270	* RFC 7230 section 2.6, 3.1 and 3.5
	271	*
de158bf5	272	* \retval -1 an error occurred. parseStatusCode indicates HTTP status result.
947ca0c6 AJ	273	* \retval 1 successful parse. member fields contain the request-line items
	274	* \retval 0 more data is needed to complete the parse
	275	*/
	276	int
	277	Http::One::RequestParser::parseRequestFirstLine()
	278	{
947ca0c6 AJ	279	debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
947ca0c6 AJ	280	debugs(74, DBG_DATA, buf_);
4c14658e	281
e02f963c	282	SBuf line;
947ca0c6	283
e02f963c AR	284	// Earlier, skipGarbageLines() took care of any leading LFs (if allowed).
	285	// Now, the request line has to end at the first LF.
	286	static const CharacterSet lineChars = CharacterSet::LF.complement("notLF");
417da400	287	Tokenizer lineTok(buf_);
e02f963c	288	if (!lineTok.prefix(line, lineChars) \|\| !lineTok.skip('\n')) {
f8b58a68 EB	289	if (buf_.length() >= Config.maxRequestHeaderSize) {
	290	/* who should we blame for our failure to parse this line? */
	291
417da400	292	Tokenizer methodTok(buf_);
f8b58a68 EB	293	if (!parseMethodField(methodTok))
	294	return -1; // blame a bad method (or its delimiter)
	295
	296	// assume it is the URI
	297	debugs(74, ErrorLevel(), "invalid request-line: URI exceeds " <<
a95f4c73	298	Config.maxRequestHeaderSize << "-byte limit");
f8b58a68 EB	299	parseStatusCode = Http::scUriTooLong;
	300	return -1;
	301	}
947ca0c6 AJ	302	debugs(74, 5, "Parser needs more data");
947ca0c6 AJ	303	return 0;
4c14658e AJ	304	}
4c14658e AJ	305
417da400	306	Tokenizer tok(line);
78a63ed1	307
e02f963c AR	308	if (!parseMethodField(tok))
e02f963c AR	309	return -1;
e47e0802	310
e02f963c AR	311	/* now parse backwards, to leave just the URI */
	312	if (!skipTrailingCrs(tok))
	313	return -1;
	314
	315	if (!parseHttpVersionField(tok))
	316	return -1;
947ca0c6	317
f8b58a68	318	if (!http0() && !skipDelimiter(tok.skipAllTrailing(DelimiterCharacters()), "before protocol version"))
e02f963c AR	319	return -1;
	320
	321	/* parsed everything before and after the URI */
	322
	323	if (!parseUriField(tok))
	324	return -1;
	325
	326	if (!tok.atEnd()) {
	327	debugs(33, ErrorLevel(), "invalid request-line: garbage after URI");
	328	parseStatusCode = Http::scBadRequest;
	329	return -1;
4c14658e	330	}
4c14658e	331
e02f963c AR	332	parseStatusCode = Http::scOkay;
	333	buf_ = lineTok.remaining(); // incremental parse checkpoint
	334	return 1;
4c14658e	335	}
7a4fa6a0	336
87abd755	337	bool
36a9c964	338	Http::One::RequestParser::parse(const SBuf &aBuf)
6b2b6cfe CT	339	{
	340	const bool result = doParse(aBuf);
	341	if (preserveParsed_) {
	342	assert(aBuf.length() >= remaining().length());
	343	parsed_.append(aBuf.substr(0, aBuf.length() - remaining().length())); // newly parsed bytes
	344	}
	345
	346	return result;
	347	}
	348
	349	// raw is not a reference because a reference might point back to our own buf_ or parsed_
	350	bool
	351	Http::One::RequestParser::doParse(const SBuf &aBuf)
4c14658e	352	{
b749de75	353	buf_ = aBuf;
36a9c964 AJ	354	debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
36a9c964 AJ	355
cbcd99df	356	// stage 1: locate the request-line
36a9c964	357	if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df	358	skipGarbageLines();
cbcd99df AJ	359
cbcd99df AJ	360	// if we hit something before EOS treat it as a message
b749de75	361	if (!buf_.isEmpty())
cbcd99df AJ	362	parsingStage_ = HTTP_PARSE_FIRST;
cbcd99df AJ	363	else
f9daf571	364	return false;
cbcd99df	365	}
c11191e0	366
cbcd99df AJ	367	// stage 2: parse the request-line
cbcd99df AJ	368	if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526	369	PROF_start(HttpParserParseReqLine);
678451c0	370	const int retcode = parseRequestFirstLine();
e4cff825 AJ	371
	372	// first-line (or a look-alike) found successfully.
	373	if (retcode > 0) {
e4cff825 AJ	374	parsingStage_ = HTTP_PARSE_MIME;
	375	}
	376
947ca0c6 AJ	377	debugs(74, 5, "request-line: retval " << retcode << ": line={" << aBuf.length() << ", data='" << aBuf << "'}");
	378	debugs(74, 5, "request-line: method: " << method_);
	379	debugs(74, 5, "request-line: url: " << uri_);
	380	debugs(74, 5, "request-line: proto: " << msgProtocol_);
b749de75	381	debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526	382	PROF_stop(HttpParserParseReqLine);
cbcd99df AJ	383
cbcd99df AJ	384	// syntax errors already
f4880526	385	if (retcode < 0) {
cbcd99df	386	parsingStage_ = HTTP_PARSE_DONE;
f4880526 AJ	387	return false;
	388	}
	389	}
	390
	391	// stage 3: locate the mime header block
cbcd99df	392	if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526	393	// HTTP/1.x request-line is valid and parsing completed.
f8cab755	394	if (!grabMimeBlock("Request", Config.maxRequestHeaderSize)) {
f1d5359e AJ	395	if (parseStatusCode == Http::scHeaderTooLarge)
f1d5359e AJ	396	parseStatusCode = Http::scRequestHeaderFieldsTooLarge;
016a316b AJ	397	return false;
016a316b AJ	398	}
f4880526	399	}
87abd755	400
36a9c964	401	return !needsMoreData();
4c14658e	402	}
f53969cc	403