[thirdparty/squid.git] / src / http / one / RequestParser.cc

/*
 * Copyright (C) 1996-2014 The Squid Software Foundation and contributors
 *
 * Squid software is distributed under GPLv2+ license and includes
 * contributions from numerous individuals and organizations.
 * Please see the COPYING and CONTRIBUTORS files for details.
 */

#include "squid.h"
#include "Debug.h"
#include "http/one/RequestParser.h"
#include "http/ProtocolVersion.h"
#include "mime_header.h"
#include "profiler/Profiler.h"
#include "SquidConfig.h"

Http::One::RequestParser::RequestParser() :
    Parser(),
    request_parse_status(Http::scNone)
{
    req.start = req.end = -1;
    req.m_start = req.m_end = -1;
    req.u_start = req.u_end = -1;
    req.v_start = req.v_end = -1;
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by RFC 7230 section 3.5
 *  "
 *    In the interest of robustness, a server that is expecting to receive
 *    and parse a request-line SHOULD ignore at least one empty line (CRLF)
 *    received prior to the request-line.
 *  "
 *
 * Parsing state is stored between calls to avoid repeating buffer scans.
 * If garbage is found the parsing offset is incremented.
 */
void
Http::One::RequestParser::skipGarbageLines()
{
    if (Config.onoff.relaxed_header_parser) {
        if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' || buf_[0] == '\n'))
            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                   "CRLF bytes received ahead of request-line. " <<
                   "Ignored due to relaxed_header_parser.");
        // Be tolerant of prefix empty lines
        // ie any series of either \n or \r\n with no other characters and no repeated \r
        while (!buf_.isEmpty() && (buf_[0] == '\n' || (buf_[0] == '\r' && buf_[1] == '\n'))) {
            buf_.consume(1);
        }
    }

    /* XXX: this is a Squid-specific tolerance
     * it appears never to have been relevant outside out unit-tests
     * because the ConnStateData parser loop starts with consumeWhitespace()
     * which absorbs any SP HTAB VTAB CR LF characters.
     * But unit-tests called the HttpParser method directly without that pruning.
     */
#if USE_HTTP_VIOLATIONS
    if (Config.onoff.relaxed_header_parser) {
        if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                   "Whitespace bytes received ahead of method. " <<
                   "Ignored due to relaxed_header_parser.");
        // Be tolerant of prefix spaces (other bytes are valid method values)
        while (!buf_.isEmpty() && buf_[0] == ' ') {
            buf_.consume(1);
        }
    }
#endif
}

/**
 * Attempt to parse the first line of a new request message.
 *
 * Governed by:
 *  RFC 1945 section 5.1
 *  RFC 7230 section 3.1 and 3.5
 *
 * Parsing state is stored between calls. However the current implementation
 * begins parsing from scratch on every call.
 * The return value tells you whether the parsing state fields are valid or not.
 *
 * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
 * \retval  1  successful parse. member fields contain the request-line items
 * \retval  0  more data is needed to complete the parse
 */
int
Http::One::RequestParser::parseRequestFirstLine()
{
    int second_word = -1; // track the suspected URI start
    int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
    int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence

    debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
    debugs(74, DBG_DATA, buf_);

    // Single-pass parse: (provided we have the whole line anyways)

    req.start = 0;
    req.end = -1;
    for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
        // track first and last whitespace (SP only)
        if (buf_[i] == ' ') {
            last_whitespace = i;
            if (first_whitespace < req.start)
                first_whitespace = i;
        }

        // track next non-SP/non-HT byte after first_whitespace
        if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
            second_word = i;
        }

        // locate line terminator
        if (buf_[i] == '\n') {
            req.end = i;
            line_end = i - 1;
            break;
        }
        if (i < buf_.length() - 1 && buf_[i] == '\r') {
            if (Config.onoff.relaxed_header_parser) {
                if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
                    debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
                           "Series of carriage-return bytes received prior to line terminator. " <<
                           "Ignored due to relaxed_header_parser.");

                // Be tolerant of invalid multiple \r prior to terminal \n
                if (buf_[i + 1] == '\n' || buf_[i + 1] == '\r')
                    line_end = i - 1;
                while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
                    ++i;

                if (buf_[i + 1] == '\n') {
                    req.end = i + 1;
                    break;
                }
            } else {
                if (buf_[i + 1] == '\n') {
                    req.end = i + 1;
                    line_end = i - 1;
                    break;
                }
            }

            // RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
            // However it does explicitly state an exact syntax which omits un-encoded CR
            // and defines 400 (Bad Request) as the required action when
            // handed an invalid request-line.
            request_parse_status = Http::scBadRequest;
            return -1;
        }
    }

    if (req.end == -1) {
        // DoS protection against long first-line
        if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
            debugs(33, 5, "Too large request-line");
            // RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
            request_parse_status = Http::scUriTooLong;
            return -1;
        }

        debugs(74, 5, "Parser: retval 0: from " << req.start <<
               "->" << req.end << ": needs more data to complete first line.");
        return 0;
    }

    // NP: we have now seen EOL, more-data (0) cannot occur.
    //     From here on any failure is -1, success is 1

    // Input Validation:

    // DoS protection against long first-line
    if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
        debugs(33, 5, "Too large request-line");
        request_parse_status = Http::scUriTooLong;
        return -1;
    }

    // Process what we now know about the line structure into field offsets
    // generating HTTP status for any aborts as we go.

    // First non-whitespace = beginning of method
    if (req.start > line_end) {
        request_parse_status = Http::scBadRequest;
        return -1;
    }
    req.m_start = req.start;

    // First whitespace = end of method
    if (first_whitespace > line_end || first_whitespace < req.start) {
        request_parse_status = Http::scBadRequest; // no method
        return -1;
    }
    req.m_end = first_whitespace - 1;
    if (req.m_end < req.m_start) {
        request_parse_status = Http::scBadRequest; // missing URI?
        return -1;
    }

    /* Set method_ */
    const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
    method_ = HttpRequestMethod(tmp);

    // First non-whitespace after first SP = beginning of URL+Version
    if (second_word > line_end || second_word < req.start) {
        request_parse_status = Http::scBadRequest; // missing URI
        return -1;
    }
    req.u_start = second_word;

    // RFC 1945: SP and version following URI are optional, marking version 0.9
    // we identify this by the last whitespace being earlier than URI start
    if (last_whitespace < second_word && last_whitespace >= req.start) {
        msgProtocol_ = Http::ProtocolVersion(0,9);
        req.u_end = line_end;
        uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
        request_parse_status = Http::scOkay; // HTTP/0.9
        return 1;
    } else {
        // otherwise last whitespace is somewhere after end of URI.
        req.u_end = last_whitespace;
        // crop any trailing whitespace in the area we think of as URI
        for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
    }
    if (req.u_end < req.u_start) {
        request_parse_status = Http::scBadRequest; // missing URI
        return -1;
    }
    uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);

    // Last whitespace SP = before start of protocol/version
    if (last_whitespace >= line_end) {
        request_parse_status = Http::scBadRequest; // missing version
        return -1;
    }
    req.v_start = last_whitespace + 1;
    req.v_end = line_end;

    /* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
    if ((req.v_end - req.v_start +1) < (int)Http1magic.length() || !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
        // non-HTTP/1 protocols not supported / implemented.
        request_parse_status = Http::scHttpVersionNotSupported;
        return -1;
    }
    // NP: magic octets include the protocol name and major version DIGIT.
    msgProtocol_.protocol = AnyP::PROTO_HTTP;
    msgProtocol_.major = 1;

    int i = req.v_start + Http1magic.length() -1;

    // catch missing minor part
    if (++i > line_end) {
        request_parse_status = Http::scHttpVersionNotSupported;
        return -1;
    }
    /* next should be one or more digits */
    if (!isdigit(buf_[i])) {
        request_parse_status = Http::scHttpVersionNotSupported;
        return -1;
    }
    int min = 0;
    for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
        min = min * 10;
        min = min + (buf_[i]) - '0';
    }
    // catch too-big values or trailing garbage
    if (min >= 65536 || i < line_end) {
        request_parse_status = Http::scHttpVersionNotSupported;
        return -1;
    }
    msgProtocol_.minor = min;

    /*
     * Rightio - we have all the schtuff. Return true; we've got enough.
     */
    request_parse_status = Http::scOkay;
    return 1;
}

bool
Http::One::RequestParser::parse(const SBuf &aBuf)
{
    buf_ = aBuf;
    debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");

    // stage 1: locate the request-line
    if (parsingStage_ == HTTP_PARSE_NONE) {
        skipGarbageLines();

        // if we hit something before EOS treat it as a message
        if (!buf_.isEmpty())
            parsingStage_ = HTTP_PARSE_FIRST;
        else
            return false;
    }

    // stage 2: parse the request-line
    if (parsingStage_ == HTTP_PARSE_FIRST) {
        PROF_start(HttpParserParseReqLine);
        const int retcode = parseRequestFirstLine();

        // first-line (or a look-alike) found successfully.
        if (retcode > 0) {
            buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
            parsingStage_ = HTTP_PARSE_MIME;
        }

        debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
               " line={" << aBuf.length() << ", data='" << aBuf << "'}");
        debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
        debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
        debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
        debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
        PROF_stop(HttpParserParseReqLine);

        // syntax errors already
        if (retcode < 0) {
            parsingStage_ = HTTP_PARSE_DONE;
            return false;
        }
    }

    // stage 3: locate the mime header block
    if (parsingStage_ == HTTP_PARSE_MIME) {
        // HTTP/1.x request-line is valid and parsing completed.
        if (msgProtocol_.major == 1) {
            /* NOTE: HTTP/0.9 requests do not have a mime header block.
             *       So the rest of the code will need to deal with '0'-byte headers
             *       (ie, none, so don't try parsing em)
             */
            int64_t mimeHeaderBytes = 0;
            // XXX: c_str() reallocates. performance regression.
            if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
                if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
                    debugs(33, 5, "Too large request");
                    request_parse_status = Http::scRequestHeaderFieldsTooLarge;
                    parsingStage_ = HTTP_PARSE_DONE;
                } else
                    debugs(33, 5, "Incomplete request, waiting for end of headers");
                return false;
            }
            mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
            debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");

        } else
            debugs(33, 3, "Missing HTTP/1.x identifier");

        // NP: we do not do any further stages here yet so go straight to DONE
        parsingStage_ = HTTP_PARSE_DONE;

        // Squid could handle these headers, but admin does not want to
        if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
            debugs(33, 5, "Too large request");
            request_parse_status = Http::scRequestHeaderFieldsTooLarge;
            return false;
        }
    }

    return !needsMoreData();
}
Commit	Line	Data
eac61ce1 AJ	1	/*
	2	* Copyright (C) 1996-2014 The Squid Software Foundation and contributors
	3	*
	4	* Squid software is distributed under GPLv2+ license and includes
	5	* contributions from numerous individuals and organizations.
	6	* Please see the COPYING and CONTRIBUTORS files for details.
	7	*/
	8
f7f3304a	9	#include "squid.h"
4c14658e	10	#include "Debug.h"
c99510dd AJ	11	#include "http/one/RequestParser.h"
c99510dd AJ	12	#include "http/ProtocolVersion.h"
f4880526	13	#include "mime_header.h"
582c2af2	14	#include "profiler/Profiler.h"
4d5904f7	15	#include "SquidConfig.h"
4c14658e	16
f9688132	17	Http::One::RequestParser::RequestParser() :
f53969cc SM	18	Parser(),
f53969cc SM	19	request_parse_status(Http::scNone)
7322c9dd	20	{
74f478f8	21	req.start = req.end = -1;
74f478f8 AJ	22	req.m_start = req.m_end = -1;
	23	req.u_start = req.u_end = -1;
	24	req.v_start = req.v_end = -1;
4c14658e AJ	25	}
4c14658e AJ	26
c11191e0 AJ	27	/**
	28	* Attempt to parse the first line of a new request message.
	29	*
a4c74dd8	30	* Governed by RFC 7230 section 3.5
c11191e0	31	* "
a4c74dd8 AJ	32	* In the interest of robustness, a server that is expecting to receive
	33	* and parse a request-line SHOULD ignore at least one empty line (CRLF)
	34	* received prior to the request-line.
c11191e0 AJ	35	* "
	36	*
	37	* Parsing state is stored between calls to avoid repeating buffer scans.
cbcd99df	38	* If garbage is found the parsing offset is incremented.
c11191e0	39	*/
cbcd99df	40	void
678451c0	41	Http::One::RequestParser::skipGarbageLines()
c11191e0	42	{
c11191e0	43	if (Config.onoff.relaxed_header_parser) {
b749de75	44	if (Config.onoff.relaxed_header_parser < 0 && (buf_[0] == '\r' \|\| buf_[0] == '\n'))
c11191e0 AJ	45	debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
	46	"CRLF bytes received ahead of request-line. " <<
	47	"Ignored due to relaxed_header_parser.");
	48	// Be tolerant of prefix empty lines
cbcd99df	49	// ie any series of either \n or \r\n with no other characters and no repeated \r
b749de75 AJ	50	while (!buf_.isEmpty() && (buf_[0] == '\n' \|\| (buf_[0] == '\r' && buf_[1] == '\n'))) {
b749de75 AJ	51	buf_.consume(1);
7a4fa6a0	52	}
c11191e0	53	}
c11191e0 AJ	54
	55	/* XXX: this is a Squid-specific tolerance
	56	* it appears never to have been relevant outside out unit-tests
	57	* because the ConnStateData parser loop starts with consumeWhitespace()
	58	* which absorbs any SP HTAB VTAB CR LF characters.
	59	* But unit-tests called the HttpParser method directly without that pruning.
	60	*/
	61	#if USE_HTTP_VIOLATIONS
	62	if (Config.onoff.relaxed_header_parser) {
b749de75	63	if (Config.onoff.relaxed_header_parser < 0 && buf_[0] == ' ')
c11191e0 AJ	64	debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
	65	"Whitespace bytes received ahead of method. " <<
	66	"Ignored due to relaxed_header_parser.");
	67	// Be tolerant of prefix spaces (other bytes are valid method values)
b749de75 AJ	68	while (!buf_.isEmpty() && buf_[0] == ' ') {
b749de75 AJ	69	buf_.consume(1);
7a4fa6a0	70	}
c11191e0 AJ	71	}
c11191e0 AJ	72	#endif
c11191e0 AJ	73	}
	74
	75	/**
	76	* Attempt to parse the first line of a new request message.
	77	*
	78	* Governed by:
	79	* RFC 1945 section 5.1
a4c74dd8	80	* RFC 7230 section 3.1 and 3.5
c11191e0 AJ	81	*
	82	* Parsing state is stored between calls. However the current implementation
	83	* begins parsing from scratch on every call.
	84	* The return value tells you whether the parsing state fields are valid or not.
	85	*
	86	* \retval -1 an error occurred. request_parse_status indicates HTTP status result.
	87	* \retval 1 successful parse. member fields contain the request-line items
	88	* \retval 0 more data is needed to complete the parse
	89	*/
4c14658e	90	int
678451c0	91	Http::One::RequestParser::parseRequestFirstLine()
4c14658e AJ	92	{
	93	int second_word = -1; // track the suspected URI start
	94	int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
	95	int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
	96
b749de75 AJ	97	debugs(74, 5, "parsing possible request: buf.length=" << buf_.length());
b749de75 AJ	98	debugs(74, DBG_DATA, buf_);
4c14658e AJ	99
	100	// Single-pass parse: (provided we have the whole line anyways)
	101
7a4fa6a0	102	req.start = 0;
74f478f8	103	req.end = -1;
b749de75	104	for (SBuf::size_type i = 0; i < buf_.length(); ++i) {
4c14658e	105	// track first and last whitespace (SP only)
b749de75	106	if (buf_[i] == ' ') {
4c14658e	107	last_whitespace = i;
74f478f8	108	if (first_whitespace < req.start)
4c14658e AJ	109	first_whitespace = i;
	110	}
	111
	112	// track next non-SP/non-HT byte after first_whitespace
b749de75	113	if (second_word < first_whitespace && buf_[i] != ' ' && buf_[i] != '\t') {
4c14658e AJ	114	second_word = i;
	115	}
	116
	117	// locate line terminator
b749de75	118	if (buf_[i] == '\n') {
74f478f8	119	req.end = i;
4c14658e AJ	120	line_end = i - 1;
	121	break;
	122	}
b749de75	123	if (i < buf_.length() - 1 && buf_[i] == '\r') {
4c14658e	124	if (Config.onoff.relaxed_header_parser) {
b749de75	125	if (Config.onoff.relaxed_header_parser < 0 && buf_[i + 1] == '\r')
4c14658e AJ	126	debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
	127	"Series of carriage-return bytes received prior to line terminator. " <<
	128	"Ignored due to relaxed_header_parser.");
	129
	130	// Be tolerant of invalid multiple \r prior to terminal \n
b749de75	131	if (buf_[i + 1] == '\n' \|\| buf_[i + 1] == '\r')
4c14658e	132	line_end = i - 1;
b749de75	133	while (i < buf_.length() - 1 && buf_[i + 1] == '\r')
95dc7ff4	134	++i;
4c14658e	135
b749de75	136	if (buf_[i + 1] == '\n') {
74f478f8	137	req.end = i + 1;
4c14658e AJ	138	break;
	139	}
	140	} else {
b749de75	141	if (buf_[i + 1] == '\n') {
74f478f8	142	req.end = i + 1;
4c14658e AJ	143	line_end = i - 1;
	144	break;
	145	}
	146	}
	147
a4c74dd8 AJ	148	// RFC 7230 section 3.1.1 does not prohibit embeded CR like RFC 2616 used to.
	149	// However it does explicitly state an exact syntax which omits un-encoded CR
	150	// and defines 400 (Bad Request) as the required action when
	151	// handed an invalid request-line.
955394ce	152	request_parse_status = Http::scBadRequest;
4c14658e AJ	153	return -1;
	154	}
	155	}
016a316b	156
74f478f8	157	if (req.end == -1) {
016a316b	158	// DoS protection against long first-line
b749de75	159	if ((size_t)buf_.length() >= Config.maxRequestHeaderSize) {
016a316b	160	debugs(33, 5, "Too large request-line");
a4c74dd8 AJ	161	// RFC 7230 section 3.1.1 mandatory 414 response if URL longer than acceptible.
a4c74dd8 AJ	162	request_parse_status = Http::scUriTooLong;
016a316b AJ	163	return -1;
	164	}
	165
74f478f8 AJ	166	debugs(74, 5, "Parser: retval 0: from " << req.start <<
74f478f8 AJ	167	"->" << req.end << ": needs more data to complete first line.");
4c14658e AJ	168	return 0;
	169	}
	170
	171	// NP: we have now seen EOL, more-data (0) cannot occur.
	172	// From here on any failure is -1, success is 1
	173
4c14658e AJ	174	// Input Validation:
4c14658e AJ	175
016a316b AJ	176	// DoS protection against long first-line
	177	if ((size_t)(req.end-req.start) >= Config.maxRequestHeaderSize) {
	178	debugs(33, 5, "Too large request-line");
a4c74dd8	179	request_parse_status = Http::scUriTooLong;
016a316b AJ	180	return -1;
	181	}
	182
4c14658e AJ	183	// Process what we now know about the line structure into field offsets
	184	// generating HTTP status for any aborts as we go.
	185
	186	// First non-whitespace = beginning of method
74f478f8	187	if (req.start > line_end) {
955394ce	188	request_parse_status = Http::scBadRequest;
4c14658e AJ	189	return -1;
4c14658e AJ	190	}
74f478f8	191	req.m_start = req.start;
4c14658e AJ	192
4c14658e AJ	193	// First whitespace = end of method
74f478f8	194	if (first_whitespace > line_end \|\| first_whitespace < req.start) {
955394ce	195	request_parse_status = Http::scBadRequest; // no method
4c14658e AJ	196	return -1;
4c14658e AJ	197	}
74f478f8 AJ	198	req.m_end = first_whitespace - 1;
74f478f8 AJ	199	if (req.m_end < req.m_start) {
955394ce	200	request_parse_status = Http::scBadRequest; // missing URI?
4c14658e AJ	201	return -1;
	202	}
	203
274bd5ad	204	/* Set method_ */
b749de75	205	const SBuf tmp = buf_.substr(req.m_start, req.m_end - req.m_start + 1);
7a4fa6a0	206	method_ = HttpRequestMethod(tmp);
274bd5ad	207
4c14658e	208	// First non-whitespace after first SP = beginning of URL+Version
74f478f8	209	if (second_word > line_end \|\| second_word < req.start) {
955394ce	210	request_parse_status = Http::scBadRequest; // missing URI
4c14658e AJ	211	return -1;
4c14658e AJ	212	}
74f478f8	213	req.u_start = second_word;
4c14658e AJ	214
	215	// RFC 1945: SP and version following URI are optional, marking version 0.9
	216	// we identify this by the last whitespace being earlier than URI start
74f478f8	217	if (last_whitespace < second_word && last_whitespace >= req.start) {
5aedd08d	218	msgProtocol_ = Http::ProtocolVersion(0,9);
74f478f8	219	req.u_end = line_end;
b749de75	220	uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
955394ce	221	request_parse_status = Http::scOkay; // HTTP/0.9
4c14658e AJ	222	return 1;
	223	} else {
	224	// otherwise last whitespace is somewhere after end of URI.
74f478f8	225	req.u_end = last_whitespace;
4c14658e	226	// crop any trailing whitespace in the area we think of as URI
b749de75	227	for (; req.u_end >= req.u_start && xisspace(buf_[req.u_end]); --req.u_end);
4c14658e	228	}
74f478f8	229	if (req.u_end < req.u_start) {
955394ce	230	request_parse_status = Http::scBadRequest; // missing URI
4c14658e AJ	231	return -1;
4c14658e AJ	232	}
b749de75	233	uri_ = buf_.substr(req.u_start, req.u_end - req.u_start + 1);
4c14658e AJ	234
	235	// Last whitespace SP = before start of protocol/version
	236	if (last_whitespace >= line_end) {
955394ce	237	request_parse_status = Http::scBadRequest; // missing version
4c14658e AJ	238	return -1;
4c14658e AJ	239	}
74f478f8 AJ	240	req.v_start = last_whitespace + 1;
74f478f8 AJ	241	req.v_end = line_end;
4c14658e	242
a4c74dd8	243	/* RFC 7230 section 2.6 : handle unsupported HTTP major versions cleanly. */
b749de75	244	if ((req.v_end - req.v_start +1) < (int)Http1magic.length() \|\| !buf_.substr(req.v_start, SBuf::npos).startsWith(Http1magic)) {
9651320a	245	// non-HTTP/1 protocols not supported / implemented.
955394ce	246	request_parse_status = Http::scHttpVersionNotSupported;
4c14658e	247	return -1;
4c14658e	248	}
9651320a	249	// NP: magic octets include the protocol name and major version DIGIT.
5aedd08d	250	msgProtocol_.protocol = AnyP::PROTO_HTTP;
9651320a	251	msgProtocol_.major = 1;
4c14658e	252
9651320a	253	int i = req.v_start + Http1magic.length() -1;
4c14658e	254
4c14658e AJ	255	// catch missing minor part
4c14658e AJ	256	if (++i > line_end) {
955394ce	257	request_parse_status = Http::scHttpVersionNotSupported;
4c14658e AJ	258	return -1;
	259	}
	260	/* next should be one or more digits */
b749de75	261	if (!isdigit(buf_[i])) {
955394ce	262	request_parse_status = Http::scHttpVersionNotSupported;
4c14658e AJ	263	return -1;
	264	}
	265	int min = 0;
b749de75	266	for (; i <= line_end && (isdigit(buf_[i])) && min < 65536; ++i) {
4c14658e	267	min = min * 10;
b749de75	268	min = min + (buf_[i]) - '0';
4c14658e AJ	269	}
	270	// catch too-big values or trailing garbage
	271	if (min >= 65536 \|\| i < line_end) {
955394ce	272	request_parse_status = Http::scHttpVersionNotSupported;
4c14658e AJ	273	return -1;
4c14658e AJ	274	}
5aedd08d	275	msgProtocol_.minor = min;
4c14658e AJ	276
	277	/*
	278	* Rightio - we have all the schtuff. Return true; we've got enough.
	279	*/
955394ce	280	request_parse_status = Http::scOkay;
4c14658e AJ	281	return 1;
4c14658e AJ	282	}
7a4fa6a0	283
87abd755	284	bool
36a9c964	285	Http::One::RequestParser::parse(const SBuf &aBuf)
4c14658e	286	{
b749de75	287	buf_ = aBuf;
36a9c964 AJ	288	debugs(74, DBG_DATA, "Parse buf={length=" << aBuf.length() << ", data='" << aBuf << "'}");
36a9c964 AJ	289
cbcd99df	290	// stage 1: locate the request-line
36a9c964	291	if (parsingStage_ == HTTP_PARSE_NONE) {
cbcd99df	292	skipGarbageLines();
cbcd99df AJ	293
cbcd99df AJ	294	// if we hit something before EOS treat it as a message
b749de75	295	if (!buf_.isEmpty())
cbcd99df AJ	296	parsingStage_ = HTTP_PARSE_FIRST;
cbcd99df AJ	297	else
f9daf571	298	return false;
cbcd99df	299	}
c11191e0	300
cbcd99df AJ	301	// stage 2: parse the request-line
cbcd99df AJ	302	if (parsingStage_ == HTTP_PARSE_FIRST) {
f4880526	303	PROF_start(HttpParserParseReqLine);
678451c0	304	const int retcode = parseRequestFirstLine();
e4cff825 AJ	305
	306	// first-line (or a look-alike) found successfully.
	307	if (retcode > 0) {
b749de75	308	buf_.consume(firstLineSize()); // first line bytes including CRLF terminator are now done.
e4cff825 AJ	309	parsingStage_ = HTTP_PARSE_MIME;
	310	}
	311
7a4fa6a0	312	debugs(74, 5, "request-line: retval " << retcode << ": from " << req.start << "->" << req.end <<
e4cff825	313	" line={" << aBuf.length() << ", data='" << aBuf << "'}");
9ff1b8ca	314	debugs(74, 5, "request-line: method " << req.m_start << "->" << req.m_end << " (" << method_ << ")");
5f3cc9a2	315	debugs(74, 5, "request-line: url " << req.u_start << "->" << req.u_end << " (" << uri_ << ")");
f4880526	316	debugs(74, 5, "request-line: proto " << req.v_start << "->" << req.v_end << " (" << msgProtocol_ << ")");
b749de75	317	debugs(74, 5, "Parser: bytes processed=" << (aBuf.length()-buf_.length()));
f4880526	318	PROF_stop(HttpParserParseReqLine);
cbcd99df AJ	319
cbcd99df AJ	320	// syntax errors already
f4880526	321	if (retcode < 0) {
cbcd99df	322	parsingStage_ = HTTP_PARSE_DONE;
f4880526 AJ	323	return false;
	324	}
	325	}
	326
	327	// stage 3: locate the mime header block
cbcd99df	328	if (parsingStage_ == HTTP_PARSE_MIME) {
f4880526 AJ	329	// HTTP/1.x request-line is valid and parsing completed.
	330	if (msgProtocol_.major == 1) {
	331	/* NOTE: HTTP/0.9 requests do not have a mime header block.
	332	* So the rest of the code will need to deal with '0'-byte headers
	333	* (ie, none, so don't try parsing em)
	334	*/
eb1bd364	335	int64_t mimeHeaderBytes = 0;
2169fd4d	336	// XXX: c_str() reallocates. performance regression.
b749de75 AJ	337	if ((mimeHeaderBytes = headersEnd(buf_.c_str(), buf_.length())) == 0) {
b749de75 AJ	338	if (buf_.length()+firstLineSize() >= Config.maxRequestHeaderSize) {
016a316b	339	debugs(33, 5, "Too large request");
a4c74dd8	340	request_parse_status = Http::scRequestHeaderFieldsTooLarge;
cbcd99df	341	parsingStage_ = HTTP_PARSE_DONE;
7a4fa6a0	342	} else
016a316b	343	debugs(33, 5, "Incomplete request, waiting for end of headers");
7a4fa6a0	344	return false;
f4880526	345	}
38012e61 AJ	346	mimeHeaderBlock_ = buf_.consume(mimeHeaderBytes);
38012e61 AJ	347	debugs(74, 5, "mime header (0-" << mimeHeaderBytes << ") {" << mimeHeaderBlock_ << "}");
f4880526	348
7a4fa6a0	349	} else
f4880526	350	debugs(33, 3, "Missing HTTP/1.x identifier");
7a4fa6a0	351
cbcd99df AJ	352	// NP: we do not do any further stages here yet so go straight to DONE
cbcd99df AJ	353	parsingStage_ = HTTP_PARSE_DONE;
016a316b AJ	354
	355	// Squid could handle these headers, but admin does not want to
	356	if (messageHeaderSize() >= Config.maxRequestHeaderSize) {
	357	debugs(33, 5, "Too large request");
a4c74dd8	358	request_parse_status = Http::scRequestHeaderFieldsTooLarge;
016a316b AJ	359	return false;
016a316b AJ	360	}
f4880526	361	}
87abd755	362
36a9c964	363	return !needsMoreData();
4c14658e	364	}
f53969cc	365