From: Marcin Siodelski Date: Tue, 13 Nov 2018 13:27:10 +0000 (+0100) Subject: [#57,!122] Improve HTTP body parsing performance. X-Git-Tag: 268-reservation-mode-is-not-global_base~28^2~9 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2ffe7e666a8f9b57b954c97221574627e7673017;p=thirdparty%2Fkea.git [#57,!122] Improve HTTP body parsing performance. --- diff --git a/src/lib/http/http_message_parser_base.cc b/src/lib/http/http_message_parser_base.cc index 9adc2d6363..f3b12b0767 100644 --- a/src/lib/http/http_message_parser_base.cc +++ b/src/lib/http/http_message_parser_base.cc @@ -136,13 +136,33 @@ void HttpMessageParserBase::stateWithReadHandler(const std::string& handler_name, boost::function after_read_logic) { - char c = getNextFromBuffer(); + std::string bytes; + getNextFromBuffer(bytes); // Do nothing if we reached the end of buffer. if (getNextEvent() != NEED_MORE_DATA_EVT) { switch(getNextEvent()) { case DATA_READ_OK_EVT: case MORE_DATA_PROVIDED_EVT: - after_read_logic(c); + after_read_logic(bytes[0]); + break; + default: + invalidEventError(handler_name, getNextEvent()); + } + } +} + +void +HttpMessageParserBase::stateWithMultiReadHandler(const std::string& handler_name, + boost::function + after_read_logic) { + std::string bytes; + getNextFromBuffer(bytes, 0); + // Do nothing if we reached the end of buffer. + if (getNextEvent() != NEED_MORE_DATA_EVT) { + switch(getNextEvent()) { + case DATA_READ_OK_EVT: + case MORE_DATA_PROVIDED_EVT: + after_read_logic(bytes); break; default: invalidEventError(handler_name, getNextEvent()); @@ -163,10 +183,10 @@ HttpMessageParserBase::onModelFailure(const std::string& explanation) { } } -char -HttpMessageParserBase::getNextFromBuffer() { +void +HttpMessageParserBase::getNextFromBuffer(std::string& bytes, const size_t limit) { unsigned int ev = getNextEvent(); - char c = '\0'; + bytes = "\0"; // The caller should always provide additional data when the // NEED_MORE_DATA_EVT occurs. If the next event is still // NEED_MORE_DATA_EVT it indicates that the caller hasn't provided @@ -178,8 +198,8 @@ HttpMessageParserBase::getNextFromBuffer() { " a deadlock. This is a Kea HTTP server logic error!"); } else { - // Try to pop next character from the buffer. - const bool data_exist = popNextFromBuffer(c); + // Try to retrieve characters from the buffer. + const bool data_exist = popNextFromBuffer(bytes, limit); if (!data_exist) { // There is no more data so it is really not possible that we're // at MORE_DATA_PROVIDED_EVT. @@ -193,11 +213,10 @@ HttpMessageParserBase::getNextFromBuffer() { } else { // If there is no more data we should set NEED_MORE_DATA_EVT // event to indicate that new data should be provided. - transition(getCurrState(), NEED_MORE_DATA_EVT); + postNextEvent(NEED_MORE_DATA_EVT); } } } - return (c); } void @@ -224,16 +243,23 @@ HttpMessageParserBase::parseEndedHandler() { } bool -HttpMessageParserBase::popNextFromBuffer(char& next) { +HttpMessageParserBase::popNextFromBuffer(std::string& next, const size_t limit) { // If there are any characters in the buffer, pop next. if (buffer_pos_ < buffer_.size()) { - next = buffer_[buffer_pos_++]; + next = buffer_.substr(buffer_pos_, limit == 0 ? std::string::npos : limit); + + if (limit > 0) { + buffer_pos_ += limit; + } + + if ((buffer_pos_ > buffer_.size()) || (limit == 0)) { + buffer_pos_ = buffer_.size(); + } return (true); } return (false); } - bool HttpMessageParserBase::isChar(const char c) const { // was (c >= 0) && (c <= 127) diff --git a/src/lib/http/http_message_parser_base.h b/src/lib/http/http_message_parser_base.h index 5dc9e90ddb..cbff213bd4 100644 --- a/src/lib/http/http_message_parser_base.h +++ b/src/lib/http/http_message_parser_base.h @@ -12,7 +12,6 @@ #include #include #include -#include namespace isc { namespace http { @@ -199,6 +198,26 @@ protected: boost::function after_read_logic); + /// @brief Generic parser handler which reads multiple bytes of data and + /// parses it using specified callback function. + /// + /// This handler is mostly used for parsing body of the HTTP message, + /// where we don't validate the content read. Reading multiple bytes + /// is the most efficient. If there is no more data it simply returns. + /// Otherwise, if the next event is DATA_READ_OK_EVT or + /// MORE_DATA_PROVIDED_EVT, it calls the provided callback function to + /// parse the new byte of data. + /// + /// @param handler_name Name of the handler function which called this + /// method. + /// @param after_read_logic Callback function to parse multiple bytes of + /// data. This callback function implements state specific logic. + /// + /// @throw HttpRequestParserError when invalid event occurred. + void stateWithMultiReadHandler(const std::string& handler_name, + boost::function + after_read_logic); + /// @brief Transition parser to failure state. /// /// This method transitions the parser to @ref HTTP_PARSE_FAILED_ST and @@ -213,12 +232,15 @@ protected: /// failure. virtual void onModelFailure(const std::string& explanation); - /// @brief Retrieves next byte of data from the buffer. + /// @brief Retrieves next bytes of data from the buffer. /// /// During normal operation, when there is no more data in the buffer, /// the parser sets NEED_MORE_DATA_EVT as next event to signal the need for /// calling @ref HttpMessageParserBase::postBuffer. /// + /// @param [out] bytes Reference to the variable where read data should be stored. + /// @param limit Maximum number of bytes to be read. + /// /// @throw HttpMessageParserBaseError If current event is already set to /// NEED_MORE_DATA_EVT or MORE_DATA_PROVIDED_EVT. In the former case, it /// indicates that the caller failed to provide new data using @@ -227,7 +249,7 @@ protected: /// parser was changed from NEED_MORE_DATA_EVT or the data were provided /// but the data buffer is empty. In both cases, it is an internal server /// error. - char getNextFromBuffer(); + void getNextFromBuffer(std::string& bytes, const size_t limit = 1); /// @brief This method is called when invalid event occurred in a particular /// parser state. @@ -255,9 +277,10 @@ protected: /// /// @param [out] next A reference to the variable where read data should be /// stored. + /// @param limit Maximum number of characters to be read. /// - /// @return true if character was successfully read, false otherwise. - bool popNextFromBuffer(char& next); + /// @return true if data was successfully read, false otherwise. + bool popNextFromBuffer(std::string& next, const size_t limit = 1); /// @brief Checks if specified value is a character. /// @@ -278,7 +301,7 @@ protected: HttpMessage& message_; /// @brief Internal buffer from which parser reads data. - std::vector buffer_; + std::string buffer_; /// @brief Position of the next character to read from the buffer. size_t buffer_pos_; diff --git a/src/lib/http/request_parser.cc b/src/lib/http/request_parser.cc index 50119c2ef5..98547040bd 100644 --- a/src/lib/http/request_parser.cc +++ b/src/lib/http/request_parser.cc @@ -142,18 +142,19 @@ HttpRequestParser::defineStates() { void HttpRequestParser::receiveStartHandler() { - char c = getNextFromBuffer(); + std::string bytes; + getNextFromBuffer(bytes); if (getNextEvent() != NEED_MORE_DATA_EVT) { switch(getNextEvent()) { case START_EVT: // The first byte should contain a first character of the // HTTP method name. - if (!isChar(c) || isCtl(c) || isSpecial(c)) { - parseFailure("invalid first character " + std::string(1, c) + + if (!isChar(bytes[0]) || isCtl(bytes[0]) || isSpecial(bytes[0])) { + parseFailure("invalid first character " + std::string(1, bytes[0]) + " in HTTP method name"); } else { - context_->method_.push_back(c); + context_->method_.push_back(bytes[0]); transition(HTTP_METHOD_ST, DATA_READ_OK_EVT); } break; @@ -433,14 +434,19 @@ HttpRequestParser::headerValueHandler() { void HttpRequestParser::bodyHandler() { - stateWithReadHandler("bodyHandler", [this](const char c) { + stateWithMultiReadHandler("bodyHandler", [this](const std::string& body) { // We don't validate the body at this stage. Simply record the // number of characters specified within "Content-Length". - context_->body_.push_back(c); - if (context_->body_.length() < - request_.getHeaderValueAsUint64("Content-Length")) { + context_->body_ += body; + size_t content_length = request_.getHeaderValueAsUint64("Content-Length"); + if (context_->body_.length() < content_length) { transition(HTTP_BODY_ST, DATA_READ_OK_EVT); + } else { + // If there was some extraneous data, ignore it. + if (context_->body_.length() > content_length) { + context_->body_.resize(content_length); + } transition(HTTP_PARSE_OK_ST, HTTP_PARSE_OK_EVT); } }); diff --git a/src/lib/http/response_parser.cc b/src/lib/http/response_parser.cc index b412a5b02b..9bbace0d74 100644 --- a/src/lib/http/response_parser.cc +++ b/src/lib/http/response_parser.cc @@ -155,15 +155,16 @@ HttpResponseParser::defineStates() { void HttpResponseParser::receiveStartHandler() { - char c = getNextFromBuffer(); + std::string bytes; + getNextFromBuffer(bytes); if (getNextEvent() != NEED_MORE_DATA_EVT) { switch(getNextEvent()) { case START_EVT: - if (c == 'H') { + if (bytes[0] == 'H') { transition(HTTP_VERSION_T1_ST, DATA_READ_OK_EVT); } else { - parseFailure("unexpected first character " + std::string(1, c) + + parseFailure("unexpected first character " + std::string(1, bytes[0]) + ": expected \'H\'"); } break; @@ -435,14 +436,19 @@ HttpResponseParser::headerValueHandler() { void HttpResponseParser::bodyHandler() { - stateWithReadHandler("bodyHandler", [this](const char c) { + stateWithMultiReadHandler("bodyHandler", [this](const std::string& body) { // We don't validate the body at this stage. Simply record the // number of characters specified within "Content-Length". - context_->body_.push_back(c); - if (context_->body_.length() < - response_.getHeaderValueAsUint64("Content-Length")) { + context_->body_ += body; + size_t content_length = response_.getHeaderValueAsUint64("Content-Length"); + if (context_->body_.length() < content_length) { transition(HTTP_BODY_ST, DATA_READ_OK_EVT); + } else { + // If there was some extraneous data, ignore it. + if (context_->body_.length() > content_length) { + context_->body_.resize(content_length); + } transition(HTTP_PARSE_OK_ST, HTTP_PARSE_OK_EVT); } });