From: Amos Jeffries Date: Fri, 1 Nov 2013 23:47:58 +0000 (-0700) Subject: Make Http parser incremental X-Git-Tag: merge-candidate-3-v1~506^2~113 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b6a7fc859fe563a1c6b66914bc83b5081d94d7bb;p=thirdparty%2Fsquid.git Make Http parser incremental This makes the parser track the start of the un-parsed buffer region and resume parsing at that point instead of reset to the beginning. - add state to mark completion of the first line (request-line) - add unit test for incremental parse states including BWS - make the state tracking member of HttpParser private --- diff --git a/src/HttpParser.cc b/src/HttpParser.cc index a9c6949576..240e4baa01 100644 --- a/src/HttpParser.cc +++ b/src/HttpParser.cc @@ -7,10 +7,11 @@ void HttpParser::clear() { - state = HTTP_PARSE_NONE; + completedState_ = HTTP_PARSE_NONE; request_parse_status = Http::scNone; buf = NULL; bufsiz = 0; + parseOffset_ = 0; req.start = req.end = -1; hdr_start = hdr_end = -1; req.m_start = req.m_end = -1; @@ -23,10 +24,10 @@ void HttpParser::reset(const char *aBuf, int len) { clear(); // empty the state. - state = HTTP_PARSE_NEW; + completedState_ = HTTP_PARSE_NEW; buf = aBuf; bufsiz = len; - debugs(74, 5, HERE << "Request buffer is " << buf); + debugs(74, DBG_DATA, "Request parse " << Raw("buf", buf, bufsiz)); } int @@ -36,11 +37,14 @@ HttpParser::parseRequestFirstLine() int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence - debugs(74, 5, HERE << "parsing possible request: " << buf); + debugs(74, 5, "parsing possible request: bufsiz=" << bufsiz << ", offset=" << parseOffset_); + debugs(74, DBG_DATA, Raw("(buf+offset)", buf+parseOffset_, bufsiz-parseOffset_)); // Single-pass parse: (provided we have the whole line anyways) - req.start = 0; + assert(completedState_ == HTTP_PARSE_NEW); + + req.start = parseOffset_; // avoid re-parsing any portion we managed to complete if (Config.onoff.relaxed_header_parser) { if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ') debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " << @@ -48,6 +52,7 @@ HttpParser::parseRequestFirstLine() "Ignored due to relaxed_header_parser."); // Be tolerant of prefix spaces (other bytes are valid method values) for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start); + parseOffset_ = req.start; } req.end = -1; for (int i = 0; i < bufsiz; ++i) { @@ -146,6 +151,7 @@ HttpParser::parseRequestFirstLine() req.v_min = 9; req.u_end = line_end; request_parse_status = Http::scOkay; // HTTP/0.9 + parseOffset_ = line_end; return 1; } else { // otherwise last whitespace is somewhere after end of URI. @@ -176,6 +182,8 @@ HttpParser::parseRequestFirstLine() req.v_min = 9; req.u_end = line_end; request_parse_status = Http::scOkay; // treat as HTTP/0.9 + completedState_ = HTTP_PARSE_FIRST; + parseOffset_ = req.end; return 1; #else // protocol not supported / implemented. @@ -234,6 +242,8 @@ HttpParser::parseRequestFirstLine() * Rightio - we have all the schtuff. Return true; we've got enough. */ request_parse_status = Http::scOkay; + parseOffset_ = req.end+1; // req.end is the \n byte. Next parse step needs to start *after* that byte. + completedState_ = HTTP_PARSE_FIRST; return 1; } diff --git a/src/HttpParser.h b/src/HttpParser.h index af3d899042..7e5250137e 100644 --- a/src/HttpParser.h +++ b/src/HttpParser.h @@ -7,6 +7,7 @@ // Parser states #define HTTP_PARSE_NONE 0 // nothing. completely unset state. #define HTTP_PARSE_NEW 1 // initialized, but nothing usefully parsed yet. +#define HTTP_PARSE_FIRST 2 // have parsed request first line /** HTTP protocol parser. * @@ -38,6 +39,9 @@ public: /// Reset the parser for use on a new buffer. void reset(const char *aBuf, int len); + /// whether the parser is already processing the buffer + bool isDone() const {return completedState_==HTTP_PARSE_FIRST;} + /** * Attempt to parse the first line of a new request message. * @@ -56,7 +60,6 @@ public: int parseRequestFirstLine(); public: - uint8_t state; const char *buf; int bufsiz; @@ -78,6 +81,13 @@ public: * Http::scNone indicates incomplete parse, Http::scOkay indicates no error. */ Http::StatusCode request_parse_status; + +private: + /// byte offset for non-parsed region of the buffer + size_t parseOffset_; + + /// what stage the parser is currently up to + uint8_t completedState_; }; // Legacy functions diff --git a/src/client_side.cc b/src/client_side.cc index e25e7b093c..f34e7b0494 100644 --- a/src/client_side.cc +++ b/src/client_side.cc @@ -2975,7 +2975,7 @@ ConnStateData::clientParseRequests() // Loop while we have read bytes that are not needed for producing the body // On errors, bodyPipe may become nil, but readMore will be cleared while (in.notYetUsed > 0 && !bodyPipe && flags.readMore) { - connStripBufferWhitespace(this); + connStripBufferWhitespace(this); // XXX: should not be needed anymore. /* Don't try to parse if the buffer is empty */ if (in.notYetUsed == 0) @@ -2991,7 +2991,14 @@ ConnStateData::clientParseRequests() /* Begin the parsing */ PROF_start(parseHttpRequest); - parser_ = new HttpParser(in.buf, in.notYetUsed); + + // parser is incremental. Generate new parser state if we, + // a) dont have one already + // b) have completed the previous request parsing already + if (!parser_ || parser_->isDone()) + parser_ = new HttpParser(in.buf, in.notYetUsed); + else // update the buffer space being parsed + parser_->bufsiz = in.notYetUsed; /* Process request */ Http::ProtocolVersion http_ver; diff --git a/src/tests/testHttpParser.cc b/src/tests/testHttpParser.cc index 5dde9b28d2..c2f4d940ba 100644 --- a/src/tests/testHttpParser.cc +++ b/src/tests/testHttpParser.cc @@ -3,6 +3,9 @@ #include +#define private public +#define protected public + #include "testHttpParser.h" #include "HttpParser.h" #include "Mem.h" @@ -1019,3 +1022,51 @@ testHttpParser::testParseRequestLineInvalid() CPPUNIT_ASSERT_EQUAL(0, output.req.v_min); input.reset(); } + +void +testHttpParser::testDripFeed() +{ + // Simulate a client drip-feeding Squid a few bytes at a time. + // extend the size of the buffer from 0 bytes to full request length + // calling the parser repeatedly as visible data grows. + + MemBuf mb; + mb.init(1024, 1024); + mb.append(" ", 12); + int garbageEnd = mb.contentSize(); + mb.append("GET http://example.com/ HTTP/1.1\r\n", 34); + int reqLineEnd = mb.contentSize(); + mb.append("\n", 1); + + HttpParser hp(mb.content(), 0); + + // only relaxed parser accepts the garbage whitespace + Config.onoff.relaxed_header_parser = 1; + + for (; hp.bufsiz < mb.contentSize(); ++hp.bufsiz) { + int parseResult = hp.parseRequestFirstLine(); + +#if WHEN_TEST_DEBUG_IS_NEEDED + printf("%d/%d :: %d, %d, %d '%c'\n", hp.bufsiz, mb.contentSize(), + garbageEnd, reqLineEnd, parseResult, + mb.content()[hp.bufsiz]); +#endif + + // before end of garbage found its a moving offset. + if (hp.bufsiz < garbageEnd) { + CPPUNIT_ASSERT_EQUAL(hp.bufsiz, (int)hp.parseOffset_); + continue; + } + + // before request line found, parse announces incomplete + if (hp.bufsiz < reqLineEnd) { + CPPUNIT_ASSERT_EQUAL(garbageEnd, (int)hp.parseOffset_); + CPPUNIT_ASSERT_EQUAL(0, parseResult); + continue; + } + + // once request line is found (AND the following \n) current parser announces success + CPPUNIT_ASSERT_EQUAL(reqLineEnd, (int)hp.parseOffset_); + CPPUNIT_ASSERT_EQUAL(1, parseResult); + } +} diff --git a/src/tests/testHttpParser.h b/src/tests/testHttpParser.h index d557e915b7..065d6bcbb8 100644 --- a/src/tests/testHttpParser.h +++ b/src/tests/testHttpParser.h @@ -11,6 +11,7 @@ class testHttpParser : public CPPUNIT_NS::TestFixture CPPUNIT_TEST( testParseRequestLineProtocols ); CPPUNIT_TEST( testParseRequestLineStrange ); CPPUNIT_TEST( testParseRequestLineInvalid ); + CPPUNIT_TEST( testDripFeed ); CPPUNIT_TEST_SUITE_END(); protected: @@ -22,6 +23,8 @@ protected: void testParseRequestLineProtocols(); // protocol tokens handled correctly void testParseRequestLineStrange(); // strange but valid lines accepted void testParseRequestLineInvalid(); // rejection of invalid lines happens + + void testDripFeed(); // test incremental parse works }; #endif