void
HttpParser::clear()
{
- state = HTTP_PARSE_NONE;
+ completedState_ = HTTP_PARSE_NONE;
request_parse_status = Http::scNone;
buf = NULL;
bufsiz = 0;
+ parseOffset_ = 0;
req.start = req.end = -1;
hdr_start = hdr_end = -1;
req.m_start = req.m_end = -1;
HttpParser::reset(const char *aBuf, int len)
{
clear(); // empty the state.
- state = HTTP_PARSE_NEW;
+ completedState_ = HTTP_PARSE_NEW;
buf = aBuf;
bufsiz = len;
- debugs(74, 5, HERE << "Request buffer is " << buf);
+ debugs(74, DBG_DATA, "Request parse " << Raw("buf", buf, bufsiz));
}
int
int first_whitespace = -1, last_whitespace = -1; // track the first and last SP byte
int line_end = -1; // tracks the last byte BEFORE terminal \r\n or \n sequence
- debugs(74, 5, HERE << "parsing possible request: " << buf);
+ debugs(74, 5, "parsing possible request: bufsiz=" << bufsiz << ", offset=" << parseOffset_);
+ debugs(74, DBG_DATA, Raw("(buf+offset)", buf+parseOffset_, bufsiz-parseOffset_));
// Single-pass parse: (provided we have the whole line anyways)
- req.start = 0;
+ assert(completedState_ == HTTP_PARSE_NEW);
+
+ req.start = parseOffset_; // avoid re-parsing any portion we managed to complete
if (Config.onoff.relaxed_header_parser) {
if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
"Ignored due to relaxed_header_parser.");
// Be tolerant of prefix spaces (other bytes are valid method values)
for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
+ parseOffset_ = req.start;
}
req.end = -1;
for (int i = 0; i < bufsiz; ++i) {
req.v_min = 9;
req.u_end = line_end;
request_parse_status = Http::scOkay; // HTTP/0.9
+ parseOffset_ = line_end;
return 1;
} else {
// otherwise last whitespace is somewhere after end of URI.
req.v_min = 9;
req.u_end = line_end;
request_parse_status = Http::scOkay; // treat as HTTP/0.9
+ completedState_ = HTTP_PARSE_FIRST;
+ parseOffset_ = req.end;
return 1;
#else
// protocol not supported / implemented.
* Rightio - we have all the schtuff. Return true; we've got enough.
*/
request_parse_status = Http::scOkay;
+ parseOffset_ = req.end+1; // req.end is the \n byte. Next parse step needs to start *after* that byte.
+ completedState_ = HTTP_PARSE_FIRST;
return 1;
}
// Parser states
#define HTTP_PARSE_NONE 0 // nothing. completely unset state.
#define HTTP_PARSE_NEW 1 // initialized, but nothing usefully parsed yet.
+#define HTTP_PARSE_FIRST 2 // have parsed request first line
/** HTTP protocol parser.
*
/// Reset the parser for use on a new buffer.
void reset(const char *aBuf, int len);
+ /// whether the parser is already processing the buffer
+ bool isDone() const {return completedState_==HTTP_PARSE_FIRST;}
+
/**
* Attempt to parse the first line of a new request message.
*
int parseRequestFirstLine();
public:
- uint8_t state;
const char *buf;
int bufsiz;
* Http::scNone indicates incomplete parse, Http::scOkay indicates no error.
*/
Http::StatusCode request_parse_status;
+
+private:
+ /// byte offset for non-parsed region of the buffer
+ size_t parseOffset_;
+
+ /// what stage the parser is currently up to
+ uint8_t completedState_;
};
// Legacy functions
// Loop while we have read bytes that are not needed for producing the body
// On errors, bodyPipe may become nil, but readMore will be cleared
while (in.notYetUsed > 0 && !bodyPipe && flags.readMore) {
- connStripBufferWhitespace(this);
+ connStripBufferWhitespace(this); // XXX: should not be needed anymore.
/* Don't try to parse if the buffer is empty */
if (in.notYetUsed == 0)
/* Begin the parsing */
PROF_start(parseHttpRequest);
- parser_ = new HttpParser(in.buf, in.notYetUsed);
+
+ // parser is incremental. Generate new parser state if we,
+ // a) dont have one already
+ // b) have completed the previous request parsing already
+ if (!parser_ || parser_->isDone())
+ parser_ = new HttpParser(in.buf, in.notYetUsed);
+ else // update the buffer space being parsed
+ parser_->bufsiz = in.notYetUsed;
/* Process request */
Http::ProtocolVersion http_ver;
#include <cppunit/TestAssert.h>
+#define private public
+#define protected public
+
#include "testHttpParser.h"
#include "HttpParser.h"
#include "Mem.h"
CPPUNIT_ASSERT_EQUAL(0, output.req.v_min);
input.reset();
}
+
+void
+testHttpParser::testDripFeed()
+{
+ // Simulate a client drip-feeding Squid a few bytes at a time.
+ // extend the size of the buffer from 0 bytes to full request length
+ // calling the parser repeatedly as visible data grows.
+
+ MemBuf mb;
+ mb.init(1024, 1024);
+ mb.append(" ", 12);
+ int garbageEnd = mb.contentSize();
+ mb.append("GET http://example.com/ HTTP/1.1\r\n", 34);
+ int reqLineEnd = mb.contentSize();
+ mb.append("\n", 1);
+
+ HttpParser hp(mb.content(), 0);
+
+ // only relaxed parser accepts the garbage whitespace
+ Config.onoff.relaxed_header_parser = 1;
+
+ for (; hp.bufsiz < mb.contentSize(); ++hp.bufsiz) {
+ int parseResult = hp.parseRequestFirstLine();
+
+#if WHEN_TEST_DEBUG_IS_NEEDED
+ printf("%d/%d :: %d, %d, %d '%c'\n", hp.bufsiz, mb.contentSize(),
+ garbageEnd, reqLineEnd, parseResult,
+ mb.content()[hp.bufsiz]);
+#endif
+
+ // before end of garbage found its a moving offset.
+ if (hp.bufsiz < garbageEnd) {
+ CPPUNIT_ASSERT_EQUAL(hp.bufsiz, (int)hp.parseOffset_);
+ continue;
+ }
+
+ // before request line found, parse announces incomplete
+ if (hp.bufsiz < reqLineEnd) {
+ CPPUNIT_ASSERT_EQUAL(garbageEnd, (int)hp.parseOffset_);
+ CPPUNIT_ASSERT_EQUAL(0, parseResult);
+ continue;
+ }
+
+ // once request line is found (AND the following \n) current parser announces success
+ CPPUNIT_ASSERT_EQUAL(reqLineEnd, (int)hp.parseOffset_);
+ CPPUNIT_ASSERT_EQUAL(1, parseResult);
+ }
+}
CPPUNIT_TEST( testParseRequestLineProtocols );
CPPUNIT_TEST( testParseRequestLineStrange );
CPPUNIT_TEST( testParseRequestLineInvalid );
+ CPPUNIT_TEST( testDripFeed );
CPPUNIT_TEST_SUITE_END();
protected:
void testParseRequestLineProtocols(); // protocol tokens handled correctly
void testParseRequestLineStrange(); // strange but valid lines accepted
void testParseRequestLineInvalid(); // rejection of invalid lines happens
+
+ void testDripFeed(); // test incremental parse works
};
#endif