Fix parser buffer accounting for dropped garbage bytes

author Amos Jeffries <squid3@treenet.co.nz>

Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)

committer Amos Jeffries <squid3@treenet.co.nz>

Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)
author Amos Jeffries <squid3@treenet.co.nz>
Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)
committer Amos Jeffries <squid3@treenet.co.nz>
Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)
diff --git a/src/client_side.cc b/src/client_side.cc

index 9531b199f57f66bfcb44dabeddecc988917a4bf9..56d8fa437995840f7994286b6a65e4b7d25c0efe 100644 (file)
--- a/src/client_side.cc
+++ b/src/client_side.cc
@@ -2234,12 +2234,6 @@ parseHttpRequest(ConnStateData *csd, Http1::RequestParser &hp)
      }
  
      /* We know the whole request is in hp.buf now */
-    size_t req_sz = hp.messageHeaderSize();
-    assert(req_sz <= (size_t) hp.bufsiz);
-
-    /* Will the following be true with HTTP/0.9 requests? probably not .. */
-    /* So the rest of the code will need to deal with '0'-byte headers (ie, none, so don't try parsing em) */
-    assert(req_sz > 0);
  
      /* deny CONNECT via accelerated ports */
      if (hp.method() == Http::METHOD_CONNECT && csd->port && csd->port->flags.accelSurrogate) {
@@ -2560,7 +2554,6 @@ clientProcessRequest(ConnStateData *conn, Http1::RequestParser &hp, ClientSocket
  {
      ClientHttpRequest *http = context->http;
      HttpRequest::Pointer request;
-    bool notedUseOfBuffer = false;
      bool chunked = false;
      bool mustReplyToOptions = false;
      bool unsupportedTe = false;
@@ -2786,10 +2779,6 @@ clientProcessRequest(ConnStateData *conn, Http1::RequestParser &hp, ClientSocket
          request->body_pipe = conn->expectRequestBody(
                                   chunked ? -1 : request->content_length);
  
-        // consume header early so that body pipe gets just the body
-        connNoteUseOfBuffer(conn, http->req_sz);
-        notedUseOfBuffer = true;
-
          /* Is it too large? */
          if (!chunked && // if chunked, we will check as we accumulate
                  clientIsRequestBodyTooLargeForPolicy(request->content_length)) {
@@ -2822,9 +2811,6 @@ clientProcessRequest(ConnStateData *conn, Http1::RequestParser &hp, ClientSocket
      http->doCallouts();
  
  finish:
-    if (!notedUseOfBuffer)
-        connNoteUseOfBuffer(conn, http->req_sz);
-
      /*
       * DPW 2007-05-18
       * Moved the TCP_RESET feature from clientReplyContext::sendMoreData
@@ -2914,6 +2900,11 @@ ConnStateData::clientParseRequests()
  
          /* Process request */
          ClientSocketContext *context = parseHttpRequest(this, *parser_);
+        if (parser_->messageOffset()) {
+            // nothing but prefix garbage in the buffer. consume it.
+            connNoteUseOfBuffer(this, parser_->messageOffset());
+            parser_->noteBufferShift(parser_->messageOffset());
+        }
          PROF_stop(parseHttpRequest);
  
          /* status -1 or 1 */
@@ -2923,6 +2914,9 @@ ConnStateData::clientParseRequests()
                                               CommTimeoutCbPtrFun(clientLifetimeTimeout, context->http));
              commSetConnTimeout(clientConnection, Config.Timeout.lifetime, timeoutCall);
  
+            // Request has now been shifted out of the buffer.
+            // Consume header early so that next action starts with just the next bytes.
+            connNoteUseOfBuffer(this, parser_->messageHeaderSize() + parser_->messageOffset());
              clientProcessRequest(this, *parser_, context);
  
              parsed_req = true; // XXX: do we really need to parse everything right NOW ?
diff --git a/src/http/Http1Parser.cc b/src/http/Http1Parser.cc

index 0ee5bff501ba89967fef9bdf80c4b9e66bd668e4..ff5c84276d94cb7ad6c5712176ecbb1902926afd 100644 (file)
--- a/src/http/Http1Parser.cc
+++ b/src/http/Http1Parser.cc
@@ -40,6 +40,44 @@ Http1::Parser::reset(const char *aBuf, int len)
      debugs(74, DBG_DATA, "Parse " << Raw("buf", buf, bufsiz));
  }
  
+void
+Http1::RequestParser::noteBufferShift(int64_t n)
+{
+    bufsiz -= n;
+
+    // if parsing done, ignore buffer changes.
+    if (completedState_ == HTTP_PARSE_DONE)
+        return;
+
+    // shift the parser resume point to match buffer content
+    parseOffset_ -= n;
+
+#if WHEN_INCREMENTAL_PARSING
+
+    // if have not yet finished request-line
+    if (completedState_ == HTTP_PARSE_NEW) {
+        // check for and adjust the known request-line offsets.
+
+        /* TODO: when the first-line is parsed incrementally we
+         * will need to recalculate the offsets for req.*
+         * For now, they are all re-calculated based on parserOffset_
+         * with each parse attempt.
+         */
+    }
+
+    // if finished request-line but not mime header
+    // adjust the mime header states
+    if (completedState_ == HTTP_PARSE_FIRST) {
+        /* TODO: when the mime-header is parsed incrementally we
+         * will need to store the initial offset of mime-header block
+         * instead of locatign it from req.end or parseOffset_.
+         * Since req.end may no longer be valid, and parseOffset_ may
+         * have moved into the mime-block interior.
+         */
+    }
+#endif
+}
+
  /**
   * Attempt to parse the first line of a new request message.
   *
@@ -343,7 +381,7 @@ Http1::RequestParser::parse()
      // stage 1: locate the request-line
      if (completedState_ == HTTP_PARSE_NEW) {
          if (skipGarbageLines() && (size_t)bufsiz < parseOffset_)
-            return 0;
+            return false;
      }
  
      // stage 2: parse the request-line
diff --git a/src/http/Http1Parser.h b/src/http/Http1Parser.h

index 609f8bd5f07fc00ffc6406f7008b4e9e59f33f9f..2679725138c565cea051339be9ce8731cd3bd2ab 100644 (file)
--- a/src/http/Http1Parser.h
+++ b/src/http/Http1Parser.h
@@ -45,14 +45,23 @@ public:
      /// Reset the parser for use on a new buffer.
      void reset(const char *aBuf, int len);
  
+    /** Adjust parser state to account for a buffer shift of n bytes.
+     *
+     * The leftmost n bytes bytes have been dropped and all other
+     * bytes shifted left n positions.
+     */
+    virtual void noteBufferShift(int64_t n) = 0;
+
      /** Whether the parser is already done processing the buffer.
       * Use to determine between incomplete data and errors results
-     * from the parse methods.
+     * from the parse.
       */
      bool isDone() const {return completedState_==HTTP_PARSE_DONE;}
  
-    /// size in bytes of the first line
-    /// including CRLF terminator
+    /// number of bytes in buffer before the message
+    virtual int64_t messageOffset() const = 0;
+
+    /// size in bytes of the first line including CRLF terminator
      virtual int64_t firstLineSize() const = 0;
  
      /// size in bytes of the message headers including CRLF terminator(s)
@@ -111,6 +120,8 @@ public:
      RequestParser() : Parser() {}
      RequestParser(const char *aBuf, int len) : Parser(aBuf, len) {}
      virtual void clear();
+    virtual void noteBufferShift(int64_t n);
+    virtual int64_t messageOffset() const {return req.start;};
      virtual int64_t firstLineSize() const {return req.end - req.start + 1;}
      virtual bool parse();
author	Amos Jeffries <squid3@treenet.co.nz>
	Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Sat, 4 Jan 2014 23:10:48 +0000 (15:10 -0800)
src/client_side.cc		patch \| blob \| blame \| history
src/http/Http1Parser.cc		patch \| blob \| blame \| history
src/http/Http1Parser.h		patch \| blob \| blame \| history