From: Amos Jeffries <squid3@treenet.co.nz>
Date: Mon, 30 Dec 2013 21:22:56 +0000 (-0800)
Subject: Make Http1Parser::parseRequestFirstLine() private and document
X-Git-Tag: merge-candidate-3-v1~506^2~77
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c11191e0f9af998fae4b454f403d0a62db7d1970;p=thirdparty%2Fsquid.git

Make Http1Parser::parseRequestFirstLine() private and document

Also shuffle the prefix garbage tolerance processing to a separate method
skipGarbageLines() and document the intended operations (it is currently
non-conformant with RFC 2616).
---

diff --git a/src/http/Http1Parser.cc b/src/http/Http1Parser.cc
index db289f1130..b7041fa0c9 100644
--- a/src/http/Http1Parser.cc
+++ b/src/http/Http1Parser.cc
@@ -34,6 +34,77 @@ Http::Http1Parser::reset(const char *aBuf, int len)
     debugs(74, DBG_DATA, "Request parse " << Raw("buf", buf, bufsiz));
 }
 
+/**
+ * Attempt to parse the first line of a new request message.
+ *
+ * Governed by RFC 2616 section 4.1
+ *  "
+ *    In the interest of robustness, servers SHOULD ignore any empty
+ *    line(s) received where a Request-Line is expected. In other words, if
+ *    the server is reading the protocol stream at the beginning of a
+ *    message and receives a CRLF first, it should ignore the CRLF.
+ *
+ *    ... To restate what is explicitly forbidden by the
+ *    BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an
+ *    extra CRLF.
+ *  "
+ *
+ * Parsing state is stored between calls to avoid repeating buffer scans.
+ * \return true if garbage whitespace was found
+ */
+bool
+Http::Http1Parser::skipGarbageLines()
+{
+    req.start = parseOffset_; // avoid re-parsing any portion we managed to complete
+
+#if WHEN_RFC_COMPLIANT // CRLF or bare-LF is what RFC 2616 tolerant parsers do ...
+    if (Config.onoff.relaxed_header_parser) {
+        if (Config.onoff.relaxed_header_parser < 0 && (buf[req.start] == '\r' || buf[req.start] == '\n'))
+            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
+                   "CRLF bytes received ahead of request-line. " <<
+                   "Ignored due to relaxed_header_parser.");
+        // Be tolerant of prefix empty lines
+        for (; req.start < bufsiz && (buf[req.start] == '\n' || ((buf[req.start] == '\r' && (buf[req.start+1] == '\n')); ++req.start);
+        parseOffset_ = req.start;
+    }
+#endif
+
+    /* XXX: this is a Squid-specific tolerance
+     * it appears never to have been relevant outside out unit-tests
+     * because the ConnStateData parser loop starts with consumeWhitespace()
+     * which absorbs any SP HTAB VTAB CR LF characters.
+     * But unit-tests called the HttpParser method directly without that pruning.
+     */
+#if USE_HTTP_VIOLATIONS
+    if (Config.onoff.relaxed_header_parser) {
+        if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
+            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
+                   "Whitespace bytes received ahead of method. " <<
+                   "Ignored due to relaxed_header_parser.");
+        // Be tolerant of prefix spaces (other bytes are valid method values)
+        for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
+        parseOffset_ = req.start;
+    }
+#endif
+
+    return (parseOffset_ > 0);
+}
+
+/**
+ * Attempt to parse the first line of a new request message.
+ *
+ * Governed by:
+ *  RFC 1945 section 5.1
+ *  RFC 2616 section 5.1
+ *
+ * Parsing state is stored between calls. However the current implementation
+ * begins parsing from scratch on every call.
+ * The return value tells you whether the parsing state fields are valid or not.
+ *
+ * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
+ * \retval  1  successful parse. member fields contain the request-line items
+ * \retval  0  more data is needed to complete the parse
+ */
 int
 Http::Http1Parser::parseRequestFirstLine()
 {
@@ -47,15 +118,6 @@ Http::Http1Parser::parseRequestFirstLine()
     // Single-pass parse: (provided we have the whole line anyways)
 
     req.start = parseOffset_; // avoid re-parsing any portion we managed to complete
-    if (Config.onoff.relaxed_header_parser) {
-        if (Config.onoff.relaxed_header_parser < 0 && buf[req.start] == ' ')
-            debugs(74, DBG_IMPORTANT, "WARNING: Invalid HTTP Request: " <<
-                   "Whitespace bytes received ahead of method. " <<
-                   "Ignored due to relaxed_header_parser.");
-        // Be tolerant of prefix spaces (other bytes are valid method values)
-        for (; req.start < bufsiz && buf[req.start] == ' '; ++req.start);
-        parseOffset_ = req.start;
-    }
     req.end = -1;
     for (int i = 0; i < bufsiz; ++i) {
         // track first and last whitespace (SP only)
@@ -256,6 +318,11 @@ bool
 Http::Http1Parser::parseRequest()
 {
     // stage 1: locate the request-line
+    if (completedState_ == HTTP_PARSE_NEW) {
+        if (skipGarbageLines() && (size_t)bufsiz < parseOffset_)
+            return 0;
+    }
+
     // stage 2: parse the request-line
     if (completedState_ == HTTP_PARSE_NEW) {
         PROF_start(HttpParserParseReqLine);
diff --git a/src/http/Http1Parser.h b/src/http/Http1Parser.h
index c50685e159..b217edfdc7 100644
--- a/src/http/Http1Parser.h
+++ b/src/http/Http1Parser.h
@@ -72,26 +72,8 @@ public:
      * \return true if a valid request was parsed.
      * \note Use isDone() method to determine between incomplete parse and errors.
      */
-    // TODO: parse more than just the request-line
     bool parseRequest();
 
-    /**
-     * Attempt to parse the first line of a new request message.
-     *
-     * Governed by:
-     *  RFC 1945 section 5.1
-     *  RFC 2616 section 5.1
-     *
-     * Parsing state is stored between calls. However the current implementation
-     * begins parsing from scratch on every call.
-     * The return value tells you whether the parsing state fields are valid or not.
-     *
-     * \retval -1  an error occurred. request_parse_status indicates HTTP status result.
-     * \retval  1  successful parse. member fields contain the request-line items
-     * \retval  0  more data is needed to complete the parse
-     */
-    int parseRequestFirstLine();
-
 public:
     const char *buf;
     int bufsiz;
@@ -122,6 +104,9 @@ public:
     Http::StatusCode request_parse_status;
 
 private:
+    bool skipGarbageLines();
+    int parseRequestFirstLine();
+
     /// byte offset for non-parsed region of the buffer
     size_t parseOffset_;