HTTP: validate Content-Length header values

author Alex Rousskov <rousskov@measurement-factory.com>

Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)

committer Amos Jeffries <squid3@treenet.co.nz>

Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)
author Alex Rousskov <rousskov@measurement-factory.com>
Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)
committer Amos Jeffries <squid3@treenet.co.nz>
Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)
diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc

index 7275dae788f7ebf05e79a62ec26cfe2b22177e96..0197509c23e101c998714fef401012ea190471c5 100644 (file)
--- a/src/HttpHeader.cc
+++ b/src/HttpHeader.cc
@@ -12,6 +12,7 @@
  #include "base/EnumIterator.h"
  #include "base64.h"
  #include "globals.h"
+#include "http/ContentLengthInterpreter.h"
  #include "HttpHdrCc.h"
  #include "HttpHdrContRange.h"
  #include "HttpHdrScTarget.h" // also includes HttpHdrSc.h
@@ -320,7 +321,6 @@ HttpHeader::parse(const char *header_start, size_t hdrLen)
  {
      const char *field_ptr = header_start;
      const char *header_end = header_start + hdrLen; // XXX: remove
-    HttpHeaderEntry *e, *e2;
      int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
  
      PROF_start(HttpHeaderParse);
@@ -338,6 +338,7 @@ HttpHeader::parse(const char *header_start, size_t hdrLen)
          return 0;
      }
  
+    Http::ContentLengthInterpreter clen(warnOnError);
      /* common format headers are "<name>:[ws]<value>" lines delimited by <CRLF>.
       * continuation lines start with a (single) space or tab */
      while (field_ptr < header_end) {
@@ -419,6 +420,7 @@ HttpHeader::parse(const char *header_start, size_t hdrLen)
              break;      /* terminating blank line */
          }
  
+        HttpHeaderEntry *e;
          if ((e = HttpHeaderEntry::parse(field_start, field_end)) == NULL) {
              debugs(55, warnOnError, "WARNING: unparseable HTTP header field {" <<
                     getStringPrefix(field_start, field_end-field_start) << "}");
@@ -432,45 +434,15 @@ HttpHeader::parse(const char *header_start, size_t hdrLen)
              return 0;
          }
  
-        // XXX: RFC 7230 Section 3.3.3 item #4 requires sending a 502 error in
-        // several cases that we do not yet cover. TODO: Rewrite to cover more.
-        if (e->id == Http::HdrType::CONTENT_LENGTH && (e2 = findEntry(e->id)) != nullptr) {
-            if (e->value != e2->value) {
-                int64_t l1, l2;
-                debugs(55, warnOnError, "WARNING: found two conflicting content-length headers in {" <<
-                       getStringPrefix(header_start, hdrLen) << "}");
-
-                if (!Config.onoff.relaxed_header_parser) {
-                    delete e;
-                    PROF_stop(HttpHeaderParse);
-                    clean();
-                    return 0;
-                }
-
-                if (!httpHeaderParseOffset(e->value.termedBuf(), &l1)) {
-                    debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e->value << "'");
-                    delete e;
-                    continue;
-                } else if (!httpHeaderParseOffset(e2->value.termedBuf(), &l2)) {
-                    debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e2->value << "'");
-                    delById(e2->id);
-                } else {
-                    if (l1 != l2)
-                        conflictingContentLength_ = true;
-                    delete e;
-                    continue;
-                }
-            } else {
-                debugs(55, warnOnError, "NOTICE: found double content-length header");
-                delete e;
+        if (e->id == Http::HdrType::CONTENT_LENGTH && !clen.checkField(e->value)) {
+            delete e;
  
-                if (Config.onoff.relaxed_header_parser)
-                    continue;
+            if (Config.onoff.relaxed_header_parser)
+                continue; // clen has printed any necessary warnings
  
-                PROF_stop(HttpHeaderParse);
-                clean();
-                return 0;
-            }
+            PROF_stop(HttpHeaderParse);
+            clean();
+            return 0;
          }
  
          if (e->id == Http::HdrType::OTHER && stringHasWhitespace(e->name.termedBuf())) {
@@ -488,14 +460,29 @@ HttpHeader::parse(const char *header_start, size_t hdrLen)
          addEntry(e);
      }
  
+    if (clen.headerWideProblem) {
+        debugs(55, warnOnError, "WARNING: " << clen.headerWideProblem <<
+               " Content-Length field values in" <<
+               Raw("header", header_start, hdrLen));
+    }
+
      if (chunked()) {
          // RFC 2616 section 4.4: ignore Content-Length with Transfer-Encoding
+        // RFC 7230 section 3.3.3 #3: Transfer-Encoding overwrites Content-Length
          delById(Http::HdrType::CONTENT_LENGTH);
-        // RFC 7230 section 3.3.3 #4: ignore Content-Length conflicts with Transfer-Encoding
-        conflictingContentLength_ = false;
-    } else if (conflictingContentLength_) {
-        // ensure our callers do not see the conflicting Content-Length value
+        // and clen state becomes irrelevant
+    } else if (clen.sawBad) {
+        // ensure our callers do not accidentally see bad Content-Length values
          delById(Http::HdrType::CONTENT_LENGTH);
+        conflictingContentLength_ = true; // TODO: Rename to badContentLength_.
+    } else if (clen.needsSanitizing) {
+        // RFC 7230 section 3.3.2: MUST either reject or ... [sanitize];
+        // ensure our callers see a clean Content-Length value or none at all
+        delById(Http::HdrType::CONTENT_LENGTH);
+        if (clen.sawGood) {
+            putInt64(Http::HdrType::CONTENT_LENGTH, clen.value);
+            debugs(55, 5, "sanitized Content-Length to be " << clen.value);
+        }
      }
  
      PROF_stop(HttpHeaderParse);
@@ -1479,12 +1466,9 @@ int64_t
  HttpHeaderEntry::getInt64() const
  {
      int64_t val = -1;
-    int ok = httpHeaderParseOffset(value.termedBuf(), &val);
-    httpHeaderNoteParsedEntry(id, value, ok == 0);
-    /* XXX: Should we check ok - ie
-     * return ok ? -1 : value;
-     */
-    return val;
+    const bool ok = httpHeaderParseOffset(value.termedBuf(), &val);
+    httpHeaderNoteParsedEntry(id, value, ok);
+    return val; // remains -1 if !ok (XXX: bad method API)
  }
  
  static void
diff --git a/src/HttpHeaderTools.cc b/src/HttpHeaderTools.cc

index af5c2bd880b07bb3b7633d8aeaa9e35b7e53ccc9..7015f1b9b8929ea3a0c7885677c8e1ea0eb73b3f 100644 (file)
--- a/src/HttpHeaderTools.cc
+++ b/src/HttpHeaderTools.cc
@@ -138,18 +138,29 @@ httpHeaderParseInt(const char *start, int *value)
      return 1;
  }
  
-int
-httpHeaderParseOffset(const char *start, int64_t * value)
+bool
+httpHeaderParseOffset(const char *start, int64_t *value, char **endPtr)
  {
+    char *end = nullptr;
      errno = 0;
-    int64_t res = strtoll(start, NULL, 10);
-    if (!res && EINVAL == errno) {   /* maybe not portable? */
-        debugs(66, 7, "failed to parse offset in " << start);
-        return 0;
+    const int64_t res = strtoll(start, &end, 10);
+    if (errno && !res) {
+        debugs(66, 7, "failed to parse malformed offset in " << start);
+        return false;
+    }
+    if (errno == ERANGE && (res == LLONG_MIN || res == LLONG_MAX)) { // no overflow
+        debugs(66, 7, "failed to parse huge offset in " << start);
+        return false;
+    }
+    if (start == end) {
+        debugs(66, 7, "failed to parse empty offset");
+        return false;
      }
      *value = res;
+    if (endPtr)
+        *endPtr = end;
      debugs(66, 7, "offset " << start << " parsed as " << res);
-    return 1;
+    return true;
  }
  
  /**
diff --git a/src/HttpHeaderTools.h b/src/HttpHeaderTools.h

index bec824ec02c82ec0635a745a871ddb3c8a1abcde..24e5bc88c1520aaaa6e8c57e2f76f1865f17af14 100644 (file)
--- a/src/HttpHeaderTools.h
+++ b/src/HttpHeaderTools.h
@@ -117,7 +117,13 @@ public:
      bool quoted;
  };
  
-int httpHeaderParseOffset(const char *start, int64_t * off);
+/// A strtoll(10) wrapper that checks for strtoll() failures and other problems.
+/// XXX: This function is not fully compatible with some HTTP syntax rules.
+/// Just like strtoll(), allows whitespace prefix, a sign, and _any_ suffix.
+/// Requires at least one digit to be present.
+/// Sets "off" and "end" arguments if and only if no problems were found.
+/// \return true if and only if no problems were found.
+bool httpHeaderParseOffset(const char *start, int64_t *offPtr, char **endPtr = nullptr);
  
  int httpHeaderHasConnDir(const HttpHeader * hdr, const char *directive);
  int httpHeaderParseInt(const char *start, int *val);
diff --git a/src/http/ContentLengthInterpreter.cc b/src/http/ContentLengthInterpreter.cc

new file mode 100644 (file)

index 0000000..61c1518
--- /dev/null
+++ b/src/http/ContentLengthInterpreter.cc
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+/* DEBUG: section 55    HTTP Header */
+
+#include "squid.h"
+#include "base/CharacterSet.h"
+#include "Debug.h"
+#include "http/ContentLengthInterpreter.h"
+#include "http/one/Parser.h"
+#include "HttpHeaderTools.h"
+#include "SquidConfig.h"
+#include "SquidString.h"
+#include "StrList.h"
+
+Http::ContentLengthInterpreter::ContentLengthInterpreter(const int aDebugLevel):
+    value(-1),
+    headerWideProblem(nullptr),
+    debugLevel(aDebugLevel),
+    sawBad(false),
+    needsSanitizing(false),
+    sawGood(false)
+{
+}
+
+/// checks whether all characters after the Content-Length are allowed
+bool
+Http::ContentLengthInterpreter::goodSuffix(const char *suffix, const char * const end) const
+{
+    // optimize for the common case that does not need delimiters
+    if (suffix == end)
+        return true;
+
+    for (const CharacterSet &delimiters = Http::One::Parser::DelimiterCharacters();
+            suffix < end; ++suffix) {
+        if (!delimiters[*suffix])
+            return false;
+    }
+    // needsSanitizing = true; // TODO: Always remove trailing whitespace?
+    return true; // including empty suffix
+}
+
+/// handles a single-token Content-Length value
+/// rawValue null-termination requirements are those of httpHeaderParseOffset()
+bool
+Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int valueSize)
+{
+    Must(!sawBad);
+
+    int64_t latestValue = -1;
+    char *suffix = nullptr;
+    // TODO: Handle malformed values with leading signs (e.g., "-0" or "+1").
+    if (!httpHeaderParseOffset(rawValue, &latestValue, &suffix)) {
+        debugs(55, DBG_IMPORTANT, "WARNING: Malformed" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    if (latestValue < 0) {
+        debugs(55, debugLevel, "WARNING: Negative" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    // check for garbage after the number
+    if (!goodSuffix(suffix, rawValue + valueSize)) {
+        debugs(55, debugLevel, "WARNING: Trailing garbage in" << Raw("Content-Length", rawValue, valueSize));
+        sawBad = true;
+        return false;
+    }
+
+    if (sawGood) {
+        /* we have found at least two, possibly identical values */
+
+        needsSanitizing = true; // replace identical values with a single value
+
+        const bool conflicting = value != latestValue;
+        if (conflicting)
+            headerWideProblem = "Conflicting"; // overwrite any lesser problem
+        else if (!headerWideProblem) // preserve a possibly worse problem
+            headerWideProblem = "Duplicate";
+
+        // with relaxed_header_parser, identical values are permitted
+        sawBad = !Config.onoff.relaxed_header_parser || conflicting;
+        return false; // conflicting or duplicate
+    }
+
+    sawGood = true;
+    value = latestValue;
+    return true;
+}
+
+/// handles Content-Length: a, b, c
+bool
+Http::ContentLengthInterpreter::checkList(const String &list)
+{
+    Must(!sawBad);
+
+    if (!Config.onoff.relaxed_header_parser) {
+        debugs(55, debugLevel, "WARNING: List-like" << Raw("Content-Length", list.rawBuf(), list.size()));
+        sawBad = true;
+        return false;
+    }
+
+    needsSanitizing = true; // remove extra commas (at least)
+
+    const char *pos = nullptr;
+    const char *item = nullptr;;
+    int ilen = -1;
+    while (strListGetItem(&list, ',', &item, &ilen, &pos)) {
+        if (!checkValue(item, ilen) && sawBad)
+            break;
+        // keep going after a duplicate value to find conflicting ones
+    }
+    return false; // no need to keep this list field; it will be sanitized away
+}
+
+bool
+Http::ContentLengthInterpreter::checkField(const String &rawValue)
+{
+    if (sawBad)
+        return false; // one rotten apple is enough to spoil all of them
+
+    // TODO: Optimize by always parsing the first integer first.
+    return rawValue.pos(',') ?
+           checkList(rawValue) :
+           checkValue(rawValue.rawBuf(), rawValue.size());
+}
+
diff --git a/src/http/ContentLengthInterpreter.h b/src/http/ContentLengthInterpreter.h

new file mode 100644 (file)

index 0000000..128c34d
--- /dev/null
+++ b/src/http/ContentLengthInterpreter.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+#define SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+
+class String;
+
+namespace Http
+{
+
+/// Finds the intended Content-Length value while parsing message-header fields.
+/// Deals with complications such as value lists and/or repeated fields.
+class ContentLengthInterpreter
+{
+public:
+    explicit ContentLengthInterpreter(const int aDebugLevel);
+
+    /// updates history based on the given message-header field
+    /// \return true iff the field should be added/remembered for future use
+    bool checkField(const String &field);
+
+    /// intended Content-Length value if sawGood is set and sawBad is not set
+    /// meaningless otherwise
+    int64_t value;
+
+    /* for debugging (declared here to minimize padding) */
+    const char *headerWideProblem; ///< worst header-wide problem found (or nil)
+    const int debugLevel; ///< debugging level for certain warnings
+
+    /// whether a malformed Content-Length value was present
+    bool sawBad;
+
+    /// whether all remembered fields should be removed
+    /// removed fields ought to be replaced with the intended value (if known)
+    /// irrelevant if sawBad is set
+    bool needsSanitizing;
+
+    /// whether a valid field value was present, possibly among problematic ones
+    /// irrelevant if sawBad is set
+    bool sawGood;
+
+protected:
+    bool goodSuffix(const char *suffix, const char * const end) const;
+    bool checkValue(const char *start, const int size);
+    bool checkList(const String &list);
+};
+
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H */
+
diff --git a/src/http/Makefile.am b/src/http/Makefile.am

index b3071bb162879bf4c1b16239ec10457eb63334cd..43ea4fa5b8c015d7605c58612e3965488a231d5c 100644 (file)
--- a/src/http/Makefile.am
+++ b/src/http/Makefile.am
@@ -14,6 +14,8 @@ DIST_SUBDIRS = one url_rewriters
  noinst_LTLIBRARIES = libhttp.la
  
  libhttp_la_SOURCES = \
+       ContentLengthInterpreter.cc \
+       ContentLengthInterpreter.h \
         forward.h \
         MethodType.cc \
         MethodType.h \
diff --git a/src/http/one/Parser.h b/src/http/one/Parser.h

index b1e819a945ed9f1d5d5bc242c99a552e5a5a84ec..cdf0c3a605467fdb4e5405fafdffa44f52abfa58 100644 (file)
--- a/src/http/one/Parser.h
+++ b/src/http/one/Parser.h
@@ -106,6 +106,10 @@ public:
       */
      Http::StatusCode parseStatusCode;
  
+    /// the characters which are to be considered valid whitespace
+    /// (WSP / BSP / OWS)
+    static const CharacterSet &DelimiterCharacters();
+
  protected:
      /**
       * detect and skip the CRLF or (if tolerant) LF line terminator
@@ -117,10 +121,6 @@ protected:
       */
      bool skipLineTerminator(Http1::Tokenizer &tok) const;
  
-    /// the characters which are to be considered valid whitespace
-    /// (WSP / BSP / OWS)
-    static const CharacterSet &DelimiterCharacters();
-
      /**
       * Scan to find the mime headers block for current message.
       *
author	Alex Rousskov <rousskov@measurement-factory.com>
	Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Tue, 6 Sep 2016 02:45:20 +0000 (14:45 +1200)
src/HttpHeader.cc		patch \| blob \| blame \| history
src/HttpHeaderTools.cc		patch \| blob \| blame \| history
src/HttpHeaderTools.h		patch \| blob \| blame \| history
src/http/ContentLengthInterpreter.cc	[new file with mode: 0644]	patch \| blob
src/http/ContentLengthInterpreter.h	[new file with mode: 0644]	patch \| blob
src/http/Makefile.am		patch \| blob \| blame \| history
src/http/one/Parser.h		patch \| blob \| blame \| history