#include "base/EnumIterator.h"
#include "base64.h"
#include "globals.h"
+#include "http/ContentLengthInterpreter.h"
#include "HttpHdrCc.h"
#include "HttpHdrContRange.h"
#include "HttpHdrScTarget.h" // also includes HttpHdrSc.h
{
const char *field_ptr = header_start;
const char *header_end = header_start + hdrLen; // XXX: remove
- HttpHeaderEntry *e, *e2;
int warnOnError = (Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2);
PROF_start(HttpHeaderParse);
return 0;
}
+ Http::ContentLengthInterpreter clen(warnOnError);
/* common format headers are "<name>:[ws]<value>" lines delimited by <CRLF>.
* continuation lines start with a (single) space or tab */
while (field_ptr < header_end) {
break; /* terminating blank line */
}
+ HttpHeaderEntry *e;
if ((e = HttpHeaderEntry::parse(field_start, field_end)) == NULL) {
debugs(55, warnOnError, "WARNING: unparseable HTTP header field {" <<
getStringPrefix(field_start, field_end-field_start) << "}");
return 0;
}
- // XXX: RFC 7230 Section 3.3.3 item #4 requires sending a 502 error in
- // several cases that we do not yet cover. TODO: Rewrite to cover more.
- if (e->id == Http::HdrType::CONTENT_LENGTH && (e2 = findEntry(e->id)) != nullptr) {
- if (e->value != e2->value) {
- int64_t l1, l2;
- debugs(55, warnOnError, "WARNING: found two conflicting content-length headers in {" <<
- getStringPrefix(header_start, hdrLen) << "}");
-
- if (!Config.onoff.relaxed_header_parser) {
- delete e;
- PROF_stop(HttpHeaderParse);
- clean();
- return 0;
- }
-
- if (!httpHeaderParseOffset(e->value.termedBuf(), &l1)) {
- debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e->value << "'");
- delete e;
- continue;
- } else if (!httpHeaderParseOffset(e2->value.termedBuf(), &l2)) {
- debugs(55, DBG_IMPORTANT, "WARNING: Unparseable content-length '" << e2->value << "'");
- delById(e2->id);
- } else {
- if (l1 != l2)
- conflictingContentLength_ = true;
- delete e;
- continue;
- }
- } else {
- debugs(55, warnOnError, "NOTICE: found double content-length header");
- delete e;
+ if (e->id == Http::HdrType::CONTENT_LENGTH && !clen.checkField(e->value)) {
+ delete e;
- if (Config.onoff.relaxed_header_parser)
- continue;
+ if (Config.onoff.relaxed_header_parser)
+ continue; // clen has printed any necessary warnings
- PROF_stop(HttpHeaderParse);
- clean();
- return 0;
- }
+ PROF_stop(HttpHeaderParse);
+ clean();
+ return 0;
}
if (e->id == Http::HdrType::OTHER && stringHasWhitespace(e->name.termedBuf())) {
addEntry(e);
}
+ if (clen.headerWideProblem) {
+ debugs(55, warnOnError, "WARNING: " << clen.headerWideProblem <<
+ " Content-Length field values in" <<
+ Raw("header", header_start, hdrLen));
+ }
+
if (chunked()) {
// RFC 2616 section 4.4: ignore Content-Length with Transfer-Encoding
+ // RFC 7230 section 3.3.3 #3: Transfer-Encoding overwrites Content-Length
delById(Http::HdrType::CONTENT_LENGTH);
- // RFC 7230 section 3.3.3 #4: ignore Content-Length conflicts with Transfer-Encoding
- conflictingContentLength_ = false;
- } else if (conflictingContentLength_) {
- // ensure our callers do not see the conflicting Content-Length value
+ // and clen state becomes irrelevant
+ } else if (clen.sawBad) {
+ // ensure our callers do not accidentally see bad Content-Length values
delById(Http::HdrType::CONTENT_LENGTH);
+ conflictingContentLength_ = true; // TODO: Rename to badContentLength_.
+ } else if (clen.needsSanitizing) {
+ // RFC 7230 section 3.3.2: MUST either reject or ... [sanitize];
+ // ensure our callers see a clean Content-Length value or none at all
+ delById(Http::HdrType::CONTENT_LENGTH);
+ if (clen.sawGood) {
+ putInt64(Http::HdrType::CONTENT_LENGTH, clen.value);
+ debugs(55, 5, "sanitized Content-Length to be " << clen.value);
+ }
}
PROF_stop(HttpHeaderParse);
HttpHeaderEntry::getInt64() const
{
int64_t val = -1;
- int ok = httpHeaderParseOffset(value.termedBuf(), &val);
- httpHeaderNoteParsedEntry(id, value, ok == 0);
- /* XXX: Should we check ok - ie
- * return ok ? -1 : value;
- */
- return val;
+ const bool ok = httpHeaderParseOffset(value.termedBuf(), &val);
+ httpHeaderNoteParsedEntry(id, value, ok);
+ return val; // remains -1 if !ok (XXX: bad method API)
}
static void
return 1;
}
-int
-httpHeaderParseOffset(const char *start, int64_t * value)
+bool
+httpHeaderParseOffset(const char *start, int64_t *value, char **endPtr)
{
+ char *end = nullptr;
errno = 0;
- int64_t res = strtoll(start, NULL, 10);
- if (!res && EINVAL == errno) { /* maybe not portable? */
- debugs(66, 7, "failed to parse offset in " << start);
- return 0;
+ const int64_t res = strtoll(start, &end, 10);
+ if (errno && !res) {
+ debugs(66, 7, "failed to parse malformed offset in " << start);
+ return false;
+ }
+ if (errno == ERANGE && (res == LLONG_MIN || res == LLONG_MAX)) { // no overflow
+ debugs(66, 7, "failed to parse huge offset in " << start);
+ return false;
+ }
+ if (start == end) {
+ debugs(66, 7, "failed to parse empty offset");
+ return false;
}
*value = res;
+ if (endPtr)
+ *endPtr = end;
debugs(66, 7, "offset " << start << " parsed as " << res);
- return 1;
+ return true;
}
/**
bool quoted;
};
-int httpHeaderParseOffset(const char *start, int64_t * off);
+/// A strtoll(10) wrapper that checks for strtoll() failures and other problems.
+/// XXX: This function is not fully compatible with some HTTP syntax rules.
+/// Just like strtoll(), allows whitespace prefix, a sign, and _any_ suffix.
+/// Requires at least one digit to be present.
+/// Sets "off" and "end" arguments if and only if no problems were found.
+/// \return true if and only if no problems were found.
+bool httpHeaderParseOffset(const char *start, int64_t *offPtr, char **endPtr = nullptr);
int httpHeaderHasConnDir(const HttpHeader * hdr, const char *directive);
int httpHeaderParseInt(const char *start, int *val);
--- /dev/null
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+/* DEBUG: section 55 HTTP Header */
+
+#include "squid.h"
+#include "base/CharacterSet.h"
+#include "Debug.h"
+#include "http/ContentLengthInterpreter.h"
+#include "http/one/Parser.h"
+#include "HttpHeaderTools.h"
+#include "SquidConfig.h"
+#include "SquidString.h"
+#include "StrList.h"
+
+Http::ContentLengthInterpreter::ContentLengthInterpreter(const int aDebugLevel):
+ value(-1),
+ headerWideProblem(nullptr),
+ debugLevel(aDebugLevel),
+ sawBad(false),
+ needsSanitizing(false),
+ sawGood(false)
+{
+}
+
+/// checks whether all characters after the Content-Length are allowed
+bool
+Http::ContentLengthInterpreter::goodSuffix(const char *suffix, const char * const end) const
+{
+ // optimize for the common case that does not need delimiters
+ if (suffix == end)
+ return true;
+
+ for (const CharacterSet &delimiters = Http::One::Parser::DelimiterCharacters();
+ suffix < end; ++suffix) {
+ if (!delimiters[*suffix])
+ return false;
+ }
+ // needsSanitizing = true; // TODO: Always remove trailing whitespace?
+ return true; // including empty suffix
+}
+
+/// handles a single-token Content-Length value
+/// rawValue null-termination requirements are those of httpHeaderParseOffset()
+bool
+Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int valueSize)
+{
+ Must(!sawBad);
+
+ int64_t latestValue = -1;
+ char *suffix = nullptr;
+ // TODO: Handle malformed values with leading signs (e.g., "-0" or "+1").
+ if (!httpHeaderParseOffset(rawValue, &latestValue, &suffix)) {
+ debugs(55, DBG_IMPORTANT, "WARNING: Malformed" << Raw("Content-Length", rawValue, valueSize));
+ sawBad = true;
+ return false;
+ }
+
+ if (latestValue < 0) {
+ debugs(55, debugLevel, "WARNING: Negative" << Raw("Content-Length", rawValue, valueSize));
+ sawBad = true;
+ return false;
+ }
+
+ // check for garbage after the number
+ if (!goodSuffix(suffix, rawValue + valueSize)) {
+ debugs(55, debugLevel, "WARNING: Trailing garbage in" << Raw("Content-Length", rawValue, valueSize));
+ sawBad = true;
+ return false;
+ }
+
+ if (sawGood) {
+ /* we have found at least two, possibly identical values */
+
+ needsSanitizing = true; // replace identical values with a single value
+
+ const bool conflicting = value != latestValue;
+ if (conflicting)
+ headerWideProblem = "Conflicting"; // overwrite any lesser problem
+ else if (!headerWideProblem) // preserve a possibly worse problem
+ headerWideProblem = "Duplicate";
+
+ // with relaxed_header_parser, identical values are permitted
+ sawBad = !Config.onoff.relaxed_header_parser || conflicting;
+ return false; // conflicting or duplicate
+ }
+
+ sawGood = true;
+ value = latestValue;
+ return true;
+}
+
+/// handles Content-Length: a, b, c
+bool
+Http::ContentLengthInterpreter::checkList(const String &list)
+{
+ Must(!sawBad);
+
+ if (!Config.onoff.relaxed_header_parser) {
+ debugs(55, debugLevel, "WARNING: List-like" << Raw("Content-Length", list.rawBuf(), list.size()));
+ sawBad = true;
+ return false;
+ }
+
+ needsSanitizing = true; // remove extra commas (at least)
+
+ const char *pos = nullptr;
+ const char *item = nullptr;;
+ int ilen = -1;
+ while (strListGetItem(&list, ',', &item, &ilen, &pos)) {
+ if (!checkValue(item, ilen) && sawBad)
+ break;
+ // keep going after a duplicate value to find conflicting ones
+ }
+ return false; // no need to keep this list field; it will be sanitized away
+}
+
+bool
+Http::ContentLengthInterpreter::checkField(const String &rawValue)
+{
+ if (sawBad)
+ return false; // one rotten apple is enough to spoil all of them
+
+ // TODO: Optimize by always parsing the first integer first.
+ return rawValue.pos(',') ?
+ checkList(rawValue) :
+ checkValue(rawValue.rawBuf(), rawValue.size());
+}
+
--- /dev/null
+/*
+ * Copyright (C) 1996-2016 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+#define SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H
+
+class String;
+
+namespace Http
+{
+
+/// Finds the intended Content-Length value while parsing message-header fields.
+/// Deals with complications such as value lists and/or repeated fields.
+class ContentLengthInterpreter
+{
+public:
+ explicit ContentLengthInterpreter(const int aDebugLevel);
+
+ /// updates history based on the given message-header field
+ /// \return true iff the field should be added/remembered for future use
+ bool checkField(const String &field);
+
+ /// intended Content-Length value if sawGood is set and sawBad is not set
+ /// meaningless otherwise
+ int64_t value;
+
+ /* for debugging (declared here to minimize padding) */
+ const char *headerWideProblem; ///< worst header-wide problem found (or nil)
+ const int debugLevel; ///< debugging level for certain warnings
+
+ /// whether a malformed Content-Length value was present
+ bool sawBad;
+
+ /// whether all remembered fields should be removed
+ /// removed fields ought to be replaced with the intended value (if known)
+ /// irrelevant if sawBad is set
+ bool needsSanitizing;
+
+ /// whether a valid field value was present, possibly among problematic ones
+ /// irrelevant if sawBad is set
+ bool sawGood;
+
+protected:
+ bool goodSuffix(const char *suffix, const char * const end) const;
+ bool checkValue(const char *start, const int size);
+ bool checkList(const String &list);
+};
+
+} // namespace Http
+
+#endif /* SQUID_SRC_HTTP_CONTENTLENGTH_INTERPRETER_H */
+
noinst_LTLIBRARIES = libhttp.la
libhttp_la_SOURCES = \
+ ContentLengthInterpreter.cc \
+ ContentLengthInterpreter.h \
forward.h \
MethodType.cc \
MethodType.h \
*/
Http::StatusCode parseStatusCode;
+ /// the characters which are to be considered valid whitespace
+ /// (WSP / BSP / OWS)
+ static const CharacterSet &DelimiterCharacters();
+
protected:
/**
* detect and skip the CRLF or (if tolerant) LF line terminator
*/
bool skipLineTerminator(Http1::Tokenizer &tok) const;
- /// the characters which are to be considered valid whitespace
- /// (WSP / BSP / OWS)
- static const CharacterSet &DelimiterCharacters();
-
/**
* Scan to find the mime headers block for current message.
*