src/http/ContentLengthInterpreter.cc

   1 /*
   2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
   3  *
   4  * Squid software is distributed under GPLv2+ license and includes
   5  * contributions from numerous individuals and organizations.
   6  * Please see the COPYING and CONTRIBUTORS files for details.
   7  */
   8
   9 /* DEBUG: section 55    HTTP Header */
  10
  11 #include "squid.h"
  12 #include "base/CharacterSet.h"
  13 #include "Debug.h"
  14 #include "http/ContentLengthInterpreter.h"
  15 #include "http/one/Parser.h"
  16 #include "HttpHeaderTools.h"
  17 #include "SquidConfig.h"
  18 #include "SquidString.h"
  19 #include "StrList.h"
  20
  21 Http::ContentLengthInterpreter::ContentLengthInterpreter():
  22     value(-1),
  23     headerWideProblem(nullptr),
  24     debugLevel(Config.onoff.relaxed_header_parser <= 0 ? DBG_IMPORTANT : 2),
  25     sawBad(false),
  26     needsSanitizing(false),
  27     sawGood(false),
  28     prohibitedAndIgnored_(nullptr)
  29 {
  30 }
  31
  32 /// checks whether all characters before the Content-Length number are allowed
  33 /// \returns the start of the digit sequence (or nil on errors)
  34 const char *
  35 Http::ContentLengthInterpreter::findDigits(const char *prefix, const char * const valueEnd) const
  36 {
  37     // skip leading OWS in RFC 7230's `OWS field-value OWS`
  38     const CharacterSet &whitespace = Http::One::Parser::WhitespaceCharacters();
  39     while (prefix < valueEnd) {
  40         const auto ch = *prefix;
  41         if (CharacterSet::DIGIT[ch])
  42             return prefix; // common case: a pre-trimmed field value
  43         if (!whitespace[ch])
  44             return nullptr; // (trimmed) length does not start with a digit
  45         ++prefix;
  46     }
  47     return nullptr; // empty or whitespace-only value
  48 }
  49
  50 /// checks whether all characters after the Content-Length are allowed
  51 bool
  52 Http::ContentLengthInterpreter::goodSuffix(const char *suffix, const char * const end) const
  53 {
  54     // optimize for the common case that does not need delimiters
  55     if (suffix == end)
  56         return true;
  57
  58     for (const CharacterSet &delimiters = Http::One::Parser::DelimiterCharacters();
  59             suffix < end; ++suffix) {
  60         if (!delimiters[*suffix])
  61             return false;
  62     }
  63     // needsSanitizing = true; // TODO: Always remove trailing whitespace?
  64     return true; // including empty suffix
  65 }
  66
  67 /// handles a single-token Content-Length value
  68 /// rawValue null-termination requirements are those of httpHeaderParseOffset()
  69 bool
  70 Http::ContentLengthInterpreter::checkValue(const char *rawValue, const int valueSize)
  71 {
  72     Must(!sawBad);
  73
  74     const auto valueEnd = rawValue + valueSize;
  75
  76     const auto digits = findDigits(rawValue, valueEnd);
  77     if (!digits) {
  78         debugs(55, debugLevel, "WARNING: Leading garbage or empty value in" << Raw("Content-Length", rawValue, valueSize));
  79         sawBad = true;
  80         return false;
  81     }
  82
  83     int64_t latestValue = -1;
  84     char *suffix = nullptr;
  85
  86     if (!httpHeaderParseOffset(digits, &latestValue, &suffix)) {
  87         debugs(55, DBG_IMPORTANT, "WARNING: Malformed" << Raw("Content-Length", rawValue, valueSize));
  88         sawBad = true;
  89         return false;
  90     }
  91
  92     if (latestValue < 0) {
  93         debugs(55, debugLevel, "WARNING: Negative" << Raw("Content-Length", rawValue, valueSize));
  94         sawBad = true;
  95         return false;
  96     }
  97
  98     // check for garbage after the number
  99     if (!goodSuffix(suffix, valueEnd)) {
 100         debugs(55, debugLevel, "WARNING: Trailing garbage in" << Raw("Content-Length", rawValue, valueSize));
 101         sawBad = true;
 102         return false;
 103     }
 104
 105     if (sawGood) {
 106         /* we have found at least two, possibly identical values */
 107
 108         needsSanitizing = true; // replace identical values with a single value
 109
 110         const bool conflicting = value != latestValue;
 111         if (conflicting)
 112             headerWideProblem = "Conflicting"; // overwrite any lesser problem
 113         else if (!headerWideProblem) // preserve a possibly worse problem
 114             headerWideProblem = "Duplicate";
 115
 116         // with relaxed_header_parser, identical values are permitted
 117         sawBad = !Config.onoff.relaxed_header_parser || conflicting;
 118         return false; // conflicting or duplicate
 119     }
 120
 121     sawGood = true;
 122     value = latestValue;
 123     return true;
 124 }
 125
 126 /// handles Content-Length: a, b, c
 127 bool
 128 Http::ContentLengthInterpreter::checkList(const String &list)
 129 {
 130     Must(!sawBad);
 131
 132     if (!Config.onoff.relaxed_header_parser) {
 133         debugs(55, debugLevel, "WARNING: List-like" << Raw("Content-Length", list.rawBuf(), list.size()));
 134         sawBad = true;
 135         return false;
 136     }
 137
 138     needsSanitizing = true; // remove extra commas (at least)
 139
 140     const char *pos = nullptr;
 141     const char *item = nullptr;;
 142     int ilen = -1;
 143     while (strListGetItem(&list, ',', &item, &ilen, &pos)) {
 144         if (!checkValue(item, ilen) && sawBad)
 145             break;
 146         // keep going after a duplicate value to find conflicting ones
 147     }
 148     return false; // no need to keep this list field; it will be sanitized away
 149 }
 150
 151 bool
 152 Http::ContentLengthInterpreter::checkField(const String &rawValue)
 153 {
 154     if (sawBad)
 155         return false; // one rotten apple is enough to spoil all of them
 156
 157     // TODO: Optimize by always parsing the first integer first.
 158     return rawValue.pos(',') ?
 159            checkList(rawValue) :
 160            checkValue(rawValue.rawBuf(), rawValue.size());
 161 }
 162