From: Alex Rousskov Date: Fri, 3 Mar 2017 23:18:25 +0000 (-0700) Subject: Fixed URI scheme case-sensitivity treatment broken since r14802. X-Git-Tag: M-staged-PR71~228 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=50a43a49966ff66fdb66befee01f3c2475e83832;p=thirdparty%2Fsquid.git Fixed URI scheme case-sensitivity treatment broken since r14802. A parsed value for the AnyP::UriScheme image constructor parameter was stored without toLower() canonicalization for known protocols (e.g., Squid would store "HTTP" instead of "http" after successfully parsing "HTTP://EXAMPLE.COM/" in urlParseFinish()). Without that canonicalization step, Squid violated various HTTP caching rules related to URI comparison (and served fewer hits) when dealing with absolute URLs containing non-lowercase HTTP scheme. According to my limited tests, URL-based ACLs are not affected by this bug, but I have not investigated how URL-based ACL code differs from caching code when it comes to stored URL access and whether some ACLs are actually affected in some environments. --- diff --git a/src/anyp/UriScheme.cc b/src/anyp/UriScheme.cc index 6e2e86ca9d..2782051b99 100644 --- a/src/anyp/UriScheme.cc +++ b/src/anyp/UriScheme.cc @@ -11,24 +11,41 @@ #include "squid.h" #include "anyp/UriScheme.h" +AnyP::UriScheme::LowercaseSchemeNames AnyP::UriScheme::LowercaseSchemeNames_; + AnyP::UriScheme::UriScheme(AnyP::ProtocolType const aScheme, const char *img) : theScheme_(aScheme) { - if (img) - // image could be provided explicitly (case-sensitive) - image_ = img; - - else if (theScheme_ == AnyP::PROTO_UNKNOWN) - // image could be actually unknown and not provided - image_ = "(unknown)"; - - else if (theScheme_ > AnyP::PROTO_NONE && theScheme_ < AnyP::PROTO_MAX) { - // image could be implied by a registered transfer protocol - // which use upper-case labels, so down-case for scheme image - image_ = AnyP::ProtocolType_str[theScheme_]; - image_.toLower(); + // RFC 3986 section 3.1: schemes are case-insensitive. + + // To improve diagnostic, remember exactly how an unsupported scheme looks like. + // XXX: Object users may rely on toLower() canonicalization that we refuse to provide. + if (img && theScheme_ == AnyP::PROTO_UNKNOWN) + image_ = img; + + // XXX: A broken caller supplies an image of an absent scheme? + // XXX: We assume that the caller is using a lower-case image. + else if (img && theScheme_ == AnyP::PROTO_NONE) + image_ = img; + + else if (theScheme_ > AnyP::PROTO_NONE && theScheme_ < AnyP::PROTO_MAX) + image_ = LowercaseSchemeNames_.at(theScheme_); + // else, the image remains empty (e.g., "://example.com/") + // hopefully, theScheme_ is PROTO_NONE here +} + +void +AnyP::UriScheme::Init() +{ + if (LowercaseSchemeNames_.empty()) { + LowercaseSchemeNames_.reserve(sizeof(SBuf) * AnyP::PROTO_MAX); + // TODO: use base/EnumIterator.h if possible + for (int i = AnyP::PROTO_NONE; i < AnyP::PROTO_MAX; ++i) { + SBuf image(ProtocolType_str[i]); + image.toLower(); + LowercaseSchemeNames_.emplace_back(image); + } } - // else, image is an empty string ("://example.com/") } unsigned short diff --git a/src/anyp/UriScheme.h b/src/anyp/UriScheme.h index 1ad4f2a3c9..f0e0a6f433 100644 --- a/src/anyp/UriScheme.h +++ b/src/anyp/UriScheme.h @@ -23,7 +23,10 @@ namespace AnyP class UriScheme { public: + typedef std::vector LowercaseSchemeNames; + UriScheme() : theScheme_(AnyP::PROTO_NONE) {} + /// \param img Explicit scheme representation for unknown/none schemes. UriScheme(AnyP::ProtocolType const aScheme, const char *img = nullptr); UriScheme(const AnyP::UriScheme &o) : theScheme_(o.theScheme_), image_(o.image_) {} UriScheme(AnyP::UriScheme &&) = default; @@ -47,7 +50,14 @@ public: unsigned short defaultPort() const; + /// initializes down-cased protocol scheme names array + static void Init(); + private: + /// optimization: stores down-cased protocol scheme names, copied from + /// AnyP::ProtocolType_str + static LowercaseSchemeNames LowercaseSchemeNames_; + /// This is a typecode pointer into the enum/registry of protocols handled. AnyP::ProtocolType theScheme_; diff --git a/src/main.cc b/src/main.cc index 7e064f717b..46e81df483 100644 --- a/src/main.cc +++ b/src/main.cc @@ -12,6 +12,7 @@ #include "AccessLogEntry.h" #include "acl/Acl.h" #include "acl/Asn.h" +#include "anyp/UriScheme.h" #include "auth/Config.h" #include "auth/Gadgets.h" #include "AuthReg.h" @@ -1499,6 +1500,8 @@ SquidMain(int argc, char **argv) Mem::Init(); + AnyP::UriScheme::Init(); + storeFsInit(); /* required for config parsing */ /* TODO: call the FS::Clean() in shutdown to do Fs cleanups */ diff --git a/src/tests/testHttpRequest.cc b/src/tests/testHttpRequest.cc index b8fe51f1b5..35d9d99b68 100644 --- a/src/tests/testHttpRequest.cc +++ b/src/tests/testHttpRequest.cc @@ -31,6 +31,7 @@ void testHttpRequest::setUp() { Mem::Init(); + AnyP::UriScheme::Init(); httpHeaderInitModule(); } diff --git a/src/tests/testURL.cc b/src/tests/testURL.cc index d88c7b13d1..1f22265399 100644 --- a/src/tests/testURL.cc +++ b/src/tests/testURL.cc @@ -25,6 +25,7 @@ void testURL::setUp() { Mem::Init(); + AnyP::UriScheme::Init(); } /*