From ad365edba79f043387446a36a930700ecc9d6d19 Mon Sep 17 00:00:00 2001 From: Alex Rousskov Date: Sun, 19 Oct 2025 19:42:38 +0000 Subject: [PATCH] Bug 5501: Squid may exit when ACLs decode an invalid URI (#2145) 2025/08/14 09:28:51| FATAL: invalid pct-encoded triplet exception location: Uri.cc(102) Decode The bug affects url_regex and urllogin ACLs. However, not every use of those ACLs results in a FATAL exit. The exact preconditions are unknown. Three pct-encoding (RFC 3986) error handling algorithms were considered: ### Algorithm A: An ACL that cannot decode, mismatches This algorithm is similar to the algorithm used for handling "ACL is used in context without ALE" and similar errors, but there is a significant context difference: Those "without ALE" errors are Squid misconfigurations or bugs! Decoding failures, on the other hand, are caused by request properties outside of admin or Squid control. With this algorithm, a request can easily avoid a "deny urlHasX" rule match by injecting an invalid pct-encoding (e.g., `X%bad`). Such injections may not be practical for URLs of most resources outside of client control because most servers are unlikely to recognize the malformed URL as something useful for the client. As for resources that client does control, a urlHasX ACL cannot be effective for those anyway because the client can change URLs. Algorithm A does not let Squid admins match problematic URLs! ### Algorithm B: An ACL that cannot decode X, tests raw/encoded X With this algorithm, a request can trigger some "allow urlHasY" rule matches by injecting an invalid pct-encoding that looks like Y (e.g., if an "allow" rule looks for the word `good`, a request may contain a `%good` or `%XXgood` sequence). Just like with algorithm A, such injections probably have little practical value, for similar reasons. Algorithm B lets Squid admins match problematic URLs. ### Algorithm C: An ACL that cannot decode X, tests partially decoded X With this algorithm, a "partially decoded X" is X where invalid pct-encoding sequences (or their parts) are left "as is" while valid pct-encoding triplets are decoded. This is actually a family of similar algorithms because there are multiple ways to define invalid pct-encoding sequence boundaries in certain URLs! For example, `%6Fne%f%6Fo` can be replaced with `one%foo` or `one%f%6Fo`. This additional complexity/uncertainty aggravates the two concerns below. Algorithm B notes apply to algorithm C as well. Algorithm C lets admins match problematic URLs but, again, it requires that admins know exactly how Squid is going to isolate problematic pct-encoding triplets (e.g., skip/leave just `%` byte that starts an invalid pct-encoding sequence or the following two bytes as well). Algorithm C family includes rfc1738_unescape() behavior. That decoding function was used for the two ACLs before commit cbb9bf12 and commit 226394f2 started to use AnyP::Uri::Decode() added in commit 26256f28. For example, rfc1738_unescape() decodes `%%` as `%` and leaves some other invalid pct-encoding one-, two-, and three-byte sequences in the decoded result. It is unlikely that many admins know exactly what that old decoding does, but they could tune their rules to "work" as they expect for specific cases. Those rules could stop working after the above commits (v7.0.1+) and this change, to their surprise. This change implements Algorithm B: * Unlike Algorithm A, B allows admins to match bad URLs. * Unlike Algorithm C, B does not force admins to guess how Squid mangles a bad URL before matching it. Also updated ACLs documentation to reflect current implementation. --- src/acl/Url.cc | 5 ++--- src/acl/UrlLogin.cc | 4 ++-- src/anyp/Uri.cc | 20 ++++++++++++++++---- src/anyp/Uri.h | 10 +++++++++- src/cf.data.pre | 23 ++++++++++++++++++++++- src/clients/FtpGateway.cc | 17 +++++++++++++++-- src/tests/testURL.cc | 14 +++++++++----- 7 files changed, 75 insertions(+), 18 deletions(-) diff --git a/src/acl/Url.cc b/src/acl/Url.cc index 12a3702c07..4c1563ec52 100644 --- a/src/acl/Url.cc +++ b/src/acl/Url.cc @@ -20,8 +20,7 @@ Acl::UrlCheck::match(ACLChecklist * const ch) const auto checklist = Filled(ch); // TODO: Consider refactoring so that effectiveRequestUri() returns decoded URI. - auto decodedUri = AnyP::Uri::Decode(checklist->request->effectiveRequestUri()); - const auto result = data->match(decodedUri.c_str()); - return result; + // XXX: c_str() truncates where %00 was decoded + return data->match(AnyP::Uri::DecodeOrDupe(checklist->request->effectiveRequestUri()).c_str()); } diff --git a/src/acl/UrlLogin.cc b/src/acl/UrlLogin.cc index c4c5fed944..f13d70b635 100644 --- a/src/acl/UrlLogin.cc +++ b/src/acl/UrlLogin.cc @@ -24,7 +24,7 @@ Acl::UrlLoginCheck::match(ACLChecklist * const ch) return 0; // nothing can match } - auto decodedUserInfo = AnyP::Uri::Decode(checklist->request->url.userInfo()); - return data->match(decodedUserInfo.c_str()); + // XXX: c_str() truncates where %00 was decoded + return data->match(AnyP::Uri::DecodeOrDupe(checklist->request->url.userInfo()).c_str()); } diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index 2b648866b8..0c80f10212 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -101,7 +101,7 @@ AnyP::Uri::Encode(const SBuf &buf, const CharacterSet &ignore) return output; } -SBuf +std::optional AnyP::Uri::Decode(const SBuf &buf) { SBuf output; @@ -114,16 +114,28 @@ AnyP::Uri::Decode(const SBuf &buf) // we are either at '%' or at end of input if (tok.skip('%')) { + const auto rawBytesAfterPercent = tok.remaining(); int64_t hex1 = 0, hex2 = 0; - if (tok.int64(hex1, 16, false, 1) && tok.int64(hex2, 16, false, 1)) + if (tok.int64(hex1, 16, false, 1) && tok.int64(hex2, 16, false, 1)) { output.append(static_cast((hex1 << 4) | hex2)); - else - throw TextException("invalid pct-encoded triplet", Here()); + } else { + // see TestUri::testEncoding() for invalid pct-encoding sequence examples + debugs(23, 3, "invalid pct-encoding sequence starting at %" << rawBytesAfterPercent); + return std::nullopt; + } } } return output; } +SBuf +AnyP::Uri::DecodeOrDupe(const SBuf &input) +{ + if (const auto decoded = Decode(input)) + return *decoded; + return input; +} + const SBuf & AnyP::Uri::Asterisk() { diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index 1c0361cc64..70ff6b7411 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -116,7 +116,15 @@ public: static SBuf Encode(const SBuf &, const CharacterSet &expected); /// %-decode the given buffer - static SBuf Decode(const SBuf &); + /// \retval std::nullopt on decoding failures + /// \sa DecodeOrDupe() + static std::optional Decode(const SBuf &); + + /// %-decode the given buffer + /// \retval decoded input if input obeys RFC 3986 Percent-Encoding rules + /// \retval an input copy if input violates RFC 3986 Percent-Encoding rules + /// \sa Decode() + static SBuf DecodeOrDupe(const SBuf &input); /** * The authority-form URI for currently stored values. diff --git a/src/cf.data.pre b/src/cf.data.pre index 451d64b67d..898fc9af48 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -1245,8 +1245,29 @@ ENDIF acl aclname url_regex [-i] ^http:// ... # POSIX extended regex matching on whole URL [fast] + # + # If request URL contains only valid pct-encoded triplets (RFC 3986), + # all of them are decoded before matching (e.g., `%25` triplet is + # replaced with a single `%` character). If request URL contains at + # least one `%` character that does not start a valid pct-encoded + # triplet (e.g., `%%`, `%X`, or `%2Y`), then the URL is not decoded at + # all (i.e. the raw request URL is used for matching). + # + # If a request URL is decoded as described above, then all request URL + # characters starting with the decoded `%00` pct-encoded triplet (if + # any) are ignored during matching. There is currently no way to match + # that triplet itself in a correctly percent-encoded URL. + # + # ACL parameters are not decoded. + acl aclname urllogin [-i] [^a-zA-Z0-9] ... - # POSIX extended regex matching on URL login field + # POSIX extended regex matching on URL login field [fast] + # + # This ACL does not match requests with a URL that lacks a login field. + # + # This ACL handles RFC 3986 pct-encoded triplets in the login field as + # url_regex ACL handles those triplets in the entire request URL. + acl aclname urlpath_regex [-i] \.gif$ ... # POSIX extended regex matching on URL path [fast] diff --git a/src/clients/FtpGateway.cc b/src/clients/FtpGateway.cc index 1001ad057d..6352ce8c05 100644 --- a/src/clients/FtpGateway.cc +++ b/src/clients/FtpGateway.cc @@ -142,6 +142,7 @@ public: int checkAuth(const HttpHeader * req_hdr); void checkUrlpath(); + std::optional decodedRequestUriPath() const; void buildTitleUrl(); void writeReplyBody(const char *, size_t len); void completeForwarding() override; @@ -2303,6 +2304,15 @@ ftpReadQuit(Ftp::Gateway * ftpState) ftpState->serverComplete(); } +/// absolute request URI path after successful decoding of all pct-encoding sequences +std::optional +Ftp::Gateway::decodedRequestUriPath() const +{ + return AnyP::Uri::Decode(request->url.absolutePath()); +} + +/// \prec !ftpState->flags.try_slash_hack +/// \prec ftpState->decodedRequestUriPath() static void ftpTrySlashHack(Ftp::Gateway * ftpState) { @@ -2315,8 +2325,9 @@ ftpTrySlashHack(Ftp::Gateway * ftpState) wordlistDestroy(&ftpState->pathcomps); /* Build the new path */ + // XXX: Conversion to c-string effectively truncates where %00 was decoded safe_free(ftpState->filepath); - ftpState->filepath = SBufToCstring(AnyP::Uri::Decode(ftpState->request->url.absolutePath())); + ftpState->filepath = SBufToCstring(ftpState->decodedRequestUriPath().value()); /* And off we go */ ftpGetFile(ftpState); @@ -2371,13 +2382,15 @@ ftpFail(Ftp::Gateway *ftpState) " reply code " << code << "flags(" << (ftpState->flags.isdir?"IS_DIR,":"") << (ftpState->flags.try_slash_hack?"TRY_SLASH_HACK":"") << "), " << + "decodable_filepath=" << bool(ftpState->decodedRequestUriPath()) << ' ' << "mdtm=" << ftpState->mdtm << ", size=" << ftpState->theSize << "slashhack=" << (slashHack? "T":"F")); /* Try the / hack to support "Netscape" FTP URL's for retrieving files */ if (!ftpState->flags.isdir && /* Not a directory */ !ftpState->flags.try_slash_hack && !slashHack && /* Not doing slash hack */ - ftpState->mdtm <= 0 && ftpState->theSize < 0) { /* Not known as a file */ + ftpState->mdtm <= 0 && ftpState->theSize < 0 && /* Not known as a file */ + ftpState->decodedRequestUriPath()) { switch (ftpState->state) { diff --git a/src/tests/testURL.cc b/src/tests/testURL.cc index b7609df5c4..b53abe12e7 100644 --- a/src/tests/testURL.cc +++ b/src/tests/testURL.cc @@ -104,7 +104,9 @@ TestUri::testEncoding() }; for (const auto &testCase: basicTestCases) { - CPPUNIT_ASSERT_EQUAL(testCase.first, AnyP::Uri::Decode(testCase.second)); + const auto decoded = AnyP::Uri::Decode(testCase.second); + CPPUNIT_ASSERT(decoded); + CPPUNIT_ASSERT_EQUAL(testCase.first, *decoded); CPPUNIT_ASSERT_EQUAL(testCase.second, AnyP::Uri::Encode(testCase.first, CharacterSet::RFC3986_UNRESERVED())); }; @@ -112,6 +114,7 @@ TestUri::testEncoding() SBuf("%"), SBuf("%%"), SBuf("%%%"), + SBuf("%0"), SBuf("%1"), SBuf("%1Z"), SBuf("%1\000", 2), @@ -122,10 +125,11 @@ TestUri::testEncoding() for (const auto &invalidEncoding: invalidEncodings) { // test various input positions of an invalid escape sequence - CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(invalidEncoding), TextException); - CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding)), TextException); - CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf(invalidEncoding, "word")), TextException); - CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding, "word")), TextException); + CPPUNIT_ASSERT(!AnyP::Uri::Decode(invalidEncoding)); + CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf("word", invalidEncoding))); + CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf(invalidEncoding, "word"))); + CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf("word", invalidEncoding, "word"))); + CPPUNIT_ASSERT_EQUAL(invalidEncoding, AnyP::Uri::DecodeOrDupe(invalidEncoding)); }; } -- 2.47.3