Bug 5501: Squid may exit when ACLs decode an invalid URI (#2145)

author Alex Rousskov <rousskov@measurement-factory.com>

Sun, 19 Oct 2025 19:42:38 +0000 (19:42 +0000)

committer Squid Anubis <squid-anubis@squid-cache.org>

Sun, 19 Oct 2025 19:42:41 +0000 (19:42 +0000)
author Alex Rousskov <rousskov@measurement-factory.com>
Sun, 19 Oct 2025 19:42:38 +0000 (19:42 +0000)
committer Squid Anubis <squid-anubis@squid-cache.org>
Sun, 19 Oct 2025 19:42:41 +0000 (19:42 +0000)
diff --git a/src/acl/Url.cc b/src/acl/Url.cc

index 12a3702c07b7e63b6644d69b622d4cd04f39148d..4c1563ec5295be17e3fcff9b83f5cf292f800289 100644 (file)
--- a/src/acl/Url.cc
+++ b/src/acl/Url.cc
@@ -20,8 +20,7 @@ Acl::UrlCheck::match(ACLChecklist * const ch)
      const auto checklist = Filled(ch);
  
      // TODO: Consider refactoring so that effectiveRequestUri() returns decoded URI.
-    auto decodedUri = AnyP::Uri::Decode(checklist->request->effectiveRequestUri());
-    const auto result = data->match(decodedUri.c_str());
-    return result;
+    // XXX: c_str() truncates where %00 was decoded
+    return data->match(AnyP::Uri::DecodeOrDupe(checklist->request->effectiveRequestUri()).c_str());
  }
  
diff --git a/src/acl/UrlLogin.cc b/src/acl/UrlLogin.cc

index c4c5fed944a45d8dfcce8a1cbf3f6a7408818e94..f13d70b635c142a8f6c12149f7e2465c56e0b0c5 100644 (file)
--- a/src/acl/UrlLogin.cc
+++ b/src/acl/UrlLogin.cc
@@ -24,7 +24,7 @@ Acl::UrlLoginCheck::match(ACLChecklist * const ch)
          return 0; // nothing can match
      }
  
-    auto decodedUserInfo = AnyP::Uri::Decode(checklist->request->url.userInfo());
-    return data->match(decodedUserInfo.c_str());
+    // XXX: c_str() truncates where %00 was decoded
+    return data->match(AnyP::Uri::DecodeOrDupe(checklist->request->url.userInfo()).c_str());
  }
  
diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc

index 2b648866b8fa3ec6250ece82d7d618acec35976d..0c80f102123f2a4be292c4c38e5ca5adbf90f0b1 100644 (file)
--- a/src/anyp/Uri.cc
+++ b/src/anyp/Uri.cc
@@ -101,7 +101,7 @@ AnyP::Uri::Encode(const SBuf &buf, const CharacterSet &ignore)
      return output;
  }
  
-SBuf
+std::optional<SBuf>
  AnyP::Uri::Decode(const SBuf &buf)
  {
      SBuf output;
@@ -114,16 +114,28 @@ AnyP::Uri::Decode(const SBuf &buf)
  
          // we are either at '%' or at end of input
          if (tok.skip('%')) {
+            const auto rawBytesAfterPercent = tok.remaining();
              int64_t hex1 = 0, hex2 = 0;
-            if (tok.int64(hex1, 16, false, 1) && tok.int64(hex2, 16, false, 1))
+            if (tok.int64(hex1, 16, false, 1) && tok.int64(hex2, 16, false, 1)) {
                  output.append(static_cast<char>((hex1 << 4) | hex2));
-            else
-                throw TextException("invalid pct-encoded triplet", Here());
+            } else {
+                // see TestUri::testEncoding() for invalid pct-encoding sequence examples
+                debugs(23, 3, "invalid pct-encoding sequence starting at %" << rawBytesAfterPercent);
+                return std::nullopt;
+            }
          }
      }
      return output;
  }
  
+SBuf
+AnyP::Uri::DecodeOrDupe(const SBuf &input)
+{
+    if (const auto decoded = Decode(input))
+        return *decoded;
+    return input;
+}
+
  const SBuf &
  AnyP::Uri::Asterisk()
  {
diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h

index 1c0361cc64572652f518e4213613aba9af53877a..70ff6b74115b55fceb69546caa815ebb099165f3 100644 (file)
--- a/src/anyp/Uri.h
+++ b/src/anyp/Uri.h
@@ -116,7 +116,15 @@ public:
      static SBuf Encode(const SBuf &, const CharacterSet &expected);
  
      /// %-decode the given buffer
-    static SBuf Decode(const SBuf &);
+    /// \retval std::nullopt on decoding failures
+    /// \sa DecodeOrDupe()
+    static std::optional<SBuf> Decode(const SBuf &);
+
+    /// %-decode the given buffer
+    /// \retval decoded input if input obeys RFC 3986 Percent-Encoding rules
+    /// \retval an input copy if input violates RFC 3986 Percent-Encoding rules
+    /// \sa Decode()
+    static SBuf DecodeOrDupe(const SBuf &input);
  
      /**
       * The authority-form URI for currently stored values.
diff --git a/src/cf.data.pre b/src/cf.data.pre

index 451d64b67dc51baedaa724184f2a60f8012fef87..898fc9af48d892341be8a84e25f8cf2f49d388e2 100644 (file)
--- a/src/cf.data.pre
+++ b/src/cf.data.pre
@@ -1245,8 +1245,29 @@ ENDIF
  
         acl aclname url_regex [-i] ^http:// ...
           # POSIX extended regex matching on whole URL [fast]
+         #
+         # If request URL contains only valid pct-encoded triplets (RFC 3986),
+         # all of them are decoded before matching (e.g., `%25` triplet is
+         # replaced with a single `%` character). If request URL contains at
+         # least one `%` character that does not start a valid pct-encoded
+         # triplet (e.g., `%%`, `%X`, or `%2Y`), then the URL is not decoded at
+         # all (i.e. the raw request URL is used for matching).
+         #
+         # If a request URL is decoded as described above, then all request URL
+         # characters starting with the decoded `%00` pct-encoded triplet (if
+         # any) are ignored during matching. There is currently no way to match
+         # that triplet itself in a correctly percent-encoded URL.
+         #
+         # ACL parameters are not decoded.
+
         acl aclname urllogin [-i] [^a-zA-Z0-9] ...
-         # POSIX extended regex matching on URL login field
+         # POSIX extended regex matching on URL login field [fast]
+         #
+         # This ACL does not match requests with a URL that lacks a login field.
+         #
+         # This ACL handles RFC 3986 pct-encoded triplets in the login field as
+         # url_regex ACL handles those triplets in the entire request URL.
+
         acl aclname urlpath_regex [-i] \.gif$ ...
           # POSIX extended regex matching on URL path [fast]
  
diff --git a/src/clients/FtpGateway.cc b/src/clients/FtpGateway.cc

index 1001ad057d92fa0e082548646d6b2c79341495b9..6352ce8c057163414295ed4d5b6b1eacdc6625ac 100644 (file)
--- a/src/clients/FtpGateway.cc
+++ b/src/clients/FtpGateway.cc
@@ -142,6 +142,7 @@ public:
  
      int checkAuth(const HttpHeader * req_hdr);
      void checkUrlpath();
+    std::optional<SBuf> decodedRequestUriPath() const;
      void buildTitleUrl();
      void writeReplyBody(const char *, size_t len);
      void completeForwarding() override;
@@ -2303,6 +2304,15 @@ ftpReadQuit(Ftp::Gateway * ftpState)
      ftpState->serverComplete();
  }
  
+/// absolute request URI path after successful decoding of all pct-encoding sequences
+std::optional<SBuf>
+Ftp::Gateway::decodedRequestUriPath() const
+{
+    return AnyP::Uri::Decode(request->url.absolutePath());
+}
+
+/// \prec !ftpState->flags.try_slash_hack
+/// \prec ftpState->decodedRequestUriPath()
  static void
  ftpTrySlashHack(Ftp::Gateway * ftpState)
  {
@@ -2315,8 +2325,9 @@ ftpTrySlashHack(Ftp::Gateway * ftpState)
          wordlistDestroy(&ftpState->pathcomps);
  
      /* Build the new path */
+    // XXX: Conversion to c-string effectively truncates where %00 was decoded
      safe_free(ftpState->filepath);
-    ftpState->filepath = SBufToCstring(AnyP::Uri::Decode(ftpState->request->url.absolutePath()));
+    ftpState->filepath = SBufToCstring(ftpState->decodedRequestUriPath().value());
  
      /* And off we go */
      ftpGetFile(ftpState);
@@ -2371,13 +2382,15 @@ ftpFail(Ftp::Gateway *ftpState)
             " reply code " << code << "flags(" <<
             (ftpState->flags.isdir?"IS_DIR,":"") <<
             (ftpState->flags.try_slash_hack?"TRY_SLASH_HACK":"") << "), " <<
+           "decodable_filepath=" << bool(ftpState->decodedRequestUriPath()) << ' ' <<
             "mdtm=" << ftpState->mdtm << ", size=" << ftpState->theSize <<
             "slashhack=" << (slashHack? "T":"F"));
  
      /* Try the / hack to support "Netscape" FTP URL's for retrieving files */
      if (!ftpState->flags.isdir &&   /* Not a directory */
              !ftpState->flags.try_slash_hack && !slashHack && /* Not doing slash hack */
-            ftpState->mdtm <= 0 && ftpState->theSize < 0) { /* Not known as a file */
+            ftpState->mdtm <= 0 && ftpState->theSize < 0 && /* Not known as a file */
+            ftpState->decodedRequestUriPath()) {
  
          switch (ftpState->state) {
  
diff --git a/src/tests/testURL.cc b/src/tests/testURL.cc

index b7609df5c4757c997a1ab6922b6fdf9629d62f71..b53abe12e7374b77877949364f6b37701861bf7d 100644 (file)
--- a/src/tests/testURL.cc
+++ b/src/tests/testURL.cc
@@ -104,7 +104,9 @@ TestUri::testEncoding()
      };
  
      for (const auto &testCase: basicTestCases) {
-        CPPUNIT_ASSERT_EQUAL(testCase.first, AnyP::Uri::Decode(testCase.second));
+        const auto decoded = AnyP::Uri::Decode(testCase.second);
+        CPPUNIT_ASSERT(decoded);
+        CPPUNIT_ASSERT_EQUAL(testCase.first, *decoded);
          CPPUNIT_ASSERT_EQUAL(testCase.second, AnyP::Uri::Encode(testCase.first, CharacterSet::RFC3986_UNRESERVED()));
      };
  
@@ -112,6 +114,7 @@ TestUri::testEncoding()
          SBuf("%"),
          SBuf("%%"),
          SBuf("%%%"),
+        SBuf("%0"),
          SBuf("%1"),
          SBuf("%1Z"),
          SBuf("%1\000", 2),
@@ -122,10 +125,11 @@ TestUri::testEncoding()
  
      for (const auto &invalidEncoding: invalidEncodings) {
          // test various input positions of an invalid escape sequence
-        CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(invalidEncoding), TextException);
-        CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding)), TextException);
-        CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf(invalidEncoding, "word")), TextException);
-        CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding, "word")), TextException);
+        CPPUNIT_ASSERT(!AnyP::Uri::Decode(invalidEncoding));
+        CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf("word", invalidEncoding)));
+        CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf(invalidEncoding, "word")));
+        CPPUNIT_ASSERT(!AnyP::Uri::Decode(ToSBuf("word", invalidEncoding, "word")));
+        CPPUNIT_ASSERT_EQUAL(invalidEncoding, AnyP::Uri::DecodeOrDupe(invalidEncoding));
      };
  }
author	Alex Rousskov <rousskov@measurement-factory.com>
	Sun, 19 Oct 2025 19:42:38 +0000 (19:42 +0000)
committer	Squid Anubis <squid-anubis@squid-cache.org>
	Sun, 19 Oct 2025 19:42:41 +0000 (19:42 +0000)
src/acl/Url.cc		patch \| blob \| blame \| history
src/acl/UrlLogin.cc		patch \| blob \| blame \| history
src/anyp/Uri.cc		patch \| blob \| blame \| history
src/anyp/Uri.h		patch \| blob \| blame \| history
src/cf.data.pre		patch \| blob \| blame \| history
src/clients/FtpGateway.cc		patch \| blob \| blame \| history
src/tests/testURL.cc		patch \| blob \| blame \| history