]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Add origin-form and absolute-path to URI API (#2141)
authorAmos Jeffries <yadij@users.noreply.github.com>
Sat, 30 Aug 2025 04:00:29 +0000 (04:00 +0000)
committerSquid Anubis <squid-anubis@squid-cache.org>
Sat, 30 Aug 2025 04:00:32 +0000 (04:00 +0000)
Convert callers of Uri::path() as appropriate to their needs.

These methods provide %-encoded URI components for use in
display or normalized processing. The path() method is now only
guaranteed to provide the URL-path component in un-escaped form
(though query has not yet been moved).

14 files changed:
src/HttpRequest.cc
src/acl/UrlPath.cc
src/adaptation/Service.cc
src/adaptation/icap/ModXact.cc
src/anyp/Uri.cc
src/anyp/Uri.h
src/carp.cc
src/clients/FtpGateway.cc
src/clients/WhoisGateway.cc
src/errorpage.cc
src/format/Format.cc
src/http.cc
src/internal.cc
src/tests/stub_libanyp.cc

index e5f48f6dbd9c660baf7b4b301664b71f5f437ebb..0a9c01604f50fdd178f85135789e29af6c881aef 100644 (file)
@@ -344,9 +344,7 @@ HttpRequest::pack(Packable * p) const
 {
     assert(p);
     /* pack request-line */
-    p->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " HTTP/%d.%d\r\n",
-               SQUIDSBUFPRINT(method.image()), SQUIDSBUFPRINT(url.path()),
-               http_ver.major, http_ver.minor);
+    packFirstLineInto(p, false /* origin-form */);
     /* headers */
     header.packInto(p);
     /* trailer */
@@ -368,7 +366,7 @@ int
 HttpRequest::prefixLen() const
 {
     return method.image().length() + 1 +
-           url.path().length() + 1 +
+           url.originForm().length() + 1 +
            4 + 1 + 3 + 2 +
            header.len + 2;
 }
@@ -470,7 +468,7 @@ HttpRequest::clearError()
 void
 HttpRequest::packFirstLineInto(Packable * p, bool full_uri) const
 {
-    const SBuf tmp(full_uri ? effectiveRequestUri() : url.path());
+    const SBuf tmp(full_uri ? effectiveRequestUri() : url.originForm());
 
     // form HTTP request-line
     p->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " HTTP/%d.%d\r\n",
index 868cacba164fa0f65808d06987142f00401bf16e..ba5c7c19da4cd429d00f21c3c4c5f70686b8bf7a 100644 (file)
 #include "acl/FilledChecklist.h"
 #include "acl/UrlPath.h"
 #include "HttpRequest.h"
-#include "rfc1738.h"
 
 int
 Acl::UrlPathCheck::match(ACLChecklist * const ch)
 {
     const auto checklist = Filled(ch);
+    auto urlPath = checklist->request->url.path();
 
-    if (checklist->request->url.path().isEmpty())
+    if (urlPath.isEmpty())
         return -1;
 
-    char *esc_buf = SBufToCstring(checklist->request->url.path());
-    rfc1738_unescape(esc_buf);
-    int result = data->match(esc_buf);
-    xfree(esc_buf);
-    return result;
+    return data->match(urlPath.c_str());
 }
-
index a9dde861e368ab7c6164c480dcf750d4b0b8b005..1b5c802376f3b2c124052401dcb8af6c012e7db1 100644 (file)
@@ -49,7 +49,7 @@ Adaptation::Service::wants(const ServiceFilter &filter) const
         // Sending a message to a service that does not want it is useless.
         // note that we cannot check wantsUrl for service that is not "up"
         // note that even essential services are skipped on unwanted URLs!
-        return wantsUrl(filter.request->url.path());
+        return wantsUrl(filter.request->url.absolutePath());
     }
 
     // The service is down and is either not bypassable or not probed due
index 7763beb69ff71f35f228409490e52b468a442362..9f8783fe52b7b5bfb504bc14dae36f0a7e51889c 100644 (file)
@@ -1634,10 +1634,9 @@ void Adaptation::Icap::ModXact::decideOnPreview()
         return;
     }
 
-    const SBuf urlPath(virginRequest().url.path());
     size_t wantedSize;
-    if (!service().wantsPreview(urlPath, wantedSize)) {
-        debugs(93, 5, "should not offer preview for " << urlPath);
+    if (!service().wantsPreview(virginRequest().url.absolutePath(), wantedSize)) {
+        debugs(93, 5, "should not offer preview for " << virginRequest().url.absolutePath());
         return;
     }
 
index 6527e6316c34558dc504e6e46b2c637b5586dae6..2b648866b8fa3ec6250ece82d7d618acec35976d 100644 (file)
@@ -50,6 +50,25 @@ UserInfoChars()
     return userInfoValid;
 }
 
+/// Characters which are valid within a URI path section
+static const CharacterSet &
+PathChars()
+{
+    /*
+     * RFC 3986 section 3.3
+     *
+     *   path          = path-abempty    ; begins with "/" or is empty
+     * ...
+     *   path-abempty  = *( "/" segment )
+     *   segment       = *pchar
+     *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+     */
+    static const auto pathValid = CharacterSet("path", "/:@-._~%!$&'()*+,;=") +
+                                  CharacterSet::ALPHA +
+                                  CharacterSet::DIGIT;
+    return pathValid;
+}
+
 /**
  * Governed by RFC 3986 section 2.1
  */
@@ -695,6 +714,7 @@ AnyP::Uri::touch()
     absolute_.clear();
     authorityHttp_.clear();
     authorityWithPort_.clear();
+    absolutePath_.clear();
 }
 
 SBuf &
@@ -745,12 +765,23 @@ AnyP::Uri::absolute() const
             absolute_.append(host());
             absolute_.append(":", 1);
         }
-        absolute_.append(path()); // TODO: Encode each URI subcomponent in path_ as needed.
+        absolute_.append(absolutePath());
     }
 
     return absolute_;
 }
 
+SBuf &
+AnyP::Uri::absolutePath() const
+{
+    if (absolutePath_.isEmpty()) {
+        // TODO: Encode each URI subcomponent in path_ as needed.
+        absolutePath_ = Encode(path(), PathChars());
+    }
+
+    return absolutePath_;
+}
+
 /* XXX: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string.
  *        After copying it on in the first place! Would be less code to merge the two with a flag parameter.
  *        and never copy the query-string part in the first place
index 108a337d3c9350847abb3e57083d68fdb1c53a06..1c0361cc64572652f518e4213613aba9af53877a 100644 (file)
@@ -102,9 +102,6 @@ public:
      * Implements RFC 3986 section 5.2.3
      *
      * The caller must ensure relUrl is a valid relative-path.
-     *
-     * NP: absolute-path are also accepted, but path() method
-     * should be used instead when possible.
      */
     void addRelativePath(const char *relUrl);
 
@@ -142,6 +139,12 @@ public:
      */
     SBuf &absolute() const;
 
+    /// RFC 3986 section 4.2 relative reference called 'absolute-path'.
+    SBuf &absolutePath() const;
+
+    /// The RFC 7230 origin-form URI for currently stored values.
+    SBuf &originForm() const { return absolutePath(); }
+
 private:
     void parseUrn(Parser::Tokenizer&);
 
@@ -187,6 +190,7 @@ private:
     mutable SBuf authorityHttp_;     ///< RFC 7230 section 5.3.3 authority, maybe without default-port
     mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
     mutable SBuf absolute_;          ///< RFC 7230 section 5.3.2 absolute-URI
+    mutable SBuf absolutePath_; ///< RFC 3986 section 4.2 absolute-path
 };
 
 inline std::ostream &
@@ -202,7 +206,7 @@ operator <<(std::ostream &os, const Uri &url)
         os << "//" << url.authority();
 
     // path is what it is - including absent
-    os << url.path();
+    os << url.absolutePath();
     return os;
 }
 
index 2dd43aa33c8479a5dd20834c07ebc015b0766abd..b04ee7b87604de2e5287df93a47cd67ee44abb12 100644 (file)
@@ -186,13 +186,13 @@ carpSelectParent(PeerSelector *ps)
             }
             if (tp->options.carp_key.path) {
                 // XXX: fix when path and query are separate
-                key.append(request->url.path().substr(0,request->url.path().find('?'))); // 0..N
+                key.append(request->url.absolutePath().substr(0,request->url.absolutePath().find('?'))); // 0..N
             }
             if (tp->options.carp_key.params) {
                 // XXX: fix when path and query are separate
                 SBuf::size_type pos;
-                if ((pos=request->url.path().find('?')) != SBuf::npos)
-                    key.append(request->url.path().substr(pos)); // N..npos
+                if ((pos=request->url.absolutePath().find('?')) != SBuf::npos)
+                    key.append(request->url.absolutePath().substr(pos)); // N..npos
             }
         }
         // if the url-based key is empty, e.g. because the user is
index b5bbb1def310d854a0959159c10c1576d873857a..1001ad057d92fa0e082548646d6b2c79341495b9 100644 (file)
@@ -1078,6 +1078,8 @@ Ftp::Gateway::checkAuth(const HttpHeader * req_hdr)
 void
 Ftp::Gateway::checkUrlpath()
 {
+    // TODO: parse FTP URL syntax properly in AnyP::Uri::parse()
+
     // If typecode was specified, extract it and leave just the filename in
     // url.path. Tolerate trailing garbage or missing typecode value. Roughly:
     // [filename] ;type=[typecode char] [trailing garbage]
@@ -1126,7 +1128,7 @@ Ftp::Gateway::buildTitleUrl()
     SBuf authority = request->url.authority(request->url.getScheme() != AnyP::PROTO_FTP);
 
     title_url.append(authority);
-    title_url.append(request->url.path());
+    title_url.append(request->url.absolutePath());
 
     base_href = "ftp://";
 
@@ -1161,7 +1163,7 @@ Ftp::Gateway::start()
     checkUrlpath();
     buildTitleUrl();
     debugs(9, 5, "FD " << (ctrl.conn ? ctrl.conn->fd : -1) << " : host=" << request->url.host() <<
-           ", path=" << request->url.path() << ", user=" << user << ", passwd=" << password);
+           ", path=" << request->url.absolutePath() << ", user=" << user << ", passwd=" << password);
     state = BEGIN;
     Ftp::Client::start();
 }
@@ -2304,7 +2306,6 @@ ftpReadQuit(Ftp::Gateway * ftpState)
 static void
 ftpTrySlashHack(Ftp::Gateway * ftpState)
 {
-    char *path;
     ftpState->flags.try_slash_hack = 1;
     /* Free old paths */
 
@@ -2313,14 +2314,9 @@ ftpTrySlashHack(Ftp::Gateway * ftpState)
     if (ftpState->pathcomps)
         wordlistDestroy(&ftpState->pathcomps);
 
+    /* Build the new path */
     safe_free(ftpState->filepath);
-
-    /* Build the new path (urlpath begins with /) */
-    path = SBufToCstring(ftpState->request->url.path());
-
-    rfc1738_unescape(path);
-
-    ftpState->filepath = path;
+    ftpState->filepath = SBufToCstring(AnyP::Uri::Decode(ftpState->request->url.absolutePath()));
 
     /* And off we go */
     ftpGetFile(ftpState);
@@ -2610,7 +2606,7 @@ Ftp::UrlWith2f(HttpRequest * request)
         return nil;
     }
 
-    if (request->url.path()[0] == '/') {
+    if (request->url.path().startsWith(AnyP::Uri::SlashPath())) {
         newbuf.append(request->url.path());
         request->url.path(newbuf);
     } else if (!request->url.path().startsWith(newbuf)) {
index 07f05ecd87e5ceaf24bea6a4153d08dc799f9c04..9fe59f15eddc3aa56913eab103a028cf1e8e4014 100644 (file)
@@ -64,10 +64,10 @@ whoisStart(FwdState * fwd)
     p->entry->lock("whoisStart");
     comm_add_close_handler(fwd->serverConnection()->fd, whoisClose, p);
 
-    size_t l = p->request->url.path().length() + 3;
+    size_t l = p->request->url.absolutePath().length() + 3;
     char *buf = (char *)xmalloc(l);
 
-    const SBuf str_print = p->request->url.path().substr(1);
+    const SBuf str_print = p->request->url.absolutePath().substr(1);
     snprintf(buf, l, SQUIDSBUFPH "\r\n", SQUIDSBUFPRINT(str_print));
 
     AsyncCall::Pointer writeCall = commCbCall(5,5, "whoisWriteComplete",
index d7a588d099f95811f438fb74551a4aaa52d45275..4634c83ec2fad98d521087dc6158bf602ac54f9e 100644 (file)
@@ -1142,7 +1142,7 @@ ErrorState::compileLegacyCode(Build &build)
     case 'R':
         if (building_deny_info_url) {
             if (request != nullptr) {
-                const SBuf &tmp = request->url.path();
+                const SBuf &tmp = request->url.absolutePath();
                 mb.append(tmp.rawContent(), tmp.length());
                 no_urlescape = 1;
             } else
@@ -1152,7 +1152,7 @@ ErrorState::compileLegacyCode(Build &build)
         if (request) {
             mb.appendf(SQUIDSBUFPH " " SQUIDSBUFPH " %s/%d.%d\n",
                        SQUIDSBUFPRINT(request->method.image()),
-                       SQUIDSBUFPRINT(request->url.path()),
+                       SQUIDSBUFPRINT(request->url.originForm()),
                        AnyP::ProtocolType_str[request->http_ver.protocol],
                        request->http_ver.major, request->http_ver.minor);
             request->header.packInto(&mb, true); //hide authorization data
index 486da88c3293f3d010f7323b905bbbb96dc14990..3612590d8efc211946a36ea2e2004d39a0e13e9d 100644 (file)
@@ -1074,7 +1074,7 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS
         case LFT_REQUEST_URLPATH_OLD_31:
         case LFT_CLIENT_REQ_URLPATH:
             if (al->request) {
-                sb = al->request->url.path();
+                sb = al->request->url.absolutePath();
                 out = sb.c_str();
                 quote = 1;
             }
@@ -1149,7 +1149,7 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS
 
         case LFT_SERVER_REQ_URLPATH:
             if (al->adapted_request) {
-                sb = al->adapted_request->url.path();
+                sb = al->adapted_request->url.absolutePath();
                 out = sb.c_str();
                 quote = 1;
             }
index 66184e049460e9534eefcce12941a3a5c220ce73..fdc9af4e9d9194663862b3088dc150b93ed1ebfa 100644 (file)
@@ -2373,7 +2373,7 @@ HttpStateData::buildRequestPrefix(MemBuf * mb)
      * not the one we are sending. Needs checking.
      */
     const AnyP::ProtocolVersion httpver = Http::ProtocolVersion();
-    const SBuf url(flags.toOrigin ? request->url.path() : request->effectiveRequestUri());
+    const SBuf url(flags.toOrigin ? request->url.originForm() : request->effectiveRequestUri());
     mb->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " %s/%d.%d\r\n",
                 SQUIDSBUFPRINT(request->method.image()),
                 SQUIDSBUFPRINT(url),
index 9b25c3939ffdf138aa8dbb8808960884870a6a6d..724563f6814a2f87f92faf7c80ef68effe82add2 100644 (file)
@@ -34,7 +34,7 @@ internalStart(const Comm::ConnectionPointer &clientConn, HttpRequest * request,
     ErrorState *err;
 
     Assure(request);
-    const SBuf upath = request->url.path();
+    const SBuf upath = request->url.absolutePath();
     debugs(76, 3, clientConn << " requesting '" << upath << "'");
 
     Assure(request->flags.internal);
index 23b0f3f28a3f1b032e28ed511545befd5c3792ee..d6c992c614d35c10fd38b4a629113380d7d73b8e 100644 (file)
@@ -31,6 +31,7 @@ const SBuf &AnyP::Uri::Asterisk()
 }
 SBuf &AnyP::Uri::authority(bool) const STUB_RETVAL(nil)
 SBuf &AnyP::Uri::absolute() const STUB_RETVAL(nil)
+SBuf &AnyP::Uri::absolutePath() const STUB_RETVAL(nil)
 void urlInitialize() STUB
 const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr)
 bool urlIsRelative(const char *) STUB_RETVAL(false)