From ff60b102574694e9599bfaa72524493c3c8e8f33 Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Sat, 30 Aug 2025 04:00:29 +0000 Subject: [PATCH] Add origin-form and absolute-path to URI API (#2141) Convert callers of Uri::path() as appropriate to their needs. These methods provide %-encoded URI components for use in display or normalized processing. The path() method is now only guaranteed to provide the URL-path component in un-escaped form (though query has not yet been moved). --- src/HttpRequest.cc | 8 +++----- src/acl/UrlPath.cc | 11 +++-------- src/adaptation/Service.cc | 2 +- src/adaptation/icap/ModXact.cc | 5 ++--- src/anyp/Uri.cc | 33 ++++++++++++++++++++++++++++++++- src/anyp/Uri.h | 12 ++++++++---- src/carp.cc | 6 +++--- src/clients/FtpGateway.cc | 18 +++++++----------- src/clients/WhoisGateway.cc | 4 ++-- src/errorpage.cc | 4 ++-- src/format/Format.cc | 4 ++-- src/http.cc | 2 +- src/internal.cc | 2 +- src/tests/stub_libanyp.cc | 1 + 14 files changed, 68 insertions(+), 44 deletions(-) diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc index e5f48f6dbd..0a9c01604f 100644 --- a/src/HttpRequest.cc +++ b/src/HttpRequest.cc @@ -344,9 +344,7 @@ HttpRequest::pack(Packable * p) const { assert(p); /* pack request-line */ - p->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " HTTP/%d.%d\r\n", - SQUIDSBUFPRINT(method.image()), SQUIDSBUFPRINT(url.path()), - http_ver.major, http_ver.minor); + packFirstLineInto(p, false /* origin-form */); /* headers */ header.packInto(p); /* trailer */ @@ -368,7 +366,7 @@ int HttpRequest::prefixLen() const { return method.image().length() + 1 + - url.path().length() + 1 + + url.originForm().length() + 1 + 4 + 1 + 3 + 2 + header.len + 2; } @@ -470,7 +468,7 @@ HttpRequest::clearError() void HttpRequest::packFirstLineInto(Packable * p, bool full_uri) const { - const SBuf tmp(full_uri ? effectiveRequestUri() : url.path()); + const SBuf tmp(full_uri ? effectiveRequestUri() : url.originForm()); // form HTTP request-line p->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " HTTP/%d.%d\r\n", diff --git a/src/acl/UrlPath.cc b/src/acl/UrlPath.cc index 868cacba16..ba5c7c19da 100644 --- a/src/acl/UrlPath.cc +++ b/src/acl/UrlPath.cc @@ -12,20 +12,15 @@ #include "acl/FilledChecklist.h" #include "acl/UrlPath.h" #include "HttpRequest.h" -#include "rfc1738.h" int Acl::UrlPathCheck::match(ACLChecklist * const ch) { const auto checklist = Filled(ch); + auto urlPath = checklist->request->url.path(); - if (checklist->request->url.path().isEmpty()) + if (urlPath.isEmpty()) return -1; - char *esc_buf = SBufToCstring(checklist->request->url.path()); - rfc1738_unescape(esc_buf); - int result = data->match(esc_buf); - xfree(esc_buf); - return result; + return data->match(urlPath.c_str()); } - diff --git a/src/adaptation/Service.cc b/src/adaptation/Service.cc index a9dde861e3..1b5c802376 100644 --- a/src/adaptation/Service.cc +++ b/src/adaptation/Service.cc @@ -49,7 +49,7 @@ Adaptation::Service::wants(const ServiceFilter &filter) const // Sending a message to a service that does not want it is useless. // note that we cannot check wantsUrl for service that is not "up" // note that even essential services are skipped on unwanted URLs! - return wantsUrl(filter.request->url.path()); + return wantsUrl(filter.request->url.absolutePath()); } // The service is down and is either not bypassable or not probed due diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc index 7763beb69f..9f8783fe52 100644 --- a/src/adaptation/icap/ModXact.cc +++ b/src/adaptation/icap/ModXact.cc @@ -1634,10 +1634,9 @@ void Adaptation::Icap::ModXact::decideOnPreview() return; } - const SBuf urlPath(virginRequest().url.path()); size_t wantedSize; - if (!service().wantsPreview(urlPath, wantedSize)) { - debugs(93, 5, "should not offer preview for " << urlPath); + if (!service().wantsPreview(virginRequest().url.absolutePath(), wantedSize)) { + debugs(93, 5, "should not offer preview for " << virginRequest().url.absolutePath()); return; } diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index 6527e6316c..2b648866b8 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -50,6 +50,25 @@ UserInfoChars() return userInfoValid; } +/// Characters which are valid within a URI path section +static const CharacterSet & +PathChars() +{ + /* + * RFC 3986 section 3.3 + * + * path = path-abempty ; begins with "/" or is empty + * ... + * path-abempty = *( "/" segment ) + * segment = *pchar + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ + static const auto pathValid = CharacterSet("path", "/:@-._~%!$&'()*+,;=") + + CharacterSet::ALPHA + + CharacterSet::DIGIT; + return pathValid; +} + /** * Governed by RFC 3986 section 2.1 */ @@ -695,6 +714,7 @@ AnyP::Uri::touch() absolute_.clear(); authorityHttp_.clear(); authorityWithPort_.clear(); + absolutePath_.clear(); } SBuf & @@ -745,12 +765,23 @@ AnyP::Uri::absolute() const absolute_.append(host()); absolute_.append(":", 1); } - absolute_.append(path()); // TODO: Encode each URI subcomponent in path_ as needed. + absolute_.append(absolutePath()); } return absolute_; } +SBuf & +AnyP::Uri::absolutePath() const +{ + if (absolutePath_.isEmpty()) { + // TODO: Encode each URI subcomponent in path_ as needed. + absolutePath_ = Encode(path(), PathChars()); + } + + return absolutePath_; +} + /* XXX: Performance: This is an *almost* duplicate of HttpRequest::effectiveRequestUri(). But elides the query-string. * After copying it on in the first place! Would be less code to merge the two with a flag parameter. * and never copy the query-string part in the first place diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index 108a337d3c..1c0361cc64 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -102,9 +102,6 @@ public: * Implements RFC 3986 section 5.2.3 * * The caller must ensure relUrl is a valid relative-path. - * - * NP: absolute-path are also accepted, but path() method - * should be used instead when possible. */ void addRelativePath(const char *relUrl); @@ -142,6 +139,12 @@ public: */ SBuf &absolute() const; + /// RFC 3986 section 4.2 relative reference called 'absolute-path'. + SBuf &absolutePath() const; + + /// The RFC 7230 origin-form URI for currently stored values. + SBuf &originForm() const { return absolutePath(); } + private: void parseUrn(Parser::Tokenizer&); @@ -187,6 +190,7 @@ private: mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port mutable SBuf absolute_; ///< RFC 7230 section 5.3.2 absolute-URI + mutable SBuf absolutePath_; ///< RFC 3986 section 4.2 absolute-path }; inline std::ostream & @@ -202,7 +206,7 @@ operator <<(std::ostream &os, const Uri &url) os << "//" << url.authority(); // path is what it is - including absent - os << url.path(); + os << url.absolutePath(); return os; } diff --git a/src/carp.cc b/src/carp.cc index 2dd43aa33c..b04ee7b876 100644 --- a/src/carp.cc +++ b/src/carp.cc @@ -186,13 +186,13 @@ carpSelectParent(PeerSelector *ps) } if (tp->options.carp_key.path) { // XXX: fix when path and query are separate - key.append(request->url.path().substr(0,request->url.path().find('?'))); // 0..N + key.append(request->url.absolutePath().substr(0,request->url.absolutePath().find('?'))); // 0..N } if (tp->options.carp_key.params) { // XXX: fix when path and query are separate SBuf::size_type pos; - if ((pos=request->url.path().find('?')) != SBuf::npos) - key.append(request->url.path().substr(pos)); // N..npos + if ((pos=request->url.absolutePath().find('?')) != SBuf::npos) + key.append(request->url.absolutePath().substr(pos)); // N..npos } } // if the url-based key is empty, e.g. because the user is diff --git a/src/clients/FtpGateway.cc b/src/clients/FtpGateway.cc index b5bbb1def3..1001ad057d 100644 --- a/src/clients/FtpGateway.cc +++ b/src/clients/FtpGateway.cc @@ -1078,6 +1078,8 @@ Ftp::Gateway::checkAuth(const HttpHeader * req_hdr) void Ftp::Gateway::checkUrlpath() { + // TODO: parse FTP URL syntax properly in AnyP::Uri::parse() + // If typecode was specified, extract it and leave just the filename in // url.path. Tolerate trailing garbage or missing typecode value. Roughly: // [filename] ;type=[typecode char] [trailing garbage] @@ -1126,7 +1128,7 @@ Ftp::Gateway::buildTitleUrl() SBuf authority = request->url.authority(request->url.getScheme() != AnyP::PROTO_FTP); title_url.append(authority); - title_url.append(request->url.path()); + title_url.append(request->url.absolutePath()); base_href = "ftp://"; @@ -1161,7 +1163,7 @@ Ftp::Gateway::start() checkUrlpath(); buildTitleUrl(); debugs(9, 5, "FD " << (ctrl.conn ? ctrl.conn->fd : -1) << " : host=" << request->url.host() << - ", path=" << request->url.path() << ", user=" << user << ", passwd=" << password); + ", path=" << request->url.absolutePath() << ", user=" << user << ", passwd=" << password); state = BEGIN; Ftp::Client::start(); } @@ -2304,7 +2306,6 @@ ftpReadQuit(Ftp::Gateway * ftpState) static void ftpTrySlashHack(Ftp::Gateway * ftpState) { - char *path; ftpState->flags.try_slash_hack = 1; /* Free old paths */ @@ -2313,14 +2314,9 @@ ftpTrySlashHack(Ftp::Gateway * ftpState) if (ftpState->pathcomps) wordlistDestroy(&ftpState->pathcomps); + /* Build the new path */ safe_free(ftpState->filepath); - - /* Build the new path (urlpath begins with /) */ - path = SBufToCstring(ftpState->request->url.path()); - - rfc1738_unescape(path); - - ftpState->filepath = path; + ftpState->filepath = SBufToCstring(AnyP::Uri::Decode(ftpState->request->url.absolutePath())); /* And off we go */ ftpGetFile(ftpState); @@ -2610,7 +2606,7 @@ Ftp::UrlWith2f(HttpRequest * request) return nil; } - if (request->url.path()[0] == '/') { + if (request->url.path().startsWith(AnyP::Uri::SlashPath())) { newbuf.append(request->url.path()); request->url.path(newbuf); } else if (!request->url.path().startsWith(newbuf)) { diff --git a/src/clients/WhoisGateway.cc b/src/clients/WhoisGateway.cc index 07f05ecd87..9fe59f15ed 100644 --- a/src/clients/WhoisGateway.cc +++ b/src/clients/WhoisGateway.cc @@ -64,10 +64,10 @@ whoisStart(FwdState * fwd) p->entry->lock("whoisStart"); comm_add_close_handler(fwd->serverConnection()->fd, whoisClose, p); - size_t l = p->request->url.path().length() + 3; + size_t l = p->request->url.absolutePath().length() + 3; char *buf = (char *)xmalloc(l); - const SBuf str_print = p->request->url.path().substr(1); + const SBuf str_print = p->request->url.absolutePath().substr(1); snprintf(buf, l, SQUIDSBUFPH "\r\n", SQUIDSBUFPRINT(str_print)); AsyncCall::Pointer writeCall = commCbCall(5,5, "whoisWriteComplete", diff --git a/src/errorpage.cc b/src/errorpage.cc index d7a588d099..4634c83ec2 100644 --- a/src/errorpage.cc +++ b/src/errorpage.cc @@ -1142,7 +1142,7 @@ ErrorState::compileLegacyCode(Build &build) case 'R': if (building_deny_info_url) { if (request != nullptr) { - const SBuf &tmp = request->url.path(); + const SBuf &tmp = request->url.absolutePath(); mb.append(tmp.rawContent(), tmp.length()); no_urlescape = 1; } else @@ -1152,7 +1152,7 @@ ErrorState::compileLegacyCode(Build &build) if (request) { mb.appendf(SQUIDSBUFPH " " SQUIDSBUFPH " %s/%d.%d\n", SQUIDSBUFPRINT(request->method.image()), - SQUIDSBUFPRINT(request->url.path()), + SQUIDSBUFPRINT(request->url.originForm()), AnyP::ProtocolType_str[request->http_ver.protocol], request->http_ver.major, request->http_ver.minor); request->header.packInto(&mb, true); //hide authorization data diff --git a/src/format/Format.cc b/src/format/Format.cc index 486da88c32..3612590d8e 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -1074,7 +1074,7 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS case LFT_REQUEST_URLPATH_OLD_31: case LFT_CLIENT_REQ_URLPATH: if (al->request) { - sb = al->request->url.path(); + sb = al->request->url.absolutePath(); out = sb.c_str(); quote = 1; } @@ -1149,7 +1149,7 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS case LFT_SERVER_REQ_URLPATH: if (al->adapted_request) { - sb = al->adapted_request->url.path(); + sb = al->adapted_request->url.absolutePath(); out = sb.c_str(); quote = 1; } diff --git a/src/http.cc b/src/http.cc index 66184e0494..fdc9af4e9d 100644 --- a/src/http.cc +++ b/src/http.cc @@ -2373,7 +2373,7 @@ HttpStateData::buildRequestPrefix(MemBuf * mb) * not the one we are sending. Needs checking. */ const AnyP::ProtocolVersion httpver = Http::ProtocolVersion(); - const SBuf url(flags.toOrigin ? request->url.path() : request->effectiveRequestUri()); + const SBuf url(flags.toOrigin ? request->url.originForm() : request->effectiveRequestUri()); mb->appendf(SQUIDSBUFPH " " SQUIDSBUFPH " %s/%d.%d\r\n", SQUIDSBUFPRINT(request->method.image()), SQUIDSBUFPRINT(url), diff --git a/src/internal.cc b/src/internal.cc index 9b25c3939f..724563f681 100644 --- a/src/internal.cc +++ b/src/internal.cc @@ -34,7 +34,7 @@ internalStart(const Comm::ConnectionPointer &clientConn, HttpRequest * request, ErrorState *err; Assure(request); - const SBuf upath = request->url.path(); + const SBuf upath = request->url.absolutePath(); debugs(76, 3, clientConn << " requesting '" << upath << "'"); Assure(request->flags.internal); diff --git a/src/tests/stub_libanyp.cc b/src/tests/stub_libanyp.cc index 23b0f3f28a..d6c992c614 100644 --- a/src/tests/stub_libanyp.cc +++ b/src/tests/stub_libanyp.cc @@ -31,6 +31,7 @@ const SBuf &AnyP::Uri::Asterisk() } SBuf &AnyP::Uri::authority(bool) const STUB_RETVAL(nil) SBuf &AnyP::Uri::absolute() const STUB_RETVAL(nil) +SBuf &AnyP::Uri::absolutePath() const STUB_RETVAL(nil) void urlInitialize() STUB const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr) bool urlIsRelative(const char *) STUB_RETVAL(false) -- 2.47.3