From 1ac1d4d37b6f464a4d84379267093c7320e22112 Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Sat, 25 Apr 2020 06:08:35 +0000 Subject: [PATCH] Cleanup: remove urlHostname hacks (#615) The urlHostname() function and class it uses employ a very dirty hack to parse a guess at the hostname portion of a URL. AnyP::Uri objects can be used instead for better parsing of URLs with far more accurate hostname extraction in a wider set of URI. --- src/anyp/Uri.cc | 99 --------------------------------------- src/anyp/Uri.h | 1 - src/client_side_reply.cc | 10 ++-- src/clients/Client.cc | 2 +- src/htcp.cc | 25 ++++------ src/htcp.h | 2 +- src/http.cc | 4 +- src/neighbors.cc | 4 +- src/neighbors.h | 2 +- src/tests/stub_libanyp.cc | 1 - src/urn.cc | 16 +++---- 11 files changed, 28 insertions(+), 138 deletions(-) diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index 083408113a..d2c60e4080 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -890,105 +890,6 @@ urlCheckRequest(const HttpRequest * r) return rc; } -/* - * Quick-n-dirty host extraction from a URL. Steps: - * Look for a colon - * Skip any '/' after the colon - * Copy the next SQUID_MAXHOSTNAMELEN bytes to host[] - * Look for an ending '/' or ':' and terminate - * Look for login info preceded by '@' - */ - -class URLHostName -{ - -public: - char * extract(char const *url); - -private: - static char Host [SQUIDHOSTNAMELEN]; - void init(char const *); - void findHostStart(); - void trimTrailingChars(); - void trimAuth(); - char const *hostStart; - char const *url; -}; - -char * -urlHostname(const char *url) -{ - return URLHostName().extract(url); -} - -char URLHostName::Host[SQUIDHOSTNAMELEN]; - -void -URLHostName::init(char const *aUrl) -{ - Host[0] = '\0'; - url = aUrl; -} - -void -URLHostName::findHostStart() -{ - if (NULL == (hostStart = strchr(url, ':'))) - return; - - ++hostStart; - - while (*hostStart != '\0' && *hostStart == '/') - ++hostStart; - - if (*hostStart == ']') - ++hostStart; -} - -void -URLHostName::trimTrailingChars() -{ - char *t; - - if ((t = strchr(Host, '/'))) - *t = '\0'; - - if ((t = strrchr(Host, ':'))) - *t = '\0'; - - if ((t = strchr(Host, ']'))) - *t = '\0'; -} - -void -URLHostName::trimAuth() -{ - char *t; - - if ((t = strrchr(Host, '@'))) { - ++t; - memmove(Host, t, strlen(t) + 1); - } -} - -char * -URLHostName::extract(char const *aUrl) -{ - init(aUrl); - findHostStart(); - - if (hostStart == NULL) - return NULL; - - xstrncpy(Host, hostStart, SQUIDHOSTNAMELEN); - - trimTrailingChars(); - - trimAuth(); - - return Host; -} - AnyP::Uri::Uri(AnyP::UriScheme const &aScheme) : scheme_(aScheme), hostIsNumeric_(false), diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index edd6ac4f88..39cd199c1a 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -245,7 +245,6 @@ enum MatchDomainNameFlags { */ int matchDomainName(const char *host, const char *domain, MatchDomainNameFlags flags = mdnNone); int urlCheckRequest(const HttpRequest *); -char *urlHostname(const char *url); void urlExtMethodConfigure(void); #endif /* SQUID_SRC_ANYP_URI_H */ diff --git a/src/client_side_reply.cc b/src/client_side_reply.cc index d5fec1f826..4f4b51ef74 100644 --- a/src/client_side_reply.cc +++ b/src/client_side_reply.cc @@ -913,7 +913,7 @@ purgeEntriesByUrl(HttpRequest * req, const char *url) const cache_key *key = storeKeyPublic(url, m); debugs(88, 5, m << ' ' << url << ' ' << storeKeyText(key)); #if USE_HTCP - neighborsHtcpClear(nullptr, url, req, m, HTCP_CLR_INVALIDATION); + neighborsHtcpClear(nullptr, req, m, HTCP_CLR_INVALIDATION); #endif Store::Root().evictIfFound(key); } @@ -1051,7 +1051,7 @@ clientReplyContext::purgeDoPurgeGet(StoreEntry *newEntry) /* Release the cached URI */ debugs(88, 4, "clientPurgeRequest: GET '" << newEntry->url() << "'" ); #if USE_HTCP - neighborsHtcpClear(newEntry, NULL, http->request, HttpRequestMethod(Http::METHOD_GET), HTCP_CLR_PURGE); + neighborsHtcpClear(newEntry, http->request, HttpRequestMethod(Http::METHOD_GET), HTCP_CLR_PURGE); #endif newEntry->release(true); purgeStatus = Http::scOkay; @@ -1067,7 +1067,7 @@ clientReplyContext::purgeDoPurgeHead(StoreEntry *newEntry) if (newEntry) { debugs(88, 4, "HEAD " << newEntry->url()); #if USE_HTCP - neighborsHtcpClear(newEntry, NULL, http->request, HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_PURGE); + neighborsHtcpClear(newEntry, http->request, HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_PURGE); #endif newEntry->release(true); purgeStatus = Http::scOkay; @@ -1083,7 +1083,7 @@ clientReplyContext::purgeDoPurgeHead(StoreEntry *newEntry) if (entry) { debugs(88, 4, "Vary GET " << entry->url()); #if USE_HTCP - neighborsHtcpClear(entry, NULL, http->request, HttpRequestMethod(Http::METHOD_GET), HTCP_CLR_PURGE); + neighborsHtcpClear(entry, http->request, HttpRequestMethod(Http::METHOD_GET), HTCP_CLR_PURGE); #endif entry->release(true); purgeStatus = Http::scOkay; @@ -1094,7 +1094,7 @@ clientReplyContext::purgeDoPurgeHead(StoreEntry *newEntry) if (entry) { debugs(88, 4, "Vary HEAD " << entry->url()); #if USE_HTCP - neighborsHtcpClear(entry, NULL, http->request, HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_PURGE); + neighborsHtcpClear(entry, http->request, HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_PURGE); #endif entry->release(true); purgeStatus = Http::scOkay; diff --git a/src/clients/Client.cc b/src/clients/Client.cc index 722aa0f44e..31e07b4bef 100644 --- a/src/clients/Client.cc +++ b/src/clients/Client.cc @@ -426,7 +426,7 @@ Client::getMoreRequestBody(MemBuf &buf) static bool sameUrlHosts(const char *url1, const char *url2) { - // XXX: Want urlHostname() here, but it uses static storage and copying + // XXX: Want AnyP::Uri::parse() here, but it uses static storage and copying const char *host1 = strchr(url1, ':'); const char *host2 = strchr(url2, ':'); diff --git a/src/htcp.cc b/src/htcp.cc index a71a1d30d5..fa04feb204 100644 --- a/src/htcp.cc +++ b/src/htcp.cc @@ -145,7 +145,7 @@ public: public: const char *method = nullptr; - char *uri = nullptr; + const char *uri = nullptr; char *version = nullptr; char *req_hdrs = nullptr; size_t reqHdrsSz = 0; ///< size of the req_hdrs content @@ -816,6 +816,7 @@ htcpTstReply(htcpDataHeader * dhdr, StoreEntry * e, htcpSpecifier * spec, Ip::Ad if (spec) { stuff.S.method = spec->method; + stuff.S.request = spec->request; stuff.S.uri = spec->uri; stuff.S.version = spec->version; stuff.S.req_hdrs = spec->req_hdrs; @@ -850,15 +851,15 @@ htcpTstReply(htcpDataHeader * dhdr, StoreEntry * e, htcpSpecifier * spec, Ip::Ad hdr.clean(); #if USE_ICMP - if (char *host = urlHostname(spec->uri)) { + if (const char *host = spec->request->url.host()) { int rtt = 0; int hops = 0; int samp = 0; netdbHostData(host, &samp, &rtt, &hops); if (rtt || hops) { - char cto_buf[128]; - snprintf(cto_buf, 128, "%s %d %f %d", + char cto_buf[SQUIDHOSTNAMELEN+128]; + snprintf(cto_buf, sizeof(cto_buf), "%s %d %f %d", host, samp, 0.001 * rtt, hops); hdr.putExt("Cache-to-Origin", cto_buf); } @@ -1563,7 +1564,7 @@ htcpQuery(StoreEntry * e, HttpRequest * req, CachePeer * p) * Send an HTCP CLR message for a specified item to a given CachePeer. */ void -htcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const HttpRequestMethod &, CachePeer * p, htcp_clr_reason reason) +htcpClear(StoreEntry * e, HttpRequest * req, const HttpRequestMethod &, CachePeer * p, htcp_clr_reason reason) { static char pkt[8192]; ssize_t pktlen; @@ -1585,14 +1586,9 @@ htcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const HttpRequestM SBuf sb = req->method.image(); stuff.S.method = sb.c_str(); - if (e == NULL || e->mem_obj == NULL) { - if (uri == NULL) { - return; - } - stuff.S.uri = xstrdup(uri); - } else { - stuff.S.uri = (char *) e->url(); - } + stuff.S.request = req; + SBuf uri = req->effectiveRequestUri(); + stuff.S.uri = uri.c_str(); stuff.S.version = vbuf; if (reason != HTCP_CLR_INVALIDATION) { HttpStateData::httpBuildRequestHeader(req, e, NULL, &hdr, flags); @@ -1607,9 +1603,6 @@ htcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const HttpRequestM if (reason != HTCP_CLR_INVALIDATION) { mb.clean(); } - if (e == NULL) { - xfree(stuff.S.uri); - } if (!pktlen) { debugs(31, 3, "htcpClear: htcpBuildPacket() failed"); return; diff --git a/src/htcp.h b/src/htcp.h index 850ba9bc36..0b5ab6eb32 100644 --- a/src/htcp.h +++ b/src/htcp.h @@ -61,7 +61,7 @@ void htcpOpenPorts(void); int htcpQuery(StoreEntry * e, HttpRequest * req, CachePeer * p); /// \ingroup ServerProtocolHTCP -void htcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const HttpRequestMethod &method, CachePeer * p, htcp_clr_reason reason); +void htcpClear(StoreEntry * e, HttpRequest * req, const HttpRequestMethod &method, CachePeer * p, htcp_clr_reason reason); /// \ingroup ServerProtocolHTCP void htcpSocketShutdown(void); diff --git a/src/http.cc b/src/http.cc index fc34c90466..916fd410c4 100644 --- a/src/http.cc +++ b/src/http.cc @@ -253,7 +253,7 @@ httpMaybeRemovePublic(StoreEntry * e, Http::StatusCode status) if (pe != NULL) { assert(e != pe); #if USE_HTCP - neighborsHtcpClear(e, nullptr, e->mem_obj->request.getRaw(), e->mem_obj->method, HTCP_CLR_INVALIDATION); + neighborsHtcpClear(e, e->mem_obj->request.getRaw(), e->mem_obj->method, HTCP_CLR_INVALIDATION); #endif pe->release(true); } @@ -270,7 +270,7 @@ httpMaybeRemovePublic(StoreEntry * e, Http::StatusCode status) if (pe != NULL) { assert(e != pe); #if USE_HTCP - neighborsHtcpClear(e, nullptr, e->mem_obj->request.getRaw(), HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_INVALIDATION); + neighborsHtcpClear(e, e->mem_obj->request.getRaw(), HttpRequestMethod(Http::METHOD_HEAD), HTCP_CLR_INVALIDATION); #endif pe->release(true); } diff --git a/src/neighbors.cc b/src/neighbors.cc index 18764be8e6..d908e7f9ad 100644 --- a/src/neighbors.cc +++ b/src/neighbors.cc @@ -1811,7 +1811,7 @@ neighborsHtcpReply(const cache_key * key, HtcpReplyData * htcp, const Ip::Addres * Send HTCP CLR messages to all peers configured to receive them. */ void -neighborsHtcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const HttpRequestMethod &method, htcp_clr_reason reason) +neighborsHtcpClear(StoreEntry * e, HttpRequest * req, const HttpRequestMethod &method, htcp_clr_reason reason) { CachePeer *p; char buf[128]; @@ -1827,7 +1827,7 @@ neighborsHtcpClear(StoreEntry * e, const char *uri, HttpRequest * req, const Htt continue; } debugs(15, 3, "neighborsHtcpClear: sending CLR to " << p->in_addr.toUrl(buf, 128)); - htcpClear(e, uri, req, method, p, reason); + htcpClear(e, req, method, p, reason); } } diff --git a/src/neighbors.h b/src/neighbors.h index 298bdb9b0c..5fccc4880f 100644 --- a/src/neighbors.h +++ b/src/neighbors.h @@ -40,7 +40,7 @@ void neighborsUdpAck(const cache_key *, icp_common_t *, const Ip::Address &); void neighborAdd(const char *, const char *, int, int, int, int, int); void neighbors_init(void); #if USE_HTCP -void neighborsHtcpClear(StoreEntry *, const char *, HttpRequest *, const HttpRequestMethod &, htcp_clr_reason); +void neighborsHtcpClear(StoreEntry *, HttpRequest *, const HttpRequestMethod &, htcp_clr_reason); #endif CachePeer *peerFindByName(const char *); CachePeer *peerFindByNameAndPort(const char *, unsigned short); diff --git a/src/tests/stub_libanyp.cc b/src/tests/stub_libanyp.cc index fd43c8c149..7d49a6faf7 100644 --- a/src/tests/stub_libanyp.cc +++ b/src/tests/stub_libanyp.cc @@ -38,6 +38,5 @@ char *urlRInternal(const char *, unsigned short, const char *, const char *) STU char *urlInternal(const char *, const char *) STUB_RETVAL(nullptr) int matchDomainName(const char *, const char *, enum MatchDomainNameFlags) STUB_RETVAL(0) int urlCheckRequest(const HttpRequest *) STUB_RETVAL(0) -char *urlHostname(const char *) STUB_RETVAL(nullptr) void urlExtMethodConfigure() STUB diff --git a/src/urn.cc b/src/urn.cc index cd794bdfca..32f3b6688d 100644 --- a/src/urn.cc +++ b/src/urn.cc @@ -387,7 +387,6 @@ urnParseReply(const char *inbuf, const HttpRequestMethod& m) { char *buf = xstrdup(inbuf); char *token; - char *host; url_entry *list; url_entry *old; int n = 32; @@ -406,24 +405,23 @@ urnParseReply(const char *inbuf, const HttpRequestMethod& m) safe_free(old); } - host = urlHostname(token); - - if (NULL == host) + AnyP::Uri uri; + if (!uri.parse(m, SBuf(token)) || !*uri.host()) continue; #if USE_ICMP - list[i].rtt = netdbHostRtt(host); + list[i].rtt = netdbHostRtt(uri.host()); if (0 == list[i].rtt) { - debugs(52, 3, "urnParseReply: Pinging " << host); - netdbPingSite(host); + debugs(52, 3, "Pinging " << uri.host()); + netdbPingSite(uri.host()); } #else list[i].rtt = 0; #endif - list[i].url = xstrdup(token); - list[i].host = xstrdup(host); + list[i].url = xstrdup(uri.absolute().c_str()); + list[i].host = xstrdup(uri.host()); // TODO: Use storeHas() or lock/unlock entry to avoid creating unlocked // ones. list[i].flags.cached = storeGetPublic(list[i].url, m) ? 1 : 0; -- 2.39.5