From: Eduard Bagdasaryan Date: Tue, 17 Jul 2018 15:31:23 +0000 (+0000) Subject: Fix %>ru logging of huge URLs (#229) X-Git-Tag: M-staged-PR229 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bec110e46f9dfba5ac076e337d9aedc9046d4c1e;p=thirdparty%2Fsquid.git Fix %>ru logging of huge URLs (#229) When dealing with an HTTP request header that Squid can parse but that contains request URI length exceeding the 8K limit, Squid should log the URL (prefix) instead of a dash. Logging the URL helps with triaging these unusual requests. The older %ru (LFT_REQUEST_URI) was already logging these huge URLs, but %>ru (LFT_CLIENT_REQ_URI) was logging a dash. Now both log the URL (or its prefix). As a side effect, %>ru now also logs error:request-too-large, error:transaction-end-before-headers and other Squid-specific pseudo-URLs, as appropriate. Also refactored request- and URI-recording code to reduce chances of similar inconsistencies reappearing in the future. Also, honor strip_query_terms in %ru for large URLs. Not stripping query string in %ru was a security problem. Also fixed a bug with "redirected" flag calculation in ClientHttpRequest::handleAdaptedHeader(). In general, http->url and request->url should not be compared directly, because the latter always passes through uri_whitespace cleanup, while the former does not. Also fixed a bug with possibly wrong %ru after redirection: ClientHttpRequest::log_uri was not updated in this case. Also initialize AccessLogEntry::request and AccessLogEntry::notes ASAP. Before this change, these fields were initialized in ClientHttpRequest::doCallouts(). It is better to initialize them just after the request object is created so that ACLs, running before doCallouts(), could have them at hand. There are at least three such ACLs: force_request_body_continuation, spoof_client_ip and spoof_client_ip. Also synced %ru and %>ru documentation with the current code. --- diff --git a/src/AccessLogEntry.cc b/src/AccessLogEntry.cc index 73d8f73133..1d87505ea6 100644 --- a/src/AccessLogEntry.cc +++ b/src/AccessLogEntry.cc @@ -117,3 +117,15 @@ AccessLogEntry::~AccessLogEntry() #endif } +const SBuf * +AccessLogEntry::effectiveVirginUrl() const +{ + const SBuf *effectiveUrl = request ? &request->url.absolute() : &virginUrlForMissingRequest_; + if (effectiveUrl && !effectiveUrl->isEmpty()) + return effectiveUrl; + // We can not use ALE::url here because it may contain a request URI after + // adaptation/redirection. When the request is missing, a non-empty ALE::url + // means that we missed a setVirginUrlForMissingRequest() call somewhere. + return nullptr; +} + diff --git a/src/AccessLogEntry.h b/src/AccessLogEntry.h index 3e436c0364..b462edf34f 100644 --- a/src/AccessLogEntry.h +++ b/src/AccessLogEntry.h @@ -228,6 +228,24 @@ public: } icap; #endif + + /// Effective URI of the received client (or equivalent) HTTP request or, + /// in rare cases where that information was not collected, a nil pointer. + /// Receiving errors are represented by "error:..." URIs. + /// Adaptations and redirections do not affect this URI. + const SBuf *effectiveVirginUrl() const; + + /// Remember Client URI (or equivalent) when there is no HttpRequest. + void setVirginUrlForMissingRequest(const SBuf &vu) + { + if (!request) + virginUrlForMissingRequest_ = vu; + } + +private: + /// Client URI (or equivalent) for effectiveVirginUrl() when HttpRequest is + /// missing. This member is ignored unless the request member is nil. + SBuf virginUrlForMissingRequest_; }; class ACLChecklist; diff --git a/src/Downloader.cc b/src/Downloader.cc index c78c3c85a1..2e95265234 100644 --- a/src/Downloader.cc +++ b/src/Downloader.cc @@ -151,12 +151,10 @@ Downloader::buildRequest() "\n----------"); ClientHttpRequest *const http = new ClientHttpRequest(nullptr); - http->request = request; - HTTPMSGLOCK(http->request); + http->initRequest(request); http->req_sz = 0; // XXX: performance regression. c_str() reallocates http->uri = xstrdup(url_.c_str()); - setLogUri (http, urlCanonicalClean(request)); context_ = new DownloaderContext(this, http); StoreIOBuffer tempBuffer; diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc index 2c5858faa7..72cb7f279e 100644 --- a/src/HttpRequest.cc +++ b/src/HttpRequest.cc @@ -738,6 +738,12 @@ HttpRequest::manager(const CbcPointer &aMgr, const AccessLogEntry } } +char * +HttpRequest::canonicalCleanUrl() const +{ + return urlCanonicalCleanWithoutRequest(effectiveRequestUri(), method, url.getScheme()); +} + /// a helper for validating FindListeningPortAddress()-found address candidates static const Ip::Address * FindListeningPortAddressInAddress(const Ip::Address *ip) diff --git a/src/HttpRequest.h b/src/HttpRequest.h index f8b7d4a8f7..eee849adcd 100644 --- a/src/HttpRequest.h +++ b/src/HttpRequest.h @@ -70,6 +70,10 @@ public: /// whether the client is likely to be able to handle a 1xx reply bool canHandle1xx() const; + /// \returns a pointer to a local static buffer containing request URI + /// that honors strip_query_terms and %-encodes unsafe URI characters + char *canonicalCleanUrl() const; + #if USE_ADAPTATION /// Returns possibly nil history, creating it if adapt. logging is enabled Adaptation::History::Pointer adaptLogHistory() const; diff --git a/src/Makefile.am b/src/Makefile.am index dc11f88657..c83c3781bb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1367,7 +1367,6 @@ tests_testCacheManager_SOURCES = \ tests/stub_SwapDir.cc \ MemStore.cc \ $(UNLINKDSOURCE) \ - tests/stub_libanyp.cc \ urn.h \ urn.cc \ wccp2.h \ @@ -1799,7 +1798,6 @@ tests_testEvent_SOURCES = \ tests/stub_tunnel.cc \ MemStore.cc \ $(UNLINKDSOURCE) \ - tests/stub_libanyp.cc \ urn.h \ urn.cc \ wccp2.h \ @@ -2033,7 +2031,6 @@ tests_testEventLoop_SOURCES = \ tests/stub_tunnel.cc \ MemStore.cc \ $(UNLINKDSOURCE) \ - tests/stub_libanyp.cc \ urn.h \ urn.cc \ wccp2.h \ @@ -2262,7 +2259,6 @@ tests_test_http_range_SOURCES = \ tools.cc \ tests/stub_tunnel.cc \ $(UNLINKDSOURCE) \ - tests/stub_libanyp.cc \ urn.h \ urn.cc \ wccp2.h \ diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index f2c2b222a0..f570e6f3a3 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -515,15 +515,15 @@ AnyP::Uri::absolute() const * and never copy the query-string part in the first place */ char * -urlCanonicalClean(const HttpRequest * request) +urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme) { LOCAL_ARRAY(char, buf, MAX_URL); - snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri())); + snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(url)); buf[sizeof(buf)-1] = '\0'; // URN, CONNECT method, and non-stripped URIs can go straight out - if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) { + if (Config.onoff.strip_query_terms && !(method == Http::METHOD_CONNECT || scheme == AnyP::PROTO_URN)) { // strip anything AFTER a question-mark // leaving the '?' in place if (auto t = strchr(buf, '?')) { @@ -555,7 +555,7 @@ urlCanonicalFakeHttps(const HttpRequest * request) } // else do the normal complete canonical thing. - return urlCanonicalClean(request); + return request->canonicalCleanUrl(); } /* @@ -936,3 +936,55 @@ AnyP::Uri::Uri(AnyP::UriScheme const &aScheme) : *host_=0; } +// TODO: fix code duplication with AnyP::Uri::parse() +char * +AnyP::Uri::cleanup(const char *uri) +{ + int flags = 0; + char *cleanedUri = nullptr; + switch (Config.uri_whitespace) { + case URI_WHITESPACE_ALLOW: + flags |= RFC1738_ESCAPE_NOSPACE; + // fall through to next case + case URI_WHITESPACE_ENCODE: + flags |= RFC1738_ESCAPE_UNESCAPED; + cleanedUri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL); + break; + + case URI_WHITESPACE_CHOP: { + flags |= RFC1738_ESCAPE_UNESCAPED; + const auto pos = strcspn(uri, w_space); + char *choppedUri = nullptr; + if (pos < strlen(uri)) + choppedUri = xstrndup(uri, pos + 1); + cleanedUri = xstrndup(rfc1738_do_escape(choppedUri ? choppedUri : uri, flags), MAX_URL); + cleanedUri[pos] = '\0'; + xfree(choppedUri); + } + break; + + case URI_WHITESPACE_DENY: + case URI_WHITESPACE_STRIP: + default: { + // TODO: avoid duplication with urlParse() + const char *t; + char *tmp_uri = static_cast(xmalloc(strlen(uri) + 1)); + char *q = tmp_uri; + t = uri; + while (*t) { + if (!xisspace(*t)) { + *q = *t; + ++q; + } + ++t; + } + *q = '\0'; + cleanedUri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL); + xfree(tmp_uri); + } + break; + } + + assert(cleanedUri); + return cleanedUri; +} diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index 5806438e2b..67f0af59f6 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -61,6 +61,9 @@ public: bool parse(const HttpRequestMethod &, const char *url); + /// \return a new URI that honors uri_whitespace + static char *cleanup(const char *uri); + AnyP::UriScheme const & getScheme() const {return scheme_;} /// convert the URL scheme to that given @@ -179,7 +182,10 @@ operator <<(std::ostream &os, const AnyP::Uri &url) class HttpRequest; void urlInitialize(void); -char *urlCanonicalClean(const HttpRequest *); +/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest +/// \returns a pointer to a local static buffer containing request URI +/// that honors strip_query_terms and %-encodes unsafe URI characters +char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &); const char *urlCanonicalFakeHttps(const HttpRequest * request); bool urlIsRelative(const char *); char *urlMakeAbsolute(const HttpRequest *, const char *); diff --git a/src/cf.data.pre b/src/cf.data.pre index a62d972c0c..181253fa72 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -4434,19 +4434,42 @@ DOC_START The is a string with embedded % format codes - % format codes all follow the same basic structure where all but - the formatcode is optional. Output strings are automatically escaped - as required according to their context and the output format - modifiers are usually not needed, but can be specified if an explicit - output format is desired. + % format codes all follow the same basic structure where all + components but the formatcode are optional and usually unnecessary, + especially when dealing with common codes. - % ["|[|'|#|/] [-] [[0]width] [{arg}] formatcode [{arg}] + % [encoding] [-] [[0]width] [{arg}] formatcode [{arg}] - " output in quoted string format - [ output in squid text log format as used by log_mime_hdrs - # output in URL quoted format - / output in shell \-escaped format - ' output as-is + encoding escapes or otherwise protects "special" characters: + + " Quoted string encoding where quote(") and + backslash(\) characters are \-escaped while + CR, LF, and TAB characters are encoded as \r, + \n, and \t two-character sequences. + + [ Custom Squid encoding where percent(%), square + brackets([]), backslash(\) and characters with + codes outside of [32,126] range are %-encoded. + SP is not encoded. Used by log_mime_hdrs. + + # URL encoding (a.k.a. percent-encoding) where + all URL unsafe and control characters (per RFC + 1738) are %-encoded. + + / Shell-like encoding where quote(") and + backslash(\) characters are \-escaped while CR + and LF characters are encoded as \r and \n + two-character sequences. Values containing SP + character(s) are surrounded by quotes("). + + ' Raw/as-is encoding with no escaping/quoting. + + Default encoding: When no explicit encoding is + specified, each %code determines its own encoding. + Most %codes use raw/as-is encoding, but some codes use + a so called "pass-through URL encoding" where all URL + unsafe and control characters (per RFC 1738) are + %-encoded, but the percent character(%) is left as is. - left aligned @@ -4583,8 +4606,40 @@ DOC_START [http::]rm Request method (GET/POST etc) [http::]>rm Request method from client [http::]ru Request URL from client + + [http::]ru Request URL received (or computed) and sanitized + + Logs request URI received from the client, a + request adaptation service, or a request + redirector (whichever was applied last). + + Computed URLs are URIs of internally generated + requests and various "error:..." URIs. + + Honors strip_query_terms and uri_whitespace. + + This field is not encoded by default. Encoding + this field using variants of %-encoding will + clash with uri_whitespace modifications that + also use %-encoding. + + [http::]>ru Request URL received from the client (or computed) + + Computed URLs are URIs of internally generated + requests and various "error:..." URIs. + + Unlike %ru, this request URI is not affected + by request adaptation, URL rewriting services, + and strip_query_terms. + + Honors uri_whitespace. + + This field is using pass-through URL encoding + by default. Encoding this field using other + variants of %-encoding will clash with + uri_whitespace modifications that also use + %-encoding. + [http::]rs Request URL scheme from client [http::]data, this); @@ -1012,8 +1010,7 @@ ConnStateData::abortRequestParsing(const char *const uri) { ClientHttpRequest *http = new ClientHttpRequest(this); http->req_sz = inBuf.length(); - http->uri = xstrdup(uri); - setLogUri (http, uri); + http->setErrorUri(uri); auto *context = new Http::Stream(clientConnection, http); StoreIOBuffer tempBuffer; tempBuffer.data = context->reqbuf; @@ -1085,57 +1082,6 @@ findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end) return NULL; } -void -setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl) -{ - safe_free(http->log_uri); - - if (!cleanUrl) - // The uri is already clean just dump it. - http->log_uri = xstrndup(uri, MAX_URL); - else { - int flags = 0; - switch (Config.uri_whitespace) { - case URI_WHITESPACE_ALLOW: - flags |= RFC1738_ESCAPE_NOSPACE; - - case URI_WHITESPACE_ENCODE: - flags |= RFC1738_ESCAPE_UNESCAPED; - http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL); - break; - - case URI_WHITESPACE_CHOP: { - flags |= RFC1738_ESCAPE_NOSPACE; - flags |= RFC1738_ESCAPE_UNESCAPED; - http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL); - int pos = strcspn(http->log_uri, w_space); - http->log_uri[pos] = '\0'; - } - break; - - case URI_WHITESPACE_DENY: - case URI_WHITESPACE_STRIP: - default: { - const char *t; - char *tmp_uri = static_cast(xmalloc(strlen(uri) + 1)); - char *q = tmp_uri; - t = uri; - while (*t) { - if (!xisspace(*t)) { - *q = *t; - ++q; - } - ++t; - } - *q = '\0'; - http->log_uri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL); - xfree(tmp_uri); - } - break; - } - } -} - static void prepareAcceleratedURL(ConnStateData * conn, ClientHttpRequest *http, const Http1::RequestParserPointer &hp) { @@ -1499,12 +1445,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context) debugs(33, 5, "Responding with delated error for " << http->uri); repContext->setReplyToStoreEntry(sslServerBump->entry, "delayed SslBump error"); - // save the original request for logging purposes - if (!context->http->al->request) { - context->http->al->request = http->request; - HTTPMSGLOCK(context->http->al->request); - } - // Get error details from the fake certificate-peeking request. http->request->detailError(sslServerBump->request->errType, sslServerBump->request->errDetail); context->pullData(); @@ -1547,11 +1487,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context) SQUID_X509_V_ERR_DOMAIN_MISMATCH, srvCert.get(), nullptr); err->detail = errDetail; - // Save the original request for logging purposes. - if (!context->http->al->request) { - context->http->al->request = request; - HTTPMSGLOCK(context->http->al->request); - } repContext->setReplyToError(request->method, err); assert(context->http->out.offset == 0); context->pullData(); @@ -1661,12 +1596,12 @@ clientProcessRequest(ConnStateData *conn, const Http1::RequestParserPointer &hp, request->url.host(internalHostname()); request->url.port(getMyPort()); http->flags.internal = true; + http->setLogUriToRequestUri(); } else debugs(33, 2, "internal URL found: " << request->url.getScheme() << "://" << request->url.authority(true) << " (not this proxy)"); } request->flags.internal = http->flags.internal; - setLogUri (http, urlCanonicalClean(request.getRaw())); if (!isFtp) { // XXX: for non-HTTP messages instantiate a different Http::Message child type @@ -3469,8 +3404,7 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un request->method = method; request->url.host(useHost.c_str()); request->url.port(usePort); - http->request = request.getRaw(); - HTTPMSGLOCK(http->request); + http->initRequest(request.getRaw()); request->manager(this, http->al); @@ -3486,7 +3420,6 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un inBuf = payload; flags.readMore = false; - setLogUri(http, urlCanonicalClean(request.getRaw())); return http; } @@ -4164,9 +4097,7 @@ ConnStateData::checkLogging() ClientHttpRequest http(this); http.req_sz = inBuf.length(); // XXX: Or we died while waiting for the pinned connection to become idle. - char const *uri = "error:transaction-end-before-headers"; - http.uri = xstrdup(uri); - setLogUri(&http, uri); + http.setErrorUri("error:transaction-end-before-headers"); } bool diff --git a/src/client_side.h b/src/client_side.h index d8a2ebc8b9..459b66567e 100644 --- a/src/client_side.h +++ b/src/client_side.h @@ -418,8 +418,6 @@ private: NotePairs::Pointer theNotes; }; -void setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl = false); - const char *findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end = NULL); int varyEvaluateMatch(StoreEntry * entry, HttpRequest * req); diff --git a/src/client_side_reply.cc b/src/client_side_reply.cc index 1366bd0b59..355863266b 100644 --- a/src/client_side_reply.cc +++ b/src/client_side_reply.cc @@ -2285,7 +2285,10 @@ clientReplyContext::createStoreEntry(const HttpRequestMethod& m, RequestFlags re if (http->request == NULL) { const MasterXaction::Pointer mx = new MasterXaction(XactionInitiator::initClient); - http->request = new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx); + // XXX: These fake URI parameters shadow the real (or error:...) URI. + // TODO: Either always set the request earlier and assert here OR use + // http->uri (converted to Anyp::Uri) to create this catch-all request. + const_cast(http->request) = new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx); HTTPMSGLOCK(http->request); } diff --git a/src/client_side_request.cc b/src/client_side_request.cc index 74c6f41d7c..69c18315af 100644 --- a/src/client_side_request.cc +++ b/src/client_side_request.cc @@ -49,6 +49,7 @@ #include "Parsing.h" #include "profiler/Profiler.h" #include "redirect.h" +#include "rfc1738.h" #include "SquidConfig.h" #include "SquidTime.h" #include "Store.h" @@ -357,8 +358,6 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre if (header) request->header.update(header); - http->log_uri = xstrdup(urlCanonicalClean(request)); - /* http struct now ready */ /* @@ -389,8 +388,7 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre request->http_ver = Http::ProtocolVersion(); - http->request = request; - HTTPMSGLOCK(http->request); + http->initRequest(request); /* optional - skip the access check ? */ http->calloutContext = new ClientRequestContext(http); @@ -1273,12 +1271,8 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply) " from request " << old_request << " to " << new_request); } - // update the current working ClientHttpRequest fields - xfree(http->uri); - http->uri = SBufToCstring(new_request->effectiveRequestUri()); - HTTPMSGUNLOCK(old_request); - http->request = new_request; - HTTPMSGLOCK(http->request); + http->resetRequest(new_request); + old_request = nullptr; } else { debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " << old_request->method << " " << urlNote << " " << old_request->http_ver); @@ -1649,6 +1643,52 @@ ClientHttpRequest::loggingEntry(StoreEntry *newEntry) loggingEntry_->lock("ClientHttpRequest::loggingEntry"); } +void +ClientHttpRequest::initRequest(HttpRequest *aRequest) +{ + assignRequest(aRequest); + if (const auto csd = getConn()) { + if (!csd->notes()->empty()) + request->notes()->appendNewOnly(csd->notes().getRaw()); + } + // al is created in the constructor + assert(al); + if (!al->request) { + al->request = request; + HTTPMSGLOCK(al->request); + al->syncNotes(request); + } +} + +void +ClientHttpRequest::resetRequest(HttpRequest *newRequest) +{ + assert(request != newRequest); + clearRequest(); + assignRequest(newRequest); + xfree(uri); + uri = SBufToCstring(request->effectiveRequestUri()); +} + +void +ClientHttpRequest::assignRequest(HttpRequest *newRequest) +{ + assert(newRequest); + assert(!request); + const_cast(request) = newRequest; + HTTPMSGLOCK(request); + setLogUriToRequestUri(); +} + +void +ClientHttpRequest::clearRequest() +{ + HttpRequest *oldRequest = request; + HTTPMSGUNLOCK(oldRequest); + const_cast(request) = nullptr; + absorbLogUri(nullptr); +} + /* * doCallouts() - This function controls the order of "callout" * executions, including non-blocking access control checks, the @@ -1689,20 +1729,6 @@ ClientHttpRequest::doCallouts() { assert(calloutContext); - auto &ale = calloutContext->http->al; - /*Save the original request for logging purposes*/ - if (!ale->request) { - ale->request = request; - HTTPMSGLOCK(ale->request); - - // Make the previously set client connection ID available as annotation. - if (ConnStateData *csd = calloutContext->http->getConn()) { - if (!csd->notes()->empty()) - calloutContext->http->request->notes()->appendNewOnly(csd->notes().getRaw()); - } - ale->syncNotes(calloutContext->http->request); - } - if (!calloutContext->error) { // CVE-2009-0801: verify the Host: header is consistent with other known details. if (!calloutContext->host_header_verify_done) { @@ -1863,6 +1889,53 @@ ClientHttpRequest::doCallouts() #endif } +void +ClientHttpRequest::setLogUriToRequestUri() +{ + assert(request); + const auto canonicalUri = request->canonicalCleanUrl(); + absorbLogUri(xstrndup(canonicalUri, MAX_URL)); +} + +void +ClientHttpRequest::setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &method) +{ + assert(rawUri); + // Should(!request); + + // TODO: SBuf() performance regression, fix by converting rawUri to SBuf + char *canonicalUri = urlCanonicalCleanWithoutRequest(SBuf(rawUri), method, AnyP::UriScheme()); + + absorbLogUri(AnyP::Uri::cleanup(canonicalUri)); + + char *cleanedRawUri = AnyP::Uri::cleanup(rawUri); + al->setVirginUrlForMissingRequest(SBuf(cleanedRawUri)); + xfree(cleanedRawUri); +} + +void +ClientHttpRequest::absorbLogUri(char *aUri) +{ + xfree(log_uri); + const_cast(log_uri) = aUri; +} + +void +ClientHttpRequest::setErrorUri(const char *aUri) +{ + assert(!uri); + assert(aUri); + // Should(!request); + + uri = xstrdup(aUri); + // TODO: SBuf() performance regression, fix by converting setErrorUri() parameter to SBuf + const SBuf errorUri(aUri); + const auto canonicalUri = urlCanonicalCleanWithoutRequest(errorUri, HttpRequestMethod(), AnyP::UriScheme()); + absorbLogUri(xstrndup(canonicalUri, MAX_URL)); + + al->setVirginUrlForMissingRequest(errorUri); +} + #if USE_ADAPTATION /// Initiate an asynchronous adaptation transaction which will call us back. void @@ -1907,23 +1980,10 @@ ClientHttpRequest::handleAdaptedHeader(Http::Message *msg) assert(msg); if (HttpRequest *new_req = dynamic_cast(msg)) { - /* - * Replace the old request with the new request. - */ - HTTPMSGUNLOCK(request); - request = new_req; - HTTPMSGLOCK(request); - // update the new message to flag whether URL re-writing was done on it - if (request->effectiveRequestUri().cmp(uri) != 0) - request->flags.redirected = 1; - - /* - * Store the new URI for logging - */ - xfree(uri); - uri = SBufToCstring(request->effectiveRequestUri()); - setLogUri(this, urlCanonicalClean(request)); + if (request->effectiveRequestUri() != new_req->effectiveRequestUri()) + new_req->flags.redirected = true; + resetRequest(new_req); assert(request->method.id()); } else if (HttpReply *new_rep = dynamic_cast(msg)) { debugs(85,3,HERE << "REQMOD reply is HTTP reply"); diff --git a/src/client_side_request.h b/src/client_side_request.h index d3a6ec7529..dc692d624f 100644 --- a/src/client_side_request.h +++ b/src/client_side_request.h @@ -72,14 +72,38 @@ public: } } + /// Initializes the current request with the virgin request. + /// Call this method when the virgin request becomes known. + /// To update the current request later, use resetRequest(). + void initRequest(HttpRequest *); + + /// Resets the current request to the latest adapted or redirected + /// request. Call this every time adaptation or redirection changes + /// the request. To set the virgin request, use initRequest(). + void resetRequest(HttpRequest *); + /** Details of the client socket which produced us. * Treat as read-only for the lifetime of this HTTP request. */ Comm::ConnectionPointer clientConnection; - HttpRequest *request; /* Parsed URL ... */ + /// Request currently being handled by ClientHttpRequest. + /// Usually remains nil until the virgin request header is parsed or faked. + /// Starts as a virgin request; see initRequest(). + /// Adaptation and redirections replace it; see resetRequest(). + HttpRequest * const request; + + /// Usually starts as a URI received from the client, with scheme and host + /// added if needed. Is used to create the virgin request for initRequest(). + /// URIs of adapted/redirected requests replace it via resetRequest(). char *uri; - char *log_uri; + + // TODO: remove this field and store the URI directly in al->url + /// Cleaned up URI of the current (virgin or adapted/redirected) request, + /// computed URI of an internally-generated requests, or + /// one of the hard-coded "error:..." URIs. + char * const log_uri; + String store_id; /* StoreID for transactions where the request member is nil */ struct Out { @@ -122,6 +146,18 @@ public: ClientRequestContext *calloutContext; void doCallouts(); + // The three methods below prepare log_uri and friends for future logging. + // Call the best-fit method whenever the current request or its URI changes. + + /// sets log_uri when we know the current request + void setLogUriToRequestUri(); + /// sets log_uri to a parsed request URI when Squid fails to parse or + /// validate other request components, yielding no current request + void setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &); + /// sets log_uri and uri to an internally-generated "error:..." URI when + /// neither the current request nor the parsed request URI are known + void setErrorUri(const char *errorUri); + /// Build an error reply. For use with the callouts. void calloutsError(const err_type error, const int errDetail); @@ -135,6 +171,14 @@ public: #endif private: + /// assigns log_uri with aUri without copying the entire C-string + void absorbLogUri(char *aUri); + /// resets the current request and log_uri to nil + void clearRequest(); + /// initializes the current unassigned request to the virgin request + /// sets the current request, asserting that it was unset + void assignRequest(HttpRequest *aRequest); + int64_t maxReplyBodySize_; StoreEntry *entry_; StoreEntry *loggingEntry_; diff --git a/src/comm/TcpAcceptor.cc b/src/comm/TcpAcceptor.cc index 2973ba0031..ca009ff6d3 100644 --- a/src/comm/TcpAcceptor.cc +++ b/src/comm/TcpAcceptor.cc @@ -264,6 +264,7 @@ logAcceptError(const Comm::ConnectionPointer &conn) AccessLogEntry::Pointer al = new AccessLogEntry; al->tcpClient = conn; al->url = "error:accept-client-connection"; + al->setVirginUrlForMissingRequest(al->url); ACLFilledChecklist ch(nullptr, nullptr, nullptr); ch.src_addr = conn->remote; ch.my_addr = conn->local; diff --git a/src/format/Format.cc b/src/format/Format.cc index 56cd6cce14..82d13bf8b0 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -985,9 +985,8 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS break; case LFT_CLIENT_REQ_URI: - // original client URI - if (al->request) { - sb = al->request->effectiveRequestUri(); + if (const auto uri = al->effectiveVirginUrl()) { + sb = *uri; out = sb.c_str(); quote = 1; } diff --git a/src/htcp.cc b/src/htcp.cc index 8c3fa71026..f77d09dcbe 100644 --- a/src/htcp.cc +++ b/src/htcp.cc @@ -274,6 +274,7 @@ htcpSyncAle(AccessLogEntryPointer &al, const Ip::Address &caddr, const int opcod al->cache.caddr = caddr; al->htcp.opcode = htcpOpcodeStr[opcode]; al->url = url; + al->setVirginUrlForMissingRequest(al->url); // HTCP transactions do not wait al->cache.start_time = current_time; al->cache.trTime.tv_sec = 0; diff --git a/src/icp_v2.cc b/src/icp_v2.cc index 3a56b690ab..57dfde381e 100644 --- a/src/icp_v2.cc +++ b/src/icp_v2.cc @@ -80,6 +80,7 @@ icpSyncAle(AccessLogEntryPointer &al, const Ip::Address &caddr, const char *url, al->icp.opcode = ICP_QUERY; al->cache.caddr = caddr; al->url = url; + al->setVirginUrlForMissingRequest(al->url); // XXX: move to use icp.clientReply instead al->http.clientReplySz.payloadData = len; al->cache.start_time = current_time; diff --git a/src/servers/FtpServer.cc b/src/servers/FtpServer.cc index 37acca4ba0..c775968cea 100644 --- a/src/servers/FtpServer.cc +++ b/src/servers/FtpServer.cc @@ -749,10 +749,9 @@ Ftp::Server::parseOneRequest() } ClientHttpRequest *const http = new ClientHttpRequest(this); - http->request = request; - HTTPMSGLOCK(http->request); http->req_sz = tok.parsedSize(); http->uri = newUri; + http->initRequest(request); Http::Stream *const result = new Http::Stream(clientConnection, http); @@ -1736,8 +1735,6 @@ Ftp::Server::setReply(const int code, const char *msg) HttpReply *const reply = Ftp::HttpReplyWrapper(code, msg, Http::scNoContent, 0); - setLogUri(http, urlCanonicalClean(http->request)); - clientStreamNode *const node = context->getClientReplyContext(); clientReplyContext *const repContext = dynamic_cast(node->data.getRaw()); diff --git a/src/servers/Http1Server.cc b/src/servers/Http1Server.cc index b0ae347682..0ab2dbee73 100644 --- a/src/servers/Http1Server.cc +++ b/src/servers/Http1Server.cc @@ -118,8 +118,10 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context) // else use default ERR_INVALID_REQ set above. break; } - // setLogUri should called before repContext->setReplyToError - setLogUri(http, http->uri, true); + // setReplyToError() requires log_uri + // must be already initialized via ConnStateData::abortRequestParsing() + assert(http->log_uri); + const char * requestErrorBytes = inBuf.c_str(); if (!clientTunnelOnError(this, context, request, parser_->method(), errPage)) { setReplyError(context, request, parser_->method(), errPage, parser_->parseStatusCode, requestErrorBytes); @@ -135,8 +137,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context) mx->tcpClient = clientConnection; if ((request = HttpRequest::FromUrl(http->uri, mx, parser_->method())) == NULL) { debugs(33, 5, "Invalid URL: " << http->uri); - // setLogUri should called before repContext->setReplyToError - setLogUri(http, http->uri, true); + // setReplyToError() requires log_uri + http->setLogUriToRawUri(http->uri, parser_->method()); const char * requestErrorBytes = inBuf.c_str(); if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_URL)) { @@ -154,8 +156,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context) (parser_->messageProtocol().major > 1) ) { debugs(33, 5, "Unsupported HTTP version discovered. :\n" << parser_->messageProtocol()); - // setLogUri should called before repContext->setReplyToError - setLogUri(http, http->uri, true); + // setReplyToError() requires log_uri + http->setLogUriToRawUri(http->uri, parser_->method()); const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_); if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_UNSUP_HTTPVERSION)) { @@ -168,8 +170,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context) /* compile headers */ if (parser_->messageProtocol().major >= 1 && !request->parseHeader(*parser_.getRaw())) { debugs(33, 5, "Failed to parse request headers:\n" << parser_->mimeHeader()); - // setLogUri should called before repContext->setReplyToError - setLogUri(http, http->uri, true); + // setReplyToError() requires log_uri + http->setLogUriToRawUri(http->uri, parser_->method()); const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_); if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_REQ)) { setReplyError(context, request, parser_->method(), ERR_INVALID_REQ, Http::scBadRequest, requestErrorBytes); @@ -195,8 +197,7 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context) request->notes()->append(notes().getRaw()); } - http->request = request.getRaw(); - HTTPMSGLOCK(http->request); + http->initRequest(request.getRaw()); return true; } @@ -242,8 +243,8 @@ Http::One::Server::processParsedRequest(Http::StreamPointer &context) if (!supportedExpect) { clientStreamNode *node = context->getClientReplyContext(); quitAfterError(request.getRaw()); - // setLogUri should called before repContext->setReplyToError - setLogUri(http, urlCanonicalClean(request.getRaw())); + // setReplyToError() requires log_uri + assert(http->log_uri); clientReplyContext *repContext = dynamic_cast(node->data.getRaw()); assert (repContext); repContext->setReplyToError(ERR_INVALID_REQ, Http::scExpectationFailed, request->method, http->uri, diff --git a/src/tests/stub_client_side.cc b/src/tests/stub_client_side.cc index e69a4b646f..d30cdf6dc0 100644 --- a/src/tests/stub_client_side.cc +++ b/src/tests/stub_client_side.cc @@ -52,7 +52,6 @@ void ConnStateData::buildSslCertGenerationParams(Ssl::CertificateProperties &) S bool ConnStateData::serveDelayedError(Http::Stream *) STUB_RETVAL(false) #endif -void setLogUri(ClientHttpRequest *, char const *, bool) STUB const char *findTrailingHTTPVersion(const char *, const char *) STUB_RETVAL(NULL) int varyEvaluateMatch(StoreEntry *, HttpRequest *) STUB_RETVAL(0) void clientOpenListenSockets(void) STUB diff --git a/src/tests/stub_libanyp.cc b/src/tests/stub_libanyp.cc index 0a7d347da2..40306ef74d 100644 --- a/src/tests/stub_libanyp.cc +++ b/src/tests/stub_libanyp.cc @@ -31,7 +31,6 @@ const SBuf &AnyP::Uri::Asterisk() SBuf &AnyP::Uri::authority(bool) const STUB_RETVAL(nil) SBuf &AnyP::Uri::absolute() const STUB_RETVAL(nil) void urlInitialize() STUB -char *urlCanonicalClean(const HttpRequest *) STUB_RETVAL(nullptr) const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr) bool urlIsRelative(const char *) STUB_RETVAL(false) char *urlMakeAbsolute(const HttpRequest *, const char *)STUB_RETVAL(nullptr)