]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Fix %>ru logging of huge URLs (#229) M-staged-PR229
authorEduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Tue, 17 Jul 2018 15:31:23 +0000 (15:31 +0000)
committerSquid Anubis <squid-anubis@squid-cache.org>
Tue, 17 Jul 2018 15:41:56 +0000 (15:41 +0000)
When dealing with an HTTP request header that Squid can parse but that
contains request URI length exceeding the 8K limit, Squid should log the
URL (prefix) instead of a dash. Logging the URL helps with triaging
these unusual requests. The older %ru (LFT_REQUEST_URI) was already
logging these huge URLs, but %>ru (LFT_CLIENT_REQ_URI) was logging a
dash. Now both log the URL (or its prefix).

As a side effect, %>ru now also logs error:request-too-large,
error:transaction-end-before-headers and other Squid-specific
pseudo-URLs, as appropriate.

Also refactored request- and URI-recording code to reduce chances of
similar inconsistencies reappearing in the future.

Also, honor strip_query_terms in %ru for large URLs. Not stripping query
string in %ru was a security problem.

Also fixed a bug with "redirected" flag calculation in
ClientHttpRequest::handleAdaptedHeader(). In general, http->url and
request->url should not be compared directly, because the latter always
passes through uri_whitespace cleanup, while the former does not.

Also fixed a bug with possibly wrong %ru after redirection:
ClientHttpRequest::log_uri was not updated in this case.

Also initialize AccessLogEntry::request and AccessLogEntry::notes ASAP.
Before this change, these fields were initialized in
ClientHttpRequest::doCallouts(). It is better to initialize them just
after the request object is created so that ACLs, running before
doCallouts(), could have them at hand. There are at least three such
ACLs: force_request_body_continuation, spoof_client_ip and
spoof_client_ip.

Also synced %ru and %>ru documentation with the current code.

22 files changed:
src/AccessLogEntry.cc
src/AccessLogEntry.h
src/Downloader.cc
src/HttpRequest.cc
src/HttpRequest.h
src/Makefile.am
src/anyp/Uri.cc
src/anyp/Uri.h
src/cf.data.pre
src/client_side.cc
src/client_side.h
src/client_side_reply.cc
src/client_side_request.cc
src/client_side_request.h
src/comm/TcpAcceptor.cc
src/format/Format.cc
src/htcp.cc
src/icp_v2.cc
src/servers/FtpServer.cc
src/servers/Http1Server.cc
src/tests/stub_client_side.cc
src/tests/stub_libanyp.cc

index 73d8f7313364cdf33c7359d18bb3ecbcfda3a7f5..1d87505ea6b425f74ecdb5442c79e264dcbd550c 100644 (file)
@@ -117,3 +117,15 @@ AccessLogEntry::~AccessLogEntry()
 #endif
 }
 
+const SBuf *
+AccessLogEntry::effectiveVirginUrl() const
+{
+    const SBuf *effectiveUrl = request ? &request->url.absolute() : &virginUrlForMissingRequest_;
+    if (effectiveUrl && !effectiveUrl->isEmpty())
+        return effectiveUrl;
+    // We can not use ALE::url here because it may contain a request URI after
+    // adaptation/redirection. When the request is missing, a non-empty ALE::url
+    // means that we missed a setVirginUrlForMissingRequest() call somewhere.
+    return nullptr;
+}
+
index 3e436c0364616fd11d6aa437b2314b88a974da97..b462edf34f4c7296dd5c15e349619969d78ff871 100644 (file)
@@ -228,6 +228,24 @@ public:
     }
     icap;
 #endif
+
+    /// Effective URI of the received client (or equivalent) HTTP request or,
+    /// in rare cases where that information was not collected, a nil pointer.
+    /// Receiving errors are represented by "error:..." URIs.
+    /// Adaptations and redirections do not affect this URI.
+    const SBuf *effectiveVirginUrl() const;
+
+    /// Remember Client URI (or equivalent) when there is no HttpRequest.
+    void setVirginUrlForMissingRequest(const SBuf &vu)
+    {
+        if (!request)
+            virginUrlForMissingRequest_ = vu;
+    }
+
+private:
+    /// Client URI (or equivalent) for effectiveVirginUrl() when HttpRequest is
+    /// missing. This member is ignored unless the request member is nil.
+    SBuf virginUrlForMissingRequest_;
 };
 
 class ACLChecklist;
index c78c3c85a1396d0bbc64d4126001be592299bb29..2e95265234b1b6d325c96dffa1633cd88824c13e 100644 (file)
@@ -151,12 +151,10 @@ Downloader::buildRequest()
            "\n----------");
 
     ClientHttpRequest *const http = new ClientHttpRequest(nullptr);
-    http->request = request;
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request);
     http->req_sz = 0;
     // XXX: performance regression. c_str() reallocates
     http->uri = xstrdup(url_.c_str());
-    setLogUri (http, urlCanonicalClean(request));
 
     context_ = new DownloaderContext(this, http);
     StoreIOBuffer tempBuffer;
index 2c5858faa7373c02589dc7741e6570c68cdcd2e1..72cb7f279e2579e2e9a7df54eac61123baac949d 100644 (file)
@@ -738,6 +738,12 @@ HttpRequest::manager(const CbcPointer<ConnStateData> &aMgr, const AccessLogEntry
     }
 }
 
+char *
+HttpRequest::canonicalCleanUrl() const
+{
+    return urlCanonicalCleanWithoutRequest(effectiveRequestUri(), method, url.getScheme());
+}
+
 /// a helper for validating FindListeningPortAddress()-found address candidates
 static const Ip::Address *
 FindListeningPortAddressInAddress(const Ip::Address *ip)
index f8b7d4a8f774073aa949516707e9b701fe1ccd32..eee849adcd45b332b15cc20cab3ce56ad7c371f0 100644 (file)
@@ -70,6 +70,10 @@ public:
     /// whether the client is likely to be able to handle a 1xx reply
     bool canHandle1xx() const;
 
+    /// \returns a pointer to a local static buffer containing request URI
+    /// that honors strip_query_terms and %-encodes unsafe URI characters
+    char *canonicalCleanUrl() const;
+
 #if USE_ADAPTATION
     /// Returns possibly nil history, creating it if adapt. logging is enabled
     Adaptation::History::Pointer adaptLogHistory() const;
index dc11f8865714005b27e97dce3b9eb980018cdc20..c83c3781bb2f4e41601579f0ee8e816ab6ce8fcd 100644 (file)
@@ -1367,7 +1367,6 @@ tests_testCacheManager_SOURCES = \
        tests/stub_SwapDir.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -1799,7 +1798,6 @@ tests_testEvent_SOURCES = \
        tests/stub_tunnel.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -2033,7 +2031,6 @@ tests_testEventLoop_SOURCES = \
        tests/stub_tunnel.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -2262,7 +2259,6 @@ tests_test_http_range_SOURCES = \
        tools.cc \
        tests/stub_tunnel.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
index f2c2b222a0a3cf51aea4787a71cfcde5ab511137..f570e6f3a3b3b0feca8d30c66ac8ab8ef3281d01 100644 (file)
@@ -515,15 +515,15 @@ AnyP::Uri::absolute() const
  *        and never copy the query-string part in the first place
  */
 char *
-urlCanonicalClean(const HttpRequest * request)
+urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme)
 {
     LOCAL_ARRAY(char, buf, MAX_URL);
 
-    snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
+    snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(url));
     buf[sizeof(buf)-1] = '\0';
 
     // URN, CONNECT method, and non-stripped URIs can go straight out
-    if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
+    if (Config.onoff.strip_query_terms && !(method == Http::METHOD_CONNECT || scheme == AnyP::PROTO_URN)) {
         // strip anything AFTER a question-mark
         // leaving the '?' in place
         if (auto t = strchr(buf, '?')) {
@@ -555,7 +555,7 @@ urlCanonicalFakeHttps(const HttpRequest * request)
     }
 
     // else do the normal complete canonical thing.
-    return urlCanonicalClean(request);
+    return request->canonicalCleanUrl();
 }
 
 /*
@@ -936,3 +936,55 @@ AnyP::Uri::Uri(AnyP::UriScheme const &aScheme) :
     *host_=0;
 }
 
+// TODO: fix code duplication with AnyP::Uri::parse()
+char *
+AnyP::Uri::cleanup(const char *uri)
+{
+    int flags = 0;
+    char *cleanedUri = nullptr;
+    switch (Config.uri_whitespace) {
+    case URI_WHITESPACE_ALLOW:
+        flags |= RFC1738_ESCAPE_NOSPACE;
+        // fall through to next case
+    case URI_WHITESPACE_ENCODE:
+        flags |= RFC1738_ESCAPE_UNESCAPED;
+        cleanedUri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
+        break;
+
+    case URI_WHITESPACE_CHOP: {
+        flags |= RFC1738_ESCAPE_UNESCAPED;
+        const auto pos = strcspn(uri, w_space);
+        char *choppedUri = nullptr;
+        if (pos < strlen(uri))
+            choppedUri = xstrndup(uri, pos + 1);
+        cleanedUri = xstrndup(rfc1738_do_escape(choppedUri ? choppedUri : uri, flags), MAX_URL);
+        cleanedUri[pos] = '\0';
+        xfree(choppedUri);
+    }
+    break;
+
+    case URI_WHITESPACE_DENY:
+    case URI_WHITESPACE_STRIP:
+    default: {
+        // TODO: avoid duplication with urlParse()
+        const char *t;
+        char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
+        char *q = tmp_uri;
+        t = uri;
+        while (*t) {
+            if (!xisspace(*t)) {
+                *q = *t;
+                ++q;
+            }
+            ++t;
+        }
+        *q = '\0';
+        cleanedUri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
+        xfree(tmp_uri);
+    }
+    break;
+    }
+
+    assert(cleanedUri);
+    return cleanedUri;
+}
index 5806438e2bb84c21c45859689e3d4884205a5ac7..67f0af59f61b75b02c0d08f6adffedcc4db4a52e 100644 (file)
@@ -61,6 +61,9 @@ public:
 
     bool parse(const HttpRequestMethod &, const char *url);
 
+    /// \return a new URI that honors uri_whitespace
+    static char *cleanup(const char *uri);
+
     AnyP::UriScheme const & getScheme() const {return scheme_;}
 
     /// convert the URL scheme to that given
@@ -179,7 +182,10 @@ operator <<(std::ostream &os, const AnyP::Uri &url)
 class HttpRequest;
 
 void urlInitialize(void);
-char *urlCanonicalClean(const HttpRequest *);
+/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
+/// \returns a pointer to a local static buffer containing request URI
+/// that honors strip_query_terms and %-encodes unsafe URI characters
+char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
 const char *urlCanonicalFakeHttps(const HttpRequest * request);
 bool urlIsRelative(const char *);
 char *urlMakeAbsolute(const HttpRequest *, const char *);
index a62d972c0cef00461dbf77d3b3fef206f35ea747..181253fa72adc08b94b5ad64441029f83bd86243 100644 (file)
@@ -4434,19 +4434,42 @@ DOC_START
 
        The <format specification> is a string with embedded % format codes
 
-       % format codes all follow the same basic structure where all but
-       the formatcode is optional. Output strings are automatically escaped
-       as required according to their context and the output format
-       modifiers are usually not needed, but can be specified if an explicit
-       output format is desired.
+       % format codes all follow the same basic structure where all
+       components but the formatcode are optional and usually unnecessary,
+       especially when dealing with common codes.
 
-               % ["|[|'|#|/] [-] [[0]width] [{arg}] formatcode [{arg}]
+               % [encoding] [-] [[0]width] [{arg}] formatcode [{arg}]
 
-               "       output in quoted string format
-               [       output in squid text log format as used by log_mime_hdrs
-               #       output in URL quoted format
-               /       output in shell \-escaped format
-               '       output as-is
+               encoding escapes or otherwise protects "special" characters:
+
+                       "       Quoted string encoding where quote(") and
+                               backslash(\) characters are \-escaped while
+                               CR, LF, and TAB characters are encoded as \r,
+                               \n, and \t two-character sequences.
+
+                       [       Custom Squid encoding where percent(%), square
+                               brackets([]), backslash(\) and characters with
+                               codes outside of [32,126] range are %-encoded.
+                               SP is not encoded. Used by log_mime_hdrs.
+
+                       #       URL encoding (a.k.a. percent-encoding) where
+                               all URL unsafe and control characters (per RFC
+                               1738) are %-encoded.
+
+                       /       Shell-like encoding where quote(") and
+                               backslash(\) characters are \-escaped while CR
+                               and LF characters are encoded as \r and \n
+                               two-character sequences. Values containing SP
+                               character(s) are surrounded by quotes(").
+
+                       '       Raw/as-is encoding with no escaping/quoting.
+
+                       Default encoding: When no explicit encoding is
+                       specified, each %code determines its own encoding.
+                       Most %codes use raw/as-is encoding, but some codes use
+                       a so called "pass-through URL encoding" where all URL
+                       unsafe and control characters (per RFC 1738) are
+                       %-encoded, but the percent character(%) is left as is.
 
                -       left aligned
 
@@ -4583,8 +4606,40 @@ DOC_START
                [http::]rm      Request method (GET/POST etc)
                [http::]>rm     Request method from client
                [http::]<rm     Request method sent to server or peer
-               [http::]ru      Request URL from client (historic, filtered for logging)
-               [http::]>ru     Request URL from client
+
+               [http::]ru      Request URL received (or computed) and sanitized
+
+                               Logs request URI received from the client, a
+                               request adaptation service, or a request
+                               redirector (whichever was applied last).
+
+                               Computed URLs are URIs of internally generated
+                               requests and various "error:..." URIs.
+
+                               Honors strip_query_terms and uri_whitespace.
+
+                               This field is not encoded by default. Encoding
+                               this field using variants of %-encoding will
+                               clash with uri_whitespace modifications that
+                               also use %-encoding.
+
+               [http::]>ru     Request URL received from the client (or computed)
+
+                               Computed URLs are URIs of internally generated
+                               requests and various "error:..." URIs.
+
+                               Unlike %ru, this request URI is not affected
+                               by request adaptation, URL rewriting services,
+                               and strip_query_terms.
+
+                               Honors uri_whitespace.
+
+                               This field is using pass-through URL encoding
+                               by default. Encoding this field using other
+                               variants of %-encoding will clash with
+                               uri_whitespace modifications that also use
+                               %-encoding.
+
                [http::]<ru     Request URL sent to server or peer
                [http::]>rs     Request URL scheme from client
                [http::]<rs     Request URL scheme sent to server or peer
index fe9570450e50b5d5f0a81539f858c87d831382c4..a503eaa7d52aca8bece396a06552ee7c7db0c1ac 100644 (file)
 #include "mime_header.h"
 #include "parser/Tokenizer.h"
 #include "profiler/Profiler.h"
-#include "rfc1738.h"
 #include "security/NegotiationHistory.h"
 #include "servers/forward.h"
 #include "SquidConfig.h"
@@ -474,10 +473,9 @@ void
 ClientHttpRequest::freeResources()
 {
     safe_free(uri);
-    safe_free(log_uri);
     safe_free(redirect.location);
     range_iter.boundary.clean();
-    HTTPMSGUNLOCK(request);
+    clearRequest();
 
     if (client_stream.tail)
         clientStreamAbort((clientStreamNode *)client_stream.tail->data, this);
@@ -1012,8 +1010,7 @@ ConnStateData::abortRequestParsing(const char *const uri)
 {
     ClientHttpRequest *http = new ClientHttpRequest(this);
     http->req_sz = inBuf.length();
-    http->uri = xstrdup(uri);
-    setLogUri (http, uri);
+    http->setErrorUri(uri);
     auto *context = new Http::Stream(clientConnection, http);
     StoreIOBuffer tempBuffer;
     tempBuffer.data = context->reqbuf;
@@ -1085,57 +1082,6 @@ findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end)
     return NULL;
 }
 
-void
-setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl)
-{
-    safe_free(http->log_uri);
-
-    if (!cleanUrl)
-        // The uri is already clean just dump it.
-        http->log_uri = xstrndup(uri, MAX_URL);
-    else {
-        int flags = 0;
-        switch (Config.uri_whitespace) {
-        case URI_WHITESPACE_ALLOW:
-            flags |= RFC1738_ESCAPE_NOSPACE;
-
-        case URI_WHITESPACE_ENCODE:
-            flags |= RFC1738_ESCAPE_UNESCAPED;
-            http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
-            break;
-
-        case URI_WHITESPACE_CHOP: {
-            flags |= RFC1738_ESCAPE_NOSPACE;
-            flags |= RFC1738_ESCAPE_UNESCAPED;
-            http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
-            int pos = strcspn(http->log_uri, w_space);
-            http->log_uri[pos] = '\0';
-        }
-        break;
-
-        case URI_WHITESPACE_DENY:
-        case URI_WHITESPACE_STRIP:
-        default: {
-            const char *t;
-            char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
-            char *q = tmp_uri;
-            t = uri;
-            while (*t) {
-                if (!xisspace(*t)) {
-                    *q = *t;
-                    ++q;
-                }
-                ++t;
-            }
-            *q = '\0';
-            http->log_uri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
-            xfree(tmp_uri);
-        }
-        break;
-        }
-    }
-}
-
 static void
 prepareAcceleratedURL(ConnStateData * conn, ClientHttpRequest *http, const Http1::RequestParserPointer &hp)
 {
@@ -1499,12 +1445,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context)
         debugs(33, 5, "Responding with delated error for " << http->uri);
         repContext->setReplyToStoreEntry(sslServerBump->entry, "delayed SslBump error");
 
-        // save the original request for logging purposes
-        if (!context->http->al->request) {
-            context->http->al->request = http->request;
-            HTTPMSGLOCK(context->http->al->request);
-        }
-
         // Get error details from the fake certificate-peeking request.
         http->request->detailError(sslServerBump->request->errType, sslServerBump->request->errDetail);
         context->pullData();
@@ -1547,11 +1487,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context)
                     SQUID_X509_V_ERR_DOMAIN_MISMATCH,
                     srvCert.get(), nullptr);
                 err->detail = errDetail;
-                // Save the original request for logging purposes.
-                if (!context->http->al->request) {
-                    context->http->al->request = request;
-                    HTTPMSGLOCK(context->http->al->request);
-                }
                 repContext->setReplyToError(request->method, err);
                 assert(context->http->out.offset == 0);
                 context->pullData();
@@ -1661,12 +1596,12 @@ clientProcessRequest(ConnStateData *conn, const Http1::RequestParserPointer &hp,
             request->url.host(internalHostname());
             request->url.port(getMyPort());
             http->flags.internal = true;
+            http->setLogUriToRequestUri();
         } else
             debugs(33, 2, "internal URL found: " << request->url.getScheme() << "://" << request->url.authority(true) << " (not this proxy)");
     }
 
     request->flags.internal = http->flags.internal;
-    setLogUri (http, urlCanonicalClean(request.getRaw()));
 
     if (!isFtp) {
         // XXX: for non-HTTP messages instantiate a different Http::Message child type
@@ -3469,8 +3404,7 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un
     request->method = method;
     request->url.host(useHost.c_str());
     request->url.port(usePort);
-    http->request = request.getRaw();
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request.getRaw());
 
     request->manager(this, http->al);
 
@@ -3486,7 +3420,6 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un
     inBuf = payload;
     flags.readMore = false;
 
-    setLogUri(http, urlCanonicalClean(request.getRaw()));
     return http;
 }
 
@@ -4164,9 +4097,7 @@ ConnStateData::checkLogging()
     ClientHttpRequest http(this);
     http.req_sz = inBuf.length();
     // XXX: Or we died while waiting for the pinned connection to become idle.
-    char const *uri = "error:transaction-end-before-headers";
-    http.uri = xstrdup(uri);
-    setLogUri(&http, uri);
+    http.setErrorUri("error:transaction-end-before-headers");
 }
 
 bool
index d8a2ebc8b9f3621a262c90a807ee4ff683eb0922..459b66567ea2b1c1e0312d22e608931f929ea8c1 100644 (file)
@@ -418,8 +418,6 @@ private:
     NotePairs::Pointer theNotes;
 };
 
-void setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl = false);
-
 const char *findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end = NULL);
 
 int varyEvaluateMatch(StoreEntry * entry, HttpRequest * req);
index 1366bd0b59fe0c9dc36d9acaa9b3c5b19fad59c3..355863266b3330b2e2874f2e6a901b6cafad8430 100644 (file)
@@ -2285,7 +2285,10 @@ clientReplyContext::createStoreEntry(const HttpRequestMethod& m, RequestFlags re
 
     if (http->request == NULL) {
         const MasterXaction::Pointer mx = new MasterXaction(XactionInitiator::initClient);
-        http->request = new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx);
+        // XXX: These fake URI parameters shadow the real (or error:...) URI.
+        // TODO: Either always set the request earlier and assert here OR use
+        // http->uri (converted to Anyp::Uri) to create this catch-all request.
+        const_cast<HttpRequest *&>(http->request) =  new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx);
         HTTPMSGLOCK(http->request);
     }
 
index 74c6f41d7cacaa2529df0832156f96e5f442591e..69c18315af6af35ed43c6e0ab2029b39b8fee47f 100644 (file)
@@ -49,6 +49,7 @@
 #include "Parsing.h"
 #include "profiler/Profiler.h"
 #include "redirect.h"
+#include "rfc1738.h"
 #include "SquidConfig.h"
 #include "SquidTime.h"
 #include "Store.h"
@@ -357,8 +358,6 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre
     if (header)
         request->header.update(header);
 
-    http->log_uri = xstrdup(urlCanonicalClean(request));
-
     /* http struct now ready */
 
     /*
@@ -389,8 +388,7 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre
 
     request->http_ver = Http::ProtocolVersion();
 
-    http->request = request;
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request);
 
     /* optional - skip the access check ? */
     http->calloutContext = new ClientRequestContext(http);
@@ -1273,12 +1271,8 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply)
                                " from request " << old_request << " to " << new_request);
                     }
 
-                    // update the current working ClientHttpRequest fields
-                    xfree(http->uri);
-                    http->uri = SBufToCstring(new_request->effectiveRequestUri());
-                    HTTPMSGUNLOCK(old_request);
-                    http->request = new_request;
-                    HTTPMSGLOCK(http->request);
+                    http->resetRequest(new_request);
+                    old_request = nullptr;
                 } else {
                     debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " <<
                            old_request->method << " " << urlNote << " " << old_request->http_ver);
@@ -1649,6 +1643,52 @@ ClientHttpRequest::loggingEntry(StoreEntry *newEntry)
         loggingEntry_->lock("ClientHttpRequest::loggingEntry");
 }
 
+void
+ClientHttpRequest::initRequest(HttpRequest *aRequest)
+{
+    assignRequest(aRequest);
+    if (const auto csd = getConn()) {
+        if (!csd->notes()->empty())
+            request->notes()->appendNewOnly(csd->notes().getRaw());
+    }
+    // al is created in the constructor
+    assert(al);
+    if (!al->request) {
+        al->request = request;
+        HTTPMSGLOCK(al->request);
+        al->syncNotes(request);
+    }
+}
+
+void
+ClientHttpRequest::resetRequest(HttpRequest *newRequest)
+{
+    assert(request != newRequest);
+    clearRequest();
+    assignRequest(newRequest);
+    xfree(uri);
+    uri = SBufToCstring(request->effectiveRequestUri());
+}
+
+void
+ClientHttpRequest::assignRequest(HttpRequest *newRequest)
+{
+    assert(newRequest);
+    assert(!request);
+    const_cast<HttpRequest *&>(request) = newRequest;
+    HTTPMSGLOCK(request);
+    setLogUriToRequestUri();
+}
+
+void
+ClientHttpRequest::clearRequest()
+{
+    HttpRequest *oldRequest = request;
+    HTTPMSGUNLOCK(oldRequest);
+    const_cast<HttpRequest *&>(request) = nullptr;
+    absorbLogUri(nullptr);
+}
+
 /*
  * doCallouts() - This function controls the order of "callout"
  * executions, including non-blocking access control checks, the
@@ -1689,20 +1729,6 @@ ClientHttpRequest::doCallouts()
 {
     assert(calloutContext);
 
-    auto &ale = calloutContext->http->al;
-    /*Save the original request for logging purposes*/
-    if (!ale->request) {
-        ale->request = request;
-        HTTPMSGLOCK(ale->request);
-
-        // Make the previously set client connection ID available as annotation.
-        if (ConnStateData *csd = calloutContext->http->getConn()) {
-            if (!csd->notes()->empty())
-                calloutContext->http->request->notes()->appendNewOnly(csd->notes().getRaw());
-        }
-        ale->syncNotes(calloutContext->http->request);
-    }
-
     if (!calloutContext->error) {
         // CVE-2009-0801: verify the Host: header is consistent with other known details.
         if (!calloutContext->host_header_verify_done) {
@@ -1863,6 +1889,53 @@ ClientHttpRequest::doCallouts()
 #endif
 }
 
+void
+ClientHttpRequest::setLogUriToRequestUri()
+{
+    assert(request);
+    const auto canonicalUri = request->canonicalCleanUrl();
+    absorbLogUri(xstrndup(canonicalUri, MAX_URL));
+}
+
+void
+ClientHttpRequest::setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &method)
+{
+    assert(rawUri);
+    // Should(!request);
+
+    // TODO: SBuf() performance regression, fix by converting rawUri to SBuf
+    char *canonicalUri = urlCanonicalCleanWithoutRequest(SBuf(rawUri), method, AnyP::UriScheme());
+
+    absorbLogUri(AnyP::Uri::cleanup(canonicalUri));
+
+    char *cleanedRawUri = AnyP::Uri::cleanup(rawUri);
+    al->setVirginUrlForMissingRequest(SBuf(cleanedRawUri));
+    xfree(cleanedRawUri);
+}
+
+void
+ClientHttpRequest::absorbLogUri(char *aUri)
+{
+    xfree(log_uri);
+    const_cast<char *&>(log_uri) = aUri;
+}
+
+void
+ClientHttpRequest::setErrorUri(const char *aUri)
+{
+    assert(!uri);
+    assert(aUri);
+    // Should(!request);
+
+    uri = xstrdup(aUri);
+    // TODO: SBuf() performance regression, fix by converting setErrorUri() parameter to SBuf
+    const SBuf errorUri(aUri);
+    const auto canonicalUri = urlCanonicalCleanWithoutRequest(errorUri, HttpRequestMethod(), AnyP::UriScheme());
+    absorbLogUri(xstrndup(canonicalUri, MAX_URL));
+
+    al->setVirginUrlForMissingRequest(errorUri);
+}
+
 #if USE_ADAPTATION
 /// Initiate an asynchronous adaptation transaction which will call us back.
 void
@@ -1907,23 +1980,10 @@ ClientHttpRequest::handleAdaptedHeader(Http::Message *msg)
     assert(msg);
 
     if (HttpRequest *new_req = dynamic_cast<HttpRequest*>(msg)) {
-        /*
-         * Replace the old request with the new request.
-         */
-        HTTPMSGUNLOCK(request);
-        request = new_req;
-        HTTPMSGLOCK(request);
-
         // update the new message to flag whether URL re-writing was done on it
-        if (request->effectiveRequestUri().cmp(uri) != 0)
-            request->flags.redirected = 1;
-
-        /*
-         * Store the new URI for logging
-         */
-        xfree(uri);
-        uri = SBufToCstring(request->effectiveRequestUri());
-        setLogUri(this, urlCanonicalClean(request));
+        if (request->effectiveRequestUri() != new_req->effectiveRequestUri())
+            new_req->flags.redirected = true;
+        resetRequest(new_req);
         assert(request->method.id());
     } else if (HttpReply *new_rep = dynamic_cast<HttpReply*>(msg)) {
         debugs(85,3,HERE << "REQMOD reply is HTTP reply");
index d3a6ec7529cb43e536eb983a16b02344193cdb3f..dc692d624f775c71e526a92475a47534271f339d 100644 (file)
@@ -72,14 +72,38 @@ public:
         }
     }
 
+    /// Initializes the current request with the virgin request.
+    /// Call this method when the virgin request becomes known.
+    /// To update the current request later, use resetRequest().
+    void initRequest(HttpRequest *);
+
+    /// Resets the current request to the latest adapted or redirected
+    /// request. Call this every time adaptation or redirection changes
+    /// the request. To set the virgin request, use initRequest().
+    void resetRequest(HttpRequest *);
+
     /** Details of the client socket which produced us.
      * Treat as read-only for the lifetime of this HTTP request.
      */
     Comm::ConnectionPointer clientConnection;
 
-    HttpRequest *request;       /* Parsed URL ... */
+    /// Request currently being handled by ClientHttpRequest.
+    /// Usually remains nil until the virgin request header is parsed or faked.
+    /// Starts as a virgin request; see initRequest().
+    /// Adaptation and redirections replace it; see resetRequest().
+    HttpRequest * const request;
+
+    /// Usually starts as a URI received from the client, with scheme and host
+    /// added if needed. Is used to create the virgin request for initRequest().
+    /// URIs of adapted/redirected requests replace it via resetRequest().
     char *uri;
-    char *log_uri;
+
+    // TODO: remove this field and store the URI directly in al->url
+    /// Cleaned up URI of the current (virgin or adapted/redirected) request,
+    /// computed URI of an internally-generated requests, or
+    /// one of the hard-coded "error:..." URIs.
+    char * const log_uri;
+
     String store_id; /* StoreID for transactions where the request member is nil */
 
     struct Out {
@@ -122,6 +146,18 @@ public:
     ClientRequestContext *calloutContext;
     void doCallouts();
 
+    // The three methods below prepare log_uri and friends for future logging.
+    // Call the best-fit method whenever the current request or its URI changes.
+
+    /// sets log_uri when we know the current request
+    void setLogUriToRequestUri();
+    /// sets log_uri to a parsed request URI when Squid fails to parse or
+    /// validate other request components, yielding no current request
+    void setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &);
+    /// sets log_uri and uri to an internally-generated "error:..." URI when
+    /// neither the current request nor the parsed request URI are known
+    void setErrorUri(const char *errorUri);
+
     /// Build an error reply. For use with the callouts.
     void calloutsError(const err_type error, const int errDetail);
 
@@ -135,6 +171,14 @@ public:
 #endif
 
 private:
+    /// assigns log_uri with aUri without copying the entire C-string
+    void absorbLogUri(char *aUri);
+    /// resets the current request and log_uri to nil
+    void clearRequest();
+    /// initializes the current unassigned request to the virgin request
+    /// sets the current request, asserting that it was unset
+    void assignRequest(HttpRequest *aRequest);
+
     int64_t maxReplyBodySize_;
     StoreEntry *entry_;
     StoreEntry *loggingEntry_;
index 2973ba0031488674758f54327b37e223240f4e63..ca009ff6d38008b220638312c605897c92915ebc 100644 (file)
@@ -264,6 +264,7 @@ logAcceptError(const Comm::ConnectionPointer &conn)
     AccessLogEntry::Pointer al = new AccessLogEntry;
     al->tcpClient = conn;
     al->url = "error:accept-client-connection";
+    al->setVirginUrlForMissingRequest(al->url);
     ACLFilledChecklist ch(nullptr, nullptr, nullptr);
     ch.src_addr = conn->remote;
     ch.my_addr = conn->local;
index 56cd6cce1435863fce27fd0a505993ceeca8a03d..82d13bf8b0eae2e66a4fc3f665cf11403f4f1b00 100644 (file)
@@ -985,9 +985,8 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS
             break;
 
         case LFT_CLIENT_REQ_URI:
-            // original client URI
-            if (al->request) {
-                sb = al->request->effectiveRequestUri();
+            if (const auto uri = al->effectiveVirginUrl()) {
+                sb = *uri;
                 out = sb.c_str();
                 quote = 1;
             }
index 8c3fa7102629b2270be39d09bf39f1931a77bdcb..f77d09dcbec7df419fba846c36e30883cb28f28e 100644 (file)
@@ -274,6 +274,7 @@ htcpSyncAle(AccessLogEntryPointer &al, const Ip::Address &caddr, const int opcod
     al->cache.caddr = caddr;
     al->htcp.opcode = htcpOpcodeStr[opcode];
     al->url = url;
+    al->setVirginUrlForMissingRequest(al->url);
     // HTCP transactions do not wait
     al->cache.start_time = current_time;
     al->cache.trTime.tv_sec = 0;
index 3a56b690abd171f2d376b197a53d61b559a96c53..57dfde381ebf4ebbfec680caddbe183e5e1d174e 100644 (file)
@@ -80,6 +80,7 @@ icpSyncAle(AccessLogEntryPointer &al, const Ip::Address &caddr, const char *url,
     al->icp.opcode = ICP_QUERY;
     al->cache.caddr = caddr;
     al->url = url;
+    al->setVirginUrlForMissingRequest(al->url);
     // XXX: move to use icp.clientReply instead
     al->http.clientReplySz.payloadData = len;
     al->cache.start_time = current_time;
index 37acca4ba0c10793559068776dcf5ea14716f242..c775968cea8546f564ece7c7ffba16285cbc776d 100644 (file)
@@ -749,10 +749,9 @@ Ftp::Server::parseOneRequest()
     }
 
     ClientHttpRequest *const http = new ClientHttpRequest(this);
-    http->request = request;
-    HTTPMSGLOCK(http->request);
     http->req_sz = tok.parsedSize();
     http->uri = newUri;
+    http->initRequest(request);
 
     Http::Stream *const result =
         new Http::Stream(clientConnection, http);
@@ -1736,8 +1735,6 @@ Ftp::Server::setReply(const int code, const char *msg)
 
     HttpReply *const reply = Ftp::HttpReplyWrapper(code, msg, Http::scNoContent, 0);
 
-    setLogUri(http, urlCanonicalClean(http->request));
-
     clientStreamNode *const node = context->getClientReplyContext();
     clientReplyContext *const repContext =
         dynamic_cast<clientReplyContext *>(node->data.getRaw());
index b0ae347682e23ff157b7a9b53cbd3538b95701ed..0ab2dbee73850c2ccd4c2b2428cca2a92229da0e 100644 (file)
@@ -118,8 +118,10 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
             // else use default ERR_INVALID_REQ set above.
             break;
         }
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        // must be already initialized via ConnStateData::abortRequestParsing()
+        assert(http->log_uri);
+
         const char * requestErrorBytes = inBuf.c_str();
         if (!clientTunnelOnError(this, context, request, parser_->method(), errPage)) {
             setReplyError(context, request, parser_->method(), errPage, parser_->parseStatusCode, requestErrorBytes);
@@ -135,8 +137,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
     mx->tcpClient = clientConnection;
     if ((request = HttpRequest::FromUrl(http->uri, mx, parser_->method())) == NULL) {
         debugs(33, 5, "Invalid URL: " << http->uri);
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
 
         const char * requestErrorBytes = inBuf.c_str();
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_URL)) {
@@ -154,8 +156,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
             (parser_->messageProtocol().major > 1) ) {
 
         debugs(33, 5, "Unsupported HTTP version discovered. :\n" << parser_->messageProtocol());
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri,  true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
 
         const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_);
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_UNSUP_HTTPVERSION)) {
@@ -168,8 +170,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
     /* compile headers */
     if (parser_->messageProtocol().major >= 1 && !request->parseHeader(*parser_.getRaw())) {
         debugs(33, 5, "Failed to parse request headers:\n" << parser_->mimeHeader());
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
         const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_);
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_REQ)) {
             setReplyError(context, request, parser_->method(), ERR_INVALID_REQ, Http::scBadRequest, requestErrorBytes);
@@ -195,8 +197,7 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
         request->notes()->append(notes().getRaw());
     }
 
-    http->request = request.getRaw();
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request.getRaw());
 
     return true;
 }
@@ -242,8 +243,8 @@ Http::One::Server::processParsedRequest(Http::StreamPointer &context)
         if (!supportedExpect) {
             clientStreamNode *node = context->getClientReplyContext();
             quitAfterError(request.getRaw());
-            // setLogUri should called before repContext->setReplyToError
-            setLogUri(http, urlCanonicalClean(request.getRaw()));
+            // setReplyToError() requires log_uri
+            assert(http->log_uri);
             clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
             assert (repContext);
             repContext->setReplyToError(ERR_INVALID_REQ, Http::scExpectationFailed, request->method, http->uri,
index e69a4b646f9a2d2c401149dab60143257e2af88e..d30cdf6dc034cd4bfe19897bbe35b58b1da9b286 100644 (file)
@@ -52,7 +52,6 @@ void ConnStateData::buildSslCertGenerationParams(Ssl::CertificateProperties &) S
 bool ConnStateData::serveDelayedError(Http::Stream *) STUB_RETVAL(false)
 #endif
 
-void setLogUri(ClientHttpRequest *, char const *, bool) STUB
 const char *findTrailingHTTPVersion(const char *, const char *) STUB_RETVAL(NULL)
 int varyEvaluateMatch(StoreEntry *, HttpRequest *) STUB_RETVAL(0)
 void clientOpenListenSockets(void) STUB
index 0a7d347da2bb39895deafb094b91fd6e47d3b20a..40306ef74d36a5fe080995dfff44f9baab98bc11 100644 (file)
@@ -31,7 +31,6 @@ const SBuf &AnyP::Uri::Asterisk()
 SBuf &AnyP::Uri::authority(bool) const STUB_RETVAL(nil)
 SBuf &AnyP::Uri::absolute() const STUB_RETVAL(nil)
 void urlInitialize() STUB
-char *urlCanonicalClean(const HttpRequest *) STUB_RETVAL(nullptr)
 const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr)
 bool urlIsRelative(const char *) STUB_RETVAL(false)
 char *urlMakeAbsolute(const HttpRequest *, const char *)STUB_RETVAL(nullptr)