]> git.ipfire.org Git - thirdparty/squid.git/commitdiff
Fix %>ru logging of huge URLs (#259)
authorEduard Bagdasaryan <eduard.bagdasaryan@measurement-factory.com>
Fri, 20 Jul 2018 22:01:17 +0000 (01:01 +0300)
committerAmos Jeffries <yadij@users.noreply.github.com>
Sat, 21 Jul 2018 12:22:31 +0000 (00:22 +1200)
When dealing with an HTTP request header that Squid can parse but that
contains request URI length exceeding the 8K limit, Squid should log the
URL (prefix) instead of a dash. Logging the URL helps with triaging
these unusual requests. The older %ru (LFT_REQUEST_URI) was already
logging these huge URLs, but %>ru (LFT_CLIENT_REQ_URI) was logging a
dash. Now both log the URL (or its prefix).

As a side effect, %>ru now also logs error:request-too-large,
error:transaction-end-before-headers and other Squid-specific
pseudo-URLs, as appropriate.

Also refactored request- and URI-recording code to reduce chances of
similar inconsistencies reappearing in the future.

Also, honor strip_query_terms in %ru for large URLs. Not stripping query
string in %ru was a security problem.

Also fixed a bug with "redirected" flag calculation in
ClientHttpRequest::handleAdaptedHeader(). In general, http->url and
request->url should not be compared directly, because the latter always
passes through uri_whitespace cleanup, while the former does not.

Also fixed a bug with possibly wrong %ru after redirection:
ClientHttpRequest::log_uri was not updated in this case.

Also initialize AccessLogEntry::request and AccessLogEntry::notes ASAP.
Before this change, these fields were initialized in
ClientHttpRequest::doCallouts(). It is better to initialize them just
after the request object is created so that ACLs, running before
doCallouts(), could have them at hand. There are at least three such
ACLs: force_request_body_continuation, spoof_client_ip and
spoof_client_ip.

Also synced %ru and %>ru documentation with the current code.

This is a v4 port of master bec110e.

22 files changed:
src/AccessLogEntry.cc
src/AccessLogEntry.h
src/Downloader.cc
src/HttpRequest.cc
src/HttpRequest.h
src/Makefile.am
src/anyp/Uri.cc
src/anyp/Uri.h
src/cf.data.pre
src/client_side.cc
src/client_side.h
src/client_side_reply.cc
src/client_side_request.cc
src/client_side_request.h
src/comm/TcpAcceptor.cc
src/format/Format.cc
src/htcp.cc
src/icp_v2.cc
src/servers/FtpServer.cc
src/servers/Http1Server.cc
src/tests/stub_client_side.cc
src/tests/stub_libanyp.cc

index 25126e68b57503294537da710c4f6454a16a096e..f3679256868fa9500c97002aa624cb049fe1b7cc 100644 (file)
@@ -112,3 +112,15 @@ AccessLogEntry::~AccessLogEntry()
 #endif
 }
 
+const SBuf *
+AccessLogEntry::effectiveVirginUrl() const
+{
+    const SBuf *effectiveUrl = request ? &request->url.absolute() : &virginUrlForMissingRequest_;
+    if (effectiveUrl && !effectiveUrl->isEmpty())
+        return effectiveUrl;
+    // We can not use ALE::url here because it may contain a request URI after
+    // adaptation/redirection. When the request is missing, a non-empty ALE::url
+    // means that we missed a setVirginUrlForMissingRequest() call somewhere.
+    return nullptr;
+}
+
index 41d909f739480c78b1e79f5153100f5847cdb946..782add58fe2fd4c5e131a6d0d8d65330cecae51f 100644 (file)
@@ -271,6 +271,24 @@ public:
     }
     icap;
 #endif
+
+    /// Effective URI of the received client (or equivalent) HTTP request or,
+    /// in rare cases where that information was not collected, a nil pointer.
+    /// Receiving errors are represented by "error:..." URIs.
+    /// Adaptations and redirections do not affect this URI.
+    const SBuf *effectiveVirginUrl() const;
+
+    /// Remember Client URI (or equivalent) when there is no HttpRequest.
+    void setVirginUrlForMissingRequest(const SBuf &vu)
+    {
+        if (!request)
+            virginUrlForMissingRequest_ = vu;
+    }
+
+private:
+    /// Client URI (or equivalent) for effectiveVirginUrl() when HttpRequest is
+    /// missing. This member is ignored unless the request member is nil.
+    SBuf virginUrlForMissingRequest_;
 };
 
 class ACLChecklist;
index c78c3c85a1396d0bbc64d4126001be592299bb29..2e95265234b1b6d325c96dffa1633cd88824c13e 100644 (file)
@@ -151,12 +151,10 @@ Downloader::buildRequest()
            "\n----------");
 
     ClientHttpRequest *const http = new ClientHttpRequest(nullptr);
-    http->request = request;
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request);
     http->req_sz = 0;
     // XXX: performance regression. c_str() reallocates
     http->uri = xstrdup(url_.c_str());
-    setLogUri (http, urlCanonicalClean(request));
 
     context_ = new DownloaderContext(this, http);
     StoreIOBuffer tempBuffer;
index dee671372b13751d619ba1b8e48b0994a3aab682..921c57f82dbb3b328cda7221b054926452a21e24 100644 (file)
@@ -673,6 +673,12 @@ HttpRequest::effectiveRequestUri() const
     return url.absolute();
 }
 
+char *
+HttpRequest::canonicalCleanUrl() const
+{
+    return urlCanonicalCleanWithoutRequest(effectiveRequestUri(), method, url.getScheme());
+}
+
 void
 HttpRequest::manager(const CbcPointer<ConnStateData> &aMgr, const AccessLogEntryPointer &al)
 {
index 38c0b8a4bf98bb0743452fe4a9e81e694012978a..450bb782f93b9b53e757ac206295f4ea6fca3557 100644 (file)
@@ -70,6 +70,10 @@ public:
     /// whether the client is likely to be able to handle a 1xx reply
     bool canHandle1xx() const;
 
+    /// \returns a pointer to a local static buffer containing request URI
+    /// that honors strip_query_terms and %-encodes unsafe URI characters
+    char *canonicalCleanUrl() const;
+
 #if USE_ADAPTATION
     /// Returns possibly nil history, creating it if adapt. logging is enabled
     Adaptation::History::Pointer adaptLogHistory() const;
index 0efba2455872f157452d05789f54ffdec87d7d2c..7df05fd146714d854278a0cc8d28fc24eefb4d43 100644 (file)
@@ -1370,7 +1370,6 @@ tests_testCacheManager_SOURCES = \
        tests/stub_SwapDir.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -1804,7 +1803,6 @@ tests_testEvent_SOURCES = \
        tests/stub_tunnel.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -2040,7 +2038,6 @@ tests_testEventLoop_SOURCES = \
        tests/stub_tunnel.cc \
        MemStore.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
@@ -2271,7 +2268,6 @@ tests_test_http_range_SOURCES = \
        tools.cc \
        tests/stub_tunnel.cc \
        $(UNLINKDSOURCE) \
-       tests/stub_libanyp.cc \
        urn.h \
        urn.cc \
        wccp2.h \
index f2c2b222a0a3cf51aea4787a71cfcde5ab511137..9c025bb6daf68712f498e36364f75e8a826f9b29 100644 (file)
@@ -515,15 +515,15 @@ AnyP::Uri::absolute() const
  *        and never copy the query-string part in the first place
  */
 char *
-urlCanonicalClean(const HttpRequest * request)
+urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &method, const AnyP::UriScheme &scheme)
 {
     LOCAL_ARRAY(char, buf, MAX_URL);
 
-    snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(request->effectiveRequestUri()));
+    snprintf(buf, sizeof(buf), SQUIDSBUFPH, SQUIDSBUFPRINT(url));
     buf[sizeof(buf)-1] = '\0';
 
     // URN, CONNECT method, and non-stripped URIs can go straight out
-    if (Config.onoff.strip_query_terms && !(request->method == Http::METHOD_CONNECT || request->url.getScheme() == AnyP::PROTO_URN)) {
+    if (Config.onoff.strip_query_terms && !(method == Http::METHOD_CONNECT || scheme == AnyP::PROTO_URN)) {
         // strip anything AFTER a question-mark
         // leaving the '?' in place
         if (auto t = strchr(buf, '?')) {
@@ -555,7 +555,7 @@ urlCanonicalFakeHttps(const HttpRequest * request)
     }
 
     // else do the normal complete canonical thing.
-    return urlCanonicalClean(request);
+    return request->canonicalCleanUrl();
 }
 
 /*
@@ -936,3 +936,56 @@ AnyP::Uri::Uri(AnyP::UriScheme const &aScheme) :
     *host_=0;
 }
 
+// TODO: fix code duplication with AnyP::Uri::parse()
+char *
+AnyP::Uri::cleanup(const char *uri)
+{
+    int flags = 0;
+    char *cleanedUri = nullptr;
+    switch (Config.uri_whitespace) {
+    case URI_WHITESPACE_ALLOW:
+        flags |= RFC1738_ESCAPE_NOSPACE;
+        // fall through to next case
+    case URI_WHITESPACE_ENCODE:
+        flags |= RFC1738_ESCAPE_UNESCAPED;
+        cleanedUri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
+        break;
+
+    case URI_WHITESPACE_CHOP: {
+        flags |= RFC1738_ESCAPE_UNESCAPED;
+        const auto pos = strcspn(uri, w_space);
+        char *choppedUri = nullptr;
+        if (pos < strlen(uri))
+            choppedUri = xstrndup(uri, pos + 1);
+        cleanedUri = xstrndup(rfc1738_do_escape(choppedUri ? choppedUri : uri, flags), MAX_URL);
+        cleanedUri[pos] = '\0';
+        xfree(choppedUri);
+    }
+    break;
+
+    case URI_WHITESPACE_DENY:
+    case URI_WHITESPACE_STRIP:
+    default: {
+        // TODO: avoid duplication with urlParse()
+        const char *t;
+        char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
+        char *q = tmp_uri;
+        t = uri;
+        while (*t) {
+            if (!xisspace(*t)) {
+                *q = *t;
+                ++q;
+            }
+            ++t;
+        }
+        *q = '\0';
+        cleanedUri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
+        xfree(tmp_uri);
+    }
+    break;
+    }
+
+    assert(cleanedUri);
+    return cleanedUri;
+}
+
index 5806438e2bb84c21c45859689e3d4884205a5ac7..67f0af59f61b75b02c0d08f6adffedcc4db4a52e 100644 (file)
@@ -61,6 +61,9 @@ public:
 
     bool parse(const HttpRequestMethod &, const char *url);
 
+    /// \return a new URI that honors uri_whitespace
+    static char *cleanup(const char *uri);
+
     AnyP::UriScheme const & getScheme() const {return scheme_;}
 
     /// convert the URL scheme to that given
@@ -179,7 +182,10 @@ operator <<(std::ostream &os, const AnyP::Uri &url)
 class HttpRequest;
 
 void urlInitialize(void);
-char *urlCanonicalClean(const HttpRequest *);
+/// call HttpRequest::canonicalCleanUrl() instead if you have HttpRequest
+/// \returns a pointer to a local static buffer containing request URI
+/// that honors strip_query_terms and %-encodes unsafe URI characters
+char *urlCanonicalCleanWithoutRequest(const SBuf &url, const HttpRequestMethod &, const AnyP::UriScheme &);
 const char *urlCanonicalFakeHttps(const HttpRequest * request);
 bool urlIsRelative(const char *);
 char *urlMakeAbsolute(const HttpRequest *, const char *);
index 77f12d54a28a9e0c506bb9c928f2648113de21b3..4e014365baaf1225bbd948282b6bf506ca98856e 100644 (file)
@@ -4293,19 +4293,42 @@ DOC_START
 
        The <format specification> is a string with embedded % format codes
 
-       % format codes all follow the same basic structure where all but
-       the formatcode is optional. Output strings are automatically escaped
-       as required according to their context and the output format
-       modifiers are usually not needed, but can be specified if an explicit
-       output format is desired.
+       % format codes all follow the same basic structure where all
+       components but the formatcode are optional and usually unnecessary,
+       especially when dealing with common codes.
 
-               % ["|[|'|#|/] [-] [[0]width] [{arg}] formatcode [{arg}]
+               % [encoding] [-] [[0]width] [{arg}] formatcode [{arg}]
 
-               "       output in quoted string format
-               [       output in squid text log format as used by log_mime_hdrs
-               #       output in URL quoted format
-               /       output in shell \-escaped format
-               '       output as-is
+               encoding escapes or otherwise protects "special" characters:
+
+                       "       Quoted string encoding where quote(") and
+                               backslash(\) characters are \-escaped while
+                               CR, LF, and TAB characters are encoded as \r,
+                               \n, and \t two-character sequences.
+
+                       [       Custom Squid encoding where percent(%), square
+                               brackets([]), backslash(\) and characters with
+                               codes outside of [32,126] range are %-encoded.
+                               SP is not encoded. Used by log_mime_hdrs.
+
+                       #       URL encoding (a.k.a. percent-encoding) where
+                               all URL unsafe and control characters (per RFC
+                               1738) are %-encoded.
+
+                       /       Shell-like encoding where quote(") and
+                               backslash(\) characters are \-escaped while CR
+                               and LF characters are encoded as \r and \n
+                               two-character sequences. Values containing SP
+                               character(s) are surrounded by quotes(").
+
+                       '       Raw/as-is encoding with no escaping/quoting.
+
+                       Default encoding: When no explicit encoding is
+                       specified, each %code determines its own encoding.
+                       Most %codes use raw/as-is encoding, but some codes use
+                       a so called "pass-through URL encoding" where all URL
+                       unsafe and control characters (per RFC 1738) are
+                       %-encoded, but the percent character(%) is left as is.
 
                -       left aligned
 
@@ -4411,8 +4434,40 @@ DOC_START
                [http::]rm      Request method (GET/POST etc)
                [http::]>rm     Request method from client
                [http::]<rm     Request method sent to server or peer
-               [http::]ru      Request URL from client (historic, filtered for logging)
-               [http::]>ru     Request URL from client
+
+               [http::]ru      Request URL received (or computed) and sanitized
+
+                               Logs request URI received from the client, a
+                               request adaptation service, or a request
+                               redirector (whichever was applied last).
+
+                               Computed URLs are URIs of internally generated
+                               requests and various "error:..." URIs.
+
+                               Honors strip_query_terms and uri_whitespace.
+
+                               This field is not encoded by default. Encoding
+                               this field using variants of %-encoding will
+                               clash with uri_whitespace modifications that
+                               also use %-encoding.
+
+               [http::]>ru     Request URL received from the client (or computed)
+
+                               Computed URLs are URIs of internally generated
+                               requests and various "error:..." URIs.
+
+                               Unlike %ru, this request URI is not affected
+                               by request adaptation, URL rewriting services,
+                               and strip_query_terms.
+
+                               Honors uri_whitespace.
+
+                               This field is using pass-through URL encoding
+                               by default. Encoding this field using other
+                               variants of %-encoding will clash with
+                               uri_whitespace modifications that also use
+                               %-encoding.
+
                [http::]<ru     Request URL sent to server or peer
                [http::]>rs     Request URL scheme from client
                [http::]<rs     Request URL scheme sent to server or peer
index ae3d96af3873417424044f328be53636ff3dd2ce..d61e278dc0154c8ed81ea3f04f5715a37fb1260b 100644 (file)
 #include "mime_header.h"
 #include "parser/Tokenizer.h"
 #include "profiler/Profiler.h"
-#include "rfc1738.h"
 #include "security/NegotiationHistory.h"
 #include "servers/forward.h"
 #include "SquidConfig.h"
@@ -473,10 +472,9 @@ void
 ClientHttpRequest::freeResources()
 {
     safe_free(uri);
-    safe_free(log_uri);
     safe_free(redirect.location);
     range_iter.boundary.clean();
-    HTTPMSGUNLOCK(request);
+    clearRequest();
 
     if (client_stream.tail)
         clientStreamAbort((clientStreamNode *)client_stream.tail->data, this);
@@ -1011,8 +1009,7 @@ ConnStateData::abortRequestParsing(const char *const uri)
 {
     ClientHttpRequest *http = new ClientHttpRequest(this);
     http->req_sz = inBuf.length();
-    http->uri = xstrdup(uri);
-    setLogUri (http, uri);
+    http->setErrorUri(uri);
     auto *context = new Http::Stream(clientConnection, http);
     StoreIOBuffer tempBuffer;
     tempBuffer.data = context->reqbuf;
@@ -1084,57 +1081,6 @@ findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end)
     return NULL;
 }
 
-void
-setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl)
-{
-    safe_free(http->log_uri);
-
-    if (!cleanUrl)
-        // The uri is already clean just dump it.
-        http->log_uri = xstrndup(uri, MAX_URL);
-    else {
-        int flags = 0;
-        switch (Config.uri_whitespace) {
-        case URI_WHITESPACE_ALLOW:
-            flags |= RFC1738_ESCAPE_NOSPACE;
-
-        case URI_WHITESPACE_ENCODE:
-            flags |= RFC1738_ESCAPE_UNESCAPED;
-            http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
-            break;
-
-        case URI_WHITESPACE_CHOP: {
-            flags |= RFC1738_ESCAPE_NOSPACE;
-            flags |= RFC1738_ESCAPE_UNESCAPED;
-            http->log_uri = xstrndup(rfc1738_do_escape(uri, flags), MAX_URL);
-            int pos = strcspn(http->log_uri, w_space);
-            http->log_uri[pos] = '\0';
-        }
-        break;
-
-        case URI_WHITESPACE_DENY:
-        case URI_WHITESPACE_STRIP:
-        default: {
-            const char *t;
-            char *tmp_uri = static_cast<char*>(xmalloc(strlen(uri) + 1));
-            char *q = tmp_uri;
-            t = uri;
-            while (*t) {
-                if (!xisspace(*t)) {
-                    *q = *t;
-                    ++q;
-                }
-                ++t;
-            }
-            *q = '\0';
-            http->log_uri = xstrndup(rfc1738_escape_unescaped(tmp_uri), MAX_URL);
-            xfree(tmp_uri);
-        }
-        break;
-        }
-    }
-}
-
 static void
 prepareAcceleratedURL(ConnStateData * conn, ClientHttpRequest *http, const Http1::RequestParserPointer &hp)
 {
@@ -1498,12 +1444,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context)
         debugs(33, 5, "Responding with delated error for " << http->uri);
         repContext->setReplyToStoreEntry(sslServerBump->entry, "delayed SslBump error");
 
-        // save the original request for logging purposes
-        if (!context->http->al->request) {
-            context->http->al->request = http->request;
-            HTTPMSGLOCK(context->http->al->request);
-        }
-
         // Get error details from the fake certificate-peeking request.
         http->request->detailError(sslServerBump->request->errType, sslServerBump->request->errDetail);
         context->pullData();
@@ -1546,11 +1486,6 @@ bool ConnStateData::serveDelayedError(Http::Stream *context)
                     SQUID_X509_V_ERR_DOMAIN_MISMATCH,
                     srvCert.get(), nullptr);
                 err->detail = errDetail;
-                // Save the original request for logging purposes.
-                if (!context->http->al->request) {
-                    context->http->al->request = request;
-                    HTTPMSGLOCK(context->http->al->request);
-                }
                 repContext->setReplyToError(request->method, err);
                 assert(context->http->out.offset == 0);
                 context->pullData();
@@ -1660,12 +1595,12 @@ clientProcessRequest(ConnStateData *conn, const Http1::RequestParserPointer &hp,
             request->url.host(internalHostname());
             request->url.port(getMyPort());
             http->flags.internal = true;
+            http->setLogUriToRequestUri();
         } else
             debugs(33, 2, "internal URL found: " << request->url.getScheme() << "://" << request->url.authority(true) << " (not this proxy)");
     }
 
     request->flags.internal = http->flags.internal;
-    setLogUri (http, urlCanonicalClean(request.getRaw()));
 
     if (!isFtp) {
         // XXX: for non-HTTP messages instantiate a different HttpMsg child type
@@ -3468,8 +3403,7 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un
     request->method = method;
     request->url.host(useHost.c_str());
     request->url.port(usePort);
-    http->request = request.getRaw();
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request.getRaw());
 
     request->manager(this, http->al);
 
@@ -3485,7 +3419,6 @@ ConnStateData::buildFakeRequest(Http::MethodType const method, SBuf &useHost, un
     inBuf = payload;
     flags.readMore = false;
 
-    setLogUri(http, urlCanonicalClean(request.getRaw()));
     return http;
 }
 
@@ -4154,9 +4087,7 @@ ConnStateData::checkLogging()
     ClientHttpRequest http(this);
     http.req_sz = inBuf.length();
     // XXX: Or we died while waiting for the pinned connection to become idle.
-    char const *uri = "error:transaction-end-before-headers";
-    http.uri = xstrdup(uri);
-    setLogUri(&http, uri);
+    http.setErrorUri("error:transaction-end-before-headers");
 }
 
 bool
index 867b52b564272ff955cd6440ee1f9a73a21c3429..bd4ceb0dd45e9e3ef16bad086e4ae8ed2d45cbd2 100644 (file)
@@ -412,8 +412,6 @@ private:
     SBuf connectionTag_; ///< clt_conn_tag=Tag annotation for client connection
 };
 
-void setLogUri(ClientHttpRequest * http, char const *uri, bool cleanUrl = false);
-
 const char *findTrailingHTTPVersion(const char *uriAndHTTPVersion, const char *end = NULL);
 
 int varyEvaluateMatch(StoreEntry * entry, HttpRequest * req);
index 3bee9feb8570b488aa702b59986913ad1ea08f51..6ddd8ff7323e86a71a75fcf4c0db7ebd6f7061bf 100644 (file)
@@ -2261,7 +2261,10 @@ clientReplyContext::createStoreEntry(const HttpRequestMethod& m, RequestFlags re
 
     if (http->request == NULL) {
         const MasterXaction::Pointer mx = new MasterXaction(XactionInitiator::initClient);
-        http->request = new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx);
+        // XXX: These fake URI parameters shadow the real (or error:...) URI.
+        // TODO: Either always set the request earlier and assert here OR use
+        // http->uri (converted to Anyp::Uri) to create this catch-all request.
+        const_cast<HttpRequest *&>(http->request) =  new HttpRequest(m, AnyP::PROTO_NONE, "http", null_string, mx);
         HTTPMSGLOCK(http->request);
     }
 
index e619392b809012aae36f62cda9ba9581d6f43c52..07ab78fbf753a9b43310344086ed6f1be1fe4ced 100644 (file)
@@ -48,6 +48,7 @@
 #include "Parsing.h"
 #include "profiler/Profiler.h"
 #include "redirect.h"
+#include "rfc1738.h"
 #include "SquidConfig.h"
 #include "SquidTime.h"
 #include "Store.h"
@@ -358,8 +359,6 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre
     if (header)
         request->header.update(header);
 
-    http->log_uri = xstrdup(urlCanonicalClean(request));
-
     /* http struct now ready */
 
     /*
@@ -390,8 +389,7 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre
 
     request->http_ver = Http::ProtocolVersion();
 
-    http->request = request;
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request);
 
     /* optional - skip the access check ? */
     http->calloutContext = new ClientRequestContext(http);
@@ -1279,12 +1277,8 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply)
                                " from request " << old_request << " to " << new_request);
                     }
 
-                    // update the current working ClientHttpRequest fields
-                    xfree(http->uri);
-                    http->uri = SBufToCstring(new_request->effectiveRequestUri());
-                    HTTPMSGUNLOCK(old_request);
-                    http->request = new_request;
-                    HTTPMSGLOCK(http->request);
+                    http->resetRequest(new_request);
+                    old_request = nullptr;
                 } else {
                     debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " <<
                            old_request->method << " " << urlNote << " " << old_request->http_ver);
@@ -1654,6 +1648,56 @@ ClientHttpRequest::loggingEntry(StoreEntry *newEntry)
         loggingEntry_->lock("ClientHttpRequest::loggingEntry");
 }
 
+void
+ClientHttpRequest::initRequest(HttpRequest *aRequest)
+{
+    assignRequest(aRequest);
+    if (const auto csd = getConn()) {
+        if (!csd->connectionTag().isEmpty()) {
+            if (!request->notes)
+                request->notes = new NotePairs;
+            // TODO: assert if "clt_conn_tag" already added?
+            request->notes->add("clt_conn_tag", SBuf(csd->connectionTag()).c_str());
+        }
+    }
+    // al is created in the constructor
+    assert(al);
+    if (!al->request) {
+        al->request = request;
+        HTTPMSGLOCK(al->request);
+        (void)SyncNotes(*al, *request);
+    }
+}
+
+void
+ClientHttpRequest::resetRequest(HttpRequest *newRequest)
+{
+    assert(request != newRequest);
+    clearRequest();
+    assignRequest(newRequest);
+    xfree(uri);
+    uri = SBufToCstring(request->effectiveRequestUri());
+}
+
+void
+ClientHttpRequest::assignRequest(HttpRequest *newRequest)
+{
+    assert(newRequest);
+    assert(!request);
+    const_cast<HttpRequest *&>(request) = newRequest;
+    HTTPMSGLOCK(request);
+    setLogUriToRequestUri();
+}
+
+void
+ClientHttpRequest::clearRequest()
+{
+    HttpRequest *oldRequest = request;
+    HTTPMSGUNLOCK(oldRequest);
+    const_cast<HttpRequest *&>(request) = nullptr;
+    absorbLogUri(nullptr);
+}
+
 /*
  * doCallouts() - This function controls the order of "callout"
  * executions, including non-blocking access control checks, the
@@ -1694,19 +1738,6 @@ ClientHttpRequest::doCallouts()
 {
     assert(calloutContext);
 
-    /*Save the original request for logging purposes*/
-    if (!calloutContext->http->al->request) {
-        calloutContext->http->al->request = request;
-        HTTPMSGLOCK(calloutContext->http->al->request);
-
-        NotePairs &notes = SyncNotes(*calloutContext->http->al, *calloutContext->http->request);
-        // Make the previously set client connection ID available as annotation.
-        if (ConnStateData *csd = calloutContext->http->getConn()) {
-            if (!csd->connectionTag().isEmpty())
-                notes.add("clt_conn_tag", SBuf(csd->connectionTag()).c_str());
-        }
-    }
-
     if (!calloutContext->error) {
         // CVE-2009-0801: verify the Host: header is consistent with other known details.
         if (!calloutContext->host_header_verify_done) {
@@ -1871,6 +1902,53 @@ ClientHttpRequest::doCallouts()
 #endif
 }
 
+void
+ClientHttpRequest::setLogUriToRequestUri()
+{
+    assert(request);
+    const auto canonicalUri = request->canonicalCleanUrl();
+    absorbLogUri(xstrndup(canonicalUri, MAX_URL));
+}
+
+void
+ClientHttpRequest::setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &method)
+{
+    assert(rawUri);
+    // Should(!request);
+
+    // TODO: SBuf() performance regression, fix by converting rawUri to SBuf
+    char *canonicalUri = urlCanonicalCleanWithoutRequest(SBuf(rawUri), method, AnyP::UriScheme());
+
+    absorbLogUri(AnyP::Uri::cleanup(canonicalUri));
+
+    char *cleanedRawUri = AnyP::Uri::cleanup(rawUri);
+    al->setVirginUrlForMissingRequest(SBuf(cleanedRawUri));
+    xfree(cleanedRawUri);
+}
+
+void
+ClientHttpRequest::absorbLogUri(char *aUri)
+{
+    xfree(log_uri);
+    const_cast<char *&>(log_uri) = aUri;
+}
+
+void
+ClientHttpRequest::setErrorUri(const char *aUri)
+{
+    assert(!uri);
+    assert(aUri);
+    // Should(!request);
+
+    uri = xstrdup(aUri);
+    // TODO: SBuf() performance regression, fix by converting setErrorUri() parameter to SBuf
+    const SBuf errorUri(aUri);
+    const auto canonicalUri = urlCanonicalCleanWithoutRequest(errorUri, HttpRequestMethod(), AnyP::UriScheme());
+    absorbLogUri(xstrndup(canonicalUri, MAX_URL));
+
+    al->setVirginUrlForMissingRequest(errorUri);
+}
+
 #if !_USE_INLINE_
 #include "client_side_request.cci"
 #endif
@@ -1919,23 +1997,11 @@ ClientHttpRequest::handleAdaptedHeader(HttpMsg *msg)
     assert(msg);
 
     if (HttpRequest *new_req = dynamic_cast<HttpRequest*>(msg)) {
-        /*
-         * Replace the old request with the new request.
-         */
-        HTTPMSGUNLOCK(request);
-        request = new_req;
-        HTTPMSGLOCK(request);
 
         // update the new message to flag whether URL re-writing was done on it
-        if (request->effectiveRequestUri().cmp(uri) != 0)
-            request->flags.redirected = 1;
-
-        /*
-         * Store the new URI for logging
-         */
-        xfree(uri);
-        uri = SBufToCstring(request->effectiveRequestUri());
-        setLogUri(this, urlCanonicalClean(request));
+        if (request->effectiveRequestUri() != new_req->effectiveRequestUri())
+            new_req->flags.redirected = true;
+        resetRequest(new_req);
         assert(request->method.id());
     } else if (HttpReply *new_rep = dynamic_cast<HttpReply*>(msg)) {
         debugs(85,3,HERE << "REQMOD reply is HTTP reply");
index cd2e196f1f42a5017b4ca01fca12d30ba95d78ec..3c316667cca2fd0217a6a738795b95cfe1d66838 100644 (file)
@@ -63,14 +63,37 @@ public:
     _SQUID_INLINE_ ConnStateData * getConn() const;
     _SQUID_INLINE_ void setConn(ConnStateData *);
 
+    /// Initializes the current request with the virgin request.
+    /// Call this method when the virgin request becomes known.
+    /// To update the current request later, use resetRequest().
+    void initRequest(HttpRequest *);
+
+    /// Resets the current request to the latest adapted or redirected
+    /// request. Call this every time adaptation or redirection changes
+    /// the request. To set the virgin request, use initRequest().
+    void resetRequest(HttpRequest *);
+
     /** Details of the client socket which produced us.
      * Treat as read-only for the lifetime of this HTTP request.
      */
     Comm::ConnectionPointer clientConnection;
 
-    HttpRequest *request;       /* Parsed URL ... */
+    /// Request currently being handled by ClientHttpRequest.
+    /// Starts as a virgin request; see initRequest().
+    /// Usually remains nil until the virgin request header is parsed or faked.
+    /// Adaptation and redirections replace it; see resetRequest().
+    HttpRequest * const request;
+    /// Usually starts as a URI received from the client, with scheme and host
+    /// added if needed. Is used to create the virgin request for initRequest().
+    /// URIs of adapted/redirected requests replace it via resetRequest().
     char *uri;
-    char *log_uri;
+
+    // TODO: remove this field and store the URI directly in al->url
+    /// Cleaned up URI of the current (virgin or adapted/redirected) request,
+    /// computed URI of an internally-generated requests, or
+    /// one of the hard-coded "error:..." URIs.
+    char * const log_uri;
+
     String store_id; /* StoreID for transactions where the request member is nil */
 
     struct Out {
@@ -113,6 +136,18 @@ public:
     ClientRequestContext *calloutContext;
     void doCallouts();
 
+    // The three methods below prepare log_uri and friends for future logging.
+    // Call the best-fit method whenever the current request or its URI changes.
+
+    /// sets log_uri when we know the current request
+    void setLogUriToRequestUri();
+    /// sets log_uri to a parsed request URI when Squid fails to parse or
+    /// validate other request components, yielding no current request
+    void setLogUriToRawUri(const char *rawUri, const HttpRequestMethod &);
+    /// sets log_uri and uri to an internally-generated "error:..." URI when
+    /// neither the current request nor the parsed request URI are known
+    void setErrorUri(const char *errorUri);
+
     /// Build an error reply. For use with the callouts.
     void calloutsError(const err_type error, const int errDetail);
 
@@ -126,6 +161,14 @@ public:
 #endif
 
 private:
+    /// assigns log_uri with aUri without copying the entire C-string
+    void absorbLogUri(char *aUri);
+    /// resets the current request and log_uri to nil
+    void clearRequest();
+    /// initializes the current unassigned request to the virgin request
+    /// sets the current request, asserting that it was unset
+    void assignRequest(HttpRequest *aRequest);
+
     int64_t maxReplyBodySize_;
     StoreEntry *entry_;
     StoreEntry *loggingEntry_;
index 1d722738474c79ea15c7ba118bded43aedf77b0d..618b85dca0b27727676f1c1fd162a21bda6337eb 100644 (file)
@@ -264,6 +264,7 @@ logAcceptError(const Comm::ConnectionPointer &conn)
     AccessLogEntry::Pointer al = new AccessLogEntry;
     al->tcpClient = conn;
     al->url = "error:accept-client-connection";
+    al->setVirginUrlForMissingRequest(al->url);
     ACLFilledChecklist ch(nullptr, nullptr, nullptr);
     ch.src_addr = conn->remote;
     ch.my_addr = conn->local;
index e19efb0bd3788288bbca87a57a371272c912f279..46fc671677c9b975519e45a042d12752e8200a21 100644 (file)
@@ -979,9 +979,8 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS
             break;
 
         case LFT_CLIENT_REQ_URI:
-            // original client URI
-            if (al->request) {
-                sb = al->request->effectiveRequestUri();
+            if (const auto uri = al->effectiveVirginUrl()) {
+                sb = *uri;
                 out = sb.c_str();
                 quote = 1;
             }
index 18c5259e4e62dab8fc4010e9946c346b06734b0c..e424291ad16f0d45441e0c5f2398484b8a525fb7 100644 (file)
@@ -1579,6 +1579,7 @@ htcpLogHtcp(Ip::Address &caddr, int opcode, LogTags logcode, const char *url)
         return;
     al->htcp.opcode = htcpOpcodeStr[opcode];
     al->url = url;
+    al->setVirginUrlForMissingRequest(al->url);
     al->cache.caddr = caddr;
     al->cache.code = logcode;
     al->cache.trTime.tv_sec = 0;
index 31483c544327ee8d9773cf4d1c1cf0223da8e1ec..0adac9a2b8e48c5ac0b3ec1dd1eefd139412c408 100644 (file)
@@ -198,6 +198,8 @@ icpLogIcp(const Ip::Address &caddr, const LogTags &logcode, int len, const char
 
     al->url = url;
 
+    al->setVirginUrlForMissingRequest(al->url);
+
     al->cache.caddr = caddr;
 
     // XXX: move to use icp.clientReply instead
index 37acca4ba0c10793559068776dcf5ea14716f242..c775968cea8546f564ece7c7ffba16285cbc776d 100644 (file)
@@ -749,10 +749,9 @@ Ftp::Server::parseOneRequest()
     }
 
     ClientHttpRequest *const http = new ClientHttpRequest(this);
-    http->request = request;
-    HTTPMSGLOCK(http->request);
     http->req_sz = tok.parsedSize();
     http->uri = newUri;
+    http->initRequest(request);
 
     Http::Stream *const result =
         new Http::Stream(clientConnection, http);
@@ -1736,8 +1735,6 @@ Ftp::Server::setReply(const int code, const char *msg)
 
     HttpReply *const reply = Ftp::HttpReplyWrapper(code, msg, Http::scNoContent, 0);
 
-    setLogUri(http, urlCanonicalClean(http->request));
-
     clientStreamNode *const node = context->getClientReplyContext();
     clientReplyContext *const repContext =
         dynamic_cast<clientReplyContext *>(node->data.getRaw());
index c23a934f9f91dcc729d5b3e655a5511ca5ca49b1..8b8a82fcbeeb2857c42666a70f1678a2804bed51 100644 (file)
@@ -118,8 +118,10 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
             // else use default ERR_INVALID_REQ set above.
             break;
         }
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        // must be already initialized via ConnStateData::abortRequestParsing()
+        assert(http->log_uri);
+
         const char * requestErrorBytes = inBuf.c_str();
         if (!clientTunnelOnError(this, context, request, parser_->method(), errPage)) {
             setReplyError(context, request, parser_->method(), errPage, parser_->parseStatusCode, requestErrorBytes);
@@ -135,8 +137,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
     mx->tcpClient = clientConnection;
     if ((request = HttpRequest::FromUrl(http->uri, mx, parser_->method())) == NULL) {
         debugs(33, 5, "Invalid URL: " << http->uri);
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
 
         const char * requestErrorBytes = inBuf.c_str();
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_URL)) {
@@ -154,8 +156,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
             (parser_->messageProtocol().major > 1) ) {
 
         debugs(33, 5, "Unsupported HTTP version discovered. :\n" << parser_->messageProtocol());
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri,  true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
 
         const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_);
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_UNSUP_HTTPVERSION)) {
@@ -168,8 +170,8 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
     /* compile headers */
     if (parser_->messageProtocol().major >= 1 && !request->parseHeader(*parser_.getRaw())) {
         debugs(33, 5, "Failed to parse request headers:\n" << parser_->mimeHeader());
-        // setLogUri should called before repContext->setReplyToError
-        setLogUri(http, http->uri, true);
+        // setReplyToError() requires log_uri
+        http->setLogUriToRawUri(http->uri, parser_->method());
         const char * requestErrorBytes = NULL; //HttpParserHdrBuf(parser_);
         if (!clientTunnelOnError(this, context, request, parser_->method(), ERR_INVALID_REQ)) {
             setReplyError(context, request, parser_->method(), ERR_INVALID_REQ, Http::scBadRequest, requestErrorBytes);
@@ -188,8 +190,7 @@ Http::One::Server::buildHttpRequest(Http::StreamPointer &context)
         request->header.putStr(Http::HOST, tmp.c_str());
     }
 
-    http->request = request.getRaw();
-    HTTPMSGLOCK(http->request);
+    http->initRequest(request.getRaw());
 
     return true;
 }
@@ -235,8 +236,8 @@ Http::One::Server::processParsedRequest(Http::StreamPointer &context)
         if (!supportedExpect) {
             clientStreamNode *node = context->getClientReplyContext();
             quitAfterError(request.getRaw());
-            // setLogUri should called before repContext->setReplyToError
-            setLogUri(http, urlCanonicalClean(request.getRaw()));
+            // setReplyToError() requires log_uri
+            assert(http->log_uri);
             clientReplyContext *repContext = dynamic_cast<clientReplyContext *>(node->data.getRaw());
             assert (repContext);
             repContext->setReplyToError(ERR_INVALID_REQ, Http::scExpectationFailed, request->method, http->uri,
index 079d245cd93fbc3cae5d31259719e90f6c4c022e..bedf9beb3a2c10cc553cb9d6673631952d4db40c 100644 (file)
@@ -51,7 +51,6 @@ void ConnStateData::buildSslCertGenerationParams(Ssl::CertificateProperties &) S
 bool ConnStateData::serveDelayedError(Http::Stream *) STUB_RETVAL(false)
 #endif
 
-void setLogUri(ClientHttpRequest *, char const *, bool) STUB
 const char *findTrailingHTTPVersion(const char *, const char *) STUB_RETVAL(NULL)
 int varyEvaluateMatch(StoreEntry *, HttpRequest *) STUB_RETVAL(0)
 void clientOpenListenSockets(void) STUB
index 0a7d347da2bb39895deafb094b91fd6e47d3b20a..40306ef74d36a5fe080995dfff44f9baab98bc11 100644 (file)
@@ -31,7 +31,6 @@ const SBuf &AnyP::Uri::Asterisk()
 SBuf &AnyP::Uri::authority(bool) const STUB_RETVAL(nil)
 SBuf &AnyP::Uri::absolute() const STUB_RETVAL(nil)
 void urlInitialize() STUB
-char *urlCanonicalClean(const HttpRequest *) STUB_RETVAL(nullptr)
 const char *urlCanonicalFakeHttps(const HttpRequest *) STUB_RETVAL(nullptr)
 bool urlIsRelative(const char *) STUB_RETVAL(false)
 char *urlMakeAbsolute(const HttpRequest *, const char *)STUB_RETVAL(nullptr)