Bug 1961 partial: refactor 'canonical' URI creation

author Amos Jeffries <squid3@treenet.co.nz>

Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)

committer Amos Jeffries <squid3@treenet.co.nz>

Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)
author Amos Jeffries <squid3@treenet.co.nz>
Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)
committer Amos Jeffries <squid3@treenet.co.nz>
Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)
diff --git a/src/URL.h b/src/URL.h

index c33dc53e19cafa6446f96e8abe5f4f3bfea4b9ca..3ae18ead79819f5f4902773448a850c6e3ff7558 100644 (file)
--- a/src/URL.h
+++ b/src/URL.h
@@ -77,6 +77,15 @@ public:
       */
      SBuf &authority(bool requirePort = false) const;
  
+    /**
+     * The absolute-form URI for currently stored values.
+     *
+     * As defined by RFC 7230 section 5.3.3 this form omits the
+     * userinfo@ field from RFC 3986 defined authority segments
+     * when the protocol scheme is http: or https:.
+     */
+    SBuf &absolute() const;
+
  private:
      /**
       \par
@@ -116,7 +125,7 @@ private:
      // pre-assembled URL forms
      mutable SBuf authorityHttp_;     ///< RFC 7230 section 5.3.3 authority, maybe without default-port
      mutable SBuf authorityWithPort_; ///< RFC 7230 section 5.3.3 authority with explicit port
-    mutable SBuf canonical_;         ///< full absolute-URI
+    mutable SBuf absolute_;          ///< RFC 7230 section 5.3.2 absolute-URI
  };
  
  inline std::ostream &
diff --git a/src/url.cc b/src/url.cc

index 626d92bf1f6e53f1039c321444346cfc2f7ee36d..e0bf23ae26e33118bc784367209e9a027256feb5 100644 (file)
--- a/src/url.cc
+++ b/src/url.cc
@@ -474,6 +474,7 @@ urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
  void
  URL::touch()
  {
+    absolute_.clear();
      authorityHttp_.clear();
      authorityWithPort_.clear();
  }
@@ -496,39 +497,44 @@ URL::authority(bool requirePort) const
      return requirePort ? authorityWithPort_ : authorityHttp_;
  }
  
+SBuf &
+URL::absolute() const
+{
+    if (absolute_.isEmpty()) {
+        // TODO: most URL will be much shorter, avoid allocating this much
+        absolute_.reserveCapacity(MAX_URL);
+
+        absolute_.appendf("%s:", getScheme().c_str());
+        if (getScheme() != AnyP::PROTO_URN) {
+            absolute_.append("//", 2);
+            const bool omitUserInfo = getScheme() == AnyP::PROTO_HTTP ||
+                                      getScheme() != AnyP::PROTO_HTTPS ||
+                                      userInfo().isEmpty();
+            if (!omitUserInfo) {
+                absolute_.append(userInfo());
+                absolute_.append("@", 1);
+            }
+            absolute_.append(authority());
+        }
+        absolute_.append(path());
+    }
+
+    return absolute_;
+}
+
  const char *
  urlCanonical(HttpRequest * request)
  {
-    LOCAL_ARRAY(char, urlbuf, MAX_URL);
-
      if (request->canonical)
          return request->canonical;
  
-    if (request->url.getScheme() == AnyP::PROTO_URN) {
-        snprintf(urlbuf, MAX_URL, "urn:" SQUIDSBUFPH,
-                 SQUIDSBUFPRINT(request->url.path()));
-    } else {
-        SBuf authorityForm;
-        switch (request->method.id()) {
+    SBuf url;
+    if (request->method.id() == Http::METHOD_CONNECT)
+        url = request->url.authority(true); // host:port
+    else
+        url = request->url.absolute();
  
-        case Http::METHOD_CONNECT:
-            authorityForm = request->url.authority(true); // host:port
-            snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(authorityForm));
-            break;
-
-        default: {
-            authorityForm = request->url.authority(); // host[:port]
-            snprintf(urlbuf, MAX_URL, "%s://" SQUIDSBUFPH "%s" SQUIDSBUFPH SQUIDSBUFPH,
-                     request->url.getScheme().c_str(),
-                     SQUIDSBUFPRINT(request->url.userInfo()),
-                     !request->url.userInfo().isEmpty() ? "@" : "",
-                     SQUIDSBUFPRINT(authorityForm),
-                     SQUIDSBUFPRINT(request->url.path()));
-        }
-        }
-    }
-
-    return (request->canonical = xstrdup(urlbuf));
+    return (request->canonical = xstrndup(url.rawContent(), url.length()+1));
  }
  
  /** \todo AYJ: Performance: This is an *almost* duplicate of urlCanonical. But elides the query-string.
@@ -539,35 +545,21 @@ char *
  urlCanonicalClean(const HttpRequest * request)
  {
      LOCAL_ARRAY(char, buf, MAX_URL);
-    char *t;
  
-    if (request->url.getScheme() == AnyP::PROTO_URN) {
-        snprintf(buf, MAX_URL, "urn:" SQUIDSBUFPH,
-                 SQUIDSBUFPRINT(request->url.path()));
-    } else {
-        SBuf authorityForm;
-        switch (request->method.id()) {
+    snprintf(buf, sizeof(buf), "%s", urlCanonical(const_cast<HttpRequest *>(request)));
+    buf[sizeof(buf)-1] = '\0';
  
-        case Http::METHOD_CONNECT:
-            authorityForm = request->url.authority(true); // host:port
-            snprintf(buf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(authorityForm));
-            break;
+    // URN, CONNECT method, and non-stripped URIs can go straight out
+    if (!(request->url.getScheme() == AnyP::PROTO_URN ||
+          !Config.onoff.strip_query_terms ||
+          request->method == Http::METHOD_CONNECT
+          )) {
  
-        default: {
-            authorityForm = request->url.authority(); // host[:port]
-            snprintf(buf, MAX_URL, "%s://" SQUIDSBUFPH "%s" SQUIDSBUFPH SQUIDSBUFPH,
-                     request->url.getScheme().c_str(),
-                     SQUIDSBUFPRINT(request->url.userInfo()),
-                     !request->url.userInfo().isEmpty() ? "@" : "",
-                     SQUIDSBUFPRINT(authorityForm),
-                     SQUIDSBUFPRINT(request->url.path()));
-
-            // strip arguments AFTER a question-mark
-            if (Config.onoff.strip_query_terms)
-                if ((t = strchr(buf, '?')))
-                    *(++t) = '\0';
+        // strip anything AFTER a question-mark
+        // leaving the '?' in place
+        if (auto t = strchr(buf, '?')) {
+            *(++t) = '\0';
          }
-        } // switch
      }
  
      if (stringHasCntl(buf))
@@ -647,8 +639,10 @@ urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
      char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
  
      if (req->url.getScheme() == AnyP::PROTO_URN) {
-        snprintf(urlbuf, MAX_URL, "urn:" SQUIDSBUFPH,
-                 SQUIDSBUFPRINT(req->url.path()));
+        // XXX: this is what the original code did, but it seems to break the
+        // intended behaviour of this function. It returns the stored URN path,
+        // not converting the given one into a URN...
+        snprintf(urlbuf, MAX_URL, SQUIDSBUFPH, SQUIDSBUFPRINT(req->url.absolute()));
          return (urlbuf);
      }
  
@@ -662,6 +656,7 @@ urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
      // if the first char is '/' assume its a relative path
      // XXX: this breaks on scheme-relative URLs,
      // but we should not see those outside ESI, and rarely there.
+    // XXX: also breaks on any URL containing a '/' in the query-string portion
      if (relUrl[0] == '/') {
          xstrncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
      } else {
author	Amos Jeffries <squid3@treenet.co.nz>
	Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)
committer	Amos Jeffries <squid3@treenet.co.nz>
	Thu, 16 Jul 2015 14:55:27 +0000 (07:55 -0700)
src/URL.h		patch \| blob \| blame \| history
src/url.cc		patch \| blob \| blame \| history