From: Daniel Stenberg <daniel@haxx.se>
Date: Wed, 17 Apr 2024 08:42:28 +0000 (+0200)
Subject: urlapi: fix relative redirects to fragment-only
X-Git-Tag: curl-8_8_0~198
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c37b694e467bf5771e5e661056c0c269f5f5ec23;p=thirdparty%2Fcurl.git

urlapi: fix relative redirects to fragment-only

Using the URL API for a redirect URL when the redirected-to string
starts with a hash, ie is only a fragment, the API would produce the
wrong final URL.

Adjusted test 1560 to test for several new redirect cases.

Closes #13394
---

diff --git a/lib/urlapi.c b/lib/urlapi.c
index e610d97964..ab96ae218a 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -264,6 +264,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
   const char *useurl = relurl;
   CURLcode result = CURLE_OK;
   CURLUcode uc;
+  bool skip_slash = FALSE;
   *newurl = NULL;
 
   /* protsep points to the start of the host name */
@@ -283,48 +284,50 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
       *pathsep = 0;
 
     /* we have a relative path to append to the last slash if there's one
-       available, or if the new URL is just a query string (starts with a
-       '?')  we append the new one at the end of the entire currently worked
-       out URL */
-    if(useurl[0] != '?') {
+       available, or the new URL is just a query string (starts with a '?') or
+       a fragment (starts with '#') we append the new one at the end of the
+       current URL */
+    if((useurl[0] != '?') && (useurl[0] != '#')) {
       pathsep = strrchr(protsep, '/');
       if(pathsep)
         *pathsep = 0;
-    }
 
-    /* Check if there's any slash after the host name, and if so, remember
-       that position instead */
-    pathsep = strchr(protsep, '/');
-    if(pathsep)
-      protsep = pathsep + 1;
-    else
-      protsep = NULL;
+      /* Check if there's any slash after the host name, and if so, remember
+         that position instead */
+      pathsep = strchr(protsep, '/');
+      if(pathsep)
+        protsep = pathsep + 1;
+      else
+        protsep = NULL;
 
-    /* now deal with one "./" or any amount of "../" in the newurl
-       and act accordingly */
+      /* now deal with one "./" or any amount of "../" in the newurl
+         and act accordingly */
 
-    if((useurl[0] == '.') && (useurl[1] == '/'))
-      useurl += 2; /* just skip the "./" */
+      if((useurl[0] == '.') && (useurl[1] == '/'))
+        useurl += 2; /* just skip the "./" */
 
-    while((useurl[0] == '.') &&
-          (useurl[1] == '.') &&
-          (useurl[2] == '/')) {
-      level++;
-      useurl += 3; /* pass the "../" */
-    }
+      while((useurl[0] == '.') &&
+            (useurl[1] == '.') &&
+            (useurl[2] == '/')) {
+        level++;
+        useurl += 3; /* pass the "../" */
+      }
 
-    if(protsep) {
-      while(level--) {
-        /* cut off one more level from the right of the original URL */
-        pathsep = strrchr(protsep, '/');
-        if(pathsep)
-          *pathsep = 0;
-        else {
-          *protsep = 0;
-          break;
+      if(protsep) {
+        while(level--) {
+          /* cut off one more level from the right of the original URL */
+          pathsep = strrchr(protsep, '/');
+          if(pathsep)
+            *pathsep = 0;
+          else {
+            *protsep = 0;
+            break;
+          }
         }
       }
     }
+    else
+      skip_slash = TRUE;
   }
   else {
     /* We got a new absolute path for this server */
@@ -370,7 +373,7 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
     return result;
 
   /* check if we need to append a slash */
-  if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
+  if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
     ;
   else {
     result = Curl_dyn_addn(&newest, "/", 1);
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 2f7c76a433..66f944d0d6 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -1081,6 +1081,54 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
 }
 
 static const struct redircase set_url_list[] = {
+  {"http://example.org/",
+   "../path/././../../moo",
+   "http://example.org/moo",
+   0, 0, CURLUE_OK},
+  {"http://example.org/",
+   "//example.org/../path/../../",
+   "http://example.org/",
+   0, 0, CURLUE_OK},
+  {"http://example.org/",
+   "///example.org/../path/../../",
+   "http://example.org/",
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   ":23",
+   "http://example.org/foo/:23",
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "\\x",
+   "http://example.org/foo/\\x",
+   /* WHATWG disagrees */
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "#/",
+   "http://example.org/foo/bar#/",
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "?/",
+   "http://example.org/foo/bar?/",
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "#;?",
+   "http://example.org/foo/bar#;?",
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "#",
+   "http://example.org/foo/bar",
+   /* This happens because the parser removes empty fragments */
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "?",
+   "http://example.org/foo/bar",
+   /* This happens because the parser removes empty queries */
+   0, 0, CURLUE_OK},
+  {"http://example.org/foo/bar",
+   "?#",
+   "http://example.org/foo/bar",
+   /* This happens because the parser removes empty queries and fragments */
+   0, 0, CURLUE_OK},
   {"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
    "http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
    "http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",