From: Daniel Stenberg <daniel@haxx.se>
Date: Thu, 8 Jun 2023 11:15:09 +0000 (+0200)
Subject: urlapi: have *set(PATH) prepend a slash if one is missing
X-Git-Tag: curl-8_2_0~133
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3c9256c8a0b5a36dde28b49cc8df8c6d0aae1f48;p=thirdparty%2Fcurl.git

urlapi: have *set(PATH) prepend a slash if one is missing

Previously the code would just do that for the path when extracting the
full URL, which made a subsequent curl_url_get() of the path to
(unexpectedly) still return it without the leading path.

Amend lib1560 to verify this. Clarify the curl_url_set() docs about it.

Bug: https://curl.se/mail/lib-2023-06/0015.html
Closes #11272
Reported-by: Pedro Henrique
---

diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3
index 034974e111..435818e8bf 100644
--- a/docs/libcurl/curl_url_set.3
+++ b/docs/libcurl/curl_url_set.3
@@ -101,8 +101,8 @@ Port cannot be URL encoded on set. The given port number is provided as a
 string and the decimal number must be between 1 and 65535. Anything else will
 return an error.
 .IP CURLUPART_PATH
-If a path is set in the URL without a leading slash, a slash will be inserted
-automatically when this URL is read from the handle.
+If a path is set in the URL without a leading slash, a slash will be prepended
+automatically.
 .IP CURLUPART_QUERY
 The query part will also get spaces converted to pluses when asked to URL
 encode on set with the \fICURLU_URLENCODE\fP bit.
diff --git a/lib/urlapi.c b/lib/urlapi.c
index 07df6d65d8..7b2498c40e 100644
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -1547,7 +1547,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
         }
       }
 
-      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                     scheme,
                     u->user ? u->user : "",
                     u->password ? ":": "",
@@ -1558,7 +1558,6 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
                     allochost ? allochost : u->host,
                     port ? ":": "",
                     port ? port : "",
-                    (u->path && (u->path[0] != '/')) ? "/": "",
                     u->path ? u->path : "/",
                     (u->query && u->query[0]) ? "?": "",
                     (u->query && u->query[0]) ? u->query : "",
@@ -1640,6 +1639,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
   bool plusencode = FALSE;
   bool urlskipslash = FALSE;
+  bool leadingslash = FALSE;
   bool appendquery = FALSE;
   bool equalsencode = FALSE;
 
@@ -1751,6 +1751,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   break;
   case CURLUPART_PATH:
     urlskipslash = TRUE;
+    leadingslash = TRUE; /* enforce */
     storep = &u->path;
     break;
   case CURLUPART_QUERY:
@@ -1801,16 +1802,21 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   {
     const char *newp = part;
     size_t nalloc = strlen(part);
+    struct dynbuf enc;
 
     if(nalloc > CURL_MAX_INPUT_LENGTH)
       /* excessive input length */
       return CURLUE_MALFORMED_INPUT;
 
+    Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
+
+    if(leadingslash && (part[0] != '/')) {
+      CURLcode result = Curl_dyn_addn(&enc, "/", 1);
+      if(result)
+        return CURLUE_OUT_OF_MEMORY;
+    }
     if(urlencode) {
       const unsigned char *i;
-      struct dynbuf enc;
-
-      Curl_dyn_init(&enc, nalloc * 3 + 1);
 
       for(i = (const unsigned char *)part; *i; i++) {
         CURLcode result;
@@ -1838,14 +1844,13 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
             return CURLUE_OUT_OF_MEMORY;
         }
       }
-      newp = Curl_dyn_ptr(&enc);
     }
     else {
       char *p;
-      newp = strdup(part);
-      if(!newp)
+      CURLcode result = Curl_dyn_add(&enc, part);
+      if(result)
         return CURLUE_OUT_OF_MEMORY;
-      p = (char *)newp;
+      p = Curl_dyn_ptr(&enc);
       while(*p) {
         /* make sure percent encoded are lower case */
         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
@@ -1858,6 +1863,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
           p++;
       }
     }
+    newp = Curl_dyn_ptr(&enc);
 
     if(appendquery) {
       /* Append the 'newp' string onto the old query. Add a '&' separator if
@@ -1866,24 +1872,24 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
       size_t querylen = u->query ? strlen(u->query) : 0;
       bool addamperand = querylen && (u->query[querylen -1] != '&');
       if(querylen) {
-        struct dynbuf enc;
-        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+        struct dynbuf qbuf;
+        Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
 
-        if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+        if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
           goto nomem;
 
         if(addamperand) {
-          if(Curl_dyn_addn(&enc, "&", 1))
+          if(Curl_dyn_addn(&qbuf, "&", 1))
             goto nomem;
         }
-        if(Curl_dyn_add(&enc, newp))
+        if(Curl_dyn_add(&qbuf, newp))
           goto nomem;
-        free((char *)newp);
+        Curl_dyn_free(&enc);
         free(*storep);
-        *storep = Curl_dyn_ptr(&enc);
+        *storep = Curl_dyn_ptr(&qbuf);
         return CURLUE_OK;
 nomem:
-        free((char *)newp);
+        Curl_dyn_free(&enc);
         return CURLUE_OUT_OF_MEMORY;
       }
     }
@@ -1895,7 +1901,7 @@ nomem:
       }
       else {
         if(!n || hostname_check(u, (char *)newp, n)) {
-          free((char *)newp);
+          Curl_dyn_free(&enc);
           return CURLUE_BAD_HOSTNAME;
         }
       }
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c
index 449f937a9d..0eca0fda72 100644
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -108,6 +108,16 @@ struct setcase {
   CURLUcode pcode; /* for updating parts */
 };
 
+struct setgetcase {
+  const char *in;
+  const char *set;
+  const char *out;
+  unsigned int urlflags; /* for setting the URL */
+  unsigned int setflags; /* for updating parts */
+  unsigned int getflags; /* for getting parts */
+  CURLUcode pcode; /* for updating parts */
+};
+
 struct testcase {
   const char *in;
   const char *out;
@@ -747,8 +757,33 @@ static int checkurl(const char *org, const char *url, const char *out)
   return 0;
 }
 
+/* 1. Set the URL
+   2. Set components
+   3. Extract all components (not URL)
+*/
+static const struct setgetcase setget_parts_list[] = {
+  {"https://example.com",
+   "path=get,",
+   "https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
+   0, 0, 0, CURLUE_OK},
+  {"https://example.com",
+   "path=/get,",
+   "https | [11] | [12] | [13] | example.com | [15] | /get | [16] | [17]",
+   0, 0, 0, CURLUE_OK},
+  {"https://example.com",
+   "path=g e t,",
+   "https | [11] | [12] | [13] | example.com | [15] | /g%20e%20t | "
+   "[16] | [17]",
+   0, CURLU_URLENCODE, 0, CURLUE_OK},
+  {NULL, NULL, NULL, 0, 0, 0, CURLUE_OK}
+};
+
 /* !checksrc! disable SPACEBEFORECOMMA 1 */
 static const struct setcase set_parts_list[] = {
+  {"https://example.com",
+   "path=get,",
+   "https://example.com/get",
+   0, 0, CURLUE_OK, CURLUE_OK},
   {"https://example.com/",
    "scheme=ftp+-.123,",
    "ftp+-.123://example.com/",
@@ -1120,6 +1155,54 @@ static int set_url(void)
   return error;
 }
 
+/* 1. Set a URL
+   2. Set one or more parts
+   3. Extract and compare all parts - not the URL
+*/
+static int setget_parts(void)
+{
+  int i;
+  int error = 0;
+
+  for(i = 0; setget_parts_list[i].set && !error; i++) {
+    CURLUcode rc;
+    CURLU *urlp = curl_url();
+    if(!urlp) {
+      error++;
+      break;
+    }
+    if(setget_parts_list[i].in)
+      rc = curl_url_set(urlp, CURLUPART_URL, setget_parts_list[i].in,
+                        setget_parts_list[i].urlflags);
+    else
+      rc = CURLUE_OK;
+    if(!rc) {
+      char *url = NULL;
+      CURLUcode uc = updateurl(urlp, setget_parts_list[i].set,
+                               setget_parts_list[i].setflags);
+
+      if(uc != setget_parts_list[i].pcode) {
+        fprintf(stderr, "updateurl\nin: %s\nreturned %d (expected %d)\n",
+                setget_parts_list[i].set, (int)uc, setget_parts_list[i].pcode);
+        error++;
+      }
+      if(!uc) {
+        if(checkparts(urlp, setget_parts_list[i].set, setget_parts_list[i].out,
+                      setget_parts_list[i].getflags))
+          error++;        /* add */
+      }
+      curl_free(url);
+    }
+    else if(rc != CURLUE_OK) {
+      fprintf(stderr, "Set parts\nin: %s\nreturned %d (expected %d)\n",
+              setget_parts_list[i].in, (int)rc, 0);
+      error++;
+    }
+    curl_url_cleanup(urlp);
+  }
+  return error;
+}
+
 static int set_parts(void)
 {
   int i;
@@ -1593,6 +1676,9 @@ int test(char *URL)
 {
   (void)URL; /* not used */
 
+  if(setget_parts())
+    return 10;
+
   if(get_url())
     return 3;