urlapi: fix parsing URL without slash with CURLU_URLENCODE

author Daniel Stenberg <daniel@haxx.se>

Tue, 18 Oct 2022 13:54:06 +0000 (15:54 +0200)

committer Daniel Stenberg <daniel@haxx.se>

Thu, 20 Oct 2022 06:56:53 +0000 (08:56 +0200)
author Daniel Stenberg <daniel@haxx.se>
Tue, 18 Oct 2022 13:54:06 +0000 (15:54 +0200)
committer Daniel Stenberg <daniel@haxx.se>
Thu, 20 Oct 2022 06:56:53 +0000 (08:56 +0200)
diff --git a/lib/urlapi.c b/lib/urlapi.c

index 736123527c11b02eb3a0a7132d77032deb1765a5..66cc87707f9ae89429401dfbb0a32e19486ec039 100644 (file)
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -116,26 +116,26 @@ static const char *find_host_sep(const char *url)
  }
  
  /*
- * Decide in an encoding-independent manner whether a character in a
- * URL must be escaped. The same criterion must be used in strlen_url()
- * and strcpy_url().
+ * Decide in an encoding-independent manner whether a character in a URL must
+ * be escaped. This is used in urlencode_str().
   */
  static bool urlchar_needs_escaping(int c)
  {
    return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
  }
  
-/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
- * in the source URL accordingly.
+/* urlencode_str() writes data into an output dynbuf and URL-encodes the
+ * spaces in the source URL accordingly.
   *
   * URL encoding should be skipped for host names, otherwise IDN resolution
   * will fail.
- *
   */
-static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
+static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
+                               size_t len, bool relative,
+                               bool query)
  {
    /* we must add this with whitespace-replacing */
-  bool left = TRUE;
+  bool left = !query;
    const unsigned char *iptr;
    const unsigned char *host_sep = (const unsigned char *) url;
  
@@ -143,8 +143,7 @@ static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
      host_sep = (const unsigned char *) find_host_sep(url);
  
    for(iptr = (unsigned char *)url;    /* read from here */
-      *iptr;         /* until zero byte */
-      iptr++) {
+      len; iptr++, len--) {
  
      if(iptr < host_sep) {
        if(Curl_dyn_addn(o, iptr, 1))
@@ -361,7 +360,7 @@ static char *concat_url(char *base, const char *relurl)
    }
  
    /* then append the new piece on the right side */
-  strcpy_url(&newest, useurl, !host_changed);
+  urlencode_str(&newest, useurl, strlen(useurl), !host_changed, FALSE);
  
    return Curl_dyn_ptr(&newest);
  }
@@ -1130,16 +1129,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
      }
    }
  
-  if(*path && (flags & CURLU_URLENCODE)) {
-    struct dynbuf enc;
-    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-    if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
-      result = CURLUE_OUT_OF_MEMORY;
-      goto fail;
-    }
-    path = u->path = Curl_dyn_ptr(&enc);
-  }
-
    fragment = strchr(path, '#');
    if(fragment) {
      fraglen = strlen(fragment);
@@ -1163,12 +1152,25 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
      size_t qlen = strlen(query) - fraglen; /* includes '?' */
      pathlen = strlen(path) - qlen - fraglen;
      if(qlen > 1) {
-      u->query = Curl_memdup(query + 1, qlen);
-      if(!u->query) {
-        result = CURLUE_OUT_OF_MEMORY;
-        goto fail;
+      if(qlen && (flags & CURLU_URLENCODE)) {
+        struct dynbuf enc;
+        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+        /* skip the leading question mark */
+        if(urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE)) {
+          result = CURLUE_OUT_OF_MEMORY;
+          goto fail;
+        }
+        qlen = Curl_dyn_len(&enc);
+        query = u->query = Curl_dyn_ptr(&enc);
+      }
+      else {
+        u->query = Curl_memdup(query + 1, qlen);
+        if(!u->query) {
+          result = CURLUE_OUT_OF_MEMORY;
+          goto fail;
+        }
+        u->query[qlen - 1] = 0;
        }
-      u->query[qlen - 1] = 0;
  
        if(junkscan(u->query, flags)) {
          result = CURLUE_BAD_QUERY;
@@ -1187,6 +1189,17 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
    else
      pathlen = strlen(path) - fraglen;
  
+  if(pathlen && (flags & CURLU_URLENCODE)) {
+    struct dynbuf enc;
+    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+    if(urlencode_str(&enc, path, pathlen, TRUE, FALSE)) {
+      result = CURLUE_OUT_OF_MEMORY;
+      goto fail;
+    }
+    pathlen = Curl_dyn_len(&enc);
+    path = u->path = Curl_dyn_ptr(&enc);
+  }
+
    if(!pathlen) {
      /* there is no path left, unset */
      path = NULL;
@@ -1563,13 +1576,15 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
      break;
    }
    if(ptr) {
-    *part = strdup(ptr);
+    size_t partlen = strlen(ptr);
+    size_t i = 0;
+    *part = Curl_memdup(ptr, partlen + 1);
      if(!*part)
        return CURLUE_OUT_OF_MEMORY;
      if(plusdecode) {
        /* convert + to space */
-      char *plus;
-      for(plus = *part; *plus; ++plus) {
+      char *plus = *part;
+      for(i = 0; i < partlen; ++plus, i++) {
          if(*plus == '+')
            *plus = ' ';
        }
@@ -1586,11 +1601,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
          return CURLUE_URLDECODE;
        }
        *part = decoded;
+      partlen = dlen;
      }
      if(urlencode) {
        struct dynbuf enc;
        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-      if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
+      if(urlencode_str(&enc, *part, partlen, TRUE,
+                       what == CURLUPART_QUERY))
          return CURLUE_OUT_OF_MEMORY;
        free(*part);
        *part = Curl_dyn_ptr(&enc);
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c

index 4016266bfc3d1e2304e84e05701b58152cbf219e..42300cfd7d622814ddb8a78a271ef46e3526e464 100644 (file)
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -138,6 +138,12 @@ struct clearurlcase {
  };
  
  static const struct testcase get_parts_list[] ={
+  {"https://user@example.net?he l lo",
+   "https | user | [12] | [13] | example.net | [15] | / | he+l+lo | [17]",
+   CURLU_ALLOW_SPACE, CURLU_URLENCODE, CURLUE_OK},
+  {"https://user@example.net?he l lo",
+   "https | user | [12] | [13] | example.net | [15] | / | he l lo | [17]",
+   CURLU_ALLOW_SPACE, 0, CURLUE_OK},
    {"https://exam{}[]ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
    {"https://exam{ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
    {"https://exam}ple.net", "", 0, 0, CURLUE_BAD_HOSTNAME},
@@ -849,6 +855,18 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
  }
  
  static const struct redircase set_url_list[] = {
+  {"http://example.com/please/../gimme/%TESTNUMBER?foobar#hello",
+   "http://example.net/there/it/is/../../tes t case=/%TESTNUMBER0002? yes no",
+   "http://example.net/there/tes%20t%20case=/%TESTNUMBER0002?+yes+no",
+   0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
+  {"http://local.test?redirect=http://local.test:80?-321",
+   "http://local.test:80?-123",
+   "http://local.test:80/?-123",
+   0, CURLU_URLENCODE|CURLU_ALLOW_SPACE, CURLUE_OK},
+  {"http://local.test?redirect=http://local.test:80?-321",
+   "http://local.test:80?-123",
+   "http://local.test:80/?-123",
+   0, 0, CURLUE_OK},
    {"http://example.org/static/favicon/wikipedia.ico",
     "//fake.example.com/licenses/by-sa/3.0/",
     "http://fake.example.com/licenses/by-sa/3.0/",
author	Daniel Stenberg <daniel@haxx.se>
	Tue, 18 Oct 2022 13:54:06 +0000 (15:54 +0200)
committer	Daniel Stenberg <daniel@haxx.se>
	Thu, 20 Oct 2022 06:56:53 +0000 (08:56 +0200)
lib/urlapi.c		patch \| blob \| blame \| history
tests/libtest/lib1560.c		patch \| blob \| blame \| history