urlapi: fix redirect to a new fragment or query (only)

author Daniel Stenberg <daniel@haxx.se>

Sat, 28 Dec 2024 13:47:01 +0000 (14:47 +0100)

committer Daniel Stenberg <daniel@haxx.se>

Mon, 30 Dec 2024 07:23:26 +0000 (08:23 +0100)
author Daniel Stenberg <daniel@haxx.se>
Sat, 28 Dec 2024 13:47:01 +0000 (14:47 +0100)
committer Daniel Stenberg <daniel@haxx.se>
Mon, 30 Dec 2024 07:23:26 +0000 (08:23 +0100)
diff --git a/docs/libcurl/opts/CURLOPT_PATH_AS_IS.md b/docs/libcurl/opts/CURLOPT_PATH_AS_IS.md

index ef78004320e5625180f2f8949fde58a1b5bce2b6..90ebe00db5b8a895dbe6b4fa89d07e5935864644 100644 (file)
--- a/docs/libcurl/opts/CURLOPT_PATH_AS_IS.md
+++ b/docs/libcurl/opts/CURLOPT_PATH_AS_IS.md
@@ -41,6 +41,9 @@ order to try out server implementations.
  
  By default libcurl normalizes such sequences before using the path.
  
+This is a request for the *first* request libcurl issues. When following
+redirects, it may no longer apply.
+
  The corresponding flag for the curl_url_set(3) function is called
  **CURLU_PATH_AS_IS**.
  
diff --git a/lib/urlapi.c b/lib/urlapi.c

index 98c8f6fe6d5ce08c919edfbf6a5a87aa1e263484..55c2aa8ffa0af01f844ce6ab60a0f812d76d2912 100644 (file)
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -258,78 +258,41 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
     problems in the future...
    */
    struct dynbuf newest;
-  char *protsep;
-  char *pathsep;
    bool host_changed = FALSE;
    const char *useurl = relurl;
    CURLcode result = CURLE_OK;
    CURLUcode uc;
-  bool skip_slash = FALSE;
-  *newurl = NULL;
-
    /* protsep points to the start of the hostname */
-  protsep = strstr(base, "//");
+  char *protsep = strstr(base, "//");
+  DEBUGASSERT(protsep);
    if(!protsep)
      protsep = base;
    else
      protsep += 2; /* pass the slashes */
  
-  if('/' != relurl[0]) {
-    int level = 0;
-
-    /* First we need to find out if there is a ?-letter in the URL,
+  *newurl = NULL;
+  if(('/' != relurl[0]) && ('#' != relurl[0])) {
+    /* First we need to find out if there is a ?-letter in the original URL,
         and cut it and the right-side of that off */
-    pathsep = strchr(protsep, '?');
+    char *pathsep = strchr(protsep, '?');
      if(pathsep)
        *pathsep = 0;
-
-    /* we have a relative path to append to the last slash if there is one
-       available, or the new URL is just a query string (starts with a '?') or
-       a fragment (starts with '#') we append the new one at the end of the
-       current URL */
-    if((useurl[0] != '?') && (useurl[0] != '#')) {
-      pathsep = strrchr(protsep, '/');
+    else {
+      /* if not, cut off the potential fragment */
+      pathsep = strchr(protsep, '#');
        if(pathsep)
          *pathsep = 0;
+    }
  
-      /* Check if there is any slash after the hostname, and if so, remember
-         that position instead */
-      pathsep = strchr(protsep, '/');
+    /* if the redirect-to piece is not just a query, cut the path after the
+       last slash */
+    if(useurl[0] != '?') {
+      pathsep = strrchr(protsep, '/');
        if(pathsep)
-        protsep = pathsep + 1;
-      else
-        protsep = NULL;
-
-      /* now deal with one "./" or any amount of "../" in the newurl
-         and act accordingly */
-
-      if((useurl[0] == '.') && (useurl[1] == '/'))
-        useurl += 2; /* just skip the "./" */
-
-      while((useurl[0] == '.') &&
-            (useurl[1] == '.') &&
-            (useurl[2] == '/')) {
-        level++;
-        useurl += 3; /* pass the "../" */
-      }
-
-      if(protsep) {
-        while(level--) {
-          /* cut off one more level from the right of the original URL */
-          pathsep = strrchr(protsep, '/');
-          if(pathsep)
-            *pathsep = 0;
-          else {
-            *protsep = 0;
-            break;
-          }
-        }
-      }
+        pathsep[1] = 0; /* leave the slash */
      }
-    else
-      skip_slash = TRUE;
    }
-  else {
+  else if('/' == relurl[0]) {
      /* We got a new absolute path for this server */
  
      if(relurl[1] == '/') {
@@ -341,29 +304,20 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
        host_changed = TRUE;
      }
      else {
-      /* cut off the original URL from the first slash, or deal with URLs
-         without slash */
-      pathsep = strchr(protsep, '/');
-      if(pathsep) {
-        /* When people use badly formatted URLs, such as
-           "http://www.example.com?dir=/home/daniel" we must not use the first
-           slash, if there is a ?-letter before it! */
-        char *sep = strchr(protsep, '?');
-        if(sep && (sep < pathsep))
-          pathsep = sep;
+      /* cut the original URL at first slash */
+      char *pathsep = strchr(protsep, '/');
+      if(pathsep)
          *pathsep = 0;
-      }
-      else {
-        /* There was no slash. Now, since we might be operating on a badly
-           formatted URL, such as "http://www.example.com?id=2380" which does
-           not use a slash separator as it is supposed to, we need to check
-           for a ?-letter as well! */
-        pathsep = strchr(protsep, '?');
-        if(pathsep)
-          *pathsep = 0;
-      }
      }
    }
+  else {
+    /* the relative piece starts with '#' */
+
+    /* If there is a fragment in the original URL, cut it off */
+    char *pathsep = strchr(protsep, '#');
+    if(pathsep)
+      *pathsep = 0;
+  }
  
    Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
  
@@ -372,15 +326,6 @@ static CURLcode concat_url(char *base, const char *relurl, char **newurl)
    if(result)
      return result;
  
-  /* check if we need to append a slash */
-  if(('/' == useurl[0]) || (protsep && !*protsep) || skip_slash)
-    ;
-  else {
-    result = Curl_dyn_addn(&newest, "/", 1);
-    if(result)
-      return result;
-  }
-
    /* then append the new piece on the right side */
    uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
                       FALSE);
@@ -1882,7 +1827,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
      if(result)
        return cc2cu(result);
  
-    uc = parseurl_and_replace(redired_url, u, flags);
+    uc = parseurl_and_replace(redired_url, u, flags&~CURLU_PATH_AS_IS);
      free(redired_url);
      return uc;
    }
diff --git a/tests/data/test391 b/tests/data/test391

index 24428a08f0f2a9048cb939f14fc25e476ab863ad..279c562de3173e43c7856b0da164ded6f529662e 100644 (file)
--- a/tests/data/test391
+++ b/tests/data/test391
@@ -62,7 +62,7 @@ Host: %HOSTIP:%HTTPPORT
  User-Agent: curl/%VERSION\r
  Accept: */*\r
  \r
-GET /../%TESTNUMBER0002 HTTP/1.1\r
+GET /%TESTNUMBER0002 HTTP/1.1\r
  Host: %HOSTIP:%HTTPPORT\r
  User-Agent: curl/%VERSION\r
  Accept: */*\r
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c

index d5253a739a107469858e5b225979b3a6463a5db3..3103c69f20900e1c7c309510c5a42c6506d60be1 100644 (file)
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -1143,6 +1143,38 @@ static CURLUcode updateurl(CURLU *u, const char *cmd, unsigned int setflags)
  }
  
  static const struct redircase set_url_list[] = {
+  {"http://example.org#withs/ash", "/moo#frag",
+   "http://example.org/moo#frag",
+   0, 0, CURLUE_OK},
+  {"http://example.org/", "../path/././../././../moo",
+   "http://example.org/moo",
+   0, 0, CURLUE_OK},
+
+  {"http://example.org?bar/moo", "?weird",
+   "http://example.org/?weird", 0, 0, CURLUE_OK},
+  {"http://example.org/foo?bar", "?weird",
+   "http://example.org/foo?weird", 0, 0, CURLUE_OK},
+  {"http://example.org/foo", "?weird",
+   "http://example.org/foo?weird", 0, 0, CURLUE_OK},
+  {"http://example.org", "?weird",
+   "http://example.org/?weird", 0, 0, CURLUE_OK},
+  {"http://example.org/#original", "?weird#moo",
+   "http://example.org/?weird#moo", 0, 0, CURLUE_OK},
+
+  {"http://example.org?bar/moo#yes/path", "#new/slash",
+   "http://example.org/?bar/moo#new/slash", 0, 0, CURLUE_OK},
+  {"http://example.org/foo?bar", "#weird",
+   "http://example.org/foo?bar#weird", 0, 0, CURLUE_OK},
+  {"http://example.org/foo?bar#original", "#weird",
+   "http://example.org/foo?bar#weird", 0, 0, CURLUE_OK},
+  {"http://example.org/foo#original", "#weird",
+   "http://example.org/foo#weird", 0, 0, CURLUE_OK},
+  {"http://example.org/#original", "#weird",
+   "http://example.org/#weird", 0, 0, CURLUE_OK},
+  {"http://example.org#original", "#weird",
+   "http://example.org/#weird", 0, 0, CURLUE_OK},
+  {"http://example.org/foo?bar", "moo?hey#weird",
+   "http://example.org/moo?hey#weird", 0, 0, CURLUE_OK},
    {"http://example.org/",
     "../path/././../../moo",
     "http://example.org/moo",
author	Daniel Stenberg <daniel@haxx.se>
	Sat, 28 Dec 2024 13:47:01 +0000 (14:47 +0100)
committer	Daniel Stenberg <daniel@haxx.se>
	Mon, 30 Dec 2024 07:23:26 +0000 (08:23 +0100)
docs/libcurl/opts/CURLOPT_PATH_AS_IS.md		patch \| blob \| blame \| history
lib/urlapi.c		patch \| blob \| blame \| history
tests/data/test391		patch \| blob \| blame \| history
tests/libtest/lib1560.c		patch \| blob \| blame \| history