urlapi: remove percent encoded dot sequences from the URL path

author Daniel Stenberg <daniel@haxx.se>

Sat, 29 Mar 2025 18:10:40 +0000 (19:10 +0100)

committer Daniel Stenberg <daniel@haxx.se>

Mon, 31 Mar 2025 06:52:26 +0000 (08:52 +0200)
author Daniel Stenberg <daniel@haxx.se>
Sat, 29 Mar 2025 18:10:40 +0000 (19:10 +0100)
committer Daniel Stenberg <daniel@haxx.se>
Mon, 31 Mar 2025 06:52:26 +0000 (08:52 +0200)
diff --git a/lib/urlapi.c b/lib/urlapi.c

index e4266a59f06fd8a23025297504389f85485e4458..cdd64f9a897208536f1ce1f05536d8952397b362 100644 (file)
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -762,6 +762,25 @@ CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
   * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
   */
  
+static bool is_dot(const char **str, size_t *clen)
+{
+  const char *p = *str;
+  if(*p == '.') {
+    (*str)++;
+    (*clen)--;
+    return TRUE;
+  }
+  else if((*clen >= 3) &&
+          (p[0] == '%') && (p[1] == '2') && ((p[2] | 0x20) == 'e')) {
+    *str += 3;
+    *clen -= 3;
+    return TRUE;
+  }
+  return FALSE;
+}
+
+#define ISSLASH(x) ((x) == '/')
+
  /*
   * dedotdotify()
   * @unittest: 1395
@@ -770,8 +789,7 @@ CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
   * passed in and strips them off according to the rules in RFC 3986 section
   * 5.2.4.
   *
- * The function handles a query part ('?' + stuff) appended but it expects
- * that fragments ('#' + stuff) have already been cut off.
+ * The function handles a path. It should not contain the query nor fragment.
   *
   * RETURNS
   *
@@ -780,112 +798,109 @@ CURLUcode Curl_url_set_authority(CURLU *u, const char *authority)
  UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
  UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
  {
-  char *outptr;
-  const char *endp = &input[clen];
-  char *out;
+  struct dynbuf out;
+  CURLcode result = CURLE_OK;
  
    *outp = NULL;
    /* the path always starts with a slash, and a slash has not dot */
-  if((clen < 2) || !memchr(input, '.', clen))
+  if(clen < 2)
      return 0;
  
-  out = malloc(clen + 1);
-  if(!out)
-    return 1; /* out of memory */
-
-  *out = 0; /* null-terminates, for inputs like "./" */
-  outptr = out;
-
-  do {
-    bool dotdot = TRUE;
-    if(*input == '.') {
-      /*  A. If the input buffer begins with a prefix of "../" or "./", then
-          remove that prefix from the input buffer; otherwise, */
-
-      if(!strncmp("./", input, 2)) {
-        input += 2;
-        clen -= 2;
-      }
-      else if(!strncmp("../", input, 3)) {
-        input += 3;
-        clen -= 3;
-      }
-      /*  D. if the input buffer consists only of "." or "..", then remove
-          that from the input buffer; otherwise, */
+  Curl_dyn_init(&out, clen + 1);
+
+  /*  A. If the input buffer begins with a prefix of "../" or "./", then
+      remove that prefix from the input buffer; otherwise, */
+  if(is_dot(&input, &clen)) {
+    const char *p = input;
+    size_t blen = clen;
+
+    if(!clen)
+      /* . [end] */
+      goto end;
+    else if(ISSLASH(*p)) {
+      /* one dot followed by a slash */
+      input = p + 1;
+      clen--;
+    }
  
-      else if(!strcmp(".", input) || !strcmp("..", input) ||
-              !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
-        *out = 0;
-        break;
+    /*  D. if the input buffer consists only of "." or "..", then remove
+        that from the input buffer; otherwise, */
+    else if(is_dot(&p, &blen)) {
+      if(!blen)
+        /* .. [end] */
+        goto end;
+      else if(ISSLASH(*p)) {
+        /* ../ */
+        input = p + 1;
+        clen = blen - 1;
        }
-      else
-        dotdot = FALSE;
      }
-    else if(*input == '/') {
+  }
+
+  while(clen && !result) { /* until end of path content */
+    if(ISSLASH(*input)) {
+      const char *p = &input[1];
+      size_t blen = clen - 1;
        /*  B. if the input buffer begins with a prefix of "/./" or "/.", where
            "."  is a complete path segment, then replace that prefix with "/" in
            the input buffer; otherwise, */
-      if(!strncmp("/./", input, 3)) {
-        input += 2;
-        clen -= 2;
-      }
-      else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
-        *outptr++ = '/';
-        *outptr = 0;
-        break;
-      }
-
-      /*  C. if the input buffer begins with a prefix of "/../" or "/..",
-          where ".." is a complete path segment, then replace that prefix with
-          "/" in the input buffer and remove the last segment and its
-          preceding "/" (if any) from the output buffer; otherwise, */
-
-      else if(!strncmp("/../", input, 4)) {
-        input += 3;
-        clen -= 3;
-        /* remove the last segment from the output buffer */
-        while(outptr > out) {
-          outptr--;
-          if(*outptr == '/')
-            break;
+      if(is_dot(&p, &blen)) {
+        if(!blen) { /* /. */
+          result = Curl_dyn_addn(&out, "/", 1);
+          break;
          }
-        *outptr = 0; /* null-terminate where it stops */
-      }
-      else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
-        /* remove the last segment from the output buffer */
-        while(outptr > out) {
-          outptr--;
-          if(*outptr == '/')
-            break;
+        else if(ISSLASH(*p)) { /* /./ */
+          input = p;
+          clen = blen;
+          continue;
+        }
+
+        /*  C. if the input buffer begins with a prefix of "/../" or "/..",
+            where ".." is a complete path segment, then replace that prefix
+            with "/" in the input buffer and remove the last segment and its
+            preceding "/" (if any) from the output buffer; otherwise, */
+        else if(is_dot(&p, &blen) && (ISSLASH(*p) || !blen)) {
+          /* remove the last segment from the output buffer */
+          size_t len = Curl_dyn_len(&out);
+          if(len) {
+            char *ptr = Curl_dyn_ptr(&out);
+            char *last = memrchr(ptr, '/', len);
+            if(last)
+              /* trim the output at the slash */
+              Curl_dyn_setlen(&out, last - ptr);
+          }
+
+          if(blen) { /* /../ */
+            input = p;
+            clen = blen;
+            continue;
+          }
+          result = Curl_dyn_addn(&out, "/", 1);
+          break;
          }
-        *outptr++ = '/';
-        *outptr = 0; /* null-terminate where it stops */
-        break;
        }
-      else
-        dotdot = FALSE;
-    }
-    else
-      dotdot = FALSE;
-
-    if(!dotdot) {
-      /*  E. move the first path segment in the input buffer to the end of
-          the output buffer, including the initial "/" character (if any) and
-          any subsequent characters up to, but not including, the next "/"
-          character or the end of the input buffer. */
-
-      do {
-        *outptr++ = *input++;
-        clen--;
-      } while(*input && (*input != '/') && (*input != '?'));
-      *outptr = 0;
      }
  
-    /* continue until end of path */
-  } while(input < endp);
+    /*  E. move the first path segment in the input buffer to the end of
+        the output buffer, including the initial "/" character (if any) and
+        any subsequent characters up to, but not including, the next "/"
+        character or the end of the input buffer. */
  
-  *outp = out;
-  return 0; /* success */
+    result = Curl_dyn_addn(&out, input, 1);
+    input++;
+    clen--;
+  }
+end:
+  if(!result) {
+    if(Curl_dyn_len(&out))
+      *outp = Curl_dyn_ptr(&out);
+    else {
+      *outp = strdup("");
+      if(!*outp)
+        return 1;
+    }
+  }
+  return result ? 1 : 0; /* success */
  }
  
  static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c

index 0a8b244d7686b21f99062bee2d6fbf436c01eff8..3d93163a9a1dd3119cf0e79e15ced72811c02d29 100644 (file)
--- a/tests/libtest/lib1560.c
+++ b/tests/libtest/lib1560.c
@@ -1218,6 +1218,12 @@ static const struct redircase set_url_list[] = {
    {"http://example.org/", "../path/././../././../moo",
     "http://example.org/moo",
     0, 0, CURLUE_OK},
+  {"http://example.org/", ".%2e/path/././../%2E/./../moo",
+   "http://example.org/moo",
+   0, 0, CURLUE_OK},
+  {"http://example.org/", ".%2e/path/./%2e/.%2E/%2E/./%2e%2E/moo",
+   "http://example.org/moo",
+   0, 0, CURLUE_OK},
  
    {"http://example.org?bar/moo", "?weird",
     "http://example.org/?weird", 0, 0, CURLUE_OK},
diff --git a/tests/unit/unit1395.c b/tests/unit/unit1395.c

index dd1f29a307e3ece0008288439b468363f80b513a..b467fe6f0810e6dc64f89bd6d7ee7553cb191c23 100644 (file)
--- a/tests/unit/unit1395.c
+++ b/tests/unit/unit1395.c
@@ -48,28 +48,72 @@ UNITTEST_START
    unsigned int i;
    int fails = 0;
    const struct dotdot pairs[] = {
+    { "%2f%2e%2e%2f/../a", "%2f%2e%2e%2f/a" },
+    { "%2f%2e%2e%2f/../", "%2f%2e%2e%2f/" },
+    { "%2f%2e%2e%2f/.", "%2f%2e%2e%2f/" },
+    { "%2f%2e%2e%2f/", "%2f%2e%2e%2f/" },
+    { "%2f%2e%2e%2f", "%2f%2e%2e%2f" },
+    { "%2f%2e%2e%2", "%2f%2e%2e%2" },
+    { "%2f%2e%2e%", "%2f%2e%2e%" },
+    { "%2f%2e%2e", "%2f%2e%2e" },
+    { "%2f%2e%2", "%2f%2e%2" },
+    { "%2f%2e%", "%2f%2e%" },
+    { "%2f%2e", "%2f%2e" },
+    { "%2f%2", "%2f%2" },
+    { "%2f%", "%2f%" },
+    { "%2f", "%2f" },
+    { "%2", "%2" },
+    { "%", NULL },
+    { "2", NULL },
+    { "e", NULL },
+    { ".", NULL },
+    { "./", "" },
+    { "..", "" },
+    { "../", "" },
+    { "../a", "a" },
+    { "///moo.", "///moo." },
+    { ".///moo.", "//moo." },
+    { "./moo..", "moo.." },
+    { "./moo../", "moo../" },
+    { "./moo../.m", "moo../.m" },
+    { "./moo", "moo" },
+    { "../moo", "moo" },
+    { "../moo?", "moo?" },
+    { "../moo?#", "moo?#" },
+    { "../moo?#?..", "moo?#?.." },
+    { "/../moo/..", "/" },
+    { "/a/c/%2e%2E/b", "/a/b" },
+    { "/a/%2e/g", "/a/g" },
+    { "/a/b/c/./g", "/a/b/c/g" },
+    { "/a/c/../b", "/a/b" },
      { "/a/b/c/./../../g", "/a/g" },
+    { "/a/b/c/./%2e%2E/../g", "/a/g" },
+    { "/a/b/c/./../%2e%2E/g", "/a/g" },
+    { "/a/b/c/%2E/%2e%2E/%2e%2E/g", "/a/g" },
      { "mid/content=5/../6", "mid/6" },
      { "/hello/../moo", "/moo" },
      { "/1/../1", "/1" },
      { "/1/./1", "/1/1" },
+    { "/1/%2e/1", "/1/1" },
+    { "/1/%2E/1", "/1/1" },
      { "/1/..", "/" },
      { "/1/.", "/1/" },
+    { "/1/%2e", "/1/" },
+    { "/1/%2E", "/1/" },
      { "/1/./..", "/" },
+    { "/1/%2e/.%2E", "/" },
+    { "/1/./%2e.", "/" },
      { "/1/./../2", "/2" },
      { "/hello/1/./../2", "/hello/2" },
-    { "test/this", NULL },
+    { "test/this", "test/this" },
      { "test/this/../now", "test/now" },
      { "/1../moo../foo", "/1../moo../foo"},
      { "/../../moo", "/moo"},
      { "/../../moo?", "/moo?"},
-    { "/123?", NULL},
-    { "/../moo/..?", "/" },
+    { "/123?", "/123?" },
      { "/", NULL },
      { "", NULL },
      { "/.../", "/.../" },
-    { "./moo", "moo" },
-    { "../moo", "moo" },
      { "/.", "/" },
      { "/..", "/" },
      { "/moo/..", "/" },
author	Daniel Stenberg <daniel@haxx.se>
	Sat, 29 Mar 2025 18:10:40 +0000 (19:10 +0100)
committer	Daniel Stenberg <daniel@haxx.se>
	Mon, 31 Mar 2025 06:52:26 +0000 (08:52 +0200)
lib/urlapi.c		patch \| blob \| blame \| history
tests/libtest/lib1560.c		patch \| blob \| blame \| history
tests/unit/unit1395.c		patch \| blob \| blame \| history