urlapi: stop extracting hostname from file:// URLs on Windows

author Daniel Stenberg <daniel@haxx.se>

Mon, 13 Apr 2026 12:55:16 +0000 (14:55 +0200)

committer Daniel Stenberg <daniel@haxx.se>

Tue, 14 Apr 2026 10:09:48 +0000 (12:09 +0200)
author Daniel Stenberg <daniel@haxx.se>
Mon, 13 Apr 2026 12:55:16 +0000 (14:55 +0200)
committer Daniel Stenberg <daniel@haxx.se>
Tue, 14 Apr 2026 10:09:48 +0000 (12:09 +0200)
diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h

index f635e9ae683a6a709b7df7282edb0e1e1671bd83..129ee0481f70d132ffd9327fff1d172ce741a7a2 100644 (file)
--- a/lib/urlapi-int.h
+++ b/lib/urlapi-int.h
@@ -49,6 +49,10 @@ struct Curl_URL {
  #define HOST_IPV4    2
  #define HOST_IPV6    3
  
+#define QUERY_NO      2
+#define QUERY_NOT_YET 3 /* allow to change to query */
+#define QUERY_YES     4
+
  size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
                              bool guess_scheme);
  
diff --git a/lib/urlapi.c b/lib/urlapi.c

index 3a254b7bf3f22a50ec79222b9d5e8543e940a4e2..a3036dcf6afc39d4ac588ba055e5378d45ba3e2e 100644 (file)
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -115,20 +115,25 @@ static const char *find_host_sep(const char *url)
   * URL encoding should be skipped for hostnames, otherwise IDN resolution
   * will fail.
   *
+ * 'query' tells if it is a query part or not, or if it is allowed to
+ * "transition" into a query part with a question mark.
+ *
   * @unittest: 1675
   */
  UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
                                   size_t len, bool relative,
-                                 bool query);
+                                 unsigned int query);
  UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
                                   size_t len, bool relative,
-                                 bool query)
+                                 unsigned int query)
  {
    /* we must add this with whitespace-replacing */
    const unsigned char *iptr;
    const unsigned char *host_sep = (const unsigned char *)url;
    CURLcode result = CURLE_OK;
  
+  DEBUGASSERT((query >= QUERY_NO) && (query <= QUERY_YES));
+
    if(!relative) {
      size_t n;
      host_sep = (const unsigned char *)find_host_sep(url);
@@ -141,7 +146,7 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
  
    for(iptr = host_sep; len && !result; iptr++, len--) {
      if(*iptr == ' ') {
-      if(!query)
+      if(query != QUERY_YES)
          result = curlx_dyn_addn(o, "%20", 3);
        else
          result = curlx_dyn_addn(o, "+", 1);
@@ -151,7 +156,8 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
        Curl_hexbyte(&out[1], *iptr);
        result = curlx_dyn_addn(o, out, 3);
      }
-    else if(*iptr == '%' && ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) &&
+    else if(*iptr == '%' && (len >= 3) &&
+            ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) &&
              (ISLOWER(iptr[1]) || ISLOWER(iptr[2]))) {
        /* uppercase it */
        unsigned char hex = (unsigned char)((curlx_hexval(iptr[1]) << 4) |
@@ -164,8 +170,8 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
      }
      else {
        result = curlx_dyn_addn(o, iptr, 1);
-      if(*iptr == '?')
-        query = TRUE;
+      if(*iptr == '?' && (query == QUERY_NOT_YET))
+        query = QUERY_YES;
      }
    }
  
@@ -832,15 +838,12 @@ end:
   * @unittest: 1675
   */
  UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
-                              struct dynbuf *host, const char **pathp,
-                              size_t *pathlenp);
+                              const char **pathp, size_t *pathlenp);
  UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
-                              struct dynbuf *host, const char **pathp,
-                              size_t *pathlenp)
+                              const char **pathp, size_t *pathlenp)
  {
    const char *path;
    size_t pathlen;
-  bool uncpath = FALSE;
    if(urllen <= 6)
      /* file:/ is not enough to actually be a complete file: URL */
      return CURLUE_BAD_FILE_URL;
@@ -872,9 +875,6 @@ UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
       *
       *  o the hostname is a FQDN that resolves to this machine, or
       *
-     *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
-     *    Appendix E.3).
-     *
       * For brevity, we only consider URLs with empty, "localhost", or
       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
       *
@@ -889,42 +889,16 @@ UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
           checkprefix("127.0.0.1/", ptr)) {
          ptr += 9; /* now points to the slash after the host */
        }
-      else {
-#ifdef _WIN32
-        size_t len;
-
-        /* the hostname, NetBIOS computer name, can not contain disallowed
-           chars, and the delimiting slash character must be appended to the
-           hostname */
-        path = strpbrk(ptr, "/\\:*?\"<>|");
-        if(!path || *path != '/')
-          return CURLUE_BAD_FILE_URL;
-
-        len = path - ptr;
-        if(len) {
-          CURLcode code = curlx_dyn_addn(host, ptr, len);
-          if(code)
-            return cc2cu(code);
-          uncpath = TRUE;
-        }
-
-        ptr -= 2; /* now points to the // before the host in UNC */
-#else
+      else
          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
             none */
          return CURLUE_BAD_FILE_URL;
-#endif
-      }
      }
  
      path = ptr;
      pathlen = urllen - (ptr - url);
    }
  
-  if(!uncpath)
-    /* no host for file: URLs by default */
-    curlx_dyn_reset(host);
-
  #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
    /* Do not allow Windows drive letters when not in Windows.
     * This catches both "file:/c:" and "file:c:" */
@@ -1033,7 +1007,7 @@ static CURLUcode handle_fragment(CURLU *u, const char *fragment,
      if(flags & CURLU_URLENCODE) {
        struct dynbuf enc;
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-      ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
+      ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, QUERY_NO);
        if(ures)
          return ures;
        u->fragment = curlx_dyn_ptr(&enc);
@@ -1057,7 +1031,7 @@ static CURLUcode handle_query(CURLU *u, const char *query,
        CURLUcode ures;
        curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
        /* skip the leading question mark */
-      ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
+      ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, QUERY_YES);
        if(ures)
          return ures;
        u->query = curlx_dyn_ptr(&enc);
@@ -1085,7 +1059,7 @@ static CURLUcode handle_path(CURLU *u, const char *path,
    if(pathlen && (flags & CURLU_URLENCODE)) {
      struct dynbuf enc;
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-    ures = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
+    ures = urlencode_str(&enc, path, pathlen, TRUE, QUERY_NO);
      if(ures)
        return ures;
      pathlen = curlx_dyn_len(&enc);
@@ -1145,7 +1119,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
    /* handle the file: scheme */
    if(schemelen && !strcmp(schemebuf, "file")) {
      is_file = TRUE;
-    ures = parse_file(url, urllen, u, &host, &path, &pathlen);
+    ures = parse_file(url, urllen, u, &path, &pathlen);
    }
    else {
      const char *hostp = NULL;
@@ -1287,7 +1261,8 @@ static CURLUcode redirect_url(const char *base, const char *relurl,
    curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH);
  
    if(!curlx_dyn_addn(&urlbuf, base, prelen) &&
-     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) {
+     !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed,
+                    QUERY_NOT_YET)) {
      uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u,
                                flags & ~U_CURLU_PATH_AS_IS);
    }
@@ -1407,7 +1382,8 @@ static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
    if(urlencode) {
      struct dynbuf enc;
      curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY);
+    uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY ?
+                       QUERY_YES : QUERY_NO);
      curlx_free(part);
      if(uc)
        return uc;
diff --git a/tests/unit/unit1675.c b/tests/unit/unit1675.c

index e43ac1e7fea3bc803c3cdfcdf530d5973c8e095c..25616c6727672fad7f0007b01fab101f7ef4d259 100644 (file)
--- a/tests/unit/unit1675.c
+++ b/tests/unit/unit1675.c
@@ -145,35 +145,37 @@ static CURLcode test_unit1675(const char *arg)
      struct urlencode_test {
        const char *in;
        bool relative;
-      bool query;
+      unsigned int query;
        const char *out;
      };
      const struct urlencode_test tests[] = {
-      {"http://leave\x01/hello\x01world", FALSE, FALSE,
+      {"http://leave\x01/hello\x01world", FALSE, QUERY_NO,
         "http://leave\x01/hello%01world"},
-      {"http://leave/hello\x01world", FALSE, FALSE,
+      {"http://leave/hello\x01world", FALSE, QUERY_NO,
         "http://leave/hello%01world"},
-      {"http://le ave/hello\x01world", FALSE, FALSE,
+      {"http://le ave/hello\x01world", FALSE, QUERY_NO,
         "http://le ave/hello%01world"},
-      {"hello\x01world", TRUE, FALSE, "hello%01world"},
-      {"hello\xf0world", TRUE, FALSE, "hello%F0world"},
-      {"hello world", TRUE, FALSE, "hello%20world"},
-      {"hello%20world", TRUE, FALSE, "hello%20world"},
-      {"hello world", TRUE, TRUE, "hello+world"},
-      {"a+b c", TRUE, FALSE, "a+b%20c"},
-      {"a%20b%20c", TRUE, FALSE, "a%20b%20c"},
-      {"a%aab%aac", TRUE, FALSE, "a%AAb%AAc"},
-      {"a%aab%AAc", TRUE, FALSE, "a%AAb%AAc"},
-      {"w%w%x", TRUE, FALSE, "w%w%x"},
-      {"w%wf%xf", TRUE, FALSE, "w%wf%xf"},
-      {"w%fw%fw", TRUE, FALSE, "w%fw%fw"},
-      {"a+b c", TRUE, TRUE, "a+b+c"},
-      {"/foo/bar", TRUE, FALSE, "/foo/bar"},
-      {"/foo/bar", TRUE, TRUE, "/foo/bar"},
-      {"/foo/ bar", TRUE, FALSE, "/foo/%20bar"},
-      {"/foo/ bar", TRUE, TRUE, "/foo/+bar"},
-      {"~-._", TRUE, FALSE, "~-._"},
-      {"~-._", TRUE, TRUE, "~-._"},
+      {"hello\x01world", TRUE, QUERY_NO, "hello%01world"},
+      {"hello\xf0world", TRUE, QUERY_NO, "hello%F0world"},
+      {"hello world", TRUE, QUERY_NO, "hello%20world"},
+      {"hello%20world", TRUE, QUERY_NO, "hello%20world"},
+      {"hello world", TRUE, QUERY_YES, "hello+world"},
+      {"a+b c", TRUE, QUERY_NO, "a+b%20c"},
+      {"a%20b%20c", TRUE, QUERY_NO, "a%20b%20c"},
+      {"a%aab%aac", TRUE, QUERY_NO, "a%AAb%AAc"},
+      {"a%aab%AAc", TRUE, QUERY_NO, "a%AAb%AAc"},
+      {"w%w%x", TRUE, QUERY_NO, "w%w%x"},
+      {"w%wf%xf", TRUE, QUERY_NO, "w%wf%xf"},
+      {"w%fw%fw", TRUE, QUERY_NO, "w%fw%fw"},
+      {"a+b c", TRUE, QUERY_YES, "a+b+c"},
+      {"/foo/bar", TRUE, QUERY_NO, "/foo/bar"},
+      {"/foo/bar", TRUE, QUERY_YES, "/foo/bar"},
+      {"/foo/ bar", TRUE, QUERY_NO, "/foo/%20bar"},
+      {"/foo/ bar", TRUE, QUERY_YES, "/foo/+bar"},
+      {"~-._", TRUE, QUERY_NO, "~-._"},
+      {"~-._", TRUE, QUERY_YES, "~-._"},
+      {"foo bar?foo bar", TRUE, QUERY_NO, "foo%20bar?foo%20bar"},
+      {"foo bar?foo bar", TRUE, QUERY_NOT_YET, "foo%20bar?foo+bar"},
      };
  
      curlx_dyn_init(&out, 256);
@@ -259,18 +261,16 @@ static CURLcode test_unit1675(const char *arg)
      unsigned int i;
      struct file_test {
        const char *in;
-      const char *out_host;
        const char *out_path;
        bool fine;
      };
      const struct file_test tests[] = {
-      {"file:///etc/hosts", "", "/etc/hosts", TRUE},
-      {"file://localhost/etc/hosts", "", "/etc/hosts", TRUE},
-      {"file://apple/etc/hosts", "", "/etc/hosts", FALSE},
+      {"file:///etc/hosts", "/etc/hosts", TRUE},
+      {"file://localhost/etc/hosts", "/etc/hosts", TRUE},
+      {"file://apple/etc/hosts", "/etc/hosts", FALSE},
  #ifdef _WIN32
-      {"file:///c:/windows/system32", "", "c:/windows/system32", TRUE},
-      {"file://localhost/c:/windows/system32", "",
-       "c:/windows/system32", TRUE},
+      {"file:///c:/windows/system32", "c:/windows/system32", TRUE},
+      {"file://localhost/c:/windows/system32", "c:/windows/system32", TRUE},
  #endif
      };
  
@@ -280,28 +280,19 @@ static CURLcode test_unit1675(const char *arg)
        if(!u)
          return CURLE_OUT_OF_MEMORY;
  
-      uc = parse_file(tests[i].in, strlen(tests[i].in), u, &host, &path,
-                      &pathlen);
+      uc = parse_file(tests[i].in, strlen(tests[i].in), u, &path, &pathlen);
        if(!tests[i].fine && !uc) {
          curl_mfprintf(stderr, "Unexpectedly fine for input '%s'\n",
                        tests[i].in);
          fails++;
        }
-      else if(tests[i].out_host[0]) {
-        /* expecting a hostname output */
-        if(!curlx_dyn_len(&host) ||
-           strcmp(curlx_dyn_ptr(&host), tests[i].out_host))
-          error = TRUE;
-      }
        if(tests[i].fine &&
           (uc ||
            strncmp(path, tests[i].out_path, pathlen) ||
            strlen(tests[i].out_path) != pathlen)) {
          curl_mfprintf(stderr, "parse_file('%s') failed:"
-                      " expected host '%s', path '%s'; got host '%s',"
-                      " path '%.*s'\n",
-                      tests[i].in, tests[i].out_host, tests[i].out_path,
-                      uc ? "error" : curlx_dyn_ptr(&host),
+                      " expected path '%s'; got path '%.*s'\n",
+                      tests[i].in, tests[i].out_path,
                        (int)pathlen, path);
          fails++;
        }
author	Daniel Stenberg <daniel@haxx.se>
	Mon, 13 Apr 2026 12:55:16 +0000 (14:55 +0200)
committer	Daniel Stenberg <daniel@haxx.se>
	Tue, 14 Apr 2026 10:09:48 +0000 (12:09 +0200)
lib/urlapi-int.h		patch \| blob \| blame \| history
lib/urlapi.c		patch \| blob \| blame \| history
tests/unit/unit1675.c		patch \| blob \| blame \| history