urlapi: leaner with fewer allocs

author Daniel Stenberg <daniel@haxx.se>

Thu, 1 Sep 2022 08:16:24 +0000 (10:16 +0200)

committer Daniel Stenberg <daniel@haxx.se>

Wed, 7 Sep 2022 08:21:45 +0000 (10:21 +0200)
author Daniel Stenberg <daniel@haxx.se>
Thu, 1 Sep 2022 08:16:24 +0000 (10:16 +0200)
committer Daniel Stenberg <daniel@haxx.se>
Wed, 7 Sep 2022 08:21:45 +0000 (10:21 +0200)
diff --git a/docs/DYNBUF.md b/docs/DYNBUF.md

index a984a41e47672f19f21cc12be9ff1bc456e55370..7ed8d80ec42ae9c670847264ba0ebe47852c20a0 100644 (file)
--- a/docs/DYNBUF.md
+++ b/docs/DYNBUF.md
@@ -76,7 +76,8 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t length);
  
  Keep `length` bytes of the buffer tail (the last `length` bytes of the
  buffer). The rest of the buffer is dropped. The specified `length` must not be
-larger than the buffer length.
+larger than the buffer length. To instead keep the leading part, see
+`Curl_dyn_setlen()`.
  
  ## ptr
  
@@ -106,3 +107,13 @@ size_t Curl_dyn_len(const struct dynbuf *s);
  
  Returns the length of the buffer in bytes. Does not include the terminating
  zero byte.
+
+## setlen
+
+```c
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t len);
+```
+
+Sets the new shorter length of the buffer in number of bytes. Keeps the
+leftmost set number of bytes, discards the rest. To instead keep the tail part
+of the buffer, see `Curl_dyn_tail()`.
diff --git a/lib/Makefile.inc b/lib/Makefile.inc

index af47fe6a20f8e7bcf06fadc5c2b85034fb1c3b70..0c8972ce5f7b451759e858a21073d9560e862bcd 100644 (file)
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@@ -128,7 +128,6 @@ LIB_CFILES =         \
    curl_threads.c     \
    dict.c             \
    doh.c              \
-  dotdot.c           \
    dynbuf.c           \
    easy.c             \
    easygetopt.c       \
@@ -262,7 +261,6 @@ LIB_HFILES =         \
    curlx.h            \
    dict.h             \
    doh.h              \
-  dotdot.h           \
    dynbuf.h           \
    easy_lock.h        \
    easyif.h           \
diff --git a/lib/dotdot.c b/lib/dotdot.c

deleted file mode 100644 (file)

index 0b04531..0000000
--- a/lib/dotdot.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/***************************************************************************
- *                                  _   _ ____  _
- *  Project                     ___| | | |  _ \| |
- *                             / __| | | | |_) | |
- *                            | (__| |_| |  _ <| |___
- *                             \___|\___/|_| \_\_____|
- *
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
- *
- * This software is licensed as described in the file COPYING, which
- * you should have received as part of this distribution. The terms
- * are also available at https://curl.se/docs/copyright.html.
- *
- * You may opt to use, copy, modify, merge, publish, distribute and/or sell
- * copies of the Software, and permit persons to whom the Software is
- * furnished to do so, under the terms of the COPYING file.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- * SPDX-License-Identifier: curl
- *
- ***************************************************************************/
-
-#include "curl_setup.h"
-
-#include <curl/curl.h>
-
-#include "dotdot.h"
-#include "curl_memory.h"
-
-/* The last #include file should be: */
-#include "memdebug.h"
-
-/*
- * "Remove Dot Segments"
- * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
- */
-
-/*
- * Curl_dedotdotify()
- * @unittest: 1395
- *
- * This function gets a null-terminated path with dot and dotdot sequences
- * passed in and strips them off according to the rules in RFC 3986 section
- * 5.2.4.
- *
- * The function handles a query part ('?' + stuff) appended but it expects
- * that fragments ('#' + stuff) have already been cut off.
- *
- * RETURNS
- *
- * an allocated dedotdotified output string
- */
-char *Curl_dedotdotify(const char *input)
-{
-  size_t inlen = strlen(input);
-  char *clone;
-  size_t clen = inlen; /* the length of the cloned input */
-  char *out = malloc(inlen + 1);
-  char *outptr;
-  char *orgclone;
-  char *queryp;
-  if(!out)
-    return NULL; /* out of memory */
-
-  *out = 0; /* null-terminates, for inputs like "./" */
-
-  /* get a cloned copy of the input */
-  clone = strdup(input);
-  if(!clone) {
-    free(out);
-    return NULL;
-  }
-  orgclone = clone;
-  outptr = out;
-
-  if(!*clone) {
-    /* zero length string, return that */
-    free(out);
-    return clone;
-  }
-
-  /*
-   * To handle query-parts properly, we must find it and remove it during the
-   * dotdot-operation and then append it again at the end to the output
-   * string.
-   */
-  queryp = strchr(clone, '?');
-  if(queryp)
-    *queryp = 0;
-
-  do {
-
-    /*  A.  If the input buffer begins with a prefix of "../" or "./", then
-        remove that prefix from the input buffer; otherwise, */
-
-    if(!strncmp("./", clone, 2)) {
-      clone += 2;
-      clen -= 2;
-    }
-    else if(!strncmp("../", clone, 3)) {
-      clone += 3;
-      clen -= 3;
-    }
-
-    /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
-        "."  is a complete path segment, then replace that prefix with "/" in
-        the input buffer; otherwise, */
-    else if(!strncmp("/./", clone, 3)) {
-      clone += 2;
-      clen -= 2;
-    }
-    else if(!strcmp("/.", clone)) {
-      clone[1]='/';
-      clone++;
-      clen -= 1;
-    }
-
-    /*  C.  if the input buffer begins with a prefix of "/../" or "/..", where
-        ".." is a complete path segment, then replace that prefix with "/" in
-        the input buffer and remove the last segment and its preceding "/" (if
-        any) from the output buffer; otherwise, */
-
-    else if(!strncmp("/../", clone, 4)) {
-      clone += 3;
-      clen -= 3;
-      /* remove the last segment from the output buffer */
-      while(outptr > out) {
-        outptr--;
-        if(*outptr == '/')
-          break;
-      }
-      *outptr = 0; /* null-terminate where it stops */
-    }
-    else if(!strcmp("/..", clone)) {
-      clone[2]='/';
-      clone += 2;
-      clen -= 2;
-      /* remove the last segment from the output buffer */
-      while(outptr > out) {
-        outptr--;
-        if(*outptr == '/')
-          break;
-      }
-      *outptr = 0; /* null-terminate where it stops */
-    }
-
-    /*  D.  if the input buffer consists only of "." or "..", then remove
-        that from the input buffer; otherwise, */
-
-    else if(!strcmp(".", clone) || !strcmp("..", clone)) {
-      *clone = 0;
-      *out = 0;
-    }
-
-    else {
-      /*  E.  move the first path segment in the input buffer to the end of
-          the output buffer, including the initial "/" character (if any) and
-          any subsequent characters up to, but not including, the next "/"
-          character or the end of the input buffer. */
-
-      do {
-        *outptr++ = *clone++;
-        clen--;
-      } while(*clone && (*clone != '/'));
-      *outptr = 0;
-    }
-
-  } while(*clone);
-
-  if(queryp) {
-    size_t qlen;
-    /* There was a query part, append that to the output. The 'clone' string
-       may now have been altered so we copy from the original input string
-       from the correct index. */
-    size_t oindex = queryp - orgclone;
-    qlen = strlen(&input[oindex]);
-    memcpy(outptr, &input[oindex], qlen + 1); /* include the end zero byte */
-  }
-
-  free(orgclone);
-  return out;
-}
diff --git a/lib/dotdot.h b/lib/dotdot.h

deleted file mode 100644 (file)

index 4ffe72d..0000000
--- a/lib/dotdot.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef HEADER_CURL_DOTDOT_H
-#define HEADER_CURL_DOTDOT_H
-/***************************************************************************
- *                                  _   _ ____  _
- *  Project                     ___| | | |  _ \| |
- *                             / __| | | | |_) | |
- *                            | (__| |_| |  _ <| |___
- *                             \___|\___/|_| \_\_____|
- *
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
- *
- * This software is licensed as described in the file COPYING, which
- * you should have received as part of this distribution. The terms
- * are also available at https://curl.se/docs/copyright.html.
- *
- * You may opt to use, copy, modify, merge, publish, distribute and/or sell
- * copies of the Software, and permit persons to whom the Software is
- * furnished to do so, under the terms of the COPYING file.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- * SPDX-License-Identifier: curl
- *
- ***************************************************************************/
-char *Curl_dedotdotify(const char *input);
-#endif /* HEADER_CURL_DOTDOT_H */
diff --git a/lib/dynbuf.c b/lib/dynbuf.c

index 3b907dbe2e69126e11fde59b5c7bcda9711d0075..0b1cf9afd85657be71256333d881f8e688ce1ea4 100644 (file)
--- a/lib/dynbuf.c
+++ b/lib/dynbuf.c
@@ -128,7 +128,6 @@ void Curl_dyn_reset(struct dynbuf *s)
    s->leng = 0;
  }
  
-#ifdef USE_NGTCP2
  /*
   * Specify the size of the tail to keep (number of bytes from the end of the
   * buffer). The rest will be dropped.
@@ -153,7 +152,6 @@ CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail)
    return CURLE_OK;
  
  }
-#endif
  
  /*
   * Appends a buffer with length.
@@ -255,3 +253,18 @@ size_t Curl_dyn_len(const struct dynbuf *s)
    DEBUGASSERT(!s->leng || s->bufr);
    return s->leng;
  }
+
+/*
+ * Set a new (smaller) length.
+ */
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set)
+{
+  DEBUGASSERT(s);
+  DEBUGASSERT(s->init == DYNINIT);
+  DEBUGASSERT(!s->leng || s->bufr);
+  if(set > s->leng)
+    return CURLE_BAD_FUNCTION_ARGUMENT;
+  s->leng = set;
+  s->bufr[s->leng] = 0;
+  return CURLE_OK;
+}
diff --git a/lib/dynbuf.h b/lib/dynbuf.h

index c1e97235dee5f49a868e816e076b7077cabce980..7deba6a5fd7964a75111c1e9f9426a7ae3236cfc 100644 (file)
--- a/lib/dynbuf.h
+++ b/lib/dynbuf.h
@@ -38,6 +38,7 @@
  #define Curl_dyn_len(a) curlx_dyn_len(a)
  #define Curl_dyn_reset(a) curlx_dyn_reset(a)
  #define Curl_dyn_tail(a,b) curlx_dyn_tail(a,b)
+#define Curl_dyn_setlen(a,b) curlx_dyn_setlen(a,b)
  #define curlx_dynbuf dynbuf /* for the struct name */
  #endif
  
@@ -63,6 +64,7 @@ CURLcode Curl_dyn_vaddf(struct dynbuf *s, const char *fmt, va_list ap)
    WARN_UNUSED_RESULT;
  void Curl_dyn_reset(struct dynbuf *s);
  CURLcode Curl_dyn_tail(struct dynbuf *s, size_t trail);
+CURLcode Curl_dyn_setlen(struct dynbuf *s, size_t set);
  char *Curl_dyn_ptr(const struct dynbuf *s);
  unsigned char *Curl_dyn_uptr(const struct dynbuf *s);
  size_t Curl_dyn_len(const struct dynbuf *s);
diff --git a/lib/url.c b/lib/url.c

index e3d963ee5c6ac8f51d95cac30f8e57d0eb8493a2..231a00fecb6bf5f2fa5a006209ae2ae41f85ee9a 100644 (file)
--- a/lib/url.c
+++ b/lib/url.c
@@ -128,7 +128,6 @@ bool curl_win32_idn_to_ascii(const char *in, char **out);
  #include "http_proxy.h"
  #include "conncache.h"
  #include "multihandle.h"
-#include "dotdot.h"
  #include "strdup.h"
  #include "setopt.h"
  #include "altsvc.h"
diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h

index a03aa888af1851bd4504949c0a2d8aa64e2ed22c..5d9db8ccc7b96e67ad7775d02ac333907895ebc4 100644 (file)
--- a/lib/urlapi-int.h
+++ b/lib/urlapi-int.h
@@ -25,10 +25,11 @@
   ***************************************************************************/
  #include "curl_setup.h"
  
-bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen);
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen);
  
  #ifdef DEBUGBUILD
-CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, bool);
+CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
+                          bool has_scheme);
  #endif
  
  #endif /* HEADER_CURL_URLAPI_INT_H */
diff --git a/lib/urlapi.c b/lib/urlapi.c

index 0e07d0ba3eef5b8980acc6b4b8f7a5299e0625a7..baecb84b92ec76cf4223bad84ad0ecdf60d04bd7 100644 (file)
--- a/lib/urlapi.c
+++ b/lib/urlapi.c
@@ -27,12 +27,12 @@
  #include "urldata.h"
  #include "urlapi-int.h"
  #include "strcase.h"
-#include "dotdot.h"
  #include "url.h"
  #include "escape.h"
  #include "curl_ctype.h"
  #include "inet_pton.h"
  #include "inet_ntop.h"
+#include "strdup.h"
  
  /* The last 3 #include files should be in this order */
  #include "curl_printf.h"
@@ -68,9 +68,6 @@ struct Curl_URL {
    char *path;
    char *query;
    char *fragment;
-
-  char *scratch; /* temporary scratch area */
-  char *temppath; /* temporary path pointer */
    long portnum; /* the numerical version */
  };
  
@@ -88,8 +85,6 @@ static void free_urlhandle(struct Curl_URL *u)
    free(u->path);
    free(u->query);
    free(u->fragment);
-  free(u->scratch);
-  free(u->temppath);
  }
  
  /*
@@ -130,64 +125,19 @@ static bool urlchar_needs_escaping(int c)
    return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
  }
  
-/*
- * strlen_url() returns the length of the given URL if the spaces within the
- * URL were properly URL encoded.
- * URL encoding should be skipped for host names, otherwise IDN resolution
- * will fail.
- */
-static size_t strlen_url(const char *url, bool relative)
-{
-  const unsigned char *ptr;
-  size_t newlen = 0;
-  bool left = TRUE; /* left side of the ? */
-  const unsigned char *host_sep = (const unsigned char *) url;
-
-  if(!relative)
-    host_sep = (const unsigned char *) find_host_sep(url);
-
-  for(ptr = (unsigned char *)url; *ptr; ptr++) {
-
-    if(ptr < host_sep) {
-      ++newlen;
-      continue;
-    }
-
-    if(*ptr == ' ') {
-      if(left)
-        newlen += 3;
-      else
-        newlen++;
-      continue;
-    }
-
-    if (*ptr == '?')
-      left = FALSE;
-
-    if(urlchar_needs_escaping(*ptr))
-      newlen += 2;
-
-    newlen++;
-  }
-
-  return newlen;
-}
-
-/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
- * the source URL accordingly.
+/* strcpy_url() creates a url in an output dynbuf and URL-encodes the spaces
+ * in the source URL accordingly.
+ *
   * URL encoding should be skipped for host names, otherwise IDN resolution
   * will fail.
   *
- * Returns TRUE if something was updated.
   */
-static bool strcpy_url(char *output, const char *url, bool relative)
+static CURLUcode strcpy_url(struct dynbuf *o, const char *url, bool relative)
  {
    /* we must add this with whitespace-replacing */
    bool left = TRUE;
    const unsigned char *iptr;
-  char *optr = output;
    const unsigned char *host_sep = (const unsigned char *) url;
-  bool changed = FALSE;
  
    if(!relative)
      host_sep = (const unsigned char *) find_host_sep(url);
@@ -197,19 +147,20 @@ static bool strcpy_url(char *output, const char *url, bool relative)
        iptr++) {
  
      if(iptr < host_sep) {
-      *optr++ = *iptr;
+      if(Curl_dyn_addn(o, iptr, 1))
+        return CURLUE_OUT_OF_MEMORY;
        continue;
      }
  
      if(*iptr == ' ') {
        if(left) {
-        *optr++='%'; /* add a '%' */
-        *optr++='2'; /* add a '2' */
-        *optr++='0'; /* add a '0' */
+        if(Curl_dyn_addn(o, "%20", 3))
+          return CURLUE_OUT_OF_MEMORY;
+      }
+      else {
+        if(Curl_dyn_addn(o, "+", 1))
+          return CURLUE_OUT_OF_MEMORY;
        }
-      else
-        *optr++='+'; /* add a '+' here */
-      changed = TRUE;
        continue;
      }
  
@@ -217,24 +168,24 @@ static bool strcpy_url(char *output, const char *url, bool relative)
        left = FALSE;
  
      if(urlchar_needs_escaping(*iptr)) {
-      msnprintf(optr, 4, "%%%02x", *iptr);
-      changed = TRUE;
-      optr += 3;
+      if(Curl_dyn_addf(o, "%%%02x", *iptr))
+        return CURLUE_OUT_OF_MEMORY;
+    }
+    else {
+      if(Curl_dyn_addn(o, iptr, 1))
+        return CURLUE_OUT_OF_MEMORY;
      }
-    else
-      *optr++ = *iptr;
    }
-  *optr = 0; /* null-terminate output buffer */
  
-  return changed;
+  return CURLUE_OK;
  }
  
  /*
- * Returns true if the given URL is absolute (as opposed to relative). Returns
- * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
- * be larger than MAX_SCHEME_LEN if buf is set.
+ * Returns the length of the scheme if the given URL is absolute (as opposed
+ * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
+ * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
   */
-bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
+size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
  {
    int i;
    DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
@@ -243,7 +194,7 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
      buf[0] = 0; /* always leave a defined value in buf */
  #ifdef WIN32
    if(STARTS_WITH_DRIVE_PREFIX(url))
-    return FALSE;
+    return 0;
  #endif
    for(i = 0; i < MAX_SCHEME_LEN; ++i) {
      char s = url[i];
@@ -257,15 +208,17 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
      }
    }
    if(i && (url[i] == ':') && (url[i + 1] == '/')) {
+    /* the length of the scheme is the name part only */
+    size_t len = i;
      if(buf) {
        buf[i] = 0;
        while(i--) {
          buf[i] = Curl_raw_tolower(url[i]);
        }
      }
-    return TRUE;
+    return len;
    }
-  return FALSE;
+  return 0;
  }
  
  /*
@@ -273,34 +226,26 @@ bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
   * URL-encodes any spaces.
   * The returned pointer must be freed by the caller unless NULL
   * (returns NULL on out of memory).
+ *
+ * Note that this function destroys the 'base' string.
   */
-static char *concat_url(const char *base, const char *relurl)
+static char *concat_url(char *base, const char *relurl)
  {
    /***
     TRY to append this new path to the old URL
     to the right of the host part. Oh crap, this is doomed to cause
     problems in the future...
    */
-  char *newest;
+  struct dynbuf newest;
    char *protsep;
    char *pathsep;
-  size_t newlen;
    bool host_changed = FALSE;
-
    const char *useurl = relurl;
-  size_t urllen;
-
-  /* we must make our own copy of the URL to play with, as it may
-     point to read-only data */
-  char *url_clone = strdup(base);
-
-  if(!url_clone)
-    return NULL; /* skip out of this NOW */
  
    /* protsep points to the start of the host name */
-  protsep = strstr(url_clone, "//");
+  protsep = strstr(base, "//");
    if(!protsep)
-    protsep = url_clone;
+    protsep = base;
    else
      protsep += 2; /* pass the slashes */
  
@@ -393,38 +338,24 @@ static char *concat_url(const char *base, const char *relurl)
      }
    }
  
-  /* If the new part contains a space, this is a mighty stupid redirect
-     but we still make an effort to do "right". To the left of a '?'
-     letter we replace each space with %20 while it is replaced with '+'
-     on the right side of the '?' letter.
-  */
-  newlen = strlen_url(useurl, !host_changed);
-
-  urllen = strlen(url_clone);
-
-  newest = malloc(urllen + 1 + /* possible slash */
-                  newlen + 1 /* zero byte */);
-
-  if(!newest) {
-    free(url_clone); /* don't leak this */
-    return NULL;
-  }
+  Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
  
    /* copy over the root url part */
-  memcpy(newest, url_clone, urllen);
+  if(Curl_dyn_add(&newest, base))
+    return NULL;
  
    /* check if we need to append a slash */
    if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
      ;
-  else
-    newest[urllen++]='/';
+  else {
+    if(Curl_dyn_addn(&newest, "/", 1))
+      return NULL;
+  }
  
    /* then append the new piece on the right side */
-  strcpy_url(&newest[urllen], useurl, !host_changed);
+  strcpy_url(&newest, useurl, !host_changed);
  
-  free(url_clone);
-
-  return newest;
+  return Curl_dyn_ptr(&newest);
  }
  
  /* scan for byte values < 31 or 127 */
@@ -458,7 +389,7 @@ static bool junkscan(const char *part, unsigned int flags)
   *
   */
  static CURLUcode parse_hostname_login(struct Curl_URL *u,
-                                      char **hostname,
+                                      struct dynbuf *host,
                                        unsigned int flags)
  {
    CURLUcode result = CURLUE_OK;
@@ -468,23 +399,27 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
    char *optionsp = NULL;
    const struct Curl_handler *h = NULL;
  
-  /* At this point, we're hoping all the other special cases have
-   * been taken care of, so conn->host.name is at most
-   *    [user[:password][;options]]@]hostname
+  /* At this point, we assume all the other special cases have been taken
+   * care of, so the host is at most
+   *
+   *   [user[:password][;options]]@]hostname
     *
     * We need somewhere to put the embedded details, so do that first.
     */
  
-  char *ptr = strchr(*hostname, '@');
-  char *login = *hostname;
+  char *login = Curl_dyn_ptr(host);
+  char *ptr;
+
+  DEBUGASSERT(login);
  
+  ptr = strchr(login, '@');
    if(!ptr)
      goto out;
  
    /* We will now try to extract the
     * possible login information in a string like:
     * ftp://user:password@ftp.my.site:8021/README */
-  *hostname = ++ptr;
+  ptr++;
  
    /* if this is a known scheme, get some details */
    if(u->scheme)
@@ -530,6 +465,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
      u->options = optionsp;
    }
  
+  /* move the name to the start of the host buffer */
+  if(Curl_dyn_tail(host, strlen(ptr)))
+    return CURLUE_OUT_OF_MEMORY;
+
    return CURLUE_OK;
    out:
  
@@ -543,13 +482,13 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
    return result;
  }
  
-UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
+UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
                                     bool has_scheme)
  {
    char *portptr = NULL;
    char endbracket;
    int len;
-
+  char *hostname = Curl_dyn_ptr(host);
    /*
     * Find the end of an IPv6 address, either on the ']' ending bracket or
     * a percent-encoded zone index.
@@ -586,6 +525,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
      char *rest;
      long port;
      char portbuf[7];
+    size_t keep = portptr - hostname;
  
      /* Browser behavior adaptation. If there's a colon with no digits after,
         just cut off the name there which makes us ignore the colon and just
@@ -594,15 +534,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
         Don't do it if the URL has no scheme, to make something that looks like
         a scheme not work!
      */
-    if(!portptr[1]) {
-      *portptr = '\0';
+    Curl_dyn_setlen(host, keep);
+    portptr++;
+    if(!*portptr)
        return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
-    }
  
-    if(!ISDIGIT(portptr[1]))
+    if(!ISDIGIT(*portptr))
        return CURLUE_BAD_PORT_NUMBER;
  
-    port = strtol(portptr + 1, &rest, 10);  /* Port number must be decimal */
+    port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
  
      if(port > 0xffff)
        return CURLUE_BAD_PORT_NUMBER;
@@ -610,7 +550,6 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
      if(rest[0])
        return CURLUE_BAD_PORT_NUMBER;
  
-    *portptr++ = '\0'; /* cut off the name there */
      *rest = 0;
      /* generate a new port number string to get rid of leading zeroes etc */
      msnprintf(portbuf, sizeof(portbuf), "%ld", port);
@@ -623,12 +562,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
    return CURLUE_OK;
  }
  
-static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+                                size_t hlen) /* length of hostname */
  {
    size_t len;
-  size_t hlen = strlen(hostname);
+  DEBUGASSERT(hostname);
  
-  if(hostname[0] == '[') {
+  if(!hostname[0])
+    return CURLUE_NO_HOST;
+  else if(hostname[0] == '[') {
      const char *l = "0123456789abcdefABCDEF:.";
      if(hlen < 4) /* '[::]' is the shortest possible valid string */
        return CURLUE_BAD_IPV6;
@@ -692,8 +634,6 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
        /* hostname with bad content */
        return CURLUE_BAD_HOSTNAME;
    }
-  if(!hostname[0])
-    return CURLUE_NO_HOST;
    return CURLUE_OK;
  }
  
@@ -787,79 +727,231 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
    return TRUE;
  }
  
-/* return strdup'ed version in 'outp', possibly percent decoded */
-static CURLUcode decode_host(char *hostname, char **outp)
+/* if necessary, replace the host content with a URL decoded version */
+static CURLUcode decode_host(struct dynbuf *host)
  {
    char *per = NULL;
-  if(hostname[0] != '[')
+  const char *hostname = Curl_dyn_ptr(host);
+  if(hostname[0] == '[')
      /* only decode if not an ipv6 numerical */
-    per = strchr(hostname, '%');
-  if(!per) {
-    *outp = strdup(hostname);
-    if(!*outp)
-      return CURLUE_OUT_OF_MEMORY;
-  }
+    return CURLUE_OK;
+  per = strchr(hostname, '%');
+  if(!per)
+    /* nothing to decode */
+    return CURLUE_OK;
    else {
-    /* might be encoded */
+    /* encoded */
      size_t dlen;
-    CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
+    char *decoded;
+    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
+                                     REJECT_CTRL);
      if(result)
        return CURLUE_BAD_HOSTNAME;
+    Curl_dyn_reset(host);
+    result = Curl_dyn_addn(host, decoded, dlen);
+    free(decoded);
+    if(result)
+      return CURLUE_OUT_OF_MEMORY;
    }
  
    return CURLUE_OK;
  }
  
-static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
+/*
+ * "Remove Dot Segments"
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * dedotdotify()
+ * @unittest: 1395
+ *
+ * This function gets a null-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.4.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * Note that this funciton *writes* a byte into the source buffer during its
+ * operation.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+UNITTEST char *dedotdotify(const char *input, size_t clen);
+UNITTEST char *dedotdotify(const char *input, size_t clen)
  {
-  char *path;
-  bool path_alloced = FALSE;
+  char *out = malloc(clen + 1);
+  char *outptr;
+  const char *orginput = input;
+  char *queryp;
+  if(!out)
+    return NULL; /* out of memory */
+
+  *out = 0; /* null-terminates, for inputs like "./" */
+  outptr = out;
+
+  if(!*input)
+    /* zero length input string, return that */
+    return out;
+
+  /*
+   * To handle query-parts properly, we must find it and remove it during the
+   * dotdot-operation and then append it again at the end to the output
+   * string.
+   */
+  queryp = strchr(input, '?');
+
+  do {
+    bool dotdot = TRUE;
+    if(*input == '.') {
+      /*  A.  If the input buffer begins with a prefix of "../" or "./", then
+          remove that prefix from the input buffer; otherwise, */
+
+      if(!strncmp("./", input, 2)) {
+        input += 2;
+        clen -= 2;
+      }
+      else if(!strncmp("../", input, 3)) {
+        input += 3;
+        clen -= 3;
+      }
+      /*  D.  if the input buffer consists only of "." or "..", then remove
+          that from the input buffer; otherwise, */
+
+      else if(!strcmp(".", input) || !strcmp("..", input) ||
+              !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
+        *out = 0;
+        break;
+      }
+      else
+        dotdot = FALSE;
+    }
+    else if(*input == '/') {
+      /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
+          "."  is a complete path segment, then replace that prefix with "/" in
+          the input buffer; otherwise, */
+      if(!strncmp("/./", input, 3)) {
+        input += 2;
+        clen -= 2;
+      }
+      else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
+        *outptr++ = '/';
+        *outptr = 0;
+        break;
+      }
+
+      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
+          where ".." is a complete path segment, then replace that prefix with
+          "/" in the input buffer and remove the last segment and its
+          preceding "/" (if any) from the output buffer; otherwise, */
+
+      else if(!strncmp("/../", input, 4)) {
+        input += 3;
+        clen -= 3;
+        /* remove the last segment from the output buffer */
+        while(outptr > out) {
+          outptr--;
+          if(*outptr == '/')
+            break;
+        }
+        *outptr = 0; /* null-terminate where it stops */
+      }
+      else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
+        /* remove the last segment from the output buffer */
+        while(outptr > out) {
+          outptr--;
+          if(*outptr == '/')
+            break;
+        }
+        *outptr++ = '/';
+        *outptr = 0; /* null-terminate where it stops */
+        break;
+      }
+      else
+        dotdot = FALSE;
+    }
+    else
+      dotdot = FALSE;
+
+    if(!dotdot) {
+      /*  E.  move the first path segment in the input buffer to the end of
+          the output buffer, including the initial "/" character (if any) and
+          any subsequent characters up to, but not including, the next "/"
+          character or the end of the input buffer. */
+
+      do {
+        *outptr++ = *input++;
+        clen--;
+      } while(*input && (*input != '/') && (*input != '?'));
+      *outptr = 0;
+    }
+
+    /* continue until end of input string OR, if there is a terminating
+       query part, stop there */
+  } while(*input && (!queryp || (input < queryp)));
+
+  if(queryp) {
+    size_t qlen;
+    /* There was a query part, append that to the output. */
+    size_t oindex = queryp - orginput;
+    qlen = strlen(&orginput[oindex]);
+    memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
+  }
+
+  return out;
+}
+
+static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
+{
+  const char *path;
+  size_t pathlen;
    bool uncpath = FALSE;
-  char *hostname;
    char *query = NULL;
    char *fragment = NULL;
-  CURLUcode result;
-  bool url_has_scheme = FALSE;
    char schemebuf[MAX_SCHEME_LEN + 1];
    const char *schemep = NULL;
    size_t schemelen = 0;
    size_t urllen;
+  CURLUcode result = CURLUE_OK;
+  size_t fraglen = 0;
+  struct dynbuf host;
  
    DEBUGASSERT(url);
  
+  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
    /*************************************************************
     * Parse the URL.
     ************************************************************/
    /* allocate scratch area */
    urllen = strlen(url);
-  if(urllen > CURL_MAX_INPUT_LENGTH)
+  if(urllen > CURL_MAX_INPUT_LENGTH) {
      /* excessive input length */
-    return CURLUE_MALFORMED_INPUT;
-
-  path = u->scratch = malloc(urllen * 2 + 2);
-  if(!path)
-    return CURLUE_OUT_OF_MEMORY;
-
-  hostname = &path[urllen + 1];
-  hostname[0] = 0;
-
-  if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
-    url_has_scheme = TRUE;
-    schemelen = strlen(schemebuf);
+    result = CURLUE_MALFORMED_INPUT;
+    goto fail;
    }
  
+  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf));
+
    /* handle the file: scheme */
-  if(url_has_scheme && !strcmp(schemebuf, "file")) {
-    if(urllen <= 6)
+  if(schemelen && !strcmp(schemebuf, "file")) {
+    if(urllen <= 6) {
        /* file:/ is not enough to actually be a complete file: URL */
-      return CURLUE_BAD_FILE_URL;
+      result = CURLUE_BAD_FILE_URL;
+      goto fail;
+    }
  
      /* path has been allocated large enough to hold this */
-    strcpy(path, &url[5]);
+    path = (char *)&url[5];
  
-    u->scheme = strdup("file");
-    if(!u->scheme)
-      return CURLUE_OUT_OF_MEMORY;
+    schemep = u->scheme = strdup("file");
+    if(!u->scheme) {
+      result = CURLUE_OUT_OF_MEMORY;
+      goto fail;
+    }
  
      /* Extra handling URLs with an authority component (i.e. that start with
       * "file://")
@@ -869,7 +961,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
       */
      if(path[0] == '/' && path[1] == '/') {
        /* swallow the two slashes */
-      char *ptr = &path[2];
+      const char *ptr = &path[2];
  
        /*
         * According to RFC 8089, a file: URL can be reliably dereferenced if:
@@ -905,13 +997,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
               chars, and the delimiting slash character must be appended to the
               host name */
            path = strpbrk(ptr, "/\\:*?\"<>|");
-          if(!path || *path != '/')
-            return CURLUE_BAD_FILE_URL;
+          if(!path || *path != '/') {
+            result = CURLUE_BAD_FILE_URL;
+            goto fail;
+          }
  
            len = path - ptr;
            if(len) {
-            memcpy(hostname, ptr, len);
-            hostname[len] = 0;
+            if(Curl_dyn_addn(&host, ptr, len)) {
+              result = CURLUE_OUT_OF_MEMORY;
+              goto fail;
+            }
              uncpath = TRUE;
            }
  
@@ -919,7 +1015,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
  #else
            /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
               none */
-          return CURLUE_BAD_FILE_URL;
+          result = CURLUE_BAD_FILE_URL;
+          goto fail;
  #endif
          }
        }
@@ -928,7 +1025,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
      }
  
      if(!uncpath)
-        hostname = NULL; /* no host for file: URLs by default */
+      /* no host for file: URLs by default */
+      Curl_dyn_reset(&host);
  
  #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
      /* Don't allow Windows drive letters when not in Windows.
@@ -936,13 +1034,14 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
      if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
         STARTS_WITH_URL_DRIVE_PREFIX(path)) {
        /* File drive letters are only accepted in MSDOS/Windows */
-      return CURLUE_BAD_FILE_URL;
+      result = CURLUE_BAD_FILE_URL;
+      goto fail;
      }
  #else
      /* If the path starts with a slash and a drive letter, ditch the slash */
      if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
        /* This cannot be done with strcpy, as the memory chunks overlap! */
-      memmove(path, &path[1], strlen(&path[1]) + 1);
+      path++;
      }
  #endif
  
@@ -952,32 +1051,39 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
      const char *p;
      const char *hostp;
      size_t len;
-    path[0] = 0;
  
-    if(url_has_scheme) {
+    if(schemelen) {
        int i = 0;
        p = &url[schemelen + 1];
        while(p && (*p == '/') && (i < 4)) {
          p++;
          i++;
        }
-      if((i < 1) || (i>3))
+      if((i < 1) || (i>3)) {
          /* less than one or more than three slashes */
-        return CURLUE_BAD_SLASHES;
+        result = CURLUE_BAD_SLASHES;
+        goto fail;
+      }
  
        schemep = schemebuf;
        if(!Curl_builtin_scheme(schemep) &&
-         !(flags & CURLU_NON_SUPPORT_SCHEME))
-        return CURLUE_UNSUPPORTED_SCHEME;
+         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
+        result = CURLUE_UNSUPPORTED_SCHEME;
+        goto fail;
+      }
  
-      if(junkscan(schemep, flags))
-        return CURLUE_BAD_SCHEME;
+      if(junkscan(schemep, flags)) {
+        result = CURLUE_BAD_SCHEME;
+        goto fail;
+      }
      }
      else {
        /* no scheme! */
  
-      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
-        return CURLUE_BAD_SCHEME;
+      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
+        result = CURLUE_BAD_SCHEME;
+        goto fail;
+      }
        if(flags & CURLU_DEFAULT_SCHEME)
          schemep = DEFAULT_SCHEME;
  
@@ -994,122 +1100,156 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
  
      len = p - hostp;
      if(len) {
-      memcpy(hostname, hostp, len);
-      hostname[len] = 0;
+      if(Curl_dyn_addn(&host, hostp, len)) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
      }
      else {
-      if(!(flags & CURLU_NO_AUTHORITY))
-        return CURLUE_NO_HOST;
+      if(!(flags & CURLU_NO_AUTHORITY)) {
+        result = CURLUE_NO_HOST;
+        goto fail;
+      }
      }
  
-    strcpy(path, p);
+    path = (char *)p;
  
      if(schemep) {
        u->scheme = strdup(schemep);
-      if(!u->scheme)
-        return CURLUE_OUT_OF_MEMORY;
+      if(!u->scheme) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
      }
    }
  
-  if((flags & CURLU_URLENCODE) && path[0]) {
-    /* worst case output length is 3x the original! */
-    char *newp = malloc(strlen(path) * 3);
-    if(!newp)
-      return CURLUE_OUT_OF_MEMORY;
-    path_alloced = TRUE;
-    strcpy_url(newp, path, TRUE); /* consider it relative */
-    u->temppath = path = newp;
+  if(*path && (flags & CURLU_URLENCODE)) {
+    struct dynbuf enc;
+    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+    if(strcpy_url(&enc, path, TRUE)) { /* consider it relative */
+      result = CURLUE_OUT_OF_MEMORY;
+      goto fail;
+    }
+    path = u->path = Curl_dyn_ptr(&enc);
    }
  
    fragment = strchr(path, '#');
    if(fragment) {
-    *fragment++ = 0;
-    if(junkscan(fragment, flags))
-      return CURLUE_BAD_FRAGMENT;
-    if(fragment[0]) {
-      u->fragment = strdup(fragment);
-      if(!u->fragment)
-        return CURLUE_OUT_OF_MEMORY;
+    fraglen = strlen(fragment);
+    if(fraglen > 1) {
+      /* skip the leading '#' in the copy but include the terminating null */
+      u->fragment = Curl_memdup(fragment + 1, fraglen);
+      if(!u->fragment) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
+
+      if(junkscan(u->fragment, flags)) {
+        result = CURLUE_BAD_FRAGMENT;
+        goto fail;
+      }
      }
    }
  
    query = strchr(path, '?');
-  if(query) {
-    *query++ = 0;
-    if(junkscan(query, flags))
-      return CURLUE_BAD_QUERY;
-    /* done even if the query part is a blank string */
-    u->query = strdup(query);
-    if(!u->query)
-      return CURLUE_OUT_OF_MEMORY;
-  }
+  if(query && (!fragment || (query < fragment))) {
+    size_t qlen = strlen(query) - fraglen; /* includes '?' */
+    pathlen = strlen(path) - qlen - fraglen;
+    if(qlen > 1) {
+      u->query = Curl_memdup(query + 1, qlen);
+      if(!u->query) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
+      u->query[qlen - 1] = 0;
  
-  if(junkscan(path, flags))
-    return CURLUE_BAD_PATH;
+      if(junkscan(u->query, flags)) {
+        result = CURLUE_BAD_QUERY;
+        goto fail;
+      }
+    }
+    else {
+      /* single byte query */
+      u->query = strdup("");
+      if(!u->query) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
+    }
+  }
+  else
+    pathlen = strlen(path) - fraglen;
  
-  if(!path[0])
-    /* if there's no path left set, unset */
+  if(!pathlen) {
+    /* there is no path left, unset */
      path = NULL;
+  }
    else {
+    if(!u->path) {
+      u->path = Curl_memdup(path, pathlen + 1);
+      if(!u->path) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
+      u->path[pathlen] = 0;
+      path = u->path;
+    }
+    else if(flags & CURLU_URLENCODE)
+      /* it might have encoded more than just the path so cut it */
+      u->path[pathlen] = 0;
+
+    if(junkscan(u->path, flags)) {
+      result = CURLUE_BAD_PATH;
+      goto fail;
+    }
+
      if(!(flags & CURLU_PATH_AS_IS)) {
        /* remove ../ and ./ sequences according to RFC3986 */
-      char *newp = Curl_dedotdotify(path);
-      if(!newp)
-        return CURLUE_OUT_OF_MEMORY;
-
-      if(strcmp(newp, path)) {
-        /* if we got a new version */
-        if(path_alloced)
-          Curl_safefree(u->temppath);
-        u->temppath = path = newp;
-        path_alloced = TRUE;
+      char *newp = dedotdotify((char *)path, pathlen);
+      if(!newp) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
        }
-      else
-        free(newp);
+      free(u->path);
+      u->path = newp;
      }
-
-    u->path = path_alloced?path:strdup(path);
-    if(!u->path)
-      return CURLUE_OUT_OF_MEMORY;
-    u->temppath = NULL; /* used now */
    }
  
-  if(hostname) {
+  if(Curl_dyn_len(&host)) {
      char normalized_ipv4[sizeof("255.255.255.255") + 1];
  
      /*
       * Parse the login details and strip them out of the host name.
       */
-    result = parse_hostname_login(u, &hostname, flags);
-    if(result)
-      return result;
-
-    result = Curl_parse_port(u, hostname, url_has_scheme);
+    result = parse_hostname_login(u, &host, flags);
+    if(!result)
+      result = Curl_parse_port(u, &host, schemelen);
      if(result)
-      return result;
+      goto fail;
  
-    if(junkscan(hostname, flags))
-      return CURLUE_BAD_HOSTNAME;
+    if(junkscan(Curl_dyn_ptr(&host), flags)) {
+      result = CURLUE_BAD_HOSTNAME;
+      goto fail;
+    }
  
-    if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
-      /* Skip hostname check, it's allowed to be empty. */
-      u->host = strdup("");
+    if(ipv4_normalize(Curl_dyn_ptr(&host),
+                      normalized_ipv4, sizeof(normalized_ipv4))) {
+      Curl_dyn_reset(&host);
+      if(Curl_dyn_add(&host, normalized_ipv4)) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
      }
      else {
-      if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
-        u->host = strdup(normalized_ipv4);
-      else {
-        result = decode_host(hostname, &u->host);
-        if(result)
-          return result;
-        result = hostname_check(u, u->host);
-        if(result)
-          return result;
-      }
+      result = decode_host(&host);
+      if(!result)
+        result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
+      if(result)
+        goto fail;
      }
-    if(!u->host)
-      return CURLUE_OUT_OF_MEMORY;
+
      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+      const char *hostname = Curl_dyn_ptr(&host);
        /* legacy curl-style guess based on host name */
        if(checkprefix("ftp.", hostname))
          schemep = "ftp";
@@ -1127,27 +1267,26 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
          schemep = "http";
  
        u->scheme = strdup(schemep);
-      if(!u->scheme)
-        return CURLUE_OUT_OF_MEMORY;
+      if(!u->scheme) {
+        result = CURLUE_OUT_OF_MEMORY;
+        goto fail;
+      }
+    }
+  }
+  else if(flags & CURLU_NO_AUTHORITY) {
+    /* allowed to be empty. */
+    if(Curl_dyn_add(&host, "")) {
+      result = CURLUE_OUT_OF_MEMORY;
+      goto fail;
      }
    }
  
-  Curl_safefree(u->scratch);
-  Curl_safefree(u->temppath);
+  u->host = Curl_dyn_ptr(&host);
  
-  return CURLUE_OK;
-}
-
-/*
- * Parse the URL and set the relevant members of the Curl_URL struct.
- */
-static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
-{
-  CURLUcode result = seturl(url, u, flags);
-  if(result) {
-    free_urlhandle(u);
-    memset(u, 0, sizeof(struct Curl_URL));
-  }
+  return result;
+  fail:
+  Curl_dyn_free(&host);
+  free_urlhandle(u);
    return result;
  }
  
@@ -1165,8 +1304,6 @@ static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
      free_urlhandle(u);
      *u = tmpurl;
    }
-  else
-    free_urlhandle(&tmpurl);
    return result;
  }
  
@@ -1344,14 +1481,13 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
        if(u->host[0] == '[') {
          if(u->zoneid) {
            /* make it '[ host %25 zoneid ]' */
+          struct dynbuf enc;
            size_t hostlen = strlen(u->host);
-          size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
-          allochost = malloc(alen);
-          if(!allochost)
+          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+          if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
+                           u->zoneid))
              return CURLUE_OUT_OF_MEMORY;
-          memcpy(allochost, u->host, hostlen - 1);
-          msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
-                    "%%25%s]", u->zoneid);
+          allochost = Curl_dyn_ptr(&enc);
          }
        }
        else if(urlencode) {
@@ -1362,32 +1498,32 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
        else {
          /* only encode '%' in output host name */
          char *host = u->host;
-        size_t pcount = 0;
+        bool percent = FALSE;
          /* first, count number of percents present in the name */
          while(*host) {
-          if(*host == '%')
-            pcount++;
+          if(*host == '%') {
+            percent = TRUE;
+            break;
+          }
            host++;
          }
-        /* if there were percents, encode the host name */
-        if(pcount) {
-          size_t hostlen = strlen(u->host);
-          size_t alen = hostlen + 2 * pcount + 1;
-          char *o = allochost = malloc(alen);
-          if(!allochost)
-            return CURLUE_OUT_OF_MEMORY;
-
+        /* if there were percent(s), encode the host name */
+        if(percent) {
+          struct dynbuf enc;
+          CURLcode result;
+          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
            host = u->host;
            while(*host) {
-            if(*host == '%') {
-              memcpy(o, "%25", 3);
-              o += 3;
-              host++;
-              continue;
-            }
-            *o++ = *host++;
+            if(*host == '%')
+              result = Curl_dyn_addn(&enc, "%25", 3);
+            else
+              result = Curl_dyn_addn(&enc, host, 1);
+            if(result)
+              return CURLUE_OUT_OF_MEMORY;
+            host++;
            }
-          *o = '\0';
+          free(u->host);
+          u->host = Curl_dyn_ptr(&enc);
          }
        }
  
@@ -1445,16 +1581,12 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
        *part = decoded;
      }
      if(urlencode) {
-      /* worst case output length is 3x the original! */
-      char *newp = malloc(strlen(*part) * 3);
-      if(!newp)
+      struct dynbuf enc;
+      Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+      if(strcpy_url(&enc, *part, TRUE)) /* consider it relative */
          return CURLUE_OUT_OF_MEMORY;
-      if(strcpy_url(newp, *part, TRUE)) { /* consider it relative */
-        free(*part);
-        *part = newp;
-      }
-      else
-        free(newp);
+      free(*part);
+      *part = Curl_dyn_ptr(&enc);
      }
  
      return CURLUE_OK;
@@ -1628,14 +1760,16 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
  
      if(urlencode) {
        const unsigned char *i;
-      char *o;
-      char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
-      if(!enc)
-        return CURLUE_OUT_OF_MEMORY;
-      for(i = (const unsigned char *)part, o = enc; *i; i++) {
+      struct dynbuf enc;
+
+      Curl_dyn_init(&enc, nalloc * 3 + 1);
+
+      for(i = (const unsigned char *)part; *i; i++) {
+        CURLcode result;
          if((*i == ' ') && plusencode) {
-          *o = '+';
-          o++;
+          result = Curl_dyn_addn(&enc, "+", 1);
+          if(result)
+            return CURLUE_OUT_OF_MEMORY;
          }
          else if(Curl_isunreserved(*i) ||
                  ((*i == '/') && urlskipslash) ||
@@ -1643,16 +1777,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
            if((*i == '=') && equalsencode)
              /* only skip the first equals sign */
              equalsencode = FALSE;
-          *o = *i;
-          o++;
+          result = Curl_dyn_addn(&enc, i, 1);
+          if(result)
+            return CURLUE_OUT_OF_MEMORY;
          }
          else {
-          msnprintf(o, 4, "%%%02x", *i);
-          o += 3;
+          result = Curl_dyn_addf(&enc, "%%%02x", *i);
+          if(result)
+            return CURLUE_OUT_OF_MEMORY;
          }
        }
-      *o = 0; /* null-terminate */
-      newp = enc;
+      newp = Curl_dyn_ptr(&enc);
      }
      else {
        char *p;
@@ -1674,34 +1809,41 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
      }
  
      if(appendquery) {
-      /* Append the string onto the old query. Add a '&' separator if none is
-         present at the end of the existing query already */
+      /* Append the 'newp' string onto the old query. Add a '&' separator if
+         none is present at the end of the existing query already */
+
        size_t querylen = u->query ? strlen(u->query) : 0;
        bool addamperand = querylen && (u->query[querylen -1] != '&');
        if(querylen) {
-        size_t newplen = strlen(newp);
-        char *p = malloc(querylen + addamperand + newplen + 1);
-        if(!p) {
-          free((char *)newp);
-          return CURLUE_OUT_OF_MEMORY;
+        struct dynbuf enc;
+        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+
+        if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+          goto nomem;
+
+        if(addamperand) {
+          if(Curl_dyn_addn(&enc, "&", 1))
+            goto nomem;
          }
-        strcpy(p, u->query); /* original query */
-        if(addamperand)
-          p[querylen] = '&'; /* ampersand */
-        strcpy(&p[querylen + addamperand], newp); /* new suffix */
+        if(Curl_dyn_add(&enc, newp))
+          goto nomem;
          free((char *)newp);
          free(*storep);
-        *storep = p;
+        *storep = Curl_dyn_ptr(&enc);
          return CURLUE_OK;
+        nomem:
+        free((char *)newp);
+        return CURLUE_OUT_OF_MEMORY;
        }
      }
  
      if(what == CURLUPART_HOST) {
-      if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
+      size_t n = strlen(newp);
+      if(!n && (flags & CURLU_NO_AUTHORITY)) {
          /* Skip hostname check, it's allowed to be empty. */
        }
        else {
-        if(hostname_check(u, (char *)newp)) {
+        if(hostname_check(u, (char *)newp, n)) {
            free((char *)newp);
            return CURLUE_BAD_HOSTNAME;
          }
diff --git a/tests/unit/unit1395.c b/tests/unit/unit1395.c

index 79115dd9038fd69da7ef1c0cee0f0e9e2eea8aa1..c37f942d355a831ae8ec1271f0d6fe89be4c245d 100644 (file)
--- a/tests/unit/unit1395.c
+++ b/tests/unit/unit1395.c
@@ -23,7 +23,8 @@
   ***************************************************************************/
  #include "curlcheck.h"
  
-#include "dotdot.h"
+/* copied from urlapi.c */
+extern char *dedotdotify(const char *input, size_t clen);
  
  #include "memdebug.h"
  
@@ -77,7 +78,7 @@ UNITTEST_START
    };
  
    for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
-    char *out = Curl_dedotdotify(pairs[i].input);
+    char *out = dedotdotify(pairs[i].input, strlen(pairs[i].input));
      abort_unless(out != NULL, "returned NULL!");
  
      if(strcmp(out, pairs[i].output)) {
diff --git a/tests/unit/unit1653.c b/tests/unit/unit1653.c

index 5b6e0a1c55153d212f6aa1c56250ef53b746b94c..40a21dd066baaab648a552acb670029064353bff 100644 (file)
--- a/tests/unit/unit1653.c
+++ b/tests/unit/unit1653.c
@@ -44,6 +44,19 @@ unit_stop(void)
  
  #define free_and_clear(x) free(x); x = NULL
  
+static CURLUcode parse_port(CURLU *url,
+                           char *h, bool has_scheme)
+{
+  struct dynbuf host;
+  CURLUcode ret;
+  Curl_dyn_init(&host, 10000);
+  if(Curl_dyn_add(&host, h))
+    return CURLUE_OUT_OF_MEMORY;
+  ret = Curl_parse_port(url, &host, has_scheme);
+  Curl_dyn_free(&host);
+  return ret;
+}
+
  UNITTEST_START
  {
    CURLUcode ret;
@@ -57,8 +70,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15]");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    ret = curl_url_get(u, CURLUPART_PORT, &portnum, CURLU_NO_DEFAULT_PORT);
    fail_unless(ret != CURLUE_OK, "curl_url_get portnum returned something");
    free_and_clear(ipv6port);
@@ -71,8 +84,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15|");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -82,8 +95,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff;fea7:da15]:80");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -94,8 +107,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]:80");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
    fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
    fail_unless(portnum && !strcmp(portnum, "80"), "Check portnumber");
@@ -110,8 +123,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15%25eth3]");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -122,8 +135,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15]:81");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    ret = curl_url_get(u, CURLUPART_PORT, &portnum, 0);
    fail_unless(ret == CURLUE_OK, "curl_url_get portnum returned error");
    fail_unless(portnum && !strcmp(portnum, "81"), "Check portnumber");
@@ -138,8 +151,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15];81");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -149,8 +162,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15]80");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port true on error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port true on error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -162,8 +175,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15]:");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, TRUE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, TRUE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -174,8 +187,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15!25eth3]:80");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret != CURLUE_OK, "Curl_parse_port returned non-error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret != CURLUE_OK, "parse_port returned non-error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -186,8 +199,8 @@ UNITTEST_START
    ipv6port = strdup("[fe80::250:56ff:fea7:da15%eth3]:80");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_OK, "Curl_parse_port returned error");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_OK, "parse_port returned error");
    free_and_clear(ipv6port);
    curl_url_cleanup(u);
  
@@ -200,8 +213,8 @@ UNITTEST_START
                      "aaaaaaaaaaaaaaaaaaaaaa:");
    if(!ipv6port)
      goto fail;
-  ret = Curl_parse_port(u, ipv6port, FALSE);
-  fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "Curl_parse_port did wrong");
+  ret = parse_port(u, ipv6port, FALSE);
+  fail_unless(ret == CURLUE_BAD_PORT_NUMBER, "parse_port did wrong");
    fail:
    free(ipv6port);
    curl_url_cleanup(u);
author	Daniel Stenberg <daniel@haxx.se>
	Thu, 1 Sep 2022 08:16:24 +0000 (10:16 +0200)
committer	Daniel Stenberg <daniel@haxx.se>
	Wed, 7 Sep 2022 08:21:45 +0000 (10:21 +0200)
docs/DYNBUF.md		patch \| blob \| blame \| history
lib/Makefile.inc		patch \| blob \| blame \| history
lib/dotdot.c	[deleted file]	patch \| blob \| blame \| history
lib/dotdot.h	[deleted file]	patch \| blob \| blame \| history
lib/dynbuf.c		patch \| blob \| blame \| history
lib/dynbuf.h		patch \| blob \| blame \| history
lib/url.c		patch \| blob \| blame \| history
lib/urlapi-int.h		patch \| blob \| blame \| history
lib/urlapi.c		patch \| blob \| blame \| history
tests/unit/unit1395.c		patch \| blob \| blame \| history
tests/unit/unit1653.c		patch \| blob \| blame \| history