From: Daniel Stenberg <daniel@haxx.se>
Date: Tue, 18 Feb 2025 22:03:09 +0000 (+0100)
Subject: cookie: convert to using strparse
X-Git-Tag: curl-8_13_0~423
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1aea05a6c2699e80c75936d58569851555acd603;p=thirdparty%2Fcurl.git

cookie: convert to using strparse

- using strparse cleans up the code and makes it easier to read and follow
- remove ? handling never used - since the path is provided without queries nowadays
- simplify sanitize_cookie_path
- avoid the strdup in pathmatch()

Closes #16386
---

diff --git a/lib/cookie.c b/lib/cookie.c
index d4ac256600..22a7681752 100644
--- a/lib/cookie.c
+++ b/lib/cookie.c
@@ -159,12 +159,10 @@ static bool cookie_tailmatch(const char *cookie_domain,
  * matching cookie path and URL path
  * RFC6265 5.1.4 Paths and Path-Match
  */
-static bool pathmatch(const char *cookie_path, const char *request_uri)
+static bool pathmatch(const char *cookie_path, const char *uri_path)
 {
   size_t cookie_path_len;
   size_t uri_path_len;
-  char *uri_path = NULL;
-  char *pos;
   bool ret = FALSE;
 
   /* cookie_path must not have last '/' separator. ex: /sample */
@@ -174,19 +172,9 @@ static bool pathmatch(const char *cookie_path, const char *request_uri)
     return TRUE;
   }
 
-  uri_path = strdup(request_uri);
-  if(!uri_path)
-    return FALSE;
-  pos = strchr(uri_path, '?');
-  if(pos)
-    *pos = 0x0;
-
   /* #-fragments are already cut off! */
-  if(0 == strlen(uri_path) || uri_path[0] != '/') {
-    strstore(&uri_path, "/", 1);
-    if(!uri_path)
-      return FALSE;
-  }
+  if(0 == strlen(uri_path) || uri_path[0] != '/')
+    uri_path = "/";
 
   /*
    * here, RFC6265 5.1.4 says
@@ -200,16 +188,12 @@ static bool pathmatch(const char *cookie_path, const char *request_uri)
 
   uri_path_len = strlen(uri_path);
 
-  if(uri_path_len < cookie_path_len) {
-    ret = FALSE;
+  if(uri_path_len < cookie_path_len)
     goto pathmatched;
-  }
 
   /* not using checkprefix() because matching should be case-sensitive */
-  if(strncmp(cookie_path, uri_path, cookie_path_len)) {
-    ret = FALSE;
+  if(strncmp(cookie_path, uri_path, cookie_path_len))
     goto pathmatched;
-  }
 
   /* The cookie-path and the uri-path are identical. */
   if(cookie_path_len == uri_path_len) {
@@ -223,10 +207,7 @@ static bool pathmatch(const char *cookie_path, const char *request_uri)
     goto pathmatched;
   }
 
-  ret = FALSE;
-
 pathmatched:
-  free(uri_path);
   return ret;
 }
 
@@ -300,34 +281,27 @@ static size_t cookiehash(const char * const domain)
  */
 static char *sanitize_cookie_path(const char *cookie_path)
 {
-  size_t len;
-  char *new_path = strdup(cookie_path);
-  if(!new_path)
-    return NULL;
+  size_t len = strlen(cookie_path);
 
-  /* some stupid site sends path attribute with '"'. */
-  len = strlen(new_path);
-  if(new_path[0] == '\"') {
-    memmove(new_path, new_path + 1, len);
+  /* some sites send path attribute within '"'. */
+  if(cookie_path[0] == '\"') {
+    cookie_path++;
     len--;
   }
-  if(len && (new_path[len - 1] == '\"')) {
-    new_path[--len] = 0x0;
-  }
+  if(len && (cookie_path[len - 1] == '\"'))
+    len--;
 
   /* RFC6265 5.2.4 The Path Attribute */
-  if(new_path[0] != '/') {
+  if(cookie_path[0] != '/')
     /* Let cookie-path be the default-path. */
-    strstore(&new_path, "/", 1);
-    return new_path;
-  }
+    return strdup("/");
 
+  /* remove trailing slash */
   /* convert /hoge/ to /hoge */
-  if(len && new_path[len - 1] == '/') {
-    new_path[len - 1] = 0x0;
-  }
+  if(len && cookie_path[len - 1] == '/')
+    len--;
 
-  return new_path;
+  return Curl_memdup0(cookie_path, len);
 }
 
 /*
@@ -369,9 +343,12 @@ void Curl_cookie_loadfiles(struct Curl_easy *data)
  */
 static void strstore(char **str, const char *newstr, size_t len)
 {
-  DEBUGASSERT(newstr);
   DEBUGASSERT(str);
   free(*str);
+  if(!len) {
+    len++;
+    newstr = "";
+  }
   *str = Curl_memdup0(newstr, len);
 }
 
@@ -515,50 +492,30 @@ parse_cookie_header(struct Curl_easy *data,
 
   now = time(NULL);
   do {
-    size_t vlen;
-    size_t nlen;
-
-    while(ISBLANK(*ptr))
-      ptr++;
+    struct Curl_str name;
+    struct Curl_str val;
 
     /* we have a <name>=<value> pair or a stand-alone word here */
-    nlen = strcspn(ptr, ";\t\r\n=");
-    if(nlen) {
+    if(!Curl_str_cspn(&ptr, &name, ";\t\r\n=")) {
       bool done = FALSE;
       bool sep = FALSE;
-      const char *namep = ptr;
-      const char *valuep;
-
-      ptr += nlen;
+      Curl_str_trimblanks(&name);
 
-      /* trim trailing spaces and tabs after name */
-      while(nlen && ISBLANK(namep[nlen - 1]))
-        nlen--;
+      if(!Curl_str_single(&ptr, '=')) {
+        sep = TRUE; /* a '=' was used */
+        if(!Curl_str_cspn(&ptr, &val, ";\r\n")) {
+          Curl_str_trimblanks(&val);
 
-      if(*ptr == '=') {
-        ptr++;
-        /* Skip spaces and tabs before the value */
-        while(ISBLANK(*ptr))
-          ptr++;
-
-        vlen = strcspn(ptr, ";\r\n");
-        valuep = ptr;
-        sep = TRUE;
-        ptr = &valuep[vlen];
-
-        /* Strip off trailing whitespace from the value */
-        while(vlen && ISBLANK(valuep[vlen-1]))
-          vlen--;
-
-        /* Reject cookies with a TAB inside the value */
-        if(memchr(valuep, '\t', vlen)) {
-          infof(data, "cookie contains TAB, dropping");
-          return CERR_TAB;
+          /* Reject cookies with a TAB inside the value */
+          if(memchr(val.str, '\t', val.len)) {
+            infof(data, "cookie contains TAB, dropping");
+            return CERR_TAB;
+          }
         }
       }
       else {
-        valuep = NULL;
-        vlen = 0;
+        val.str = NULL;
+        val.len = 0;
       }
 
       /*
@@ -566,10 +523,10 @@ parse_cookie_header(struct Curl_easy *data,
        * combination of name + contents. Chrome and Firefox support 4095 or
        * 4096 bytes combo
        */
-      if(nlen >= (MAX_NAME-1) || vlen >= (MAX_NAME-1) ||
-         ((nlen + vlen) > MAX_NAME)) {
+      if(name.len >= (MAX_NAME-1) || val.len >= (MAX_NAME-1) ||
+         ((name.len + val.len) > MAX_NAME)) {
         infof(data, "oversized cookie dropped, name/val %zu + %zu bytes",
-              nlen, vlen);
+              name.len, val.len);
         return CERR_TOO_BIG;
       }
 
@@ -579,12 +536,10 @@ parse_cookie_header(struct Curl_easy *data,
        * "the rest". Prefixes must start with '__' and end with a '-', so
        * only test for names where that can possibly be true.
        */
-      if(nlen >= 7 && namep[0] == '_' && namep[1] == '_') {
-        if(strncasecompare("__Secure-", namep, 9))
-          co->prefix_secure = TRUE;
-        else if(strncasecompare("__Host-", namep, 7))
-          co->prefix_host = TRUE;
-      }
+      if(strncasecompare("__Secure-", name.str, 9))
+        co->prefix_secure = TRUE;
+      else if(strncasecompare("__Host-", name.str, 7))
+        co->prefix_host = TRUE;
 
       /*
        * Use strstore() below to properly deal with received cookie
@@ -598,8 +553,8 @@ parse_cookie_header(struct Curl_easy *data,
           /* Bad name/value pair. */
           return CERR_NO_SEP;
 
-        strstore(&co->name, namep, nlen);
-        strstore(&co->value, valuep, vlen);
+        strstore(&co->name, name.str, name.len);
+        strstore(&co->value, val.str, val.len);
         done = TRUE;
         if(!co->name || !co->value)
           return CERR_NO_NAME_VALUE;
@@ -609,7 +564,7 @@ parse_cookie_header(struct Curl_easy *data,
           return CERR_INVALID_OCTET;
         }
       }
-      else if(!vlen) {
+      else if(!val.len) {
         /*
          * this was a "<name>=" with no content, and we must allow
          * 'secure' and 'httponly' specified this weirdly
@@ -620,7 +575,7 @@ parse_cookie_header(struct Curl_easy *data,
          * using a secure protocol, or when the cookie is being set by
          * reading from file
          */
-        if((nlen == 6) && strncasecompare("secure", namep, 6)) {
+        if(Curl_str_casecompare(&name, "secure")) {
           if(secure || !ci->running) {
             co->secure = TRUE;
           }
@@ -628,7 +583,7 @@ parse_cookie_header(struct Curl_easy *data,
             return CERR_BAD_SECURE;
           }
         }
-        else if((nlen == 8) && strncasecompare("httponly", namep, 8))
+        else if(Curl_str_casecompare(&name, "httponly"))
           co->httponly = TRUE;
         else if(sep)
           /* there was a '=' so we are not done parsing this field */
@@ -636,8 +591,8 @@ parse_cookie_header(struct Curl_easy *data,
       }
       if(done)
         ;
-      else if((nlen == 4) && strncasecompare("path", namep, 4)) {
-        strstore(&co->path, valuep, vlen);
+      else if(Curl_str_casecompare(&name, "path")) {
+        strstore(&co->path, val.str, val.len);
         if(!co->path)
           return CERR_OUT_OF_MEMORY;
         free(co->spath); /* if this is set again */
@@ -645,8 +600,7 @@ parse_cookie_header(struct Curl_easy *data,
         if(!co->spath)
           return CERR_OUT_OF_MEMORY;
       }
-      else if((nlen == 6) &&
-              strncasecompare("domain", namep, 6) && vlen) {
+      else if(Curl_str_casecompare(&name, "domain") && val.len) {
         bool is_ip;
 
         /*
@@ -654,10 +608,8 @@ parse_cookie_header(struct Curl_easy *data,
          * the given domain is not valid and thus cannot be set.
          */
 
-        if('.' == valuep[0]) {
-          valuep++; /* ignore preceding dot */
-          vlen--;
-        }
+        if('.' == val.str[0])
+          Curl_str_nudge(&val, 1);
 
 #ifndef USE_LIBPSL
         /*
@@ -665,17 +617,17 @@ parse_cookie_header(struct Curl_easy *data,
          * TLD or otherwise "protected" suffix. To reduce risk, we require a
          * dot OR the exact hostname being "localhost".
          */
-        if(bad_domain(valuep, vlen))
+        if(bad_domain(val.str, val.len))
           domain = ":";
 #endif
 
-        is_ip = Curl_host_is_ipnum(domain ? domain : valuep);
+        is_ip = Curl_host_is_ipnum(domain ? domain : val.str);
 
         if(!domain
-           || (is_ip && !strncmp(valuep, domain, vlen) &&
-               (vlen == strlen(domain)))
-           || (!is_ip && cookie_tailmatch(valuep, vlen, domain))) {
-          strstore(&co->domain, valuep, vlen);
+           || (is_ip && !strncmp(val.str, domain, val.len) &&
+               (val.len == strlen(domain)))
+           || (!is_ip && cookie_tailmatch(val.str, val.len, domain))) {
+          strstore(&co->domain, val.str, val.len);
           if(!co->domain)
             return CERR_OUT_OF_MEMORY;
 
@@ -689,14 +641,14 @@ parse_cookie_header(struct Curl_easy *data,
            * not a domain to which the current host belongs. Mark as bad.
            */
           infof(data, "skipped cookie with bad tailmatch domain: %s",
-                valuep);
+                val.str);
           return CERR_NO_TAILMATCH;
         }
       }
-      else if((nlen == 7) && strncasecompare("version", namep, 7)) {
+      else if(Curl_str_casecompare(&name, "version")) {
         /* just ignore */
       }
-      else if((nlen == 7) && strncasecompare("max-age", namep, 7)) {
+      else if(Curl_str_casecompare(&name, "max-age") && val.len) {
         /*
          * Defined in RFC2109:
          *
@@ -707,7 +659,7 @@ parse_cookie_header(struct Curl_easy *data,
          * cookie should be discarded immediately.
          */
         int rc;
-        const char *maxage = valuep;
+        const char *maxage = val.str;
         if(*maxage == '\"')
           maxage++;
         rc = Curl_str_number(&maxage, &co->expires, CURL_OFF_T_MAX);
@@ -734,8 +686,8 @@ parse_cookie_header(struct Curl_easy *data,
         }
         cap_expires(now, co);
       }
-      else if((nlen == 7) && strncasecompare("expires", namep, 7)) {
-        if(!co->expires && (vlen < MAX_DATE_LENGTH)) {
+      else if(Curl_str_casecompare(&name, "expires") && val.len) {
+        if(!co->expires && (val.len < MAX_DATE_LENGTH)) {
           /*
            * Let max-age have priority.
            *
@@ -743,8 +695,8 @@ parse_cookie_header(struct Curl_easy *data,
            * will be treated as a session cookie
            */
           char dbuf[MAX_DATE_LENGTH + 1];
-          memcpy(dbuf, valuep, vlen);
-          dbuf[vlen] = 0;
+          memcpy(dbuf, val.str, val.len);
+          dbuf[val.len] = 0;
           co->expires = Curl_getdate_capped(dbuf);
 
           /*
@@ -764,15 +716,8 @@ parse_cookie_header(struct Curl_easy *data,
        * Else, this is the second (or more) name we do not know about!
        */
     }
-    else {
-      /* this is an "illegal" <what>=<this> pair */
-    }
 
-    while(ISBLANK(*ptr))
-      ptr++;
-    if(*ptr == ';')
-      ptr++;
-    else
+    if(Curl_str_single(&ptr, ';'))
       break;
   } while(1);
 
@@ -785,23 +730,11 @@ parse_cookie_header(struct Curl_easy *data,
 
   if(!co->path && path) {
     /*
-     * No path was given in the header line, set the default. Note that the
-     * passed-in path to this function MAY have a '?' and following part that
-     * MUST NOT be stored as part of the path.
+     * No path was given in the header line, set the default.
      */
-    char *queryp = strchr(path, '?');
-
-    /*
-     * queryp is where the interesting part of the path ends, so now we
-     * want to the find the last
-     */
-    char *endslash;
-    if(!queryp)
-      endslash = strrchr(path, '/');
-    else
-      endslash = memrchr(path, '/', (queryp - path));
+    const char *endslash = strrchr(path, '/');
     if(endslash) {
-      size_t pathlen = (endslash-path + 1); /* include end slash */
+      size_t pathlen = (endslash - path + 1); /* include end slash */
       co->path = Curl_memdup0(path, pathlen);
       if(co->path) {
         co->spath = sanitize_cookie_path(co->path);
diff --git a/lib/strparse.c b/lib/strparse.c
index 30a20f7367..5bfbdcbe0e 100644
--- a/lib/strparse.c
+++ b/lib/strparse.c
@@ -210,3 +210,34 @@ int Curl_str_nudge(struct Curl_str *str, size_t num)
   }
   return STRE_OVERFLOW;
 }
+
+/* Get the following character sequence that consists only of bytes not
+   present in the 'reject' string. Like strcspn(). */
+int Curl_str_cspn(const char **linep, struct Curl_str *out, const char *reject)
+{
+  const char *s = *linep;
+  size_t len;
+  DEBUGASSERT(linep && *linep);
+
+  len = strcspn(s, reject);
+  if(len) {
+    out->str = s;
+    out->len = len;
+    *linep = &s[len];
+    return STRE_OK;
+  }
+  out->str = NULL;
+  out->len = 0;
+  return STRE_SHORT;
+}
+
+/* remove ISBLANK()s from both ends of the string */
+void Curl_str_trimblanks(struct Curl_str *out)
+{
+  while(out->len && ISBLANK(*out->str))
+    Curl_str_nudge(out, 1);
+
+  /* trim trailing spaces and tabs */
+  while(out->len && ISBLANK(out->str[out->len - 1]))
+    out->len--;
+}
diff --git a/lib/strparse.h b/lib/strparse.h
index 946cf5b2ba..41aa3fed10 100644
--- a/lib/strparse.h
+++ b/lib/strparse.h
@@ -81,4 +81,7 @@ int Curl_str_casecompare(struct Curl_str *str, const char *check);
 
 int Curl_str_nudge(struct Curl_str *str, size_t num);
 
+int Curl_str_cspn(const char **linep, struct Curl_str *out, const char *cspn);
+void Curl_str_trimblanks(struct Curl_str *out);
+
 #endif /* HEADER_CURL_STRPARSE_H */