strparse: string parsing helper functions

author Daniel Stenberg <daniel@haxx.se>

Fri, 6 Dec 2024 13:31:38 +0000 (14:31 +0100)

committer Daniel Stenberg <daniel@haxx.se>

Thu, 12 Dec 2024 15:00:52 +0000 (16:00 +0100)
author Daniel Stenberg <daniel@haxx.se>
Fri, 6 Dec 2024 13:31:38 +0000 (14:31 +0100)
committer Daniel Stenberg <daniel@haxx.se>
Thu, 12 Dec 2024 15:00:52 +0000 (16:00 +0100)
diff --git a/docs/Makefile.am b/docs/Makefile.am

index d944111b1ad59424bbc345b04a793d25c53ceb35..8320184970b8eaaf6bbca46d16ff1c02de7b499f 100644 (file)
--- a/docs/Makefile.am
+++ b/docs/Makefile.am
@@ -56,6 +56,7 @@ INTERNALDOCS =                                  \
   internals/NEW-PROTOCOL.md                      \
   internals/README.md                            \
   internals/SPLAY.md                             \
+ internals/STRPARSE.md                          \
   internals/WEBSOCKET.md
  
  EXTRA_DIST =                                    \
diff --git a/docs/internals/STRPARSE.md b/docs/internals/STRPARSE.md

new file mode 100644 (file)

index 0000000..a50448c
--- /dev/null
+++ b/docs/internals/STRPARSE.md
@@ -0,0 +1,109 @@
+<!--
+Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
+
+SPDX-License-Identifier: curl
+-->
+
+# String parsing with `strparse`
+
+The functions take input via a pointer to a pointer, which allows the
+functions to advance the pointer on success which then by extension allows
+"chaining" of functions like this example that gets a word, a space and then a
+second word:
+
+~~~c
+if(Curl_str_word(&line, &word1, MAX) ||
+   Curl_str_singlespace(&line) ||
+   Curl_str_word(&line, &word2, MAX))
+  fprintf(stderr, "ERROR\n");
+~~~
+
+## Strings
+
+The functions that return string information does so by populating a
+`struct Curl_str`:
+
+~~~c
+struct Curl_str {
+  char *str;
+  size_t len;
+};
+~~~
+
+## `Curl_str_word`
+
+~~~c
+int Curl_str_word(char **linep, struct Curl_str *out, const size_t max);
+~~~
+
+Get a sequence of bytes until the first space or the end of the string. Return
+non-zero on error. There is no way to include a space in the word, no sort of
+escaping. The word must be at least one byte, otherwise it is considered an
+error.
+
+`max` is the longest accepted word, or it returns error.
+
+On a successful return, `linep` is updated to point to the byte immediately
+following the parsed word.
+
+## `Curl_str_until`
+
+~~~c
+int Curl_str_until(char **linep, struct Curl_str *out, const size_t max,
+                   char delim);
+~~~
+
+Like `Curl_str_word` but instead of parsing to space, it parses to a given
+custom delimiter non-zero byte `delim`.
+
+`max` is the longest accepted word, or it returns error.
+
+The parsed word must be at least one byte, otherwise it is considered an
+error.
+
+## `Curl_str_quotedword`
+
+~~~c
+int Curl_str_quotedword(char **linep, struct Curl_str *out, const size_t max);
+~~~
+
+Get a "quoted" word. This means everything that is provided within a leading
+and an ending double character. No escaping possible.
+
+`max` is the longest accepted word, or it returns error.
+
+The parsed word must be at least one byte, otherwise it is considered an
+error.
+
+## `Curl_str_single`
+
+~~~c
+int Curl_str_single(char **linep, char byte);
+~~~
+
+Advance over a single character provided in `byte`. Return non-zero on error.
+
+## `Curl_str_singlespace`
+
+~~~c
+int Curl_str_singlespace(char **linep);
+~~~
+
+Advance over a single ASCII space. Return non-zero on error.
+
+## `Curl_str_number`
+
+~~~c
+int Curl_str_number(char **linep, size_t *nump, size_t max);
+~~~
+
+Get an unsigned decimal number. Leading zeroes are just swallowed. Return
+non-zero on error.
+
+## `Curl_str_newline`
+
+~~~c
+int Curl_str_newline(char **linep);
+~~~
+
+Check for a single CR or LF. Return non-zero on error */
diff --git a/lib/Makefile.inc b/lib/Makefile.inc

index 1d3f69a23c3ce5029344b13fe0d98654449771f5..8529246160226fdca79d30763a018d0e93f545a1 100644 (file)
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@@ -223,6 +223,7 @@ LIB_CFILES =         \
    strcase.c          \
    strdup.c           \
    strerror.c         \
+  strparse.c         \
    strtok.c           \
    strtoofft.c        \
    system_win32.c     \
@@ -358,6 +359,7 @@ LIB_HFILES =         \
    strcase.h          \
    strdup.h           \
    strerror.h         \
+  strparse.h         \
    strtok.h           \
    strtoofft.h        \
    system_win32.h     \
diff --git a/lib/altsvc.c b/lib/altsvc.c

index ea37b0afc1a6f631694f9d2a4afe73f49c795fa3..a3ab368c50146e40ac4a3d32dddbee8f4f7a85f0 100644 (file)
--- a/lib/altsvc.c
+++ b/lib/altsvc.c
@@ -40,6 +40,7 @@
  #include "rename.h"
  #include "strdup.h"
  #include "inet_pton.h"
+#include "strparse.h"
  
  /* The last 3 #include files should be in this order */
  #include "curl_printf.h"
@@ -47,25 +48,26 @@
  #include "memdebug.h"
  
  #define MAX_ALTSVC_LINE 4095
-#define MAX_ALTSVC_DATELENSTR "64"
-#define MAX_ALTSVC_DATELEN 64
-#define MAX_ALTSVC_HOSTLENSTR "512"
-#define MAX_ALTSVC_HOSTLEN 512
-#define MAX_ALTSVC_ALPNLENSTR "10"
+#define MAX_ALTSVC_DATELEN 256
+#define MAX_ALTSVC_HOSTLEN 2048
  #define MAX_ALTSVC_ALPNLEN 10
  
  #define H3VERSION "h3"
  
-static enum alpnid alpn2alpnid(char *name)
+static enum alpnid alpn2alpnid(char *name, size_t len)
  {
-  if(strcasecompare(name, "h1"))
-    return ALPN_h1;
-  if(strcasecompare(name, "h2"))
-    return ALPN_h2;
-  if(strcasecompare(name, H3VERSION))
-    return ALPN_h3;
-  if(strcasecompare(name, "http/1.1"))
-    return ALPN_h1;
+  if(len == 2) {
+    if(strncasecompare(name, "h1", 2))
+      return ALPN_h1;
+    if(strncasecompare(name, "h2", 2))
+      return ALPN_h2;
+    if(strncasecompare(name, "h3", 2))
+      return ALPN_h3;
+  }
+  else if(len == 8) {
+    if(strncasecompare(name, "http/1.1", 8))
+      return ALPN_h1;
+  }
    return ALPN_none; /* unknown, probably rubbish input */
  }
  
@@ -93,18 +95,17 @@ static void altsvc_free(struct altsvc *as)
  }
  
  static struct altsvc *altsvc_createid(const char *srchost,
+                                      size_t hlen,
                                        const char *dsthost,
                                        size_t dlen, /* dsthost length */
                                        enum alpnid srcalpnid,
                                        enum alpnid dstalpnid,
-                                      unsigned int srcport,
-                                      unsigned int dstport)
+                                      size_t srcport,
+                                      size_t dstport)
  {
    struct altsvc *as = calloc(1, sizeof(struct altsvc));
-  size_t hlen;
    if(!as)
      return NULL;
-  hlen = strlen(srchost);
    DEBUGASSERT(hlen);
    DEBUGASSERT(dlen);
    if(!hlen || !dlen) {
@@ -136,8 +137,8 @@ static struct altsvc *altsvc_createid(const char *srchost,
  
    as->src.alpnid = srcalpnid;
    as->dst.alpnid = dstalpnid;
-  as->src.port = curlx_ultous(srcport);
-  as->dst.port = curlx_ultous(dstport);
+  as->src.port = (unsigned short)srcport;
+  as->dst.port = (unsigned short)dstport;
  
    return as;
  error:
@@ -145,18 +146,19 @@ error:
    return NULL;
  }
  
-static struct altsvc *altsvc_create(char *srchost,
-                                    char *dsthost,
-                                    char *srcalpn,
-                                    char *dstalpn,
-                                    unsigned int srcport,
-                                    unsigned int dstport)
+static struct altsvc *altsvc_create(struct Curl_str *srchost,
+                                    struct Curl_str *dsthost,
+                                    struct Curl_str *srcalpn,
+                                    struct Curl_str *dstalpn,
+                                    size_t srcport,
+                                    size_t dstport)
  {
-  enum alpnid dstalpnid = alpn2alpnid(dstalpn);
-  enum alpnid srcalpnid = alpn2alpnid(srcalpn);
+  enum alpnid dstalpnid = alpn2alpnid(dstalpn->str, dstalpn->len);
+  enum alpnid srcalpnid = alpn2alpnid(srcalpn->str, srcalpn->len);
    if(!srcalpnid || !dstalpnid)
      return NULL;
-  return altsvc_createid(srchost, dsthost, strlen(dsthost),
+  return altsvc_createid(srchost->str, srchost->len,
+                         dsthost->str, dsthost->len,
                           srcalpnid, dstalpnid,
                           srcport, dstport);
  }
@@ -167,31 +169,50 @@ static CURLcode altsvc_add(struct altsvcinfo *asi, char *line)
    /* Example line:
       h2 example.com 443 h3 shiny.example.com 8443 "20191231 10:00:00" 1
     */
-  char srchost[MAX_ALTSVC_HOSTLEN + 1];
-  char dsthost[MAX_ALTSVC_HOSTLEN + 1];
-  char srcalpn[MAX_ALTSVC_ALPNLEN + 1];
-  char dstalpn[MAX_ALTSVC_ALPNLEN + 1];
-  char date[MAX_ALTSVC_DATELEN + 1];
-  unsigned int srcport;
-  unsigned int dstport;
-  unsigned int prio;
-  unsigned int persist;
-  int rc;
-
-  rc = sscanf(line,
-              "%" MAX_ALTSVC_ALPNLENSTR "s %" MAX_ALTSVC_HOSTLENSTR "s %u "
-              "%" MAX_ALTSVC_ALPNLENSTR "s %" MAX_ALTSVC_HOSTLENSTR "s %u "
-              "\"%" MAX_ALTSVC_DATELENSTR "[^\"]\" %u %u",
-              srcalpn, srchost, &srcport,
-              dstalpn, dsthost, &dstport,
-              date, &persist, &prio);
-  if(9 == rc) {
+  struct Curl_str srchost;
+  struct Curl_str dsthost;
+  struct Curl_str srcalpn;
+  struct Curl_str dstalpn;
+  struct Curl_str date;
+  size_t srcport;
+  size_t dstport;
+  size_t persist;
+  size_t prio;
+
+  if(Curl_str_word(&line, &srcalpn, MAX_ALTSVC_ALPNLEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_word(&line, &srchost, MAX_ALTSVC_HOSTLEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_number(&line, &srcport, 65535) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_word(&line, &dstalpn, MAX_ALTSVC_ALPNLEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_word(&line, &dsthost, MAX_ALTSVC_HOSTLEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_number(&line, &dstport, 65535) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_quotedword(&line, &date, MAX_ALTSVC_DATELEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_number(&line, &persist, 1) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_number(&line, &prio, 0) ||
+     Curl_str_newline(&line))
+    ;
+  else {
      struct altsvc *as;
-    time_t expires = Curl_getdate_capped(date);
-    as = altsvc_create(srchost, dsthost, srcalpn, dstalpn, srcport, dstport);
+    char dbuf[MAX_ALTSVC_DATELEN + 1];
+    time_t expires;
+
+    /* The date parser works on a null terminated string. The maximum length
+       is upheld by Curl_str_quotedword(). */
+    memcpy(dbuf, date.str, date.len);
+    dbuf[date.len] = 0;
+    expires = Curl_getdate_capped(dbuf);
+    as = altsvc_create(&srchost, &dsthost, &srcalpn, &dstalpn, srcport,
+                       dstport);
      if(as) {
        as->expires = expires;
-      as->prio = prio;
+      as->prio = 0; /* not supported to just set zero */
        as->persist = persist ? 1 : 0;
        Curl_llist_append(&asi->list, as, &as->node);
      }
@@ -471,8 +492,6 @@ static time_t altsvc_debugtime(void *unused)
  #define time(x) altsvc_debugtime(x)
  #endif
  
-#define ISNEWLINE(x) (((x) == '\n') || (x) == '\r')
-
  /*
   * Curl_altsvc_parse() takes an incoming alt-svc response header and stores
   * the data correctly in the cache.
@@ -495,6 +514,8 @@ CURLcode Curl_altsvc_parse(struct Curl_easy *data,
    unsigned short dstport = srcport; /* the same by default */
    CURLcode result = getalnum(&p, alpnbuf, sizeof(alpnbuf));
    size_t entries = 0;
+  size_t alpnlen = strlen(alpnbuf);
+  size_t srchostlen = strlen(srchost);
  #ifdef CURL_DISABLE_VERBOSE_STRINGS
    (void)data;
  #endif
@@ -515,7 +536,7 @@ CURLcode Curl_altsvc_parse(struct Curl_easy *data,
    do {
      if(*p == '=') {
        /* [protocol]="[host][:port]" */
-      enum alpnid dstalpnid = alpn2alpnid(alpnbuf); /* the same by default */
+      enum alpnid dstalpnid = alpn2alpnid(alpnbuf, alpnlen);
        p++;
        if(*p == '\"') {
          const char *dsthost = "";
@@ -633,7 +654,8 @@ CURLcode Curl_altsvc_parse(struct Curl_easy *data,
                 this is the first entry of the line. */
              altsvc_flush(asi, srcalpnid, srchost, srcport);
  
-          as = altsvc_createid(srchost, dsthost, dstlen,
+          as = altsvc_createid(srchost, srchostlen,
+                               dsthost, dstlen,
                                 srcalpnid, dstalpnid,
                                 srcport, dstport);
            if(as) {
diff --git a/lib/curl_ctype.h b/lib/curl_ctype.h

index 7f0d0cc2916d08e63a9373708fdacc1c4ed7f490..b70acf3c5a66174381ec361f06527ad29e5afd74 100644 (file)
--- a/lib/curl_ctype.h
+++ b/lib/curl_ctype.h
@@ -46,6 +46,6 @@
  #define ISURLPUNTCS(x) (((x) == '-') || ((x) == '.') || ((x) == '_') || \
                          ((x) == '~'))
  #define ISUNRESERVED(x) (ISALNUM(x) || ISURLPUNTCS(x))
-
+#define ISNEWLINE(x) (((x) == '\n') || (x) == '\r')
  
  #endif /* HEADER_CURL_CTYPE_H */
diff --git a/lib/hsts.c b/lib/hsts.c

index 5b0137263b08af77df2521381cfb9ba62c9aa20e..99452b61a6ab0ac989c6e8b82dbc93733d02d25e 100644 (file)
--- a/lib/hsts.c
+++ b/lib/hsts.c
@@ -41,6 +41,7 @@
  #include "rename.h"
  #include "share.h"
  #include "strdup.h"
+#include "strparse.h"
  
  /* The last 3 #include files should be in this order */
  #include "curl_printf.h"
@@ -48,10 +49,8 @@
  #include "memdebug.h"
  
  #define MAX_HSTS_LINE 4095
-#define MAX_HSTS_HOSTLEN 256
-#define MAX_HSTS_HOSTLENSTR "256"
-#define MAX_HSTS_DATELEN 64
-#define MAX_HSTS_DATELENSTR "64"
+#define MAX_HSTS_HOSTLEN 2048
+#define MAX_HSTS_DATELEN 256
  #define UNLIMITED "unlimited"
  
  #if defined(DEBUGBUILD) || defined(UNITTESTS)
@@ -109,14 +108,13 @@ void Curl_hsts_cleanup(struct hsts **hp)
  
  static CURLcode hsts_create(struct hsts *h,
                              const char *hostname,
+                            size_t hlen,
                              bool subdomains,
                              curl_off_t expires)
  {
-  size_t hlen;
    DEBUGASSERT(h);
    DEBUGASSERT(hostname);
  
-  hlen = strlen(hostname);
    if(hlen && (hostname[hlen - 1] == '.'))
      /* strip off any trailing dot */
      --hlen;
@@ -150,6 +148,7 @@ CURLcode Curl_hsts_parse(struct hsts *h, const char *hostname,
    bool subdomains = FALSE;
    struct stsentry *sts;
    time_t now = time(NULL);
+  size_t hlen = strlen(hostname);
  
    if(Curl_host_is_ipnum(hostname))
      /* "explicit IP address identification of all forms is excluded."
@@ -218,7 +217,7 @@ CURLcode Curl_hsts_parse(struct hsts *h, const char *hostname,
  
    if(!expires) {
      /* remove the entry if present verbatim (without subdomain match) */
-    sts = Curl_hsts(h, hostname, FALSE);
+    sts = Curl_hsts(h, hostname, hlen, FALSE);
      if(sts) {
        Curl_node_remove(&sts->node);
        hsts_free(sts);
@@ -233,14 +232,14 @@ CURLcode Curl_hsts_parse(struct hsts *h, const char *hostname,
      expires += now;
  
    /* check if it already exists */
-  sts = Curl_hsts(h, hostname, FALSE);
+  sts = Curl_hsts(h, hostname, hlen, FALSE);
    if(sts) {
      /* just update these fields */
      sts->expires = expires;
      sts->includeSubDomains = subdomains;
    }
    else
-    return hsts_create(h, hostname, subdomains, expires);
+    return hsts_create(h, hostname, hlen, subdomains, expires);
  
    return CURLE_OK;
  }
@@ -252,12 +251,11 @@ CURLcode Curl_hsts_parse(struct hsts *h, const char *hostname,
   * attempted.
   */
  struct stsentry *Curl_hsts(struct hsts *h, const char *hostname,
-                           bool subdomain)
+                           size_t hlen, bool subdomain)
  {
    struct stsentry *bestsub = NULL;
    if(h) {
      time_t now = time(NULL);
-    size_t hlen = strlen(hostname);
      struct Curl_llist_node *e;
      struct Curl_llist_node *n;
      size_t blen = 0;
@@ -424,29 +422,40 @@ static CURLcode hsts_add(struct hsts *h, char *line)
       example.com "20191231 10:00:00"
       .example.net "20191231 10:00:00"
     */
-  char host[MAX_HSTS_HOSTLEN + 1];
-  char date[MAX_HSTS_DATELEN + 1];
-  int rc;
-
-  rc = sscanf(line,
-              "%" MAX_HSTS_HOSTLENSTR "s \"%" MAX_HSTS_DATELENSTR "[^\"]\"",
-              host, date);
-  if(2 == rc) {
-    time_t expires = strcmp(date, UNLIMITED) ? Curl_getdate_capped(date) :
-      TIME_T_MAX;
+  struct Curl_str host;
+  struct Curl_str date;
+
+  if(Curl_str_word(&line, &host, MAX_HSTS_HOSTLEN) ||
+     Curl_str_singlespace(&line) ||
+     Curl_str_quotedword(&line, &date, MAX_HSTS_DATELEN) ||
+     Curl_str_newline(&line))
+    ;
+  else {
      CURLcode result = CURLE_OK;
-    char *p = host;
      bool subdomain = FALSE;
      struct stsentry *e;
-    if(p[0] == '.') {
-      p++;
+    char dbuf[MAX_HSTS_DATELEN + 1];
+    time_t expires;
+
+    /* The date parser works on a null terminated string. The maximum length
+       is upheld by Curl_str_quotedword(). */
+    memcpy(dbuf, date.str, date.len);
+    dbuf[date.len] = 0;
+
+    expires = strcmp(dbuf, UNLIMITED) ? Curl_getdate_capped(dbuf) :
+      TIME_T_MAX;
+
+    if(host.str[0] == '.') {
+      host.str++;
+      host.len--;
        subdomain = TRUE;
      }
      /* only add it if not already present */
-    e = Curl_hsts(h, p, subdomain);
+    e = Curl_hsts(h, host.str, host.len, subdomain);
      if(!e)
-      result = hsts_create(h, p, subdomain, expires);
-    else if(strcasecompare(p, e->host)) {
+      result = hsts_create(h, host.str, host.len, subdomain, expires);
+    else if((strlen(e->host) == host.len) &&
+            strncasecompare(host.str, e->host, host.len)) {
        /* the same hostname, use the largest expire time */
        if(expires > e->expires)
          e->expires = expires;
@@ -488,7 +497,7 @@ static CURLcode hsts_pull(struct Curl_easy *data, struct hsts *h)
            expires = Curl_getdate_capped(e.expire);
          else
            expires = TIME_T_MAX; /* the end of time */
-        result = hsts_create(h, e.name,
+        result = hsts_create(h, e.name, strlen(e.name),
                               /* bitfield to bool conversion: */
                               e.includeSubDomains ? TRUE : FALSE,
                               expires);
diff --git a/lib/hsts.h b/lib/hsts.h

index 1c544f97bd8a081a974a53e934730af9c5a781e7..e8d0f9d552357b6daffb9b1e0edd5474dac03a03 100644 (file)
--- a/lib/hsts.h
+++ b/lib/hsts.h
@@ -52,7 +52,7 @@ void Curl_hsts_cleanup(struct hsts **hp);
  CURLcode Curl_hsts_parse(struct hsts *h, const char *hostname,
                           const char *sts);
  struct stsentry *Curl_hsts(struct hsts *h, const char *hostname,
-                           bool subdomain);
+                           size_t hlen, bool subdomain);
  CURLcode Curl_hsts_save(struct Curl_easy *data, struct hsts *h,
                          const char *file);
  CURLcode Curl_hsts_loadfile(struct Curl_easy *data,
diff --git a/lib/http_aws_sigv4.c b/lib/http_aws_sigv4.c

index 5d4848fed2b4dc83d32726cfe4e182f18e5eeabc..1e67a3fbe9c8ad137ceb6a8f6aab12e480e1a87e 100644 (file)
--- a/lib/http_aws_sigv4.c
+++ b/lib/http_aws_sigv4.c
@@ -35,6 +35,7 @@
  #include "parsedate.h"
  #include "sendf.h"
  #include "escape.h"
+#include "strparse.h"
  
  #include <time.h>
  
@@ -118,8 +119,6 @@ static void trim_headers(struct curl_slist *head)
  
  /* maximum length for the aws sivg4 parts */
  #define MAX_SIGV4_LEN 64
-#define MAX_SIGV4_LEN_TXT "64"
-
  #define DATE_HDR_KEY_LEN (MAX_SIGV4_LEN + sizeof("X--Date"))
  
  /* string been x-PROVIDER-date:TIMESTAMP, I need +1 for ':' */
@@ -160,7 +159,8 @@ static int compare_header_names(const char *a, const char *b)
  static CURLcode make_headers(struct Curl_easy *data,
                               const char *hostname,
                               char *timestamp,
-                             char *provider1,
+                             const char *provider1,
+                             size_t plen, /* length of provider1 */
                               char **date_header,
                               char *content_sha256_header,
                               struct dynbuf *canonical_headers,
@@ -174,16 +174,16 @@ static CURLcode make_headers(struct Curl_easy *data,
    struct curl_slist *l;
    bool again = TRUE;
  
-  /* provider1 mid */
-  Curl_strntolower(provider1, provider1, strlen(provider1));
-  provider1[0] = Curl_raw_toupper(provider1[0]);
-
-  msnprintf(date_hdr_key, DATE_HDR_KEY_LEN, "X-%s-Date", provider1);
+  msnprintf(date_hdr_key, DATE_HDR_KEY_LEN, "X-%.*s-Date",
+            (int)plen, provider1);
+  /* provider1 ucfirst */
+  Curl_strntolower(&date_hdr_key[2], provider1, plen);
+  date_hdr_key[2] = Curl_raw_toupper(provider1[0]);
  
-  /* provider1 lowercase */
-  Curl_strntolower(provider1, provider1, 1); /* first byte only */
    msnprintf(date_full_hdr, DATE_FULL_HDR_LEN,
-            "x-%s-date:%s", provider1, timestamp);
+            "x-%.*s-date:%s", (int)plen, provider1, timestamp);
+  /* provider1 lowercase */
+  Curl_strntolower(&date_full_hdr[2], provider1, plen);
  
    if(!Curl_checkheaders(data, STRCONST("Host"))) {
      char *fullhost;
@@ -336,6 +336,7 @@ fail:
  /* try to parse a payload hash from the content-sha256 header */
  static char *parse_content_sha_hdr(struct Curl_easy *data,
                                     const char *provider1,
+                                   size_t plen,
                                     size_t *value_len)
  {
    char key[CONTENT_SHA256_KEY_LEN];
@@ -343,7 +344,8 @@ static char *parse_content_sha_hdr(struct Curl_easy *data,
    char *value;
    size_t len;
  
-  key_len = msnprintf(key, sizeof(key), "x-%s-content-sha256", provider1);
+  key_len = msnprintf(key, sizeof(key), "x-%.*s-content-sha256",
+                      (int)plen, provider1);
  
    value = Curl_checkheaders(data, key, key_len);
    if(!value)
@@ -389,6 +391,7 @@ static CURLcode calc_payload_hash(struct Curl_easy *data,
  
  static CURLcode calc_s3_payload_hash(struct Curl_easy *data,
                                       Curl_HttpReq httpreq, char *provider1,
+                                     size_t plen,
                                       unsigned char *sha_hash,
                                       char *sha_hex, char *header)
  {
@@ -415,7 +418,7 @@ static CURLcode calc_s3_payload_hash(struct Curl_easy *data,
  
    /* format the required content-sha256 header */
    msnprintf(header, CONTENT_SHA256_HDR_LEN,
-            "x-%s-content-sha256: %s", provider1, sha_hex);
+            "x-%.*s-content-sha256: %s", (int)plen, provider1, sha_hex);
  
    ret = CURLE_OK;
  fail:
@@ -571,12 +574,11 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
    CURLcode result = CURLE_OUT_OF_MEMORY;
    struct connectdata *conn = data->conn;
    size_t len;
-  const char *arg;
-  char provider0[MAX_SIGV4_LEN + 1]="";
-  char provider1[MAX_SIGV4_LEN + 1]="";
-  char region[MAX_SIGV4_LEN + 1]="";
-  char service[MAX_SIGV4_LEN + 1]="";
-  bool sign_as_s3 = FALSE;
+  char *line;
+  struct Curl_str provider0;
+  struct Curl_str provider1;
+  struct Curl_str region = { NULL, 0};
+  struct Curl_str service = { NULL, 0};
    const char *hostname = conn->host.name;
    time_t clock;
    struct tm tm;
@@ -625,27 +627,31 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
     * AWS is the default because most of non-amazon providers
     * are still using aws:amz as a prefix.
     */
-  arg = data->set.str[STRING_AWS_SIGV4] ?
-    data->set.str[STRING_AWS_SIGV4] : "aws:amz";
+  line = data->set.str[STRING_AWS_SIGV4] ?
+    data->set.str[STRING_AWS_SIGV4] : (char *)"aws:amz";
  
-  /* provider1[:provider2[:region[:service]]]
+  /* provider0[:provider1[:region[:service]]]
  
       No string can be longer than N bytes of non-whitespace
    */
-  (void)sscanf(arg, "%" MAX_SIGV4_LEN_TXT "[^:]"
-               ":%" MAX_SIGV4_LEN_TXT "[^:]"
-               ":%" MAX_SIGV4_LEN_TXT "[^:]"
-               ":%" MAX_SIGV4_LEN_TXT "s",
-               provider0, provider1, region, service);
-  if(!provider0[0]) {
+  if(Curl_str_until(&line, &provider0, MAX_SIGV4_LEN, ':')) {
      failf(data, "first aws-sigv4 provider cannot be empty");
      result = CURLE_BAD_FUNCTION_ARGUMENT;
      goto fail;
    }
-  else if(!provider1[0])
-    strcpy(provider1, provider0);
+  if(Curl_str_single(&line, ':') ||
+     Curl_str_until(&line, &provider1, MAX_SIGV4_LEN, ':')) {
+    provider1.str = provider0.str;
+    provider1.len = provider0.len;
+  }
+  else if(Curl_str_single(&line, ':') ||
+          Curl_str_until(&line, &region, MAX_SIGV4_LEN, ':') ||
+          Curl_str_single(&line, ':') ||
+          Curl_str_until(&line, &service, MAX_SIGV4_LEN, ':')) {
+    /* nothing to do */
+  }
  
-  if(!service[0]) {
+  if(!service.len) {
      char *hostdot = strchr(hostname, '.');
      if(!hostdot) {
        failf(data, "aws-sigv4: service missing in parameters and hostname");
@@ -658,12 +664,13 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
        result = CURLE_URL_MALFORMAT;
        goto fail;
      }
-    memcpy(service, hostname, len);
-    service[len] = '\0';
+    service.str = (char *)hostname;
+    service.len = len;
  
-    infof(data, "aws_sigv4: picked service %s from host", service);
+    infof(data, "aws_sigv4: picked service %.*s from host",
+          (int)service.len, service.str);
  
-    if(!region[0]) {
+    if(!region.len) {
        const char *reg = hostdot + 1;
        const char *hostreg = strchr(reg, '.');
        if(!hostreg) {
@@ -677,25 +684,29 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
          result = CURLE_URL_MALFORMAT;
          goto fail;
        }
-      memcpy(region, reg, len);
-      region[len] = '\0';
-      infof(data, "aws_sigv4: picked region %s from host", region);
+      region.str = (char *)reg;
+      region.len = len;
+      infof(data, "aws_sigv4: picked region %.*s from host",
+            (int)region.len, region.str);
      }
    }
  
    Curl_http_method(data, conn, &method, &httpreq);
  
-  /* AWS S3 requires a x-amz-content-sha256 header, and supports special
-   * values like UNSIGNED-PAYLOAD */
-  sign_as_s3 = (strcasecompare(provider0, "aws") &&
-                strcasecompare(service, "s3"));
-
-  payload_hash = parse_content_sha_hdr(data, provider1, &payload_hash_len);
+  payload_hash = parse_content_sha_hdr(data, provider1.str, provider1.len,
+                                       &payload_hash_len);
  
    if(!payload_hash) {
+    /* AWS S3 requires a x-amz-content-sha256 header, and supports special
+     * values like UNSIGNED-PAYLOAD */
+    bool sign_as_s3 = ((provider0.len == 3) &&
+                       strncasecompare(provider0.str, "aws", 3)) &&
+      ((service.len == 2) && strncasecompare(service.str, "s3", 2));
+
      if(sign_as_s3)
-      result = calc_s3_payload_hash(data, httpreq, provider1, sha_hash,
-                                    sha_hex, content_sha256_hdr);
+      result = calc_s3_payload_hash(data, httpreq,
+                                    provider1.str, provider1.len,
+                                    sha_hash, sha_hex, content_sha256_hdr);
      else
        result = calc_payload_hash(data, sha_hash, sha_hex);
      if(result)
@@ -726,7 +737,8 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
      goto fail;
    }
  
-  result = make_headers(data, hostname, timestamp, provider1,
+  result = make_headers(data, hostname, timestamp,
+                        provider1.str, provider1.len,
                          &date_header, content_sha256_hdr,
                          &canonical_headers, &signed_headers);
    if(result)
@@ -771,14 +783,18 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
  
    DEBUGF(infof(data, "Canonical request: %s", canonical_request));
  
-  /* provider 0 lowercase */
-  Curl_strntolower(provider0, provider0, strlen(provider0));
-  request_type = aprintf("%s4_request", provider0);
+  request_type = aprintf("%.*s4_request", (int)provider0.len, provider0.str);
    if(!request_type)
      goto fail;
  
-  credential_scope = aprintf("%s/%s/%s/%s",
-                             date, region, service, request_type);
+  /* provider0 is lowercased *after* aprintf() so that the buffer can be
+     written to */
+  Curl_strntolower(request_type, request_type, provider0.len);
+
+  credential_scope = aprintf("%s/%.*s/%.*s/%s",
+                             date, (int)region.len, region.str,
+                             (int)service.len, service.str,
+                             request_type);
    if(!credential_scope)
      goto fail;
  
@@ -788,42 +804,41 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
  
    sha256_to_hex(sha_hex, sha_hash);
  
-  /* provider 0 uppercase */
-  Curl_strntoupper(provider0, provider0, strlen(provider0));
-
    /*
     * Google allows using RSA key instead of HMAC, so this code might change
     * in the future. For now we only support HMAC.
     */
-  str_to_sign = aprintf("%s4-HMAC-SHA256\n" /* Algorithm */
+  str_to_sign = aprintf("%.*s4-HMAC-SHA256\n" /* Algorithm */
                          "%s\n" /* RequestDateTime */
                          "%s\n" /* CredentialScope */
                          "%s",  /* HashedCanonicalRequest in hex */
-                        provider0,
+                        (int)provider0.len, provider0.str,
                          timestamp,
                          credential_scope,
                          sha_hex);
-  if(!str_to_sign) {
+  if(!str_to_sign)
      goto fail;
-  }
  
-  /* provider 0 uppercase */
-  secret = aprintf("%s4%s", provider0,
+  /* make provider0 part done uppercase */
+  Curl_strntoupper(str_to_sign, provider0.str, provider0.len);
+
+  secret = aprintf("%.*s4%s", (int)provider0.len, provider0.str,
                     data->state.aptr.passwd ?
                     data->state.aptr.passwd : "");
    if(!secret)
      goto fail;
+  /* make provider0 part done uppercase */
+  Curl_strntoupper(secret, provider0.str, provider0.len);
  
    HMAC_SHA256(secret, strlen(secret), date, strlen(date), sign0);
-  HMAC_SHA256(sign0, sizeof(sign0), region, strlen(region), sign1);
-  HMAC_SHA256(sign1, sizeof(sign1), service, strlen(service), sign0);
+  HMAC_SHA256(sign0, sizeof(sign0), region.str, region.len, sign1);
+  HMAC_SHA256(sign1, sizeof(sign1), service.str, service.len, sign0);
    HMAC_SHA256(sign0, sizeof(sign0), request_type, strlen(request_type), sign1);
    HMAC_SHA256(sign1, sizeof(sign1), str_to_sign, strlen(str_to_sign), sign0);
  
    sha256_to_hex(sha_hex, sign0);
  
-  /* provider 0 uppercase */
-  auth_headers = aprintf("Authorization: %s4-HMAC-SHA256 "
+  auth_headers = aprintf("Authorization: %.*s4-HMAC-SHA256 "
                           "Credential=%s/%s, "
                           "SignedHeaders=%s, "
                           "Signature=%s\r\n"
@@ -834,7 +849,7 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
                            */
                           "%s"
                           "%s", /* optional sha256 header includes \r\n */
-                         provider0,
+                         (int)provider0.len, provider0.str,
                           user,
                           credential_scope,
                           Curl_dyn_ptr(&signed_headers),
@@ -844,6 +859,9 @@ CURLcode Curl_output_aws_sigv4(struct Curl_easy *data, bool proxy)
    if(!auth_headers) {
      goto fail;
    }
+  /* provider 0 uppercase */
+  Curl_strntoupper(&auth_headers[sizeof("Authorization: ") - 1],
+                   provider0.str, provider0.len);
  
    Curl_safefree(data->state.aptr.userpwd);
    data->state.aptr.userpwd = auth_headers;
diff --git a/lib/strparse.c b/lib/strparse.c

new file mode 100644 (file)

index 0000000..dce0825
--- /dev/null
+++ b/lib/strparse.c
@@ -0,0 +1,136 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ * SPDX-License-Identifier: curl
+ *
+ ***************************************************************************/
+
+#include "strparse.h"
+
+/* Get a word until the first DELIM or end of string. At least one byte long.
+   return non-zero on error */
+int Curl_str_until(char **linep, struct Curl_str *out,
+                   const size_t max, char delim)
+{
+  char *s = *linep;
+  size_t len = 0;
+  DEBUGASSERT(linep && *linep && out && max && delim);
+
+  out->str = NULL;
+  out->len = 0;
+  while(*s && (*s != delim)) {
+    s++;
+    if(++len > max) {
+      return STRE_BIG;
+    }
+  }
+  if(!len)
+    return STRE_SHORT;
+  out->str = *linep;
+  out->len = len;
+  *linep = s; /* point to the first byte after the word */
+  return STRE_OK;
+}
+
+/* Get a word until the first space or end of string. At least one byte long.
+   return non-zero on error */
+int Curl_str_word(char **linep, struct Curl_str *out,
+                  const size_t max)
+{
+  return Curl_str_until(linep, out, max, ' ');
+}
+
+
+/* Get a "quoted" word. No escaping possible.
+   return non-zero on error */
+int Curl_str_quotedword(char **linep, struct Curl_str *out,
+                        const size_t max)
+{
+  char *s = *linep;
+  size_t len = 0;
+  DEBUGASSERT(linep && *linep && out && max);
+
+  out->str = NULL;
+  out->len = 0;
+  if(*s != '\"')
+    return STRE_BEGQUOTE;
+  s++;
+  while(*s && (*s != '\"')) {
+    s++;
+    if(++len > max)
+      return STRE_BIG;
+  }
+  if(*s != '\"')
+    return STRE_ENDQUOTE;
+  out->str = (*linep) + 1;
+  out->len = len;
+  *linep = s + 1;
+  return STRE_OK;
+}
+
+/* Advance over a single character.
+   return non-zero on error */
+int Curl_str_single(char **linep, char byte)
+{
+  DEBUGASSERT(linep && *linep);
+  if(**linep != byte)
+    return STRE_BYTE;
+  (*linep)++; /* move over it */
+  return STRE_OK;
+}
+
+/* Advance over a single space.
+   return non-zero on error */
+int Curl_str_singlespace(char **linep)
+{
+  return Curl_str_single(linep, ' ');
+}
+
+/* Get an unsigned number. Leading zeroes are accepted.
+   return non-zero on error */
+int Curl_str_number(char **linep, size_t *nump, size_t max)
+{
+  size_t num = 0;
+  DEBUGASSERT(linep && *linep && nump);
+  *nump = 0;
+  while(ISDIGIT(**linep)) {
+    int n = **linep - '0';
+    if(num > ((SIZE_T_MAX - n) / 10))
+      return STRE_OVERFLOW;
+    num = num * 10 + n;
+    if(num > max)
+      return STRE_BIG; /** too big */
+    (*linep)++;
+  }
+  *nump = num;
+  return STRE_OK;
+}
+
+/* CR or LF
+   return non-zero on error */
+int Curl_str_newline(char **linep)
+{
+  DEBUGASSERT(linep && *linep);
+  if(ISNEWLINE(**linep)) {
+    (*linep)++;
+    return STRE_OK; /* yessir */
+  }
+  return STRE_NEWLINE;
+}
diff --git a/lib/strparse.h b/lib/strparse.h

new file mode 100644 (file)

index 0000000..189927b
--- /dev/null
+++ b/lib/strparse.h
@@ -0,0 +1,71 @@
+#ifndef HEADER_CURL_STRPARSE_H
+#define HEADER_CURL_STRPARSE_H
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ * SPDX-License-Identifier: curl
+ *
+ ***************************************************************************/
+#include "curl_setup.h"
+
+#define STRE_OK       0
+#define STRE_BIG      1
+#define STRE_SHORT    2
+#define STRE_BEGQUOTE 3
+#define STRE_ENDQUOTE 4
+#define STRE_BYTE     5
+#define STRE_NEWLINE  6
+#define STRE_OVERFLOW 7
+
+struct Curl_str {
+  char *str;
+  size_t len;
+};
+
+/* Get a word until the first space
+   return non-zero on error */
+int Curl_str_word(char **linep, struct Curl_str *out, const size_t max);
+
+/* Get a word until the first DELIM or end of string
+   return non-zero on error */
+int Curl_str_until(char **linep, struct Curl_str *out, const size_t max,
+                   char delim);
+
+/* Get a "quoted" word. No escaping possible.
+   return non-zero on error */
+int Curl_str_quotedword(char **linep, struct Curl_str *out, const size_t max);
+
+/* Advance over a single character.
+   return non-zero on error */
+int Curl_str_single(char **linep, char byte);
+
+/* Advance over a single space.
+   return non-zero on error */
+int Curl_str_singlespace(char **linep);
+
+/* Get an unsigned number
+   return non-zero on error */
+int Curl_str_number(char **linep, size_t *nump, size_t max);
+
+/* Check for CR or LF
+   return non-zero on error */
+int Curl_str_newline(char **linep);
+
+#endif /* HEADER_CURL_STRPARSE_H */
diff --git a/lib/url.c b/lib/url.c

index 436edd891e1b22071e98f8c4befa7bfe2ea94b30..ea0298ac6aff5b568cce412558a26542f3159924 100644 (file)
--- a/lib/url.c
+++ b/lib/url.c
@@ -1827,7 +1827,7 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data,
    /* HSTS upgrade */
    if(data->hsts && strcasecompare("http", data->state.up.scheme)) {
      /* This MUST use the IDN decoded name */
-    if(Curl_hsts(data->hsts, conn->host.name, TRUE)) {
+    if(Curl_hsts(data->hsts, conn->host.name, strlen(conn->host.name), TRUE)) {
        char *url;
        Curl_safefree(data->state.up.scheme);
        uc = curl_url_set(uh, CURLUPART_SCHEME, "https", 0);
diff --git a/tests/FILEFORMAT.md b/tests/FILEFORMAT.md

index b15274e17ae9b455a09086afc6f2e644b7ff8115..4ee907273ec725d30afd9390efd53f9956a249ec 100644 (file)
--- a/tests/FILEFORMAT.md
+++ b/tests/FILEFORMAT.md
@@ -453,6 +453,7 @@ Features testable here are:
  - `Kerberos`
  - `Largefile`
  - `large-time` (time_t is larger than 32-bit)
+- `large-size` (size_t is larger than 32-bit)
  - `ld_preload`
  - `libssh2`
  - `libssh`
diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am

index 8e2c31863a151ded1c43bf29beb898508be5bf8e..bcdd965808052ddf4981feeb1a53d43b18fcbe20 100644 (file)
--- a/tests/data/Makefile.am
+++ b/tests/data/Makefile.am
@@ -218,7 +218,7 @@ test1620 test1621 \
  test1630 test1631 test1632 test1633 test1634 test1635 \
  \
  test1650 test1651 test1652 test1653 test1654 test1655 test1656 \
-test1660 test1661 test1662 test1663 \
+test1660 test1661 test1662 test1663 test1664 \
  \
  test1670 test1671 \
  \
diff --git a/tests/data/test1654 b/tests/data/test1654

index 3bd6f65c0130448ae56445543f8d7b6ad2a91775..e759c681a4e94215c0de233f55acb1a37ac0c907 100644 (file)
--- a/tests/data/test1654
+++ b/tests/data/test1654
@@ -27,13 +27,13 @@ alt-svc
  %LOGDIR/%TESTNUMBER
  </command>
  <file name="%LOGDIR/%TESTNUMBER" mode="text">
-h2 example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 1
+h2 example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 0
  # a comment
-h2 foo.example.com 443 h3 shiny.example.com 8443 "20291231 23:30:00" 0 1
-  h1 example.com 443 h3 shiny.example.com 8443 "20121231 00:00:01" 0 1
-       h3 example.com 443 h3 shiny.example.com 8443 "20131231 00:00:00" 0 1
+h2 foo.example.com 443 h3 shiny.example.com 8443 "20291231 23:30:00" 0 0
+  h1 example.com 443 h3 shiny.example.com 8443 "20121231 00:00:01" 0 0
+       h3 example.com 443 h3 shiny.example.com 8443 "20131231 00:00:00" 0 0
      # also a comment
-bad example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 1
+bad example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 0
  rubbish
  </file>
  </client>
@@ -41,10 +41,10 @@ rubbish
  <file name="%LOGDIR/%TESTNUMBER-out" mode="text">
  # Your alt-svc cache. https://curl.se/docs/alt-svc.html
  # This file was generated by libcurl! Edit at your own risk.
-h2 example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 1
-h2 foo.example.com 443 h3 shiny.example.com 8443 "20291231 23:30:00" 0 1
-h1 example.com 443 h3 shiny.example.com 8443 "20121231 00:00:01" 0 1
-h3 example.com 443 h3 shiny.example.com 8443 "20131231 00:00:00" 0 1
+h2 example.com 443 h3 shiny.example.com 8443 "20191231 00:00:00" 0 0
+h2 foo.example.com 443 h3 shiny.example.com 8443 "20291231 23:30:00" 0 0
+h1 example.com 443 h3 shiny.example.com 8443 "20121231 00:00:01" 0 0
+h3 example.com 443 h3 shiny.example.com 8443 "20131231 00:00:00" 0 0
  h1 example.org 8080 h2 example.com 8080 "20190125 22:34:21" 0 0
  h1 2.example.org 8080 h3 2.example.org 8080 "20190125 22:34:21" 0 0
  h1 3.example.org 8080 h2 example.com 8080 "20190125 22:34:21" 0 0
diff --git a/tests/data/test1664 b/tests/data/test1664

new file mode 100644 (file)

index 0000000..b5f0fa6
--- /dev/null
+++ b/tests/data/test1664
@@ -0,0 +1,120 @@
+<testcase>
+<info>
+<keywords>
+unittest
+strparse
+</keywords>
+</info>
+
+#
+# Client-side
+<client>
+<server>
+none
+</server>
+<features>
+unittest
+large-size
+</features>
+<name>
+unit tests for strparse.c string parsing functions
+</name>
+</client>
+
+<verify>
+<stdout>
+Curl_str_word
+0: ("word") 0, "word" [4], line 4
+1: ("word ") 0, "word" [4], line 4
+2: (" word ") 2, "" [0], line 0
+3: ("wo rd") 0, "wo" [2], line 2
+4: ("word(") 0, "word(" [5], line 5
+5: ("wor(d") 0, "wor(d" [5], line 5
+6: ("perfect") 0, "perfect" [7], line 7
+7: ("") 2, "" [0], line 0
+8: ("longerth") 1, "" [0], line 0
+Curl_str_until
+0: ("word") 0, "wor" [3], line 3
+1: ("word ") 0, "wor" [3], line 3
+2: (" word ") 0, " wor" [4], line 4
+3: ("wo rd") 0, "wo r" [4], line 4
+4: ("word(") 0, "wor" [3], line 3
+5: ("wor(d") 0, "wor(" [4], line 4
+6: ("perfect") 0, "perfect" [7], line 7
+7: ("") 2, "" [0], line 0
+8: ("longerth") 1, "" [0], line 0
+Curl_str_quotedword
+0: (""word"") 0, "word" [4], line 6
+1: (""word") 4, "" [0], line 0
+2: ("word"") 3, "" [0], line 0
+3: (""word""") 0, "word" [4], line 6
+4: (""word" ") 0, "word" [4], line 6
+5: (" "word"") 3, "" [0], line 0
+6: (""perfect"") 0, "perfect" [7], line 9
+7: (""p r e t"") 0, "p r e t" [7], line 9
+8: (""perfec\"") 0, "perfec\" [7], line 9
+9: ("""") 0, "" [0], line 2
+10: ("") 3, "" [0], line 0
+11: (""longerth"") 1, "" [0], line 0
+Curl_str_single
+0: ("a") 0, line 1
+1: ("aa") 0, line 1
+2: ("A") 5, line 0
+3: ("b") 5, line 0
+4: ("\") 5, line 0
+5: (" ") 5, line 0
+6: ("") 5, line 0
+Curl_str_singlespace
+0: ("a") 5, line 0
+1: ("aa") 5, line 0
+2: ("A") 5, line 0
+3: ("b") 5, line 0
+4: ("\") 5, line 0
+5: (" ") 0, line 1
+6: ("  ") 5, line 0
+7: ("
+") 5, line 0
+8: ("") 5, line 0
+Curl_str_single
+0: ("a") 0, line 1
+1: ("aa") 0, line 1
+2: ("A") 5, line 0
+3: ("b") 5, line 0
+4: ("\") 5, line 0
+5: (" ") 5, line 0
+6: ("") 5, line 0
+Curl_str_number
+0: ("1") 0, [1] line 1
+1: ("10000") 1, [0] line 4
+2: ("1234") 0, [1234] line 4
+3: ("1235") 0, [1235] line 4
+4: ("1236") 1, [0] line 3
+5: ("01234") 0, [1234] line 5
+6: ("00000000000000000000000000001234") 0, [1234] line 32
+7: ("0123 345") 0, [123] line 4
+8: ("0123O345") 0, [123] line 4
+9: ("-12") 0, [0] line 0
+10: (" 123") 0, [0] line 0
+11: ("") 0, [0] line 0
+Curl_str_number / max
+0: ("9223372036854775808") 0, [9223372036854775808] line 19
+1: ("9223372036854775809") 0, [9223372036854775809] line 19
+2: ("18446744073709551615") 0, [18446744073709551615] line 20
+3: ("18446744073709551616") 7, [0] line 19
+4: ("18446744073709551617") 7, [0] line 19
+Curl_str_newline
+0: ("a") 6, line 0
+1: ("aa") 6, line 0
+2: ("A") 6, line 0
+3: ("b") 6, line 0
+4: ("\") 6, line 0
+5: (" ") 6, line 0
+6: ("
+") 0, line 1
+7: ("\r") 0, line 1
+8: ("\r
+") 0, line 1
+9: ("") 6, line 0
+</stdout>
+</verify>
+</testcase>
diff --git a/tests/runtests.pl b/tests/runtests.pl

index 01307f8d122c8854c038d61c488bf2a79d50581a..7cfbd65479a941a8b6e4423f556c02d606369344 100755 (executable)
--- a/tests/runtests.pl
+++ b/tests/runtests.pl
@@ -823,6 +823,7 @@ sub checksystemfeatures {
      $feature{"headers-api"} = 1;
      $feature{"xattr"} = 1;
      $feature{"large-time"} = 1;
+    $feature{"large-size"} = 1;
      $feature{"sha512-256"} = 1;
      $feature{"local-http"} = servers::localhttp();
      $feature{"codeset-utf8"} = lc(langinfo(CODESET())) eq "utf-8";
diff --git a/tests/server/disabled.c b/tests/server/disabled.c

index 057ab36fc615d0cfceefdc9271fed78077df4ebb..ef047c54855118cba959425fd712b503c9302469 100644 (file)
--- a/tests/server/disabled.c
+++ b/tests/server/disabled.c
@@ -101,6 +101,9 @@ static const char *disabled[]={
  #if (SIZEOF_TIME_T < 5)
    "large-time",
  #endif
+#if (SIZEOF_SIZE_T < 5)
+  "large-size",
+#endif
  #ifndef CURL_HAVE_SHA512_256
    "sha512-256",
  #endif
diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc

index d17540bceaff447fbd06f830ebab234250c8959a..c523189233448483ee8b575c1bd47db99b315b91 100644 (file)
--- a/tests/unit/Makefile.inc
+++ b/tests/unit/Makefile.inc
@@ -39,7 +39,7 @@ UNITPROGS = unit1300          unit1302 unit1303 unit1304 unit1305 unit1307 \
   unit1608 unit1609 unit1610 unit1611 unit1612 unit1614 unit1615 unit1616 \
   unit1620 unit1621 \
   unit1650 unit1651 unit1652 unit1653 unit1654 unit1655 unit1656 \
- unit1660 unit1661 unit1663 \
+ unit1660 unit1661 unit1663 unit1664 \
   unit2600 unit2601 unit2602 unit2603 unit2604 \
   unit3200 \
   unit3205
@@ -132,6 +132,8 @@ unit1661_SOURCES = unit1661.c $(UNITFILES)
  
  unit1663_SOURCES = unit1663.c $(UNITFILES)
  
+unit1664_SOURCES = unit1664.c $(UNITFILES)
+
  unit2600_SOURCES = unit2600.c $(UNITFILES)
  
  unit2601_SOURCES = unit2601.c $(UNITFILES)
diff --git a/tests/unit/unit1660.c b/tests/unit/unit1660.c

index 46ab93ed8f87493d4671fc0b422e173949591f72..5ae8d0e9a7f4494aea9c1bfea086b5ff836691f8 100644 (file)
--- a/tests/unit/unit1660.c
+++ b/tests/unit/unit1660.c
@@ -156,7 +156,7 @@ UNITTEST_START
      }
  
      chost = headers[i].chost ? headers[i].chost : headers[i].host;
-    e = Curl_hsts(h, chost, TRUE);
+    e = Curl_hsts(h, chost, strlen(chost), TRUE);
      showsts(e, chost);
    }
  
@@ -165,7 +165,7 @@ UNITTEST_START
    /* verify that it is exists for 7 seconds */
    chost = "expire.example";
    for(i = 100; i < 110; i++) {
-    e = Curl_hsts(h, chost, TRUE);
+    e = Curl_hsts(h, chost, strlen(chost), TRUE);
      showsts(e, chost);
      deltatime++; /* another second passed */
    }
diff --git a/tests/unit/unit1664.c b/tests/unit/unit1664.c

new file mode 100644 (file)

index 0000000..3a2da81
--- /dev/null
+++ b/tests/unit/unit1664.c
@@ -0,0 +1,254 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at https://curl.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ * SPDX-License-Identifier: curl
+ *
+ ***************************************************************************/
+#include "curlcheck.h"
+
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif
+
+#include <curl/curl.h>
+
+#include "strparse.h"
+
+#include "memdebug.h" /* LAST include file */
+
+static CURLcode unit_setup(void)
+{
+  CURLcode res = CURLE_OK;
+  global_init(CURL_GLOBAL_ALL);
+  return res;
+}
+
+static void unit_stop(void)
+{
+  curl_global_cleanup();
+}
+
+UNITTEST_START
+{
+  static const char *wordparse[] = {
+    "word",
+    "word ",
+    " word ",
+    "wo rd",
+    "word(",
+    "wor(d",
+    "perfect",
+    "",
+    "longerth",
+    NULL
+  };
+
+  int i;
+  printf("Curl_str_word\n");
+  for(i = 0; wordparse[i]; i++) {
+    struct Curl_str out;
+    char *line = (char *)wordparse[i];
+    char *orgline = line;
+    int rc = Curl_str_word(&line, &out, 7);
+    printf("%u: (\"%s\") %d, \"%.*s\" [%d], line %d\n",
+           i, orgline, rc, (int)out.len, out.str, (int)out.len,
+           (int)(line - orgline));
+  }
+
+  printf("Curl_str_until\n");
+  for(i = 0; wordparse[i]; i++) {
+    struct Curl_str out;
+    char *line = (char *)wordparse[i];
+    char *orgline = line;
+    int rc = Curl_str_until(&line, &out, 7, 'd');
+    printf("%u: (\"%s\") %d, \"%.*s\" [%d], line %d\n",
+           i, orgline, rc, (int)out.len, out.str, (int)out.len,
+           (int)(line - orgline));
+  }
+
+  {
+    static const char *qwords[] = {
+      "\"word\"",
+      "\"word",
+      "word\"",
+      "\"word\"\"",
+      "\"word\" ",
+      " \"word\"",
+      "\"perfect\"",
+      "\"p r e t\"",
+      "\"perfec\\\"",
+      "\"\"",
+      "",
+      "\"longerth\"",
+      NULL
+    };
+
+    printf("Curl_str_quotedword\n");
+    for(i = 0; qwords[i]; i++) {
+      struct Curl_str out;
+      char *line = (char *)qwords[i];
+      char *orgline = line;
+      int rc = Curl_str_quotedword(&line, &out, 7);
+      printf("%u: (\"%s\") %d, \"%.*s\" [%d], line %d\n",
+             i, orgline, rc, (int)out.len, out.str, (int)out.len,
+             (int)(line - orgline));
+    }
+  }
+
+  {
+    static const char *single[] = {
+      "a",
+      "aa",
+      "A",
+      "b",
+      "\\",
+      " ",
+      "",
+      NULL
+    };
+    printf("Curl_str_single\n");
+    for(i = 0; single[i]; i++) {
+      char *line = (char *)single[i];
+      char *orgline = line;
+      int rc = Curl_str_single(&line, 'a');
+      printf("%u: (\"%s\") %d, line %d\n",
+             i, orgline, rc, (int)(line - orgline));
+    }
+  }
+  {
+    static const char *single[] = {
+      "a",
+      "aa",
+      "A",
+      "b",
+      "\\",
+      " ",
+      "\t",
+      "\n",
+      "",
+      NULL
+    };
+    printf("Curl_str_singlespace\n");
+    for(i = 0; single[i]; i++) {
+      char *line = (char *)single[i];
+      char *orgline = line;
+      int rc = Curl_str_singlespace(&line);
+      printf("%u: (\"%s\") %d, line %d\n",
+             i, orgline, rc, (int)(line - orgline));
+    }
+  }
+
+  {
+    static const char *single[] = {
+      "a",
+      "aa",
+      "A",
+      "b",
+      "\\",
+      " ",
+      "",
+      NULL
+    };
+    printf("Curl_str_single\n");
+    for(i = 0; single[i]; i++) {
+      char *line = (char *)single[i];
+      char *orgline = line;
+      int rc = Curl_str_single(&line, 'a');
+      printf("%u: (\"%s\") %d, line %d\n",
+             i, orgline, rc, (int)(line - orgline));
+    }
+  }
+  {
+    static const char *nums[] = {
+      "1",
+      "10000",
+      "1234",
+      "1235",
+      "1236",
+      "01234",
+      "00000000000000000000000000001234",
+      "0123 345",
+      "0123O345",
+      "-12",
+      " 123",
+      "",
+      NULL
+    };
+    printf("Curl_str_number\n");
+    for(i = 0; nums[i]; i++) {
+      size_t num;
+      char *line = (char *)nums[i];
+      char *orgline = line;
+      int rc = Curl_str_number(&line, &num, 1235);
+      printf("%u: (\"%s\") %d, [%u] line %d\n",
+             i, orgline, rc, (int)num, (int)(line - orgline));
+    }
+  }
+
+  {
+    /* SIZE_T_MAX is typically 18446744073709551615 */
+    static const char *nums[] = {
+      "9223372036854775808", /* 2^63 */
+      "9223372036854775809", /* 2^63 + 1 */
+      "18446744073709551615", /* 2^64 - 1 */
+      "18446744073709551616", /* 2^64 */
+      "18446744073709551617", /* 2^64 + 1 */
+      NULL
+    };
+    printf("Curl_str_number / max\n");
+    for(i = 0; nums[i]; i++) {
+      size_t num;
+      char *line = (char *)nums[i];
+      char *orgline = line;
+      int rc = Curl_str_number(&line, &num, SIZE_T_MAX);
+      printf("%u: (\"%s\") %d, [%zu] line %d\n",
+             i, orgline, rc, num, (int)(line - orgline));
+    }
+  }
+
+  {
+    static const char *newl[] = {
+      "a",
+      "aa",
+      "A",
+      "b",
+      "\\",
+      " ",
+      "\n",
+      "\r",
+      "\r\n",
+      "",
+      NULL
+    };
+    printf("Curl_str_newline\n");
+    for(i = 0; newl[i]; i++) {
+      char *line = (char *)newl[i];
+      char *orgline = line;
+      int rc = Curl_str_newline(&line);
+      printf("%u: (\"%s\") %d, line %d\n",
+             i, orgline, rc, (int)(line - orgline));
+    }
+  }
+
+}
+UNITTEST_STOP
author	Daniel Stenberg <daniel@haxx.se>
	Fri, 6 Dec 2024 13:31:38 +0000 (14:31 +0100)
committer	Daniel Stenberg <daniel@haxx.se>
	Thu, 12 Dec 2024 15:00:52 +0000 (16:00 +0100)
docs/Makefile.am		patch \| blob \| blame \| history
docs/internals/STRPARSE.md	[new file with mode: 0644]	patch \| blob
lib/Makefile.inc		patch \| blob \| blame \| history
lib/altsvc.c		patch \| blob \| blame \| history
lib/curl_ctype.h		patch \| blob \| blame \| history
lib/hsts.c		patch \| blob \| blame \| history
lib/hsts.h		patch \| blob \| blame \| history
lib/http_aws_sigv4.c		patch \| blob \| blame \| history
lib/strparse.c	[new file with mode: 0644]	patch \| blob
lib/strparse.h	[new file with mode: 0644]	patch \| blob
lib/url.c		patch \| blob \| blame \| history
tests/FILEFORMAT.md		patch \| blob \| blame \| history
tests/data/Makefile.am		patch \| blob \| blame \| history
tests/data/test1654		patch \| blob \| blame \| history
tests/data/test1664	[new file with mode: 0644]	patch \| blob
tests/runtests.pl		patch \| blob \| blame \| history
tests/server/disabled.c		patch \| blob \| blame \| history
tests/unit/Makefile.inc		patch \| blob \| blame \| history
tests/unit/unit1660.c		patch \| blob \| blame \| history
tests/unit/unit1664.c	[new file with mode: 0644]	patch \| blob