URI normalization: Allow and normalize UTF-8

author Alberto Leiva Popper <ydahhrk@gmail.com>

Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)

committer Alberto Leiva Popper <ydahhrk@gmail.com>

Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)
author Alberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)
committer Alberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)
diff --git a/src/types/uri.c b/src/types/uri.c

index b689ebe34e5160a6df85a84031f1e9c9d4676d1c..9d010c4ed4a1ab4593934e1168fb8a00915d6fc1 100644 (file)
--- a/src/types/uri.c
+++ b/src/types/uri.c
@@ -9,10 +9,9 @@
  
  /*
   * XXX IPv6 addresses
- * XXX UTF-8
   */
  
-#define URI_ALLOW_UNKNOWN_SCHEME (1 << 1)
+#define URI_ALLOW_UNKNOWN_SCHEME (1 << 0)
  
  static error_msg EM_SCHEME_EMPTY = "Scheme seems empty";
  static error_msg EM_SCHEME_1ST = "First scheme character is not a letter";
@@ -22,6 +21,7 @@ static error_msg EM_SCHEME_UNKNOWN = "Unknown scheme";
  static error_msg EM_SCHEME_NOTREMOTE = "Missing \"://\"";
  static error_msg EM_PCT_NOTHEX = "Invalid hexadecimal digit in percent encoding";
  static error_msg EM_PCT_NOT3 = "Unterminated percent-encoding";
+static error_msg EM_UTF8 = "Invalid UTF-8";
  static error_msg EM_USERINFO_BADCHR = "Illegal character in userinfo component";
  static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo";
  static error_msg EM_HOST_BADCHR = "Illegal character in host component";
@@ -33,8 +33,8 @@ static error_msg EM_QUERY_DISALLOWED = "Protocol disallows query";
  static error_msg EM_QF_BADCHR = "Illegal character in query or fragment";
  static error_msg EM_FRAGMENT_DISALLOWED = "Protocol disallows fragment";
  
-struct sized_string {
-       char const *str;
+struct sized_ustring {
+       unsigned char const *str;
         size_t len;
  };
  
@@ -70,13 +70,13 @@ struct schema_metadata const RSYNC = {
  };
  
  static bool
-is_proto(struct sized_string *scheme, char const *proto)
+is_proto(struct sized_ustring *scheme, char const *proto)
  {
-       return strncasecmp(scheme->str, proto, scheme->len) == 0;
+       return strncasecmp((char const *)scheme->str, proto, scheme->len) == 0;
  }
  
  static struct schema_metadata const *
-get_metadata(struct sized_string *scheme)
+get_metadata(struct sized_ustring *scheme)
  {
         if (scheme->len != 5)
                 return NULL;
@@ -90,37 +90,37 @@ get_metadata(struct sized_string *scheme)
  }
  
  static bool
-is_lowercase(char chr)
+is_lowercase(unsigned char chr)
  {
         return 'a' <= chr && chr <= 'z';
  }
  
  static bool
-is_uppercase(char chr)
+is_uppercase(unsigned char chr)
  {
         return 'A' <= chr && chr <= 'Z';
  }
  
  static bool
-is_lowercase_hex(char chr)
+is_lowercase_hex(unsigned char chr)
  {
         return 'a' <= chr && chr <= 'f';
  }
  
  static bool
-is_uppercase_hex(char chr)
+is_uppercase_hex(unsigned char chr)
  {
         return 'A' <= chr && chr <= 'F';
  }
  
  static bool
-is_digit(char chr)
+is_digit(unsigned char chr)
  {
         return '0' <= chr && chr <= '9';
  }
  
  static bool
-is_symbol(char chr, char const *symbols)
+is_symbol(unsigned char chr, char const *symbols)
  {
         for (; symbols[0] != '\0'; symbols++)
                 if (chr == symbols[0])
@@ -129,13 +129,13 @@ is_symbol(char chr, char const *symbols)
  }
  
  static char
-to_lowercase(char uppercase)
+to_lowercase(unsigned char uppercase)
  {
         return uppercase - ('A' - 'a');
  }
  
  static char
-to_uppercase(char chr)
+to_uppercase(unsigned char chr)
  {
         return is_lowercase(chr) ? (chr + ('A' - 'a')) : chr;
  }
@@ -144,8 +144,8 @@ static void
  approve_chara(struct uri_buffer *buf, char chr)
  {
         if (buf->d >= buf->capacity) {
-               /* It seems this is dead code. */
-               buf->capacity += 16;
+               /* Needed when we convert UTF-8 to percent-encoding */
+               buf->capacity += 32;
                 buf->dst = prealloc(buf->dst, buf->capacity);
         }
  
@@ -153,8 +153,8 @@ approve_chara(struct uri_buffer *buf, char chr)
  }
  
  static void
-collect_authority(char const *auth, char const **at, char const **colon,
-    char const **end)
+collect_authority(unsigned char const *auth, unsigned char const **at,
+    unsigned char const **colon, unsigned char const **end)
  {
         *at = NULL;
         *colon = NULL;
@@ -181,7 +181,7 @@ collect_authority(char const *auth, char const **at, char const **colon,
  }
  
  static void
-collect_path(char const *path, char const **end)
+collect_path(unsigned char const *path, unsigned char const **end)
  {
         for (; true; path++)
                 if (path[0] == '\0' || path[0] == '?' || path[0] == '#') {
@@ -191,7 +191,7 @@ collect_path(char const *path, char const **end)
  }
  
  static void
-collect_query(char const *query, char const **end)
+collect_query(unsigned char const *query, unsigned char const **end)
  {
         for (; true; query++)
                 if (query[0] == '\0' || query[0] == '#') {
@@ -201,7 +201,7 @@ collect_query(char const *query, char const **end)
  }
  
  static void
-collect_fragment(char const *fragment, char const **end)
+collect_fragment(unsigned char const *fragment, unsigned char const **end)
  {
         for (; true; fragment++)
                 if (fragment[0] == '\0') {
@@ -211,9 +211,9 @@ collect_fragment(char const *fragment, char const **end)
  }
  
  static error_msg
-normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
+normalize_scheme(struct uri_buffer *buf, struct sized_ustring *scheme)
  {
-       char chr;
+       unsigned char chr;
         array_index c;
  
         chr = scheme->str[0];
@@ -241,7 +241,7 @@ normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
  }
  
  static bool
-is_unreserved(char chr)
+is_unreserved(unsigned char chr)
  {
         return is_lowercase(chr)
             || is_uppercase(chr)
@@ -250,13 +250,13 @@ is_unreserved(char chr)
  }
  
  static bool
-is_subdelim(char chr)
+is_subdelim(unsigned char chr)
  {
         return is_symbol(chr, "!$&'()*+,;=");
  }
  
  static error_msg
-char2hex(char chr, unsigned int *hex)
+uchar2hex(unsigned char chr, unsigned int *hex)
  {
         if (is_digit(chr)) {
                 *hex = chr - '0';
@@ -275,7 +275,7 @@ char2hex(char chr, unsigned int *hex)
  }
  
  static error_msg
-approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
+approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring *sstr,
      array_index *offset)
  {
         array_index off;
@@ -289,10 +289,10 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
         if (sstr->len - off < 3)
                 return EM_PCT_NOT3;
  
-       error = char2hex(sstr->str[off + 1], &hex1);
+       error = uchar2hex(sstr->str[off + 1], &hex1);
         if (error)
                 return error;
-       error = char2hex(sstr->str[off + 2], &hex2);
+       error = uchar2hex(sstr->str[off + 2], &hex2);
         if (error)
                 return error;
  
@@ -311,11 +311,84 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
         return NULL;
  }
  
+static bool
+is_utf8(unsigned char chr)
+{
+       return chr & 0x80;
+}
+
+static char
+bin2hex(unsigned char bin)
+{
+       return bin + ((bin < 10) ? '0' : ('A' - 10));
+}
+
+static void
+approve_bin(struct uri_buffer *buf, unsigned char chr)
+{
+       approve_chara(buf, '%');
+       approve_chara(buf, bin2hex(chr >> 4));
+       approve_chara(buf, bin2hex(chr & 0xF));
+}
+
  static error_msg
-normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
+approve_utf8(struct uri_buffer *buf, struct sized_ustring *sstr,
+    array_index *offset)
+{
+       array_index off;
+       unsigned char chr1;
+       unsigned char chr2;
+       unsigned char chr3;
+       unsigned char chr4;
+
+       off = *offset;
+       if (sstr->len - off < 2)
+               return EM_UTF8;
+       chr1 = sstr->str[off];
+       chr2 = sstr->str[off + 1];
+       if ((chr1 & 0xE0) == 0xC0 && (chr2 & 0xC0) == 0x80) {
+               approve_bin(buf, chr1);
+               approve_bin(buf, chr2);
+               *offset += 1;
+               return NULL;
+       }
+
+       if (sstr->len - off < 3)
+               return EM_UTF8;
+       chr3 = sstr->str[off + 2];
+       if ((chr1 & 0xF0) == 0xE0 &&
+           (chr2 & 0xC0) == 0x80 &&
+           (chr3 & 0xC0) == 0x80) {
+               approve_bin(buf, chr1);
+               approve_bin(buf, chr2);
+               approve_bin(buf, chr3);
+               *offset += 2;
+               return NULL;
+       }
+
+       if (sstr->len - off < 4)
+               return EM_UTF8;
+       chr4 = sstr->str[off + 3];
+       if ((chr1 & 0xF8) == 0xF0 &&
+           (chr2 & 0xC0) == 0x80 &&
+           (chr3 & 0xC0) == 0x80 &&
+           (chr4 & 0xC0) == 0x80) {
+               approve_bin(buf, chr1);
+               approve_bin(buf, chr2);
+               approve_bin(buf, chr3);
+               approve_bin(buf, chr4);
+               *offset += 3;
+               return NULL;
+       }
+
+       return EM_UTF8;
+}
+
+static error_msg
+normalize_userinfo(struct uri_buffer *buf, struct sized_ustring *userinfo)
  {
         array_index c;
-       char chr;
+       unsigned char chr;
         error_msg error;
  
         if (userinfo->len == 0)
@@ -333,7 +406,11 @@ normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
                         approve_chara(buf, chr);
                 else if (chr == ':')
                         approve_chara(buf, chr);
-               else
+               else if (is_utf8(chr)) {
+                       error = approve_utf8(buf, userinfo, &c);
+                       if (error)
+                               return error;
+               } else
                         return EM_USERINFO_BADCHR;
         }
  
@@ -342,10 +419,10 @@ normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
  }
  
  static error_msg
-normalize_host(struct uri_buffer *buf, struct sized_string *host)
+normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
  {
         array_index c;
-       char chr;
+       unsigned char chr;
         error_msg error;
  
         for (c = 0; c < host->len; c++) {
@@ -360,7 +437,11 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host)
                                 return error;
                 } else if (is_subdelim(chr))
                         approve_chara(buf, chr);
-               else
+               else if (is_utf8(chr)) {
+                       error = approve_utf8(buf, host, &c);
+                       if (error)
+                               return error;
+               } else
                         return EM_HOST_BADCHR;
         }
  
@@ -368,11 +449,11 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host)
  }
  
  static error_msg
-normalize_port(struct uri_buffer *buf, struct sized_string *port,
+normalize_port(struct uri_buffer *buf, struct sized_ustring *port,
      struct schema_metadata const *schema)
  {
         array_index c;
-       char chr;
+       unsigned char chr;
         unsigned int portnum;
  
         if (port->len == 0)
@@ -397,8 +478,8 @@ normalize_port(struct uri_buffer *buf, struct sized_string *port,
         return NULL;
  }
  
-static char const *
-strnchr(char const *str, size_t n, char chr)
+static unsigned char const *
+strnchr(unsigned char const *str, size_t n, unsigned char chr)
  {
         array_index s;
         for (s = 0; s < n; s++)
@@ -408,7 +489,7 @@ strnchr(char const *str, size_t n, char chr)
  }
  
  static bool
-next_segment(struct sized_string *path, struct sized_string *segment)
+next_segment(struct sized_ustring *path, struct sized_ustring *segment)
  {
         segment->str += segment->len + 1;
         if (segment->str > (path->str + path->len))
@@ -427,11 +508,11 @@ rewind_buffer(struct uri_buffer *buf, size_t limit)
  }
  
  static error_msg
-normalize_path(struct uri_buffer *buf, struct sized_string *path)
+normalize_path(struct uri_buffer *buf, struct sized_ustring *path)
  {
-       struct sized_string segment;
+       struct sized_ustring segment;
         array_index i;
-       char chr;
+       unsigned char chr;
         size_t limit;
         error_msg error;
  
@@ -456,7 +537,11 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
                                         return error;
                         } else if (is_subdelim(chr) || is_symbol(chr, ":@"))
                                 approve_chara(buf, chr);
-                       else
+                       else if (is_utf8(chr)) {
+                               error = approve_utf8(buf, &segment, &i);
+                               if (error)
+                                       return error;
+                       } else
                                 return EM_PATH_BADCHR;
                 }
  
@@ -477,11 +562,11 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
  }
  
  static error_msg
-normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
+normalize_post_path(struct uri_buffer *buf, struct sized_ustring *post,
      char prefix)
  {
         array_index c;
-       char chr;
+       unsigned char chr;
         error_msg error;
  
         if (post->len == 0)
@@ -500,7 +585,11 @@ normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
                         approve_chara(buf, chr);
                 else if (is_symbol(chr, ":@/?"))
                         approve_chara(buf, chr);
-               else
+               else if (is_utf8(chr)) {
+                       error = approve_utf8(buf, post, &c);
+                       if (error)
+                               return error;
+               } else
                         return EM_QF_BADCHR;
         }
  
@@ -508,7 +597,7 @@ normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
  }
  
  static void
-print_component(char const *name, struct sized_string *component)
+print_component(char const *name, struct sized_ustring *component)
  {
         pr_clutter("  %s: %.*s (len:%zu)", name, (int)component->len,
             component->str, component->len);
@@ -521,20 +610,20 @@ print_component(char const *name, struct sized_string *component)
   * and needs to be released.
   */
  static error_msg
-url_normalize(char const *url, int flags, char **result)
+url_normalize(unsigned char const *url, int flags, char **result)
  {
-       struct sized_string scheme;
-       struct sized_string authority;
-       struct sized_string userinfo;
-       struct sized_string host;
-       struct sized_string port;
-       struct sized_string path;
-       struct sized_string query;
-       struct sized_string fragment;
+       struct sized_ustring scheme;
+       struct sized_ustring authority;
+       struct sized_ustring userinfo;
+       struct sized_ustring host;
+       struct sized_ustring port;
+       struct sized_ustring path;
+       struct sized_ustring query;
+       struct sized_ustring fragment;
  
-       char const *cursor;
-       char const *at;
-       char const *colon;
+       unsigned char const *cursor;
+       unsigned char const *at;
+       unsigned char const *colon;
  
         struct schema_metadata const *meta;
         struct uri_buffer buf;
@@ -543,7 +632,7 @@ url_normalize(char const *url, int flags, char **result)
         pr_clutter("-----------------------");
         pr_clutter("input: %s", url);
  
-       cursor = strchr(url, ':');
+       cursor = (unsigned char const *)strchr((char const *)url, ':');
         if (!cursor)
                 return EM_SCHEME_NOCOLON;
         if (cursor == url)
@@ -698,7 +787,7 @@ uri_init(struct uri *url, char const *str)
         char *normal;
         error_msg error;
  
-       error = url_normalize(str, 0, &normal);
+       error = url_normalize((unsigned char const *)str, 0, &normal);
         if (error)
                 return error;
  
diff --git a/test/types/uri_test.c b/test/types/uri_test.c

index aaa8320c2f1b49ac6463bf01ab4beb6af02f9b8b..3278e60971c89b47acdaebb3855c62bd5dcc1791 100644 (file)
--- a/test/types/uri_test.c
+++ b/test/types/uri_test.c
@@ -31,23 +31,27 @@ START_TEST(test_rewind)
  END_TEST
  
  #define TEST_NORMALIZE(dirty, clean)                                   \
-       ck_assert_pstr_eq(NULL, url_normalize(dirty, 0, &normal));      \
+       ck_assert_pstr_eq(NULL, url_normalize(                          \
+               (unsigned char *)dirty, 0, &normal                      \
+       ));                                                             \
         ck_assert_str_eq(clean, normal);                                \
         free(normal)
  
  #define TEST_NORMALIZE_AUS(dirty, clean)                               \
         ck_assert_ptr_eq(NULL, url_normalize(                           \
-               dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal                \
+               (unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
         ));                                                             \
         ck_assert_str_eq(clean, normal);                                \
         free(normal)
  
  #define TEST_NORMALIZE_FAIL(dirty, error)                              \
-       ck_assert_str_eq(error, url_normalize(dirty, 0, &normal));
+       ck_assert_str_eq(error, url_normalize(                          \
+               (unsigned char *)dirty, 0, &normal                      \
+       ));
  
  #define TEST_NORMALIZE_FAIL_AUS(dirty, error)                          \
         ck_assert_str_eq(error, url_normalize(                          \
-               dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal                \
+               (unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
         ));
  
  START_TEST(awkward_dot_dotting)
@@ -92,6 +96,9 @@ START_TEST(pct_encoding)
         TEST_NORMALIZE("https://%7C/", "https://%7C/");
         TEST_NORMALIZE("https://%7c/", "https://%7C/");
  
+       TEST_NORMALIZE("https://a%6fa/", "https://aoa/");
+       TEST_NORMALIZE("https://a%7ca/", "https://a%7Ca/");
+
         TEST_NORMALIZE_FAIL("https://%6G", EM_PCT_NOTHEX);
         TEST_NORMALIZE_FAIL("https://%G6", EM_PCT_NOTHEX);
  
@@ -173,6 +180,71 @@ START_TEST(test_same_origin)
  }
  END_TEST
  
+static unsigned char const ASCI = 'a'; /* 0_______ */
+static unsigned char const CONT = 0x80;        /* 10______ */
+static unsigned char const DUO = 0xC0; /* 110_____ */
+static unsigned char const TRIO = 0xE0;        /* 1110____ */
+static unsigned char const QUAD = 0xF0;        /* 11110___ */
+static unsigned char const CHRS[] = { ASCI, CONT, DUO, TRIO, QUAD, 0 };
+
+static void
+test_utf8_fail(unsigned char chr1, unsigned char chr2,
+    unsigned char chr3, unsigned char chr4)
+{
+       char *normal;
+       char messy[32];
+
+       if (chr1 == ASCI && chr2 == ASCI && chr3 == ASCI && chr4 == ASCI)
+               return;
+       if (chr1 == ASCI && chr2 == ASCI && chr3 == DUO && chr4 == CONT)
+               return;
+       if (chr1 == ASCI && chr2 == DUO && chr3 == CONT && chr4 == ASCI)
+               return;
+       if (chr1 == DUO && chr2 == CONT && chr3 == ASCI && chr4 == ASCI)
+               return;
+       if (chr1 == DUO && chr2 == CONT && chr3 == DUO && chr4 == CONT)
+               return;
+       if (chr1 == ASCI && chr2 == TRIO && chr3 == CONT && chr4 == CONT)
+               return;
+       if (chr1 == TRIO && chr2 == CONT && chr3 == CONT && chr4 == ASCI)
+               return;
+       if (chr1 == QUAD && chr2 == CONT && chr3 == CONT && chr4 == CONT)
+               return;
+
+       strcpy(messy, "https://----/");
+       messy[8] = chr1;
+       messy[9] = chr2;
+       messy[10] = chr3;
+       messy[11] = chr4;
+       TEST_NORMALIZE_FAIL(messy, EM_UTF8);
+}
+
+START_TEST(test_utf8)
+{
+       char *normal;
+       array_index c1, c2, c3, c4;
+
+       TEST_NORMALIZE("https://a.β.c/", "https://a.%CE%B2.c/");
+       TEST_NORMALIZE("https://a.砦.c/", "https://a.%E7%A0%A6.c/");
+       TEST_NORMALIZE("https://a.𝆑.c/", "https://a.%F0%9D%86%91.c/");
+
+       TEST_NORMALIZE_FAIL_AUS("βsync://a.b.c/", EM_SCHEME_1ST);
+       TEST_NORMALIZE_FAIL_AUS("rsβnc://a.b.c/", EM_SCHEME_NTH);
+       TEST_NORMALIZE("rsync://β@a.b.c/", "rsync://%CE%B2@a.b.c/");
+       TEST_NORMALIZE_FAIL("rsync://a.b.c:β/", EM_PORT_BADCHR);
+       TEST_NORMALIZE("https://a.b.c/β", "https://a.b.c/%CE%B2");
+       TEST_NORMALIZE("https://a.b.c/?β", "https://a.b.c/?%CE%B2");
+       TEST_NORMALIZE("https://a.b.c/#β", "https://a.b.c/#%CE%B2");
+
+       for (c1 = 0; CHRS[c1]; c1++)
+               for (c2 = 0; CHRS[c2]; c2++)
+                       for (c3 = 0; CHRS[c3]; c3++)
+                               for (c4 = 0; CHRS[c4]; c4++)
+                                       test_utf8_fail(CHRS[c1], CHRS[c2],
+                                           CHRS[c3], CHRS[c4]);
+}
+END_TEST
+
  START_TEST(test_unknown_protocols)
  {
         char *normal;
@@ -381,10 +453,8 @@ START_TEST(https_grammar)
         TEST_NORMALIZE_FAIL("https:", EM_SCHEME_NOTREMOTE);
         TEST_NORMALIZE_FAIL("https:/", EM_SCHEME_NOTREMOTE);
         TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY);
-       TEST_NORMALIZE_FAIL("https://a.β.c/", EM_HOST_BADCHR);
-       TEST_NORMALIZE_FAIL("https://a.b.c/β", EM_PATH_BADCHR);
  
-       /* I think everything else is already tested below. */
+       /* I think everything else is already tested elsewhere. */
  }
  END_TEST
  
@@ -540,8 +610,6 @@ START_TEST(rsync_grammar)
         TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE);
         TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE);
         TEST_NORMALIZE_FAIL("rsync://", EM_HOST_EMPTY);
-       TEST_NORMALIZE_FAIL("rsync://a.β.c/", EM_HOST_BADCHR);
-       TEST_NORMALIZE_FAIL("rsync://a.b.c/β", EM_PATH_BADCHR);
  
         TEST_NORMALIZE("rsync://a.b.c/m", "rsync://a.b.c/m");
         TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
@@ -603,6 +671,7 @@ static Suite *create_suite(void)
         tcase_add_test(misc, test_unknown_protocols);
         tcase_add_test(misc, awkward_dot_dotting);
         tcase_add_test(misc, test_same_origin);
+       tcase_add_test(misc, test_utf8);
  
         generic = tcase_create("RFC 3986 (generic URI)");
         tcase_add_test(generic, pct_encoding);
author	Alberto Leiva Popper <ydahhrk@gmail.com>
	Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)
committer	Alberto Leiva Popper <ydahhrk@gmail.com>
	Thu, 8 May 2025 22:18:54 +0000 (16:18 -0600)
src/types/uri.c		patch \| blob \| blame \| history
test/types/uri_test.c		patch \| blob \| blame \| history