From: Alberto Leiva Popper <ydahhrk@gmail.com>
Date: Thu, 8 May 2025 22:18:54 +0000 (-0600)
Subject: URI normalization: Allow and normalize UTF-8
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3e2fa6c228038032559a95d9a4a1b7a2c8e3df3;p=thirdparty%2FFORT-validator.git

URI normalization: Allow and normalize UTF-8

Was previously rejecting non-ASCII characters.
It now converts them to percent-encoding.
---

diff --git a/src/types/uri.c b/src/types/uri.c
index b689ebe3..9d010c4e 100644
--- a/src/types/uri.c
+++ b/src/types/uri.c
@@ -9,10 +9,9 @@
 
 /*
  * XXX IPv6 addresses
- * XXX UTF-8
  */
 
-#define URI_ALLOW_UNKNOWN_SCHEME (1 << 1)
+#define URI_ALLOW_UNKNOWN_SCHEME (1 << 0)
 
 static error_msg EM_SCHEME_EMPTY = "Scheme seems empty";
 static error_msg EM_SCHEME_1ST = "First scheme character is not a letter";
@@ -22,6 +21,7 @@ static error_msg EM_SCHEME_UNKNOWN = "Unknown scheme";
 static error_msg EM_SCHEME_NOTREMOTE = "Missing \"://\"";
 static error_msg EM_PCT_NOTHEX = "Invalid hexadecimal digit in percent encoding";
 static error_msg EM_PCT_NOT3 = "Unterminated percent-encoding";
+static error_msg EM_UTF8 = "Invalid UTF-8";
 static error_msg EM_USERINFO_BADCHR = "Illegal character in userinfo component";
 static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo";
 static error_msg EM_HOST_BADCHR = "Illegal character in host component";
@@ -33,8 +33,8 @@ static error_msg EM_QUERY_DISALLOWED = "Protocol disallows query";
 static error_msg EM_QF_BADCHR = "Illegal character in query or fragment";
 static error_msg EM_FRAGMENT_DISALLOWED = "Protocol disallows fragment";
 
-struct sized_string {
-	char const *str;
+struct sized_ustring {
+	unsigned char const *str;
 	size_t len;
 };
 
@@ -70,13 +70,13 @@ struct schema_metadata const RSYNC = {
 };
 
 static bool
-is_proto(struct sized_string *scheme, char const *proto)
+is_proto(struct sized_ustring *scheme, char const *proto)
 {
-	return strncasecmp(scheme->str, proto, scheme->len) == 0;
+	return strncasecmp((char const *)scheme->str, proto, scheme->len) == 0;
 }
 
 static struct schema_metadata const *
-get_metadata(struct sized_string *scheme)
+get_metadata(struct sized_ustring *scheme)
 {
 	if (scheme->len != 5)
 		return NULL;
@@ -90,37 +90,37 @@ get_metadata(struct sized_string *scheme)
 }
 
 static bool
-is_lowercase(char chr)
+is_lowercase(unsigned char chr)
 {
 	return 'a' <= chr && chr <= 'z';
 }
 
 static bool
-is_uppercase(char chr)
+is_uppercase(unsigned char chr)
 {
 	return 'A' <= chr && chr <= 'Z';
 }
 
 static bool
-is_lowercase_hex(char chr)
+is_lowercase_hex(unsigned char chr)
 {
 	return 'a' <= chr && chr <= 'f';
 }
 
 static bool
-is_uppercase_hex(char chr)
+is_uppercase_hex(unsigned char chr)
 {
 	return 'A' <= chr && chr <= 'F';
 }
 
 static bool
-is_digit(char chr)
+is_digit(unsigned char chr)
 {
 	return '0' <= chr && chr <= '9';
 }
 
 static bool
-is_symbol(char chr, char const *symbols)
+is_symbol(unsigned char chr, char const *symbols)
 {
 	for (; symbols[0] != '\0'; symbols++)
 		if (chr == symbols[0])
@@ -129,13 +129,13 @@ is_symbol(char chr, char const *symbols)
 }
 
 static char
-to_lowercase(char uppercase)
+to_lowercase(unsigned char uppercase)
 {
 	return uppercase - ('A' - 'a');
 }
 
 static char
-to_uppercase(char chr)
+to_uppercase(unsigned char chr)
 {
 	return is_lowercase(chr) ? (chr + ('A' - 'a')) : chr;
 }
@@ -144,8 +144,8 @@ static void
 approve_chara(struct uri_buffer *buf, char chr)
 {
 	if (buf->d >= buf->capacity) {
-		/* It seems this is dead code. */
-		buf->capacity += 16;
+		/* Needed when we convert UTF-8 to percent-encoding */
+		buf->capacity += 32;
 		buf->dst = prealloc(buf->dst, buf->capacity);
 	}
 
@@ -153,8 +153,8 @@ approve_chara(struct uri_buffer *buf, char chr)
 }
 
 static void
-collect_authority(char const *auth, char const **at, char const **colon,
-    char const **end)
+collect_authority(unsigned char const *auth, unsigned char const **at,
+    unsigned char const **colon, unsigned char const **end)
 {
 	*at = NULL;
 	*colon = NULL;
@@ -181,7 +181,7 @@ collect_authority(char const *auth, char const **at, char const **colon,
 }
 
 static void
-collect_path(char const *path, char const **end)
+collect_path(unsigned char const *path, unsigned char const **end)
 {
 	for (; true; path++)
 		if (path[0] == '\0' || path[0] == '?' || path[0] == '#') {
@@ -191,7 +191,7 @@ collect_path(char const *path, char const **end)
 }
 
 static void
-collect_query(char const *query, char const **end)
+collect_query(unsigned char const *query, unsigned char const **end)
 {
 	for (; true; query++)
 		if (query[0] == '\0' || query[0] == '#') {
@@ -201,7 +201,7 @@ collect_query(char const *query, char const **end)
 }
 
 static void
-collect_fragment(char const *fragment, char const **end)
+collect_fragment(unsigned char const *fragment, unsigned char const **end)
 {
 	for (; true; fragment++)
 		if (fragment[0] == '\0') {
@@ -211,9 +211,9 @@ collect_fragment(char const *fragment, char const **end)
 }
 
 static error_msg
-normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
+normalize_scheme(struct uri_buffer *buf, struct sized_ustring *scheme)
 {
-	char chr;
+	unsigned char chr;
 	array_index c;
 
 	chr = scheme->str[0];
@@ -241,7 +241,7 @@ normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
 }
 
 static bool
-is_unreserved(char chr)
+is_unreserved(unsigned char chr)
 {
 	return is_lowercase(chr)
 	    || is_uppercase(chr)
@@ -250,13 +250,13 @@ is_unreserved(char chr)
 }
 
 static bool
-is_subdelim(char chr)
+is_subdelim(unsigned char chr)
 {
 	return is_symbol(chr, "!$&'()*+,;=");
 }
 
 static error_msg
-char2hex(char chr, unsigned int *hex)
+uchar2hex(unsigned char chr, unsigned int *hex)
 {
 	if (is_digit(chr)) {
 		*hex = chr - '0';
@@ -275,7 +275,7 @@ char2hex(char chr, unsigned int *hex)
 }
 
 static error_msg
-approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
+approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring *sstr,
     array_index *offset)
 {
 	array_index off;
@@ -289,10 +289,10 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
 	if (sstr->len - off < 3)
 		return EM_PCT_NOT3;
 
-	error = char2hex(sstr->str[off + 1], &hex1);
+	error = uchar2hex(sstr->str[off + 1], &hex1);
 	if (error)
 		return error;
-	error = char2hex(sstr->str[off + 2], &hex2);
+	error = uchar2hex(sstr->str[off + 2], &hex2);
 	if (error)
 		return error;
 
@@ -311,11 +311,84 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
 	return NULL;
 }
 
+static bool
+is_utf8(unsigned char chr)
+{
+	return chr & 0x80;
+}
+
+static char
+bin2hex(unsigned char bin)
+{
+	return bin + ((bin < 10) ? '0' : ('A' - 10));
+}
+
+static void
+approve_bin(struct uri_buffer *buf, unsigned char chr)
+{
+	approve_chara(buf, '%');
+	approve_chara(buf, bin2hex(chr >> 4));
+	approve_chara(buf, bin2hex(chr & 0xF));
+}
+
 static error_msg
-normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
+approve_utf8(struct uri_buffer *buf, struct sized_ustring *sstr,
+    array_index *offset)
+{
+	array_index off;
+	unsigned char chr1;
+	unsigned char chr2;
+	unsigned char chr3;
+	unsigned char chr4;
+
+	off = *offset;
+	if (sstr->len - off < 2)
+		return EM_UTF8;
+	chr1 = sstr->str[off];
+	chr2 = sstr->str[off + 1];
+	if ((chr1 & 0xE0) == 0xC0 && (chr2 & 0xC0) == 0x80) {
+		approve_bin(buf, chr1);
+		approve_bin(buf, chr2);
+		*offset += 1;
+		return NULL;
+	}
+
+	if (sstr->len - off < 3)
+		return EM_UTF8;
+	chr3 = sstr->str[off + 2];
+	if ((chr1 & 0xF0) == 0xE0 &&
+	    (chr2 & 0xC0) == 0x80 &&
+	    (chr3 & 0xC0) == 0x80) {
+		approve_bin(buf, chr1);
+		approve_bin(buf, chr2);
+		approve_bin(buf, chr3);
+		*offset += 2;
+		return NULL;
+	}
+
+	if (sstr->len - off < 4)
+		return EM_UTF8;
+	chr4 = sstr->str[off + 3];
+	if ((chr1 & 0xF8) == 0xF0 &&
+	    (chr2 & 0xC0) == 0x80 &&
+	    (chr3 & 0xC0) == 0x80 &&
+	    (chr4 & 0xC0) == 0x80) {
+		approve_bin(buf, chr1);
+		approve_bin(buf, chr2);
+		approve_bin(buf, chr3);
+		approve_bin(buf, chr4);
+		*offset += 3;
+		return NULL;
+	}
+
+	return EM_UTF8;
+}
+
+static error_msg
+normalize_userinfo(struct uri_buffer *buf, struct sized_ustring *userinfo)
 {
 	array_index c;
-	char chr;
+	unsigned char chr;
 	error_msg error;
 
 	if (userinfo->len == 0)
@@ -333,7 +406,11 @@ normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
 			approve_chara(buf, chr);
 		else if (chr == ':')
 			approve_chara(buf, chr);
-		else
+		else if (is_utf8(chr)) {
+			error = approve_utf8(buf, userinfo, &c);
+			if (error)
+				return error;
+		} else
 			return EM_USERINFO_BADCHR;
 	}
 
@@ -342,10 +419,10 @@ normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
 }
 
 static error_msg
-normalize_host(struct uri_buffer *buf, struct sized_string *host)
+normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
 {
 	array_index c;
-	char chr;
+	unsigned char chr;
 	error_msg error;
 
 	for (c = 0; c < host->len; c++) {
@@ -360,7 +437,11 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host)
 				return error;
 		} else if (is_subdelim(chr))
 			approve_chara(buf, chr);
-		else
+		else if (is_utf8(chr)) {
+			error = approve_utf8(buf, host, &c);
+			if (error)
+				return error;
+		} else
 			return EM_HOST_BADCHR;
 	}
 
@@ -368,11 +449,11 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host)
 }
 
 static error_msg
-normalize_port(struct uri_buffer *buf, struct sized_string *port,
+normalize_port(struct uri_buffer *buf, struct sized_ustring *port,
     struct schema_metadata const *schema)
 {
 	array_index c;
-	char chr;
+	unsigned char chr;
 	unsigned int portnum;
 
 	if (port->len == 0)
@@ -397,8 +478,8 @@ normalize_port(struct uri_buffer *buf, struct sized_string *port,
 	return NULL;
 }
 
-static char const *
-strnchr(char const *str, size_t n, char chr)
+static unsigned char const *
+strnchr(unsigned char const *str, size_t n, unsigned char chr)
 {
 	array_index s;
 	for (s = 0; s < n; s++)
@@ -408,7 +489,7 @@ strnchr(char const *str, size_t n, char chr)
 }
 
 static bool
-next_segment(struct sized_string *path, struct sized_string *segment)
+next_segment(struct sized_ustring *path, struct sized_ustring *segment)
 {
 	segment->str += segment->len + 1;
 	if (segment->str > (path->str + path->len))
@@ -427,11 +508,11 @@ rewind_buffer(struct uri_buffer *buf, size_t limit)
 }
 
 static error_msg
-normalize_path(struct uri_buffer *buf, struct sized_string *path)
+normalize_path(struct uri_buffer *buf, struct sized_ustring *path)
 {
-	struct sized_string segment;
+	struct sized_ustring segment;
 	array_index i;
-	char chr;
+	unsigned char chr;
 	size_t limit;
 	error_msg error;
 
@@ -456,7 +537,11 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
 					return error;
 			} else if (is_subdelim(chr) || is_symbol(chr, ":@"))
 				approve_chara(buf, chr);
-			else
+			else if (is_utf8(chr)) {
+				error = approve_utf8(buf, &segment, &i);
+				if (error)
+					return error;
+			} else
 				return EM_PATH_BADCHR;
 		}
 
@@ -477,11 +562,11 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
 }
 
 static error_msg
-normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
+normalize_post_path(struct uri_buffer *buf, struct sized_ustring *post,
     char prefix)
 {
 	array_index c;
-	char chr;
+	unsigned char chr;
 	error_msg error;
 
 	if (post->len == 0)
@@ -500,7 +585,11 @@ normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
 			approve_chara(buf, chr);
 		else if (is_symbol(chr, ":@/?"))
 			approve_chara(buf, chr);
-		else
+		else if (is_utf8(chr)) {
+			error = approve_utf8(buf, post, &c);
+			if (error)
+				return error;
+		} else
 			return EM_QF_BADCHR;
 	}
 
@@ -508,7 +597,7 @@ normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
 }
 
 static void
-print_component(char const *name, struct sized_string *component)
+print_component(char const *name, struct sized_ustring *component)
 {
 	pr_clutter("  %s: %.*s (len:%zu)", name, (int)component->len,
 	    component->str, component->len);
@@ -521,20 +610,20 @@ print_component(char const *name, struct sized_string *component)
  * and needs to be released.
  */
 static error_msg
-url_normalize(char const *url, int flags, char **result)
+url_normalize(unsigned char const *url, int flags, char **result)
 {
-	struct sized_string scheme;
-	struct sized_string authority;
-	struct sized_string userinfo;
-	struct sized_string host;
-	struct sized_string port;
-	struct sized_string path;
-	struct sized_string query;
-	struct sized_string fragment;
+	struct sized_ustring scheme;
+	struct sized_ustring authority;
+	struct sized_ustring userinfo;
+	struct sized_ustring host;
+	struct sized_ustring port;
+	struct sized_ustring path;
+	struct sized_ustring query;
+	struct sized_ustring fragment;
 
-	char const *cursor;
-	char const *at;
-	char const *colon;
+	unsigned char const *cursor;
+	unsigned char const *at;
+	unsigned char const *colon;
 
 	struct schema_metadata const *meta;
 	struct uri_buffer buf;
@@ -543,7 +632,7 @@ url_normalize(char const *url, int flags, char **result)
 	pr_clutter("-----------------------");
 	pr_clutter("input: %s", url);
 
-	cursor = strchr(url, ':');
+	cursor = (unsigned char const *)strchr((char const *)url, ':');
 	if (!cursor)
 		return EM_SCHEME_NOCOLON;
 	if (cursor == url)
@@ -698,7 +787,7 @@ uri_init(struct uri *url, char const *str)
 	char *normal;
 	error_msg error;
 
-	error = url_normalize(str, 0, &normal);
+	error = url_normalize((unsigned char const *)str, 0, &normal);
 	if (error)
 		return error;
 
diff --git a/test/types/uri_test.c b/test/types/uri_test.c
index aaa8320c..3278e609 100644
--- a/test/types/uri_test.c
+++ b/test/types/uri_test.c
@@ -31,23 +31,27 @@ START_TEST(test_rewind)
 END_TEST
 
 #define TEST_NORMALIZE(dirty, clean)					\
-	ck_assert_pstr_eq(NULL, url_normalize(dirty, 0, &normal));	\
+	ck_assert_pstr_eq(NULL, url_normalize(				\
+		(unsigned char *)dirty, 0, &normal			\
+	));								\
 	ck_assert_str_eq(clean, normal);				\
 	free(normal)
 
 #define TEST_NORMALIZE_AUS(dirty, clean)				\
 	ck_assert_ptr_eq(NULL, url_normalize(				\
-		dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal		\
+		(unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
 	));								\
 	ck_assert_str_eq(clean, normal);				\
 	free(normal)
 
 #define TEST_NORMALIZE_FAIL(dirty, error)				\
-	ck_assert_str_eq(error, url_normalize(dirty, 0, &normal));
+	ck_assert_str_eq(error, url_normalize(				\
+		(unsigned char *)dirty, 0, &normal			\
+	));
 
 #define TEST_NORMALIZE_FAIL_AUS(dirty, error)				\
 	ck_assert_str_eq(error, url_normalize(				\
-		dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal		\
+		(unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
 	));
 
 START_TEST(awkward_dot_dotting)
@@ -92,6 +96,9 @@ START_TEST(pct_encoding)
 	TEST_NORMALIZE("https://%7C/", "https://%7C/");
 	TEST_NORMALIZE("https://%7c/", "https://%7C/");
 
+	TEST_NORMALIZE("https://a%6fa/", "https://aoa/");
+	TEST_NORMALIZE("https://a%7ca/", "https://a%7Ca/");
+
 	TEST_NORMALIZE_FAIL("https://%6G", EM_PCT_NOTHEX);
 	TEST_NORMALIZE_FAIL("https://%G6", EM_PCT_NOTHEX);
 
@@ -173,6 +180,71 @@ START_TEST(test_same_origin)
 }
 END_TEST
 
+static unsigned char const ASCI = 'a';	/* 0_______ */
+static unsigned char const CONT = 0x80;	/* 10______ */
+static unsigned char const DUO = 0xC0;	/* 110_____ */
+static unsigned char const TRIO = 0xE0;	/* 1110____ */
+static unsigned char const QUAD = 0xF0;	/* 11110___ */
+static unsigned char const CHRS[] = { ASCI, CONT, DUO, TRIO, QUAD, 0 };
+
+static void
+test_utf8_fail(unsigned char chr1, unsigned char chr2,
+    unsigned char chr3, unsigned char chr4)
+{
+	char *normal;
+	char messy[32];
+
+	if (chr1 == ASCI && chr2 == ASCI && chr3 == ASCI && chr4 == ASCI)
+		return;
+	if (chr1 == ASCI && chr2 == ASCI && chr3 == DUO && chr4 == CONT)
+		return;
+	if (chr1 == ASCI && chr2 == DUO && chr3 == CONT && chr4 == ASCI)
+		return;
+	if (chr1 == DUO && chr2 == CONT && chr3 == ASCI && chr4 == ASCI)
+		return;
+	if (chr1 == DUO && chr2 == CONT && chr3 == DUO && chr4 == CONT)
+		return;
+	if (chr1 == ASCI && chr2 == TRIO && chr3 == CONT && chr4 == CONT)
+		return;
+	if (chr1 == TRIO && chr2 == CONT && chr3 == CONT && chr4 == ASCI)
+		return;
+	if (chr1 == QUAD && chr2 == CONT && chr3 == CONT && chr4 == CONT)
+		return;
+
+	strcpy(messy, "https://----/");
+	messy[8] = chr1;
+	messy[9] = chr2;
+	messy[10] = chr3;
+	messy[11] = chr4;
+	TEST_NORMALIZE_FAIL(messy, EM_UTF8);
+}
+
+START_TEST(test_utf8)
+{
+	char *normal;
+	array_index c1, c2, c3, c4;
+
+	TEST_NORMALIZE("https://a.Î².c/", "https://a.%CE%B2.c/");
+	TEST_NORMALIZE("https://a.ç ¦.c/", "https://a.%E7%A0%A6.c/");
+	TEST_NORMALIZE("https://a.ð.c/", "https://a.%F0%9D%86%91.c/");
+
+	TEST_NORMALIZE_FAIL_AUS("Î²sync://a.b.c/", EM_SCHEME_1ST);
+	TEST_NORMALIZE_FAIL_AUS("rsÎ²nc://a.b.c/", EM_SCHEME_NTH);
+	TEST_NORMALIZE("rsync://Î²@a.b.c/", "rsync://%CE%B2@a.b.c/");
+	TEST_NORMALIZE_FAIL("rsync://a.b.c:Î²/", EM_PORT_BADCHR);
+	TEST_NORMALIZE("https://a.b.c/Î²", "https://a.b.c/%CE%B2");
+	TEST_NORMALIZE("https://a.b.c/?Î²", "https://a.b.c/?%CE%B2");
+	TEST_NORMALIZE("https://a.b.c/#Î²", "https://a.b.c/#%CE%B2");
+
+	for (c1 = 0; CHRS[c1]; c1++)
+		for (c2 = 0; CHRS[c2]; c2++)
+			for (c3 = 0; CHRS[c3]; c3++)
+				for (c4 = 0; CHRS[c4]; c4++)
+					test_utf8_fail(CHRS[c1], CHRS[c2],
+					    CHRS[c3], CHRS[c4]);
+}
+END_TEST
+
 START_TEST(test_unknown_protocols)
 {
 	char *normal;
@@ -381,10 +453,8 @@ START_TEST(https_grammar)
 	TEST_NORMALIZE_FAIL("https:", EM_SCHEME_NOTREMOTE);
 	TEST_NORMALIZE_FAIL("https:/", EM_SCHEME_NOTREMOTE);
 	TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY);
-	TEST_NORMALIZE_FAIL("https://a.Î².c/", EM_HOST_BADCHR);
-	TEST_NORMALIZE_FAIL("https://a.b.c/Î²", EM_PATH_BADCHR);
 
-	/* I think everything else is already tested below. */
+	/* I think everything else is already tested elsewhere. */
 }
 END_TEST
 
@@ -540,8 +610,6 @@ START_TEST(rsync_grammar)
 	TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE);
 	TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE);
 	TEST_NORMALIZE_FAIL("rsync://", EM_HOST_EMPTY);
-	TEST_NORMALIZE_FAIL("rsync://a.Î².c/", EM_HOST_BADCHR);
-	TEST_NORMALIZE_FAIL("rsync://a.b.c/Î²", EM_PATH_BADCHR);
 
 	TEST_NORMALIZE("rsync://a.b.c/m", "rsync://a.b.c/m");
 	TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
@@ -603,6 +671,7 @@ static Suite *create_suite(void)
 	tcase_add_test(misc, test_unknown_protocols);
 	tcase_add_test(misc, awkward_dot_dotting);
 	tcase_add_test(misc, test_same_origin);
+	tcase_add_test(misc, test_utf8);
 
 	generic = tcase_create("RFC 3986 (generic URI)");
 	tcase_add_test(generic, pct_encoding);