#include "log.h"
#include "types/path.h"
-/*
- * XXX IPv6 addresses
- */
-
#define URI_ALLOW_UNKNOWN_SCHEME (1 << 0)
static error_msg EM_SCHEME_EMPTY = "Scheme seems empty";
static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo";
static error_msg EM_HOST_BADCHR = "Illegal character in host component";
static error_msg EM_HOST_EMPTY = "Protocol disallows empty host";
+static error_msg EM_HOST_LITERAL = "Unparseable IP literal in the host";
static error_msg EM_PORT_BADCHR = "Illegal non-digit character in port component";
static error_msg EM_PORT_RANGE = "Port value is out of range";
static error_msg EM_PATH_BADCHR = "Illegal character in path component";
return '0' <= chr && chr <= '9';
}
+static bool
+is_hexdigit(unsigned char chr)
+{
+ return is_digit(chr) || is_lowercase_hex(chr) || is_uppercase_hex(chr);
+}
+
static bool
is_symbol(unsigned char chr, char const *symbols)
{
collect_authority(unsigned char const *auth, unsigned char const **at,
unsigned char const **colon, unsigned char const **end)
{
+ bool v6skip;
+
*at = NULL;
*colon = NULL;
+ v6skip = false;
for (; true; auth++) {
- switch (auth[0]) {
- case '/':
- case '?':
- case '#':
- case '\0':
- *end = auth;
- return;
- case '@':
- if ((*at) == NULL) {
- *colon = NULL; /* Was a password if not null */
- *at = auth;
+ if (v6skip) {
+ switch (auth[0]) {
+ case ']':
+ v6skip = false;
+ continue;
+ case '\0':
+ *end = auth;
+ return;
+ }
+ } else {
+ switch (auth[0]) {
+ case '/':
+ case '?':
+ case '#':
+ case '\0':
+ *end = auth;
+ return;
+ case '@':
+ if ((*at) == NULL) {
+ /* Was a password if not null */
+ *colon = NULL;
+ *at = auth;
+ }
+ break;
+ case ':':
+ *colon = auth;
+ break;
+ case '[':
+ v6skip = true;
+ break;
}
- break;
- case ':':
- *colon = auth;
- break;
}
}
}
}
static error_msg
-normalize_scheme(struct uri_buffer *buf, struct sized_ustring *scheme)
+normalize_scheme(struct uri_buffer *buf, struct sized_ustring const *scheme)
{
unsigned char chr;
array_index c;
}
static error_msg
-approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring *sstr,
+approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring const *sstr,
array_index *offset)
{
array_index off;
}
static error_msg
-approve_utf8(struct uri_buffer *buf, struct sized_ustring *sstr,
+approve_utf8(struct uri_buffer *buf, struct sized_ustring const *sstr,
array_index *offset)
{
array_index off;
}
static error_msg
-normalize_userinfo(struct uri_buffer *buf, struct sized_ustring *userinfo)
+normalize_userinfo(struct uri_buffer *buf, struct sized_ustring const *userinfo)
{
array_index c;
unsigned char chr;
return NULL;
}
+static error_msg
+normalize_ipvfuture(struct uri_buffer *buf, struct sized_ustring const *ipf)
+{
+ array_index i;
+ unsigned char chr;
+ bool found_hex;
+
+ approve_chara(buf, 'v');
+
+ found_hex = false;
+ for (i = 1; i < ipf->len; i++) {
+ chr = ipf->str[i];
+ if (is_hexdigit(chr)) {
+ approve_chara(buf, chr);
+ found_hex = true;
+ } else if (chr == '.')
+ goto value;
+ else
+ return EM_HOST_LITERAL;
+ }
+
+ return EM_HOST_LITERAL;
+
+value: if (!found_hex)
+ return EM_HOST_LITERAL;
+ approve_chara(buf, '.');
+ i++;
+ if (i == ipf->len)
+ return EM_HOST_LITERAL;
+ for (; i < ipf->len; i++) {
+ chr = ipf->str[i];
+ if (is_unreserved(chr) || is_subdelim(chr) || chr == ':')
+ approve_chara(buf, chr);
+ else
+ return EM_HOST_LITERAL;
+ }
+
+ return NULL;
+}
+
+static error_msg
+normalize_ipv6(struct uri_buffer *buf, struct sized_ustring const *v6)
+{
+ char dirty[INET6_ADDRSTRLEN];
+ struct in6_addr addr;
+ char clean[INET6_ADDRSTRLEN];
+ array_index i;
+
+ if (v6->len > (INET6_ADDRSTRLEN - 1))
+ return EM_HOST_LITERAL;
+
+ memcpy(dirty, v6->str, v6->len);
+ dirty[v6->len] = '\0';
+ if (inet_pton(AF_INET6, dirty, &addr) != 1)
+ return EM_HOST_LITERAL;
+
+ if (inet_ntop(AF_INET6, &addr, clean, INET6_ADDRSTRLEN) == NULL)
+ return EM_HOST_LITERAL;
+
+ for (i = 0; clean[i] != '\0'; i++)
+ approve_chara(buf, clean[i]);
+
+ return NULL;
+}
+
+static error_msg
+normalize_ip_literal(struct uri_buffer *buf, struct sized_ustring const *lit)
+{
+ struct sized_ustring content;
+ error_msg error;
+
+ if (lit->len < 3)
+ return EM_HOST_LITERAL;
+ if (lit->str[lit->len - 1] != ']')
+ return EM_HOST_LITERAL;
+
+ content.str = lit->str + 1;
+ content.len = lit->len - 2;
+
+ approve_chara(buf, '[');
+ error = (content.str[0] == 'v')
+ ? normalize_ipvfuture(buf, &content)
+ : normalize_ipv6(buf, &content);
+ approve_chara(buf, ']');
+
+ return error;
+}
+
static error_msg
normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
{
unsigned char chr;
error_msg error;
+ if (host->len == 0)
+ return NULL;
+
+ if (host->str[0] == '[')
+ return normalize_ip_literal(buf, host);
+
for (c = 0; c < host->len; c++) {
chr = host->str[c];
if (is_uppercase(chr))
}
static error_msg
-normalize_port(struct uri_buffer *buf, struct sized_ustring *port,
+normalize_port(struct uri_buffer *buf, struct sized_ustring const *port,
struct schema_metadata const *schema)
{
array_index c;
}
static bool
-next_segment(struct sized_ustring *path, struct sized_ustring *segment)
+next_segment(struct sized_ustring const *path, struct sized_ustring *segment)
{
segment->str += segment->len + 1;
if (segment->str > (path->str + path->len))
}
static error_msg
-normalize_path(struct uri_buffer *buf, struct sized_ustring *path)
+normalize_path(struct uri_buffer *buf, struct sized_ustring const *path)
{
struct sized_ustring segment;
array_index i;
}
static error_msg
-normalize_post_path(struct uri_buffer *buf, struct sized_ustring *post,
+normalize_post_path(struct uri_buffer *buf, struct sized_ustring const *post,
char prefix)
{
array_index c;
(unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
));
-START_TEST(awkward_dot_dotting)
+START_TEST(test_awkward_dot_dotting)
{
char *normal;
}
END_TEST
-START_TEST(test_port)
+START_TEST(test_3986_host)
+{
+ char *normal;
+
+ printf("rfc3986#3.2.2: Host\n");
+
+ TEST_NORMALIZE("https://[2001:db8::1]/", "https://[2001:db8::1]/");
+ TEST_NORMALIZE("https://[2001:0db8::1]/", "https://[2001:db8::1]/");
+ TEST_NORMALIZE("https://[2001:db8::0001]:1234/", "https://[2001:db8::1]:1234/");
+ TEST_NORMALIZE("https://[::]/", "https://[::]/");
+ TEST_NORMALIZE("https://[0::]/", "https://[::]/");
+ TEST_NORMALIZE("https://[2001:db8:0:0:0:0:0:1]/", "https://[2001:db8::1]/");
+
+ TEST_NORMALIZE_FAIL("https://[]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[2001:db8::/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[2001:db8::]a/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1g]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v.]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE("https://[v1.1]/", "https://[v1.1]/");
+ TEST_NORMALIZE("https://[v19.a-z.A_Z~0:9!$&'()*+,;=]/", "https://[v19.a-z.A_Z~0:9!$&'()*+,;=]/");
+ TEST_NORMALIZE_FAIL("https://[v1.%]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.%31]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1./]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.?]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.#]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.[]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.]]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[v1.@]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE("https://[FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:255.255.255.255]/", "https://[ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]/");
+ TEST_NORMALIZE_FAIL("https://[FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:255.255.255.2555]/", EM_HOST_LITERAL);
+ TEST_NORMALIZE_FAIL("https://[potato]/", EM_HOST_LITERAL);
+
+ TEST_NORMALIZE_FAIL("https://[2001:db8::1][2001:db8::1]/", EM_HOST_LITERAL);
+}
+END_TEST
+
+START_TEST(test_3986_port)
{
char *normal;
}
END_TEST
-START_TEST(pct_encoding)
+START_TEST(test_3986_pct_encoding)
{
char *normal;
}
END_TEST
-START_TEST(reserved_unchanged)
+START_TEST(test_3986_reserved)
{
char *normal;
}
END_TEST
-START_TEST(test_query)
+START_TEST(test_3986_query)
{
char *normal;
}
END_TEST
-START_TEST(test_fragment)
+START_TEST(test_3986_fragment)
{
char *normal;
}
END_TEST
-START_TEST(lowercase_scheme_and_host)
+START_TEST(test_3986_lowercase_scheme_and_host)
{
char *normal;
}
END_TEST
-START_TEST(decode_unreserved_characters)
+START_TEST(test_3986_unreserved_characters)
{
char *normal;
}
END_TEST
-START_TEST(path_segment_normalization)
+START_TEST(test_3986_path_segment_normalization)
{
char *normal;
}
END_TEST
-START_TEST(all_the_above_combined)
+START_TEST(test_3986_aggregated)
{
char *normal;
}
END_TEST
-START_TEST(scheme_based_normalization)
+START_TEST(test_3986_scheme_based_normalization)
{
char *normal;
}
END_TEST
-START_TEST(https_grammar)
+START_TEST(test_https_grammar)
{
char *normal;
}
END_TEST
-START_TEST(https_default_port)
+START_TEST(test_https_default_port)
{
char *normal;
}
END_TEST
-START_TEST(disallow_http_empty_host)
+START_TEST(test_https_disallow_empty_host)
{
char *normal;
}
END_TEST
-START_TEST(provide_default_path)
+START_TEST(test_https_provide_default_path)
{
char *normal;
}
END_TEST
-START_TEST(scheme_and_host_lowercase)
+START_TEST(test_https_scheme_and_host_lowercase)
{
char *normal;
}
END_TEST
-START_TEST(not_reserved_not_pct_encoded)
+START_TEST(test_https_not_reserved_not_pct_encoded)
{
char *normal;
}
END_TEST
-START_TEST(aggregated_423)
+START_TEST(test_https_aggregated)
{
char *normal;
}
END_TEST
-START_TEST(disallow_https_userinfo)
+START_TEST(test_https_disallow_userinfo)
{
char *normal;
}
END_TEST
-START_TEST(rsync_grammar)
+START_TEST(test_rsync_grammar)
{
char *normal;
TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
TEST_NORMALIZE("rsync://user@a.b.c:1234", "rsync://user@a.b.c:1234/");
TEST_NORMALIZE("rsync://a.b.c", "rsync://a.b.c/");
- TEST_NORMALIZE_FAIL("rsync://[@a.b.c", EM_USERINFO_BADCHR);
+ TEST_NORMALIZE_FAIL("rsync://]@a.b.c", EM_USERINFO_BADCHR);
/* hier-part = path-absolute */
/* ie. "rsync:/" [ pchar+ ( "/" pchar* )* ] */
}
END_TEST
-START_TEST(rsync_default_port)
+START_TEST(test_rsync_default_port)
{
char *normal;
misc = tcase_create("Miscellaneous");
tcase_add_test(misc, test_rewind);
tcase_add_test(misc, test_unknown_protocols);
- tcase_add_test(misc, awkward_dot_dotting);
+ tcase_add_test(misc, test_awkward_dot_dotting);
tcase_add_test(misc, test_same_origin);
tcase_add_test(misc, test_utf8);
generic = tcase_create("RFC 3986 (generic URI)");
- tcase_add_test(generic, pct_encoding);
- tcase_add_test(generic, reserved_unchanged);
- tcase_add_test(generic, test_port);
- tcase_add_test(generic, test_query);
- tcase_add_test(generic, test_fragment);
- tcase_add_test(generic, lowercase_scheme_and_host);
- tcase_add_test(generic, decode_unreserved_characters);
- tcase_add_test(generic, path_segment_normalization);
- tcase_add_test(generic, all_the_above_combined);
- tcase_add_test(generic, scheme_based_normalization);
+ tcase_add_test(generic, test_3986_pct_encoding);
+ tcase_add_test(generic, test_3986_reserved);
+ tcase_add_test(generic, test_3986_host);
+ tcase_add_test(generic, test_3986_port);
+ tcase_add_test(generic, test_3986_query);
+ tcase_add_test(generic, test_3986_fragment);
+ tcase_add_test(generic, test_3986_lowercase_scheme_and_host);
+ tcase_add_test(generic, test_3986_unreserved_characters);
+ tcase_add_test(generic, test_3986_path_segment_normalization);
+ tcase_add_test(generic, test_3986_aggregated);
+ tcase_add_test(generic, test_3986_scheme_based_normalization);
https = tcase_create("RFC 9110 (https)");
- tcase_add_test(https, https_grammar);
- tcase_add_test(https, https_default_port);
- tcase_add_test(https, disallow_http_empty_host);
- tcase_add_test(https, provide_default_path);
- tcase_add_test(https, scheme_and_host_lowercase);
- tcase_add_test(https, not_reserved_not_pct_encoded);
- tcase_add_test(https, aggregated_423);
- tcase_add_test(https, disallow_https_userinfo);
+ tcase_add_test(https, test_https_grammar);
+ tcase_add_test(https, test_https_default_port);
+ tcase_add_test(https, test_https_disallow_empty_host);
+ tcase_add_test(https, test_https_provide_default_path);
+ tcase_add_test(https, test_https_scheme_and_host_lowercase);
+ tcase_add_test(https, test_https_not_reserved_not_pct_encoded);
+ tcase_add_test(https, test_https_aggregated);
+ tcase_add_test(https, test_https_disallow_userinfo);
rsync = tcase_create("RFC 5781 (rsync)");
- tcase_add_test(rsync, rsync_grammar);
- tcase_add_test(rsync, rsync_default_port);
+ tcase_add_test(rsync, test_rsync_grammar);
+ tcase_add_test(rsync, test_rsync_default_port);
suite = suite_create("url");
suite_add_tcase(suite, misc);