]> git.ipfire.org Git - thirdparty/FORT-validator.git/commitdiff
URI normalization: Allow and normalize IP literals in host
authorAlberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:33:54 +0000 (16:33 -0600)
committerAlberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:33:54 +0000 (16:33 -0600)
src/types/uri.c
test/types/uri_test.c

index 9d010c4ed4a1ab4593934e1168fb8a00915d6fc1..c1070a1eec0b7edfb50d8cc1aa6d5484ca1a164d 100644 (file)
@@ -7,10 +7,6 @@
 #include "log.h"
 #include "types/path.h"
 
-/*
- * XXX IPv6 addresses
- */
-
 #define URI_ALLOW_UNKNOWN_SCHEME (1 << 0)
 
 static error_msg EM_SCHEME_EMPTY = "Scheme seems empty";
@@ -26,6 +22,7 @@ static error_msg EM_USERINFO_BADCHR = "Illegal character in userinfo component";
 static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo";
 static error_msg EM_HOST_BADCHR = "Illegal character in host component";
 static error_msg EM_HOST_EMPTY = "Protocol disallows empty host";
+static error_msg EM_HOST_LITERAL = "Unparseable IP literal in the host";
 static error_msg EM_PORT_BADCHR = "Illegal non-digit character in port component";
 static error_msg EM_PORT_RANGE = "Port value is out of range";
 static error_msg EM_PATH_BADCHR = "Illegal character in path component";
@@ -119,6 +116,12 @@ is_digit(unsigned char chr)
        return '0' <= chr && chr <= '9';
 }
 
+static bool
+is_hexdigit(unsigned char chr)
+{
+       return is_digit(chr) || is_lowercase_hex(chr) || is_uppercase_hex(chr);
+}
+
 static bool
 is_symbol(unsigned char chr, char const *symbols)
 {
@@ -156,26 +159,44 @@ static void
 collect_authority(unsigned char const *auth, unsigned char const **at,
     unsigned char const **colon, unsigned char const **end)
 {
+       bool v6skip;
+
        *at = NULL;
        *colon = NULL;
+       v6skip = false;
 
        for (; true; auth++) {
-               switch (auth[0]) {
-               case '/':
-               case '?':
-               case '#':
-               case '\0':
-                       *end = auth;
-                       return;
-               case '@':
-                       if ((*at) == NULL) {
-                               *colon = NULL; /* Was a password if not null */
-                               *at = auth;
+               if (v6skip) {
+                       switch (auth[0]) {
+                       case ']':
+                               v6skip = false;
+                               continue;
+                       case '\0':
+                               *end = auth;
+                               return;
+                       }
+               } else {
+                       switch (auth[0]) {
+                       case '/':
+                       case '?':
+                       case '#':
+                       case '\0':
+                               *end = auth;
+                               return;
+                       case '@':
+                               if ((*at) == NULL) {
+                                       /* Was a password if not null */
+                                       *colon = NULL;
+                                       *at = auth;
+                               }
+                               break;
+                       case ':':
+                               *colon = auth;
+                               break;
+                       case '[':
+                               v6skip = true;
+                               break;
                        }
-                       break;
-               case ':':
-                       *colon = auth;
-                       break;
                }
        }
 }
@@ -211,7 +232,7 @@ collect_fragment(unsigned char const *fragment, unsigned char const **end)
 }
 
 static error_msg
-normalize_scheme(struct uri_buffer *buf, struct sized_ustring *scheme)
+normalize_scheme(struct uri_buffer *buf, struct sized_ustring const *scheme)
 {
        unsigned char chr;
        array_index c;
@@ -275,7 +296,7 @@ uchar2hex(unsigned char chr, unsigned int *hex)
 }
 
 static error_msg
-approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring *sstr,
+approve_pct_encoded(struct uri_buffer *buf, struct sized_ustring const *sstr,
     array_index *offset)
 {
        array_index off;
@@ -332,7 +353,7 @@ approve_bin(struct uri_buffer *buf, unsigned char chr)
 }
 
 static error_msg
-approve_utf8(struct uri_buffer *buf, struct sized_ustring *sstr,
+approve_utf8(struct uri_buffer *buf, struct sized_ustring const *sstr,
     array_index *offset)
 {
        array_index off;
@@ -385,7 +406,7 @@ approve_utf8(struct uri_buffer *buf, struct sized_ustring *sstr,
 }
 
 static error_msg
-normalize_userinfo(struct uri_buffer *buf, struct sized_ustring *userinfo)
+normalize_userinfo(struct uri_buffer *buf, struct sized_ustring const *userinfo)
 {
        array_index c;
        unsigned char chr;
@@ -418,6 +439,94 @@ normalize_userinfo(struct uri_buffer *buf, struct sized_ustring *userinfo)
        return NULL;
 }
 
+static error_msg
+normalize_ipvfuture(struct uri_buffer *buf, struct sized_ustring const *ipf)
+{
+       array_index i;
+       unsigned char chr;
+       bool found_hex;
+
+       approve_chara(buf, 'v');
+
+       found_hex = false;
+       for (i = 1; i < ipf->len; i++) {
+               chr = ipf->str[i];
+               if (is_hexdigit(chr)) {
+                       approve_chara(buf, chr);
+                       found_hex = true;
+               } else if (chr == '.')
+                       goto value;
+               else
+                       return EM_HOST_LITERAL;
+       }
+
+       return EM_HOST_LITERAL;
+
+value: if (!found_hex)
+               return EM_HOST_LITERAL;
+       approve_chara(buf, '.');
+       i++;
+       if (i == ipf->len)
+               return EM_HOST_LITERAL;
+       for (; i < ipf->len; i++) {
+               chr = ipf->str[i];
+               if (is_unreserved(chr) || is_subdelim(chr) || chr == ':')
+                       approve_chara(buf, chr);
+               else
+                       return EM_HOST_LITERAL;
+       }
+
+       return NULL;
+}
+
+static error_msg
+normalize_ipv6(struct uri_buffer *buf, struct sized_ustring const *v6)
+{
+       char dirty[INET6_ADDRSTRLEN];
+       struct in6_addr addr;
+       char clean[INET6_ADDRSTRLEN];
+       array_index i;
+
+       if (v6->len > (INET6_ADDRSTRLEN - 1))
+               return EM_HOST_LITERAL;
+
+       memcpy(dirty, v6->str, v6->len);
+       dirty[v6->len] = '\0';
+       if (inet_pton(AF_INET6, dirty, &addr) != 1)
+               return EM_HOST_LITERAL;
+
+       if (inet_ntop(AF_INET6, &addr, clean, INET6_ADDRSTRLEN) == NULL)
+               return EM_HOST_LITERAL;
+
+       for (i = 0; clean[i] != '\0'; i++)
+               approve_chara(buf, clean[i]);
+
+       return NULL;
+}
+
+static error_msg
+normalize_ip_literal(struct uri_buffer *buf, struct sized_ustring const *lit)
+{
+       struct sized_ustring content;
+       error_msg error;
+
+       if (lit->len < 3)
+               return EM_HOST_LITERAL;
+       if (lit->str[lit->len - 1] != ']')
+               return EM_HOST_LITERAL;
+
+       content.str = lit->str + 1;
+       content.len = lit->len - 2;
+
+       approve_chara(buf, '[');
+       error = (content.str[0] == 'v')
+           ? normalize_ipvfuture(buf, &content)
+           : normalize_ipv6(buf, &content);
+       approve_chara(buf, ']');
+
+       return error;
+}
+
 static error_msg
 normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
 {
@@ -425,6 +534,12 @@ normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
        unsigned char chr;
        error_msg error;
 
+       if (host->len == 0)
+               return NULL;
+
+       if (host->str[0] == '[')
+               return normalize_ip_literal(buf, host);
+
        for (c = 0; c < host->len; c++) {
                chr = host->str[c];
                if (is_uppercase(chr))
@@ -449,7 +564,7 @@ normalize_host(struct uri_buffer *buf, struct sized_ustring *host)
 }
 
 static error_msg
-normalize_port(struct uri_buffer *buf, struct sized_ustring *port,
+normalize_port(struct uri_buffer *buf, struct sized_ustring const *port,
     struct schema_metadata const *schema)
 {
        array_index c;
@@ -489,7 +604,7 @@ strnchr(unsigned char const *str, size_t n, unsigned char chr)
 }
 
 static bool
-next_segment(struct sized_ustring *path, struct sized_ustring *segment)
+next_segment(struct sized_ustring const *path, struct sized_ustring *segment)
 {
        segment->str += segment->len + 1;
        if (segment->str > (path->str + path->len))
@@ -508,7 +623,7 @@ rewind_buffer(struct uri_buffer *buf, size_t limit)
 }
 
 static error_msg
-normalize_path(struct uri_buffer *buf, struct sized_ustring *path)
+normalize_path(struct uri_buffer *buf, struct sized_ustring const *path)
 {
        struct sized_ustring segment;
        array_index i;
@@ -562,7 +677,7 @@ normalize_path(struct uri_buffer *buf, struct sized_ustring *path)
 }
 
 static error_msg
-normalize_post_path(struct uri_buffer *buf, struct sized_ustring *post,
+normalize_post_path(struct uri_buffer *buf, struct sized_ustring const *post,
     char prefix)
 {
        array_index c;
index 3278e60971c89b47acdaebb3855c62bd5dcc1791..bac7831e3903fd23ca1bcb6da385719750e2d14a 100644 (file)
@@ -54,7 +54,7 @@ END_TEST
                (unsigned char *)dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \
        ));
 
-START_TEST(awkward_dot_dotting)
+START_TEST(test_awkward_dot_dotting)
 {
        char *normal;
 
@@ -70,7 +70,46 @@ START_TEST(awkward_dot_dotting)
 }
 END_TEST
 
-START_TEST(test_port)
+START_TEST(test_3986_host)
+{
+       char *normal;
+
+       printf("rfc3986#3.2.2: Host\n");
+
+       TEST_NORMALIZE("https://[2001:db8::1]/", "https://[2001:db8::1]/");
+       TEST_NORMALIZE("https://[2001:0db8::1]/", "https://[2001:db8::1]/");
+       TEST_NORMALIZE("https://[2001:db8::0001]:1234/", "https://[2001:db8::1]:1234/");
+       TEST_NORMALIZE("https://[::]/", "https://[::]/");
+       TEST_NORMALIZE("https://[0::]/", "https://[::]/");
+       TEST_NORMALIZE("https://[2001:db8:0:0:0:0:0:1]/", "https://[2001:db8::1]/");
+
+       TEST_NORMALIZE_FAIL("https://[]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[2001:db8::/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[2001:db8::]a/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1g]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v.]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE("https://[v1.1]/", "https://[v1.1]/");
+       TEST_NORMALIZE("https://[v19.a-z.A_Z~0:9!$&'()*+,;=]/", "https://[v19.a-z.A_Z~0:9!$&'()*+,;=]/");
+       TEST_NORMALIZE_FAIL("https://[v1.%]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.%31]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1./]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.?]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.#]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.[]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.]]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[v1.@]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE("https://[FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:255.255.255.255]/", "https://[ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]/");
+       TEST_NORMALIZE_FAIL("https://[FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:255.255.255.2555]/", EM_HOST_LITERAL);
+       TEST_NORMALIZE_FAIL("https://[potato]/", EM_HOST_LITERAL);
+
+       TEST_NORMALIZE_FAIL("https://[2001:db8::1][2001:db8::1]/", EM_HOST_LITERAL);
+}
+END_TEST
+
+START_TEST(test_3986_port)
 {
        char *normal;
 
@@ -84,7 +123,7 @@ START_TEST(test_port)
 }
 END_TEST
 
-START_TEST(pct_encoding)
+START_TEST(test_3986_pct_encoding)
 {
        char *normal;
 
@@ -285,7 +324,7 @@ START_TEST(test_unknown_protocols)
 }
 END_TEST
 
-START_TEST(reserved_unchanged)
+START_TEST(test_3986_reserved)
 {
        char *normal;
 
@@ -307,7 +346,7 @@ START_TEST(reserved_unchanged)
 }
 END_TEST
 
-START_TEST(test_query)
+START_TEST(test_3986_query)
 {
        char *normal;
 
@@ -331,7 +370,7 @@ START_TEST(test_query)
 }
 END_TEST
 
-START_TEST(test_fragment)
+START_TEST(test_3986_fragment)
 {
        char *normal;
 
@@ -356,7 +395,7 @@ START_TEST(test_fragment)
 }
 END_TEST
 
-START_TEST(lowercase_scheme_and_host)
+START_TEST(test_3986_lowercase_scheme_and_host)
 {
        char *normal;
 
@@ -372,7 +411,7 @@ START_TEST(lowercase_scheme_and_host)
 }
 END_TEST
 
-START_TEST(decode_unreserved_characters)
+START_TEST(test_3986_unreserved_characters)
 {
        char *normal;
 
@@ -383,7 +422,7 @@ START_TEST(decode_unreserved_characters)
 }
 END_TEST
 
-START_TEST(path_segment_normalization)
+START_TEST(test_3986_path_segment_normalization)
 {
        char *normal;
 
@@ -415,7 +454,7 @@ START_TEST(path_segment_normalization)
 }
 END_TEST
 
-START_TEST(all_the_above_combined)
+START_TEST(test_3986_aggregated)
 {
        char *normal;
 
@@ -426,7 +465,7 @@ START_TEST(all_the_above_combined)
 }
 END_TEST
 
-START_TEST(scheme_based_normalization)
+START_TEST(test_3986_scheme_based_normalization)
 {
        char *normal;
 
@@ -437,7 +476,7 @@ START_TEST(scheme_based_normalization)
 }
 END_TEST
 
-START_TEST(https_grammar)
+START_TEST(test_https_grammar)
 {
        char *normal;
 
@@ -458,7 +497,7 @@ START_TEST(https_grammar)
 }
 END_TEST
 
-START_TEST(https_default_port)
+START_TEST(test_https_default_port)
 {
        char *normal;
 
@@ -477,7 +516,7 @@ START_TEST(https_default_port)
 }
 END_TEST
 
-START_TEST(disallow_http_empty_host)
+START_TEST(test_https_disallow_empty_host)
 {
        char *normal;
 
@@ -497,7 +536,7 @@ START_TEST(disallow_http_empty_host)
 }
 END_TEST
 
-START_TEST(provide_default_path)
+START_TEST(test_https_provide_default_path)
 {
        char *normal;
 
@@ -508,7 +547,7 @@ START_TEST(provide_default_path)
 }
 END_TEST
 
-START_TEST(scheme_and_host_lowercase)
+START_TEST(test_https_scheme_and_host_lowercase)
 {
        char *normal;
 
@@ -520,7 +559,7 @@ START_TEST(scheme_and_host_lowercase)
 }
 END_TEST
 
-START_TEST(not_reserved_not_pct_encoded)
+START_TEST(test_https_not_reserved_not_pct_encoded)
 {
        char *normal;
 
@@ -572,7 +611,7 @@ START_TEST(not_reserved_not_pct_encoded)
 }
 END_TEST
 
-START_TEST(aggregated_423)
+START_TEST(test_https_aggregated)
 {
        char *normal;
 
@@ -584,7 +623,7 @@ START_TEST(aggregated_423)
 }
 END_TEST
 
-START_TEST(disallow_https_userinfo)
+START_TEST(test_https_disallow_userinfo)
 {
        char *normal;
 
@@ -596,7 +635,7 @@ START_TEST(disallow_https_userinfo)
 }
 END_TEST
 
-START_TEST(rsync_grammar)
+START_TEST(test_rsync_grammar)
 {
        char *normal;
 
@@ -621,7 +660,7 @@ START_TEST(rsync_grammar)
        TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
        TEST_NORMALIZE("rsync://user@a.b.c:1234", "rsync://user@a.b.c:1234/");
        TEST_NORMALIZE("rsync://a.b.c", "rsync://a.b.c/");
-       TEST_NORMALIZE_FAIL("rsync://[@a.b.c", EM_USERINFO_BADCHR);
+       TEST_NORMALIZE_FAIL("rsync://]@a.b.c", EM_USERINFO_BADCHR);
 
        /* hier-part     = path-absolute */
        /* ie. "rsync:/" [ pchar+ ( "/" pchar* )* ] */
@@ -649,7 +688,7 @@ START_TEST(rsync_grammar)
 }
 END_TEST
 
-START_TEST(rsync_default_port)
+START_TEST(test_rsync_default_port)
 {
        char *normal;
 
@@ -669,35 +708,36 @@ static Suite *create_suite(void)
        misc = tcase_create("Miscellaneous");
        tcase_add_test(misc, test_rewind);
        tcase_add_test(misc, test_unknown_protocols);
-       tcase_add_test(misc, awkward_dot_dotting);
+       tcase_add_test(misc, test_awkward_dot_dotting);
        tcase_add_test(misc, test_same_origin);
        tcase_add_test(misc, test_utf8);
 
        generic = tcase_create("RFC 3986 (generic URI)");
-       tcase_add_test(generic, pct_encoding);
-       tcase_add_test(generic, reserved_unchanged);
-       tcase_add_test(generic, test_port);
-       tcase_add_test(generic, test_query);
-       tcase_add_test(generic, test_fragment);
-       tcase_add_test(generic, lowercase_scheme_and_host);
-       tcase_add_test(generic, decode_unreserved_characters);
-       tcase_add_test(generic, path_segment_normalization);
-       tcase_add_test(generic, all_the_above_combined);
-       tcase_add_test(generic, scheme_based_normalization);
+       tcase_add_test(generic, test_3986_pct_encoding);
+       tcase_add_test(generic, test_3986_reserved);
+       tcase_add_test(generic, test_3986_host);
+       tcase_add_test(generic, test_3986_port);
+       tcase_add_test(generic, test_3986_query);
+       tcase_add_test(generic, test_3986_fragment);
+       tcase_add_test(generic, test_3986_lowercase_scheme_and_host);
+       tcase_add_test(generic, test_3986_unreserved_characters);
+       tcase_add_test(generic, test_3986_path_segment_normalization);
+       tcase_add_test(generic, test_3986_aggregated);
+       tcase_add_test(generic, test_3986_scheme_based_normalization);
 
        https = tcase_create("RFC 9110 (https)");
-       tcase_add_test(https, https_grammar);
-       tcase_add_test(https, https_default_port);
-       tcase_add_test(https, disallow_http_empty_host);
-       tcase_add_test(https, provide_default_path);
-       tcase_add_test(https, scheme_and_host_lowercase);
-       tcase_add_test(https, not_reserved_not_pct_encoded);
-       tcase_add_test(https, aggregated_423);
-       tcase_add_test(https, disallow_https_userinfo);
+       tcase_add_test(https, test_https_grammar);
+       tcase_add_test(https, test_https_default_port);
+       tcase_add_test(https, test_https_disallow_empty_host);
+       tcase_add_test(https, test_https_provide_default_path);
+       tcase_add_test(https, test_https_scheme_and_host_lowercase);
+       tcase_add_test(https, test_https_not_reserved_not_pct_encoded);
+       tcase_add_test(https, test_https_aggregated);
+       tcase_add_test(https, test_https_disallow_userinfo);
 
        rsync = tcase_create("RFC 5781 (rsync)");
-       tcase_add_test(rsync, rsync_grammar);
-       tcase_add_test(rsync, rsync_default_port);
+       tcase_add_test(rsync, test_rsync_grammar);
+       tcase_add_test(rsync, test_rsync_default_port);
 
        suite = suite_create("url");
        suite_add_tcase(suite, misc);