From: Alberto Leiva Popper Date: Thu, 8 May 2025 22:16:08 +0000 (-0600) Subject: URI normalization: Propagate error messages properly X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ae2d3ef4bf2ce0f86752a49a79f0134752deb763;p=thirdparty%2FFORT-validator.git URI normalization: Propagate error messages properly Forgot to clean this up in the previous commit. Was printing errors in standard output as a quick fix. --- diff --git a/src/http.c b/src/http.c index e4551095..2f9bd504 100644 --- a/src/http.c +++ b/src/http.c @@ -249,15 +249,18 @@ static int check_same_origin(struct uri const *src, char const *redirect) { struct uri redirect_url; + error_msg errmsg; int error; - error = uri_init(&redirect_url, redirect); - if (error) - return error; + errmsg = uri_init(&redirect_url, redirect); + if (errmsg) + return pr_val_err("Cannot parse redirect '%s' as a URI: %s", + redirect, errmsg); - if (!uri_same_origin(src, &redirect_url)) - error = pr_val_err("%s is redirecting to %s; disallowing because of different origin.", - uri_str(src), uri_str(&redirect_url)); + error = uri_same_origin(src, &redirect_url) + ? 0 + : pr_val_err("%s is redirecting to %s; disallowing because of different origin.", + uri_str(src), uri_str(&redirect_url)); uri_cleanup(&redirect_url); return error; diff --git a/src/init.c b/src/init.c index 665f12d5..ef6642c1 100644 --- a/src/init.c +++ b/src/init.c @@ -1,5 +1,6 @@ #include "init.h" +#include #include "config.h" #include "http.h" #include "types/path.h" @@ -9,23 +10,25 @@ fetch_url(char const *url, char const *filename) { struct uri uri; char *path; + error_msg errmsg; int error; - error = uri_init(&uri, url); - if (error) - return error; + errmsg = uri_init(&uri, url); + if (errmsg) { + fprintf(stderr, "Invalid URI '%s': %s", url, errmsg); + return EINVAL; + } path = path_join(config_get_tal(), filename); error = http_download(&uri, path, 0, NULL); - if (error) { + if (error) fprintf(stderr, "Couldn't fetch '%s': %s\n", path, strerror(abs(error))); - goto end; - } + else + fprintf(stdout, "Successfully fetched '%s'!\n\n", path); - fprintf(stdout, "Successfully fetched '%s'!\n\n", path); -end: free(path); + free(path); uri_cleanup(&uri); return error; } diff --git a/src/json_util.c b/src/json_util.c index 65217ca2..9c1b8ae9 100644 --- a/src/json_util.c +++ b/src/json_util.c @@ -26,23 +26,23 @@ json_get_str(json_t *parent, char const *name, char const **result) return 0; } -/* Result needs to be cleant up. */ +/* @result needs cleanup. */ int json_get_uri(json_t *parent, char const *name, struct uri *result) { char const *str; int error; + error_msg errmsg; memset(result, 0, sizeof(*result)); error = json_get_str(parent, name, &str); if (error) return error; - error = uri_init(result, str); - if (error) { - pr_op_err("Malformed URL: %s", str); - return -error; - } + errmsg = uri_init(result, str); + if (errmsg) + return pr_op_err("'%s' does not seem to be a URI: %s", + str, errmsg); return 0; } diff --git a/src/object/certificate.c b/src/object/certificate.c index 17f453fe..5b9e8774 100644 --- a/src/object/certificate.c +++ b/src/object/certificate.c @@ -1391,6 +1391,7 @@ gn2uri(GENERAL_NAME *ad, struct uri *uri) int ptype; char *str; int error; + error_msg errmsg; asn1str = GENERAL_NAME_get0_value(ad, &ptype); if (ptype != GEN_URI) { @@ -1409,10 +1410,14 @@ gn2uri(GENERAL_NAME *ad, struct uri *uri) error = ia5s2string(asn1str, &str); if (error) return error; - error = uri_init(uri, str); - free(str); - return error; + errmsg = uri_init(uri, str); + if (errmsg) + pr_val_warn("Cannot parse GENERAL_NAME '%s' as a URI: %s", + str, errmsg); + + free(str); + return errmsg ? EINVAL : 0; } static int diff --git a/src/object/tal.c b/src/object/tal.c index d91b8186..f322cf50 100644 --- a/src/object/tal.c +++ b/src/object/tal.c @@ -52,6 +52,7 @@ read_content(char *fc /* File Content */, struct tal *tal) char *nl; /* New Line */ bool cr; /* Carriage return */ struct uri url; + error_msg error; /* Comment section */ while (fc[0] == '#') { @@ -72,13 +73,14 @@ read_content(char *fc /* File Content */, struct tal *tal) if (is_blank(fc)) break; - if (uri_init(&url, fc) == 0) { + error = uri_init(&url, fc); + if (!error) { if (uri_is_https(&url) || uri_is_rsync(&url)) uris_add(&tal->urls, &url); else uri_cleanup(&url); } else { - pr_op_debug("Cannot parse '%s' as a URI; ignoring.", fc); + pr_op_debug("Ignoring URI '%s': %s", fc, error); } fc = nl + cr + 1; diff --git a/src/print_file.c b/src/print_file.c index d9f73fd5..f4a7e19b 100644 --- a/src/print_file.c +++ b/src/print_file.c @@ -28,10 +28,14 @@ static BIO * __rsync2bio(char const *src, char const *dst) { struct uri url; + error_msg errmsg; int error; - if (uri_init(&url, src) != 0) + errmsg = uri_init(&url, src); + if (errmsg) { + pr_op_err("Invalid URI: %s", errmsg); return NULL; + } // XXX use the cache diff --git a/src/rrdp.c b/src/rrdp.c index 3d99c3a0..74bd7834 100644 --- a/src/rrdp.c +++ b/src/rrdp.c @@ -477,6 +477,7 @@ static int parse_file_metadata(xmlTextReaderPtr reader, struct file_metadata *meta) { xmlChar *xmlattr; + error_msg errmsg; int error; memset(meta, 0, sizeof(*meta)); @@ -484,10 +485,11 @@ parse_file_metadata(xmlTextReaderPtr reader, struct file_metadata *meta) xmlattr = parse_string(reader, RRDP_ATTR_URI); if (xmlattr == NULL) return -EINVAL; - error = uri_init(&meta->uri, (char const *)xmlattr); + errmsg = uri_init(&meta->uri, (char const *)xmlattr); xmlFree(xmlattr); - if (error) - return -EINVAL; + if (errmsg) + return pr_val_err("Cannot parse '%s' as a URI: %s", + xmlattr, errmsg); error = parse_hash(reader, &meta->hash, &meta->hash_len); if (error) { @@ -1473,6 +1475,7 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state) char const *path; unsigned long id, max_id; int error; + error_msg errmsg; error = json_get_object(jparent, "files", &jfiles); if (error < 0) { @@ -1490,9 +1493,9 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state) pr_op_warn("RRDP file URL '%s' is not a string.", jkey); continue; } - error = uri_init(&url, jkey); - if (error) { - pr_op_warn("Cannot parse '%s' as a URI.", jkey); + errmsg = uri_init(&url, jkey); + if (errmsg) { + pr_op_warn("Cannot parse '%s' as a URI: %s", jkey, errmsg); continue; } @@ -1506,8 +1509,7 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state) continue; } - error = hex2ulong(path + parent_len + 1, &id); - if (error) { + if (hex2ulong(path + parent_len + 1, &id) != 0) { pr_op_warn("RRDP file '%s' is not a hexadecimal number.", path); uri_cleanup(&url); continue; diff --git a/src/types/uri.c b/src/types/uri.c index 886f91a0..b689ebe3 100644 --- a/src/types/uri.c +++ b/src/types/uri.c @@ -7,8 +7,32 @@ #include "log.h" #include "types/path.h" +/* + * XXX IPv6 addresses + * XXX UTF-8 + */ + #define URI_ALLOW_UNKNOWN_SCHEME (1 << 1) +static error_msg EM_SCHEME_EMPTY = "Scheme seems empty"; +static error_msg EM_SCHEME_1ST = "First scheme character is not a letter"; +static error_msg EM_SCHEME_NTH = "Scheme character is not letter, digit, plus, period or hyphen"; +static error_msg EM_SCHEME_NOCOLON = "Scheme not terminated"; +static error_msg EM_SCHEME_UNKNOWN = "Unknown scheme"; +static error_msg EM_SCHEME_NOTREMOTE = "Missing \"://\""; +static error_msg EM_PCT_NOTHEX = "Invalid hexadecimal digit in percent encoding"; +static error_msg EM_PCT_NOT3 = "Unterminated percent-encoding"; +static error_msg EM_USERINFO_BADCHR = "Illegal character in userinfo component"; +static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo"; +static error_msg EM_HOST_BADCHR = "Illegal character in host component"; +static error_msg EM_HOST_EMPTY = "Protocol disallows empty host"; +static error_msg EM_PORT_BADCHR = "Illegal non-digit character in port component"; +static error_msg EM_PORT_RANGE = "Port value is out of range"; +static error_msg EM_PATH_BADCHR = "Illegal character in path component"; +static error_msg EM_QUERY_DISALLOWED = "Protocol disallows query"; +static error_msg EM_QF_BADCHR = "Illegal character in query or fragment"; +static error_msg EM_FRAGMENT_DISALLOWED = "Protocol disallows fragment"; + struct sized_string { char const *str; size_t len; @@ -39,7 +63,8 @@ struct schema_metadata const HTTPS = { struct schema_metadata const RSYNC = { .default_port = 873, .allow_userinfo = true, - .allow_empty_host = true, + /* Seems actually allowed, but RPKI doesn't like it. */ + .allow_empty_host = false, .allow_query = false, .allow_fragment = false, }; @@ -76,6 +101,18 @@ is_uppercase(char chr) return 'A' <= chr && chr <= 'Z'; } +static bool +is_lowercase_hex(char chr) +{ + return 'a' <= chr && chr <= 'f'; +} + +static bool +is_uppercase_hex(char chr) +{ + return 'A' <= chr && chr <= 'F'; +} + static bool is_digit(char chr) { @@ -103,13 +140,6 @@ to_uppercase(char chr) return is_lowercase(chr) ? (chr + ('A' - 'a')) : chr; } -static bool -invalid(char const *errmsg) -{ - printf("%s\n", errmsg); - return false; -} - static void approve_chara(struct uri_buffer *buf, char chr) { @@ -122,7 +152,7 @@ approve_chara(struct uri_buffer *buf, char chr) buf->dst[buf->d++] = chr; } -static bool +static void collect_authority(char const *auth, char const **at, char const **colon, char const **end) { @@ -136,7 +166,7 @@ collect_authority(char const *auth, char const **at, char const **colon, case '#': case '\0': *end = auth; - return true; + return; case '@': if ((*at) == NULL) { *colon = NULL; /* Was a password if not null */ @@ -180,22 +210,19 @@ collect_fragment(char const *fragment, char const **end) } } -static bool +static error_msg normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme) { char chr; array_index c; - if (scheme->len == 0) - return invalid("Scheme seems empty."); - chr = scheme->str[0]; if (is_lowercase(chr)) approve_chara(buf, chr); else if (is_uppercase(chr)) approve_chara(buf, to_lowercase(chr)); else - return invalid("First character is not a letter."); + return EM_SCHEME_1ST; for (c = 1; c < scheme->len; c++) { chr = scheme->str[c]; @@ -204,13 +231,13 @@ normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme) else if (is_uppercase(chr)) approve_chara(buf, to_lowercase(chr)); else - return invalid("Schema character is not letter, digit, plus, period or hyphen."); + return EM_SCHEME_NTH; } approve_chara(buf, ':'); approve_chara(buf, '/'); approve_chara(buf, '/'); - return true; + return NULL; } static bool @@ -228,27 +255,26 @@ is_subdelim(char chr) return is_symbol(chr, "!$&'()*+,;="); } -static bool +static error_msg char2hex(char chr, unsigned int *hex) { if (is_digit(chr)) { *hex = chr - '0'; - return true; + return NULL; } - if (is_uppercase(chr)) { + if (is_uppercase_hex(chr)) { *hex = chr - 'A' + 10; - return true; + return NULL; } - if (is_lowercase(chr)) { + if (is_lowercase_hex(chr)) { *hex = chr - 'a' + 10; - return true; + return NULL; } - printf("Invalid hex digit: %c\n", chr); - return invalid("Invalid hexadecimal digit."); + return EM_PCT_NOTHEX; } -static bool +static error_msg approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr, array_index *offset) { @@ -256,84 +282,71 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr, unsigned int hex1; unsigned int hex2; unsigned int val; + error_msg error; off = *offset; if (sstr->len - off < 3) - return invalid("Unterminated %-encoding."); + return EM_PCT_NOT3; - if (!char2hex(sstr->str[off + 1], &hex1)) - return false; - if (!char2hex(sstr->str[off + 2], &hex2)) - return false; + error = char2hex(sstr->str[off + 1], &hex1); + if (error) + return error; + error = char2hex(sstr->str[off + 2], &hex2); + if (error) + return error; val = (hex1 << 4) | hex2; if (is_unreserved(val)) { approve_chara(buf, val); *offset += 2; - return true; + return NULL; } approve_chara(buf, '%'); approve_chara(buf, to_uppercase(sstr->str[off + 1])); approve_chara(buf, to_uppercase(sstr->str[off + 2])); *offset += 2; - return true; -} - -static bool -handle_pchar(struct uri_buffer *buf, struct sized_string *sstr, - array_index *offset) -{ - char chr = sstr->str[*offset]; - - if (is_unreserved(chr)) - approve_chara(buf, chr); - else if (chr == '%') - approve_pct_encoded(buf, sstr, offset); - else if (is_subdelim(chr)) - approve_chara(buf, chr); - else if (chr == ':' || chr == '@') - approve_chara(buf, chr); - else - return false; - return true; + return NULL; } -static bool +static error_msg normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo) { array_index c; char chr; + error_msg error; if (userinfo->len == 0) - return true; + return NULL; for (c = 0; c < userinfo->len; c++) { chr = userinfo->str[c]; if (is_unreserved(chr)) approve_chara(buf, chr); else if (chr == '%') { - if (!approve_pct_encoded(buf, userinfo, &c)) - return false; + error = approve_pct_encoded(buf, userinfo, &c); + if (error) + return error; } else if (is_subdelim(chr)) approve_chara(buf, chr); else if (chr == ':') approve_chara(buf, chr); else - return invalid("Illegal character in userinfo section."); + return EM_USERINFO_BADCHR; } approve_chara(buf, '@'); - return true; + return NULL; } -static bool +static error_msg normalize_host(struct uri_buffer *buf, struct sized_string *host) { array_index c; char chr; + error_msg error; for (c = 0; c < host->len; c++) { chr = host->str[c]; @@ -342,18 +355,19 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host) else if (is_unreserved(chr)) approve_chara(buf, chr); else if (chr == '%') { - if (!approve_pct_encoded(buf, host, &c)) - return false; + error = approve_pct_encoded(buf, host, &c); + if (error) + return error; } else if (is_subdelim(chr)) approve_chara(buf, chr); else - return invalid("Illegal character in host section."); + return EM_HOST_BADCHR; } - return true; + return NULL; } -static bool +static error_msg normalize_port(struct uri_buffer *buf, struct sized_string *port, struct schema_metadata const *schema) { @@ -362,25 +376,25 @@ normalize_port(struct uri_buffer *buf, struct sized_string *port, unsigned int portnum; if (port->len == 0) - return true; + return NULL; portnum = 0; for (c = 0; c < port->len; c++) { chr = port->str[c]; if (!is_digit(chr)) - return invalid("Illegal non-digit character in port section."); + return EM_PORT_BADCHR; portnum = 10 * portnum + (chr - '0'); - if (portnum > 0xFFFF) - return invalid("Port value is too large."); + if (portnum == 0 || portnum > 0xFFFF) + return EM_PORT_RANGE; } if (schema && (portnum == schema->default_port)) - return true; + return NULL; approve_chara(buf, ':'); for (c = 0; c < port->len; c++) approve_chara(buf, port->str[c]); - return true; + return NULL; } static char const * @@ -412,17 +426,18 @@ rewind_buffer(struct uri_buffer *buf, size_t limit) ; } -static bool +static error_msg normalize_path(struct uri_buffer *buf, struct sized_string *path) { struct sized_string segment; array_index i; char chr; size_t limit; + error_msg error; if (path->len == 0) { approve_chara(buf, '/'); - return true; + return NULL; } segment.str = path->str; @@ -436,12 +451,13 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path) if (is_unreserved(chr)) approve_chara(buf, chr); else if (chr == '%') { - if (!approve_pct_encoded(buf, &segment, &i)) - return false; + error = approve_pct_encoded(buf, &segment, &i); + if (error) + return error; } else if (is_subdelim(chr) || is_symbol(chr, ":@")) approve_chara(buf, chr); else - return invalid("Illegal character in path section."); + return EM_PATH_BADCHR; } if (buf->dst[buf->d - 2] == '/' && @@ -457,38 +473,55 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path) if (limit == buf->d) approve_chara(buf, '/'); - return true; + return NULL; } -static bool +static error_msg normalize_post_path(struct uri_buffer *buf, struct sized_string *post, char prefix) { array_index c; char chr; + error_msg error; if (post->len == 0) - return true; + return NULL; approve_chara(buf, prefix); for (c = 1; c < post->len; c++) { - if (handle_pchar(buf, post, &c)) - continue; chr = post->str[c]; - if (chr == ':' || chr == '@') + if (is_unreserved(chr)) + approve_chara(buf, chr); + else if (chr == '%') { + error = approve_pct_encoded(buf, post, &c); + if (error) + return error; + } else if (is_subdelim(chr)) + approve_chara(buf, chr); + else if (is_symbol(chr, ":@/?")) approve_chara(buf, chr); else - return invalid("Illegal character in query section."); + return EM_QF_BADCHR; } - return true; + return NULL; +} + +static void +print_component(char const *name, struct sized_string *component) +{ + pr_clutter(" %s: %.*s (len:%zu)", name, (int)component->len, + component->str, component->len); } /* * See RFC 3986. Basically, "rsync://%61.b/./c/.././%64/." -> "rsync://a.b/d" + * + * The return value is an error message. If NULL, the URL was stored in @result + * and needs to be released. */ -static char * -url_normalize(char const *url, int flags) +static error_msg +url_normalize(char const *url, int flags, char **result) { struct sized_string scheme; struct sized_string authority; @@ -505,43 +538,36 @@ url_normalize(char const *url, int flags) struct schema_metadata const *meta; struct uri_buffer buf; + char const *error; pr_clutter("-----------------------"); pr_clutter("input: %s", url); cursor = strchr(url, ':'); - if (!cursor) { - printf("Schema not terminated\n"); - return NULL; - } + if (!cursor) + return EM_SCHEME_NOCOLON; + if (cursor == url) + return EM_SCHEME_EMPTY; scheme.str = url; scheme.len = cursor - url; - pr_clutter(" scheme: %.*s (len:%zu)", (int)scheme.len, scheme.str, scheme.len); + print_component("scheme", &scheme); + meta = get_metadata(&scheme); - if (!(flags & URI_ALLOW_UNKNOWN_SCHEME) && !meta) { - printf("Unknown scheme\n"); - return NULL; - } + if (!(flags & URI_ALLOW_UNKNOWN_SCHEME) && !meta) + return EM_SCHEME_UNKNOWN; - if (cursor[1] != '/' || cursor[2] != '/') { - printf("Missing \"://\"\n"); - return NULL; - } + if (cursor[1] != '/' || cursor[2] != '/') + return EM_SCHEME_NOTREMOTE; authority.str = cursor + 3; - if (!collect_authority(authority.str, &at, &colon, &cursor)) - return NULL; + collect_authority(authority.str, &at, &colon, &cursor); authority.len = cursor - authority.str; - pr_clutter(" authority: %.*s (len:%zu)", (int)authority.len, authority.str, authority.len); - if (authority.len == 0) - return NULL; + print_component("authority", &authority); if (at != NULL) { - if (meta && !meta->allow_userinfo) { - printf("Protocol disallows userinfo.\n"); - return NULL; - } + if (meta && !meta->allow_userinfo) + return EM_USERINFO_DISALLOWED; userinfo.str = authority.str; userinfo.len = at - authority.str; @@ -562,14 +588,12 @@ url_normalize(char const *url, int flags) port.len = 0; } - if (host.len == 0 && meta && !meta->allow_empty_host) { - printf("Protocol disallows empty host.\n"); - return NULL; - } + if (host.len == 0 && meta && !meta->allow_empty_host) + return EM_HOST_EMPTY; - pr_clutter(" userinfo: %.*s (len:%zu)", (int)userinfo.len, userinfo.str, userinfo.len); - pr_clutter(" host: %.*s (len:%zu)", (int)host.len, host.str, host.len); - pr_clutter(" port: %.*s (len:%zu)", (int)port.len, port.str, port.len); + print_component("userinfo", &userinfo); + print_component("host", &host); + print_component("port", &port); if (cursor[0] == '\0') { memset(&path, 0, sizeof(path)); @@ -588,10 +612,8 @@ url_normalize(char const *url, int flags) break; case '?': - if (meta && !meta->allow_query) { - printf("Protocol disallows query.\n"); - return NULL; - } + if (meta && !meta->allow_query) + return EM_QUERY_DISALLOWED; query.str = cursor; collect_query(query.str + 1, &cursor); @@ -611,10 +633,8 @@ url_normalize(char const *url, int flags) case '#': memset(&query, 0, sizeof(query)); -frag: if (meta && !meta->allow_fragment) { - printf("Protocol disallows fragment.\n"); - return NULL; - } +frag: if (meta && !meta->allow_fragment) + return EM_FRAGMENT_DISALLOWED; fragment.str = cursor; collect_fragment(fragment.str + 1, &cursor); fragment.len = cursor - fragment.str; @@ -626,9 +646,9 @@ frag: if (meta && !meta->allow_fragment) { } } - pr_clutter(" path: %.*s (len:%zu)", (int)path.len, path.str, path.len); - pr_clutter(" query: %.*s (len:%zu)", (int)query.len, query.str, query.len); - pr_clutter(" fragment: %.*s (len:%zu)", (int)fragment.len, fragment.str, fragment.len); + print_component("path", &path); + print_component("query", &query); + print_component("fragment", &fragment); buf.capacity = scheme.len + authority.len + path.len + query.len + fragment.len + 5; /* "://" + maybe '/' + '\0' */ @@ -636,51 +656,54 @@ frag: if (meta && !meta->allow_fragment) { buf.d = 0; pr_clutter("-> Normalizing scheme."); - if (!normalize_scheme(&buf, &scheme)) + error = normalize_scheme(&buf, &scheme); + if (error) goto cancel; pr_clutter("-> Normalizing userinfo."); - if (!normalize_userinfo(&buf, &userinfo)) + error = normalize_userinfo(&buf, &userinfo); + if (error) goto cancel; pr_clutter("-> Normalizing host."); - if (!normalize_host(&buf, &host)) + error = normalize_host(&buf, &host); + if (error) goto cancel; pr_clutter("-> Normalizing port."); - if (!normalize_port(&buf, &port, meta)) + error = normalize_port(&buf, &port, meta); + if (error) goto cancel; pr_clutter("-> Normalizing path."); - if (!normalize_path(&buf, &path)) + error = normalize_path(&buf, &path); + if (error) goto cancel; pr_clutter("-> Normalizing query."); - if (!normalize_post_path(&buf, &query, '?')) + error = normalize_post_path(&buf, &query, '?'); + if (error) goto cancel; pr_clutter("-> Normalizing fragment."); - if (!normalize_post_path(&buf, &fragment, '#')) + error = normalize_post_path(&buf, &fragment, '#'); + if (error) goto cancel; approve_chara(&buf, '\0'); - return buf.dst; + *result = buf.dst; + return NULL; cancel: free(buf.dst); - return NULL; + return error; } -int +error_msg uri_init(struct uri *url, char const *str) { char *normal; + error_msg error; - normal = url_normalize(str, 0); - if (!normal) - return EINVAL; + error = url_normalize(str, 0, &normal); + if (error) + return error; __URI_INIT(url, normal); - - if (!uri_is_https(url) && !uri_is_rsync(url)) { - free(normal); - return ENOTSUP; - } - - return 0; + return NULL; } /* @str must already be normalized. */ diff --git a/src/types/uri.h b/src/types/uri.h index 601e0490..2fa555bf 100644 --- a/src/types/uri.h +++ b/src/types/uri.h @@ -13,7 +13,9 @@ struct uri { size_t _len; }; -int uri_init(struct uri *, char const *); +typedef char const *error_msg; + +error_msg uri_init(struct uri *, char const *); void __uri_init(struct uri *, char const *, size_t); #define __URI_INIT(uri, str) __uri_init(uri, str, strlen(str)) void uri_copy(struct uri *, struct uri const *); diff --git a/test/cache_test.c b/test/cache_test.c index de5e554c..4548d6b1 100644 --- a/test/cache_test.c +++ b/test/cache_test.c @@ -111,7 +111,7 @@ run_dl_rsync(char *caRepository, int expected_err, unsigned int expected_calls) struct sia_uris sias = { 0 }; struct cache_cage *cage; - ck_assert_int_eq(0, uri_init(&sias.caRepository, caRepository)); + ck_assert_ptr_eq(NULL, uri_init(&sias.caRepository, caRepository)); cage = NULL; rsync_counter = 0; @@ -149,7 +149,7 @@ run_dl_https(char const *url, unsigned int expected_calls, struct uri uri; char const *result; - ck_assert_int_eq(0, uri_init(&uri, url)); + ck_assert_ptr_eq(NULL, uri_init(&uri, url)); rsync_counter = 0; https_counter = 0; @@ -173,7 +173,7 @@ ck_cage(struct cache_cage *cage, char const *url, struct uri uri; struct cache_node const *bkp; - ck_assert_int_eq(0, uri_init(&uri, url)); + ck_assert_ptr_eq(NULL, uri_init(&uri, url)); ck_assert_str(refresh, cage_map_file(cage, &uri)); @@ -213,11 +213,11 @@ queue_commit(char const *rpkiNotify, char const *caRepository, struct uri rn, cr; if (rpkiNotify) - ck_assert_int_eq(0, uri_init(&rn, rpkiNotify)); + ck_assert_ptr_eq(NULL, uri_init(&rn, rpkiNotify)); else memset(&rn, 0, sizeof(rn)); if (caRepository) - ck_assert_int_eq(0, uri_init(&cr, caRepository)); + ck_assert_ptr_eq(NULL, uri_init(&cr, caRepository)); else memset(&cr, 0, sizeof(cr)); @@ -302,7 +302,7 @@ init_node_rsync(struct cache_node *node, char *url, char *path, { node->key.id = url; node->key.idlen = strlen(url); - ck_assert_int_eq(0, uri_init(&node->key.rsync, url)); + ck_assert_ptr_eq(NULL, uri_init(&node->key.rsync, url)); node->path = path; node->state = fresh ? DLS_FRESH : DLS_OUTDATED; /* XXX (test) */ node->dlerr = dlerr; @@ -315,7 +315,7 @@ init_node_https(struct cache_node *node, char *url, char *path, { node->key.id = url; node->key.idlen = strlen(url); - ck_assert_int_eq(0, uri_init(&node->key.http, url)); + ck_assert_ptr_eq(NULL, uri_init(&node->key.http, url)); node->path = path; node->state = fresh ? DLS_FRESH : DLS_OUTDATED; node->dlerr = dlerr; @@ -682,12 +682,12 @@ START_TEST(test_https_cleanup) } /* 3 */ - ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta50.cer")); + ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta50.cer")); map.path = pstrdup("https/50"); cache_commit_file(&map); map_cleanup(&map); - ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta52.cer")); + ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta52.cer")); map.path = pstrdup("https/52"); cache_commit_file(&map); map_cleanup(&map); @@ -701,12 +701,12 @@ START_TEST(test_https_cleanup) new_iteration(false); /* 4 */ - ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta50.cer")); + ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta50.cer")); map.path = pstrdup("fallback/0"); cache_commit_file(&map); map_cleanup(&map); - ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta51.cer")); + ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta51.cer")); map.path = pstrdup("https/51"); cache_commit_file(&map); map_cleanup(&map); @@ -816,12 +816,12 @@ START_TEST(test_context) dls[1] = SHDR("3") PBLSH("rsync://x.y.z/mod5/rpp3/a.cer", "Rm9ydAo=") STAIL; dls[2] = NULL; - ck_assert_int_eq(0, uri_init(&file_url, FILE_URL)); + ck_assert_ptr_eq(NULL, uri_init(&file_url, FILE_URL)); printf("1. 1st CA succeeds on RRDP\n"); print_tree(); - ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); - ck_assert_int_eq(0, uri_init(&sias.caRepository, CA_REPOSITORY)); + ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); + ck_assert_ptr_eq(NULL, uri_init(&sias.caRepository, CA_REPOSITORY)); ck_assert_int_eq(0, cache_refresh_by_sias(&sias, &cage)); ck_assert_str_eq(RPKI_NOTIFY, uri_str(&cage->rpkiNotify)); ck_assert_str_eq(FILE_RRDP_PATH, cage_map_file(cage, &file_url)); @@ -849,7 +849,7 @@ START_TEST(test_context) rpp.files = pzalloc(sizeof(struct cache_mapping)); uri_copy(&rpp.files->url, &file_url); rpp.files->path = pstrdup(FILE_RRDP_PATH); - ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); + ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); cache_commit_rpp(&sias.rpkiNotify, &sias.caRepository, &rpp); rpp.nfiles = 1; @@ -869,7 +869,7 @@ START_TEST(test_context) ck_assert_int_eq(true, cage_disable_refresh(cage)); ck_assert_str_eq("fallback/1/0", cage_map_file(cage, &file_url)); - ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); + ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY)); ck_assert_int_eq(0, cache_refresh_by_sias(&sias, &cage)); ck_assert_str_eq(RPKI_NOTIFY, uri_str(&cage->rpkiNotify)); ck_assert_str_eq(FILE_RRDP_PATH, cage_map_file(cage, &file_url)); @@ -965,7 +965,7 @@ CACHE_FILE_ADD(struct rrdp_state *state, char const *url, char *path) { struct uri uri; - ck_assert_int_eq(0, uri_init(&uri, url)); + ck_assert_ptr_eq(NULL, uri_init(&uri, url)); ck_assert_ptr_ne(NULL, cache_file_add(state, &uri, pstrdup(path))); uri_cleanup(&uri); } diff --git a/test/rrdp_test.c b/test/rrdp_test.c index 7eaa97e1..03aa129d 100644 --- a/test/rrdp_test.c +++ b/test/rrdp_test.c @@ -403,7 +403,7 @@ START_TEST(test_parse_notification_ok) struct uri nurl; ck_assert_int_eq(0, relax_ng_init()); - ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml")); + ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml")); ck_assert_int_eq(0, parse_notification(&nurl, "resources/rrdp/notif-ok.xml", ¬if)); uri_cleanup(&nurl); @@ -441,7 +441,7 @@ START_TEST(test_parse_notification_0deltas) struct uri nurl; ck_assert_int_eq(0, relax_ng_init()); - ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml")); + ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml")); ck_assert_int_eq(0, parse_notification(&nurl, "resources/rrdp/notif-0deltas.xml", ¬if)); uri_cleanup(&nurl); @@ -467,7 +467,7 @@ START_TEST(test_parse_notification_large_serial) struct uri nurl; ck_assert_int_eq(0, relax_ng_init()); - ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml")); + ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml")); ck_assert_int_eq(0, parse_notification(&nurl, "resources/rrdp/notif-large-serial.xml", ¬if)); uri_cleanup(&nurl); @@ -501,7 +501,7 @@ test_parse_notification_error(char *file) struct uri nurl; ck_assert_int_eq(0, relax_ng_init()); - ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml")); + ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml")); ck_assert_int_eq(-EINVAL, parse_notification(&nurl, file, ¬if)); uri_cleanup(&nurl); diff --git a/test/rrdp_update_test.c b/test/rrdp_update_test.c index 29edf560..0766675f 100644 --- a/test/rrdp_update_test.c +++ b/test/rrdp_update_test.c @@ -91,7 +91,7 @@ START_TEST(startup) seq.pathlen = strlen(seq.prefix); seq.free_prefix = false; - ck_assert_int_eq(0, uri_init(&url, URL)); + ck_assert_ptr_eq(NULL, uri_init(&url, URL)); dls[0] = NHDR("3") NSS("https://host/9d-8/3/snapshot.xml", @@ -106,7 +106,7 @@ START_TEST(startup) ck_assert_uint_eq(true, changed); ck_file("rrdp/0/0"); /* "rrdp//" */ - ck_assert_int_eq(0, uri_init(&maps[0].url, "rsync://a/b/c.cer")); + ck_assert_ptr_eq(NULL, uri_init(&maps[0].url, "rsync://a/b/c.cer")); maps[0].path = "rrdp/0/0"; memset(&maps[1], 0, sizeof(maps[1])); ck_state(TEST_SESSION, "3", 1, maps, state); diff --git a/test/types/uri_test.c b/test/types/uri_test.c index e19b0482..aaa8320c 100644 --- a/test/types/uri_test.c +++ b/test/types/uri_test.c @@ -31,17 +31,24 @@ START_TEST(test_rewind) END_TEST #define TEST_NORMALIZE(dirty, clean) \ - normal = url_normalize(dirty, 0); \ + ck_assert_pstr_eq(NULL, url_normalize(dirty, 0, &normal)); \ ck_assert_str_eq(clean, normal); \ free(normal) #define TEST_NORMALIZE_AUS(dirty, clean) \ - normal = url_normalize(dirty, URI_ALLOW_UNKNOWN_SCHEME); \ + ck_assert_ptr_eq(NULL, url_normalize( \ + dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \ + )); \ ck_assert_str_eq(clean, normal); \ free(normal) -#define TEST_NORMALIZE_FAIL(dirty) \ - ck_assert_ptr_eq(NULL, url_normalize(dirty, 0)); +#define TEST_NORMALIZE_FAIL(dirty, error) \ + ck_assert_str_eq(error, url_normalize(dirty, 0, &normal)); + +#define TEST_NORMALIZE_FAIL_AUS(dirty, error) \ + ck_assert_str_eq(error, url_normalize( \ + dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal \ + )); START_TEST(awkward_dot_dotting) { @@ -59,6 +66,82 @@ START_TEST(awkward_dot_dotting) } END_TEST +START_TEST(test_port) +{ + char *normal; + + printf("rfc3986#3.2.3: Port\n"); + + TEST_NORMALIZE_FAIL("https://a:-1/", EM_PORT_BADCHR); + TEST_NORMALIZE_FAIL("https://a:0/", EM_PORT_RANGE); + TEST_NORMALIZE("https://a:1/", "https://a:1/"); + TEST_NORMALIZE("https://a:65535/", "https://a:65535/"); + TEST_NORMALIZE_FAIL("https://a:65536/", EM_PORT_RANGE); +} +END_TEST + +START_TEST(pct_encoding) +{ + char *normal; + + printf("3986#2.1: Percent encoding\n"); + + TEST_NORMALIZE("https://%61/", "https://a/"); + TEST_NORMALIZE("https://%6f/", "https://o/"); + TEST_NORMALIZE("https://%6F/", "https://o/"); + TEST_NORMALIZE("https://%7C/", "https://%7C/"); + TEST_NORMALIZE("https://%7c/", "https://%7C/"); + + TEST_NORMALIZE_FAIL("https://%6G", EM_PCT_NOTHEX); + TEST_NORMALIZE_FAIL("https://%G6", EM_PCT_NOTHEX); + + /* Host */ + TEST_NORMALIZE_FAIL("https://%6", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%6:", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%:", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%6/", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%/", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%6?", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%?", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%6#", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://%#", EM_PCT_NOT3); + + /* Userinfo */ + TEST_NORMALIZE("rsync://%61@a/", "rsync://a@a/"); + TEST_NORMALIZE_FAIL("rsync://%6@a", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("rsync://%@a", EM_PCT_NOT3); + + /* Port */ + TEST_NORMALIZE_FAIL("rsync://a:%31/", EM_PORT_BADCHR); + TEST_NORMALIZE_FAIL("rsync://a:%3", EM_PORT_BADCHR); + TEST_NORMALIZE_FAIL("rsync://a:%", EM_PORT_BADCHR); + + /* Path */ + TEST_NORMALIZE("https://a/%41", "https://a/A"); + TEST_NORMALIZE_FAIL("https://a/%4", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%4/", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%/", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%4?", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%?", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%4#", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/%#", EM_PCT_NOT3); + + /* Query */ + TEST_NORMALIZE("https://a/?%30", "https://a/?0"); + TEST_NORMALIZE_FAIL("https://a/?%3", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/?%", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/?%3#", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/?%#", EM_PCT_NOT3); + + /* Fragment */ + TEST_NORMALIZE("https://a/#%30", "https://a/#0"); + TEST_NORMALIZE_FAIL("https://a/#%3", EM_PCT_NOT3); + TEST_NORMALIZE_FAIL("https://a/#%", EM_PCT_NOT3); +} +END_TEST + #define ck_assert_origin(expected, s1, s2) \ do { \ __URI_INIT(&u1, s1); \ @@ -96,10 +179,37 @@ START_TEST(test_unknown_protocols) printf("Unknown protocols\n"); - TEST_NORMALIZE_FAIL("httpz://a.b.c/d"); - TEST_NORMALIZE_FAIL("abcde://a.b.c/d"); + TEST_NORMALIZE_AUS("https://a.b.c/d", "https://a.b.c/d"); + TEST_NORMALIZE("https://a.b.c/d", "https://a.b.c/d"); + TEST_NORMALIZE_AUS("http://a.b.c/d", "http://a.b.c/d"); + TEST_NORMALIZE_FAIL("http://a.b.c/d", EM_SCHEME_UNKNOWN); + + TEST_NORMALIZE_FAIL("httpz://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("abcde://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("://a.b.c/d", EM_SCHEME_EMPTY); + TEST_NORMALIZE_FAIL("0abc://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("9abc://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("+abc://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL(".abc://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("-abc://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("a_b://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_FAIL("a~b://a.b.c/d", EM_SCHEME_UNKNOWN); + TEST_NORMALIZE_AUS("httpz://a.b.c/d", "httpz://a.b.c/d"); TEST_NORMALIZE_AUS("abcde://a.b.c/d", "abcde://a.b.c/d"); + TEST_NORMALIZE_FAIL_AUS("://a.b.c/d", EM_SCHEME_EMPTY); + TEST_NORMALIZE_FAIL_AUS("0abc://a.b.c/d", EM_SCHEME_1ST); + TEST_NORMALIZE_FAIL_AUS("9abc://a.b.c/d", EM_SCHEME_1ST); + TEST_NORMALIZE_FAIL_AUS("+abc://a.b.c/d", EM_SCHEME_1ST); + TEST_NORMALIZE_FAIL_AUS(".abc://a.b.c/d", EM_SCHEME_1ST); + TEST_NORMALIZE_FAIL_AUS("-abc://a.b.c/d", EM_SCHEME_1ST); + TEST_NORMALIZE_AUS("a0b://a.b.c/d", "a0b://a.b.c/d"); + TEST_NORMALIZE_AUS("a9b://a.b.c/d", "a9b://a.b.c/d"); + TEST_NORMALIZE_AUS("a+b://a.b.c/d", "a+b://a.b.c/d"); + TEST_NORMALIZE_AUS("a.b://a.b.c/d", "a.b://a.b.c/d"); + TEST_NORMALIZE_AUS("a-b://a.b.c/d", "a-b://a.b.c/d"); + TEST_NORMALIZE_FAIL_AUS("a_b://a.b.c/d", EM_SCHEME_NTH); + TEST_NORMALIZE_FAIL_AUS("a~b://a.b.c/d", EM_SCHEME_NTH); } END_TEST @@ -125,6 +235,55 @@ START_TEST(reserved_unchanged) } END_TEST +START_TEST(test_query) +{ + char *normal; + + printf("3986#3.4: Query\n"); + + TEST_NORMALIZE("https://a/?azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/?azAZ09-._~1!$&'()*+,;=:@/?"); + TEST_NORMALIZE("https://a/?azAZ09-._~%31!$&'()*+,;=:@/?#", "https://a/?azAZ09-._~1!$&'()*+,;=:@/?#"); + + TEST_NORMALIZE_FAIL("https://a/?[", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?]", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/? ", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?\"", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?<", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?>", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?\\", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?^", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?`", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?{", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?}", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/?|", EM_QF_BADCHR); +} +END_TEST + +START_TEST(test_fragment) +{ + char *normal; + + printf("3986#3.6: Fragment\n"); + + TEST_NORMALIZE("https://a/#azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/#azAZ09-._~1!$&'()*+,;=:@/?"); + TEST_NORMALIZE("https://a/#azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/#azAZ09-._~1!$&'()*+,;=:@/?"); + + TEST_NORMALIZE_FAIL("https://a/##", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#[", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#]", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/# ", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#\"", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#<", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#>", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#\\", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#^", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#`", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#{", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#}", EM_QF_BADCHR); + TEST_NORMALIZE_FAIL("https://a/#|", EM_QF_BADCHR); +} +END_TEST + START_TEST(lowercase_scheme_and_host) { char *normal; @@ -208,20 +367,22 @@ END_TEST START_TEST(https_grammar) { + char *normal; + printf("9110#4.2.2: https-URI = \"https\" \"://\" authority path-abempty [ \"?\" query ]\n"); printf(" authority = host [ \":\" port ]\n"); printf(" path-abempty = *( \"/\" segment )\n"); printf(" segment = *pchar\n"); - TEST_NORMALIZE_FAIL(""); - TEST_NORMALIZE_FAIL("h"); - TEST_NORMALIZE_FAIL("http"); - TEST_NORMALIZE_FAIL("https"); - TEST_NORMALIZE_FAIL("https:"); - TEST_NORMALIZE_FAIL("https:/"); - TEST_NORMALIZE_FAIL("https://"); - TEST_NORMALIZE_FAIL("https://a.β.c/"); - TEST_NORMALIZE_FAIL("https://a.b.c/β"); + TEST_NORMALIZE_FAIL("", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("h", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("http", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("https", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("https:", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("https:/", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY); + TEST_NORMALIZE_FAIL("https://a.β.c/", EM_HOST_BADCHR); + TEST_NORMALIZE_FAIL("https://a.b.c/β", EM_PATH_BADCHR); /* I think everything else is already tested below. */ } @@ -254,15 +415,15 @@ START_TEST(disallow_http_empty_host) printf("(Also 9110#4.2.3: Empty path normalizes to '/')\n"); TEST_NORMALIZE("https://a", "https://a/"); - TEST_NORMALIZE_FAIL("https://"); + TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY); TEST_NORMALIZE("https://a/f/g", "https://a/f/g"); - TEST_NORMALIZE_FAIL("https:///f/g"); + TEST_NORMALIZE_FAIL("https:///f/g", EM_HOST_EMPTY); TEST_NORMALIZE("https://a:1234/f/g", "https://a:1234/f/g"); - TEST_NORMALIZE_FAIL("https://:1234/f/g"); + TEST_NORMALIZE_FAIL("https://:1234/f/g", EM_HOST_EMPTY); TEST_NORMALIZE("https://a?123", "https://a/?123"); - TEST_NORMALIZE_FAIL("https://?123"); + TEST_NORMALIZE_FAIL("https://?123", EM_HOST_EMPTY); TEST_NORMALIZE("https://a#123", "https://a/#123"); - TEST_NORMALIZE_FAIL("https://#123"); + TEST_NORMALIZE_FAIL("https://#123", EM_HOST_EMPTY); } END_TEST @@ -360,8 +521,8 @@ START_TEST(disallow_https_userinfo) printf("9110#4.2.4: Disallow https userinfo\n"); TEST_NORMALIZE("https://c.d.e/f/g", "https://c.d.e/f/g"); - TEST_NORMALIZE_FAIL("https://a@c.d.e/f/g"); - TEST_NORMALIZE_FAIL("https://a:b@c.d.e/f/g"); + TEST_NORMALIZE_FAIL("https://a@c.d.e/f/g", EM_USERINFO_DISALLOWED); + TEST_NORMALIZE_FAIL("https://a:b@c.d.e/f/g", EM_USERINFO_DISALLOWED); } END_TEST @@ -372,50 +533,51 @@ START_TEST(rsync_grammar) printf("5781#2: rsync://[user@]host[:PORT]/Source\n"); printf("rsyncuri = \"rsync:\" hier-part\n"); - TEST_NORMALIZE_FAIL(""); - TEST_NORMALIZE_FAIL("r"); - TEST_NORMALIZE_FAIL("rsyn"); - TEST_NORMALIZE_FAIL("rsync"); - TEST_NORMALIZE_FAIL("rsync:"); - TEST_NORMALIZE_FAIL("rsync:/"); - TEST_NORMALIZE_FAIL("rsync://"); - TEST_NORMALIZE_FAIL("rsync://a.β.c/"); - TEST_NORMALIZE_FAIL("rsync://a.b.c/β"); + TEST_NORMALIZE_FAIL("", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("r", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("rsyn", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("rsync", EM_SCHEME_NOCOLON); + TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync://", EM_HOST_EMPTY); + TEST_NORMALIZE_FAIL("rsync://a.β.c/", EM_HOST_BADCHR); + TEST_NORMALIZE_FAIL("rsync://a.b.c/β", EM_PATH_BADCHR); TEST_NORMALIZE("rsync://a.b.c/m", "rsync://a.b.c/m"); TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r"); - TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r?query"); - TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r#fragment"); + TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r?query", EM_QUERY_DISALLOWED); + TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r#fragment", EM_FRAGMENT_DISALLOWED); /* hier-part = "//" authority path-abempty */ TEST_NORMALIZE("rsync://user@a.b.c:1234/m/r", "rsync://user@a.b.c:1234/m/r"); TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r"); TEST_NORMALIZE("rsync://user@a.b.c:1234", "rsync://user@a.b.c:1234/"); TEST_NORMALIZE("rsync://a.b.c", "rsync://a.b.c/"); + TEST_NORMALIZE_FAIL("rsync://[@a.b.c", EM_USERINFO_BADCHR); /* hier-part = path-absolute */ /* ie. "rsync:/" [ pchar+ ( "/" pchar* )* ] */ /* (These refer to local files. The RFC allows them, but Fort shouldn't.) */ - TEST_NORMALIZE_FAIL("rsync:/"); - TEST_NORMALIZE_FAIL("rsync:/a"); - TEST_NORMALIZE_FAIL("rsync:/a/"); - TEST_NORMALIZE_FAIL("rsync:/a/a"); - TEST_NORMALIZE_FAIL("rsync:/a/a/a"); - TEST_NORMALIZE_FAIL("rsync:/abc/def/xyz"); - TEST_NORMALIZE_FAIL("rsync:/abc////def//xyz"); + TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/a", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/a/", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/a/a", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/a/a/a", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/abc/def/xyz", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:/abc////def//xyz", EM_SCHEME_NOTREMOTE); /* hier-part = path-rootless */ /* ie. "rsync:" pchar+ ( "/" pchar* )* */ /* (Also local paths. Disallowed by Fort needs.) */ - TEST_NORMALIZE_FAIL("rsync:a"); - TEST_NORMALIZE_FAIL("rsync:aa"); - TEST_NORMALIZE_FAIL("rsync:aa/"); - TEST_NORMALIZE_FAIL("rsync:aa/a"); - TEST_NORMALIZE_FAIL("rsync:aa/aa"); - TEST_NORMALIZE_FAIL("rsync:aa///aa"); + TEST_NORMALIZE_FAIL("rsync:a", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:aa", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:aa/", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:aa/a", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:aa/aa", EM_SCHEME_NOTREMOTE); + TEST_NORMALIZE_FAIL("rsync:aa///aa", EM_SCHEME_NOTREMOTE); /* hier-part = path-empty */ - TEST_NORMALIZE_FAIL("rsync:"); + TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE); } END_TEST @@ -443,7 +605,11 @@ static Suite *create_suite(void) tcase_add_test(misc, test_same_origin); generic = tcase_create("RFC 3986 (generic URI)"); + tcase_add_test(generic, pct_encoding); tcase_add_test(generic, reserved_unchanged); + tcase_add_test(generic, test_port); + tcase_add_test(generic, test_query); + tcase_add_test(generic, test_fragment); tcase_add_test(generic, lowercase_scheme_and_host); tcase_add_test(generic, decode_unreserved_characters); tcase_add_test(generic, path_segment_normalization);