]> git.ipfire.org Git - thirdparty/FORT-validator.git/commitdiff
URI normalization: Propagate error messages properly
authorAlberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:16:08 +0000 (16:16 -0600)
committerAlberto Leiva Popper <ydahhrk@gmail.com>
Thu, 8 May 2025 22:16:08 +0000 (16:16 -0600)
Forgot to clean this up in the previous commit.
Was printing errors in standard output as a quick fix.

13 files changed:
src/http.c
src/init.c
src/json_util.c
src/object/certificate.c
src/object/tal.c
src/print_file.c
src/rrdp.c
src/types/uri.c
src/types/uri.h
test/cache_test.c
test/rrdp_test.c
test/rrdp_update_test.c
test/types/uri_test.c

index e455109579a567918c59f24bc5ea13b621692d09..2f9bd5049a7a7bef17dcefc762c7b0277e0343ee 100644 (file)
@@ -249,15 +249,18 @@ static int
 check_same_origin(struct uri const *src, char const *redirect)
 {
        struct uri redirect_url;
+       error_msg errmsg;
        int error;
 
-       error = uri_init(&redirect_url, redirect);
-       if (error)
-               return error;
+       errmsg = uri_init(&redirect_url, redirect);
+       if (errmsg)
+               return pr_val_err("Cannot parse redirect '%s' as a URI: %s",
+                   redirect, errmsg);
 
-       if (!uri_same_origin(src, &redirect_url))
-               error = pr_val_err("%s is redirecting to %s; disallowing because of different origin.",
-                   uri_str(src), uri_str(&redirect_url));
+       error = uri_same_origin(src, &redirect_url)
+           ? 0
+           : pr_val_err("%s is redirecting to %s; disallowing because of different origin.",
+                 uri_str(src), uri_str(&redirect_url));
 
        uri_cleanup(&redirect_url);
        return error;
index 665f12d57a6eaa16a71ee5ef6dea6c1673c66b02..ef6642c1dcc0c2ecc60f7086020b4c85af0fb4a7 100644 (file)
@@ -1,5 +1,6 @@
 #include "init.h"
 
+#include <errno.h>
 #include "config.h"
 #include "http.h"
 #include "types/path.h"
@@ -9,23 +10,25 @@ fetch_url(char const *url, char const *filename)
 {
        struct uri uri;
        char *path;
+       error_msg errmsg;
        int error;
 
-       error = uri_init(&uri, url);
-       if (error)
-               return error;
+       errmsg = uri_init(&uri, url);
+       if (errmsg) {
+               fprintf(stderr, "Invalid URI '%s': %s", url, errmsg);
+               return EINVAL;
+       }
 
        path = path_join(config_get_tal(), filename);
 
        error = http_download(&uri, path, 0, NULL);
-       if (error) {
+       if (error)
                fprintf(stderr, "Couldn't fetch '%s': %s\n",
                    path, strerror(abs(error)));
-               goto end;
-       }
+       else
+               fprintf(stdout, "Successfully fetched '%s'!\n\n", path);
 
-       fprintf(stdout, "Successfully fetched '%s'!\n\n", path);
-end:   free(path);
+       free(path);
        uri_cleanup(&uri);
        return error;
 }
index 65217ca2d6bce7f841440bb66450448d279211cf..9c1b8ae950fcb3b95f4c9d514af1abda9b481e99 100644 (file)
@@ -26,23 +26,23 @@ json_get_str(json_t *parent, char const *name, char const **result)
        return 0;
 }
 
-/* Result needs to be cleant up. */
+/* @result needs cleanup. */
 int
 json_get_uri(json_t *parent, char const *name, struct uri *result)
 {
        char const *str;
        int error;
+       error_msg errmsg;
 
        memset(result, 0, sizeof(*result));
 
        error = json_get_str(parent, name, &str);
        if (error)
                return error;
-       error = uri_init(result, str);
-       if (error) {
-               pr_op_err("Malformed URL: %s", str);
-               return -error;
-       }
+       errmsg = uri_init(result, str);
+       if (errmsg)
+               return pr_op_err("'%s' does not seem to be a URI: %s",
+                   str, errmsg);
 
        return 0;
 }
index 17f453fe1f9a55b24d7c1bfd3ac24cb2c9191c41..5b9e8774edc684edaf62df659ea2e66597ee4c6f 100644 (file)
@@ -1391,6 +1391,7 @@ gn2uri(GENERAL_NAME *ad, struct uri *uri)
        int ptype;
        char *str;
        int error;
+       error_msg errmsg;
 
        asn1str = GENERAL_NAME_get0_value(ad, &ptype);
        if (ptype != GEN_URI) {
@@ -1409,10 +1410,14 @@ gn2uri(GENERAL_NAME *ad, struct uri *uri)
        error = ia5s2string(asn1str, &str);
        if (error)
                return error;
-       error = uri_init(uri, str);
-       free(str);
 
-       return error;
+       errmsg = uri_init(uri, str);
+       if (errmsg)
+               pr_val_warn("Cannot parse GENERAL_NAME '%s' as a URI: %s",
+                   str, errmsg);
+
+       free(str);
+       return errmsg ? EINVAL : 0;
 }
 
 static int
index d91b818686080416c45143f4a518626465e6d3de..f322cf50530fbb68387a40a8b3b22fe026280b2c 100644 (file)
@@ -52,6 +52,7 @@ read_content(char *fc /* File Content */, struct tal *tal)
        char *nl; /* New Line */
        bool cr; /* Carriage return */
        struct uri url;
+       error_msg error;
 
        /* Comment section */
        while (fc[0] == '#') {
@@ -72,13 +73,14 @@ read_content(char *fc /* File Content */, struct tal *tal)
                if (is_blank(fc))
                        break;
 
-               if (uri_init(&url, fc) == 0) {
+               error = uri_init(&url, fc);
+               if (!error) {
                        if (uri_is_https(&url) || uri_is_rsync(&url))
                                uris_add(&tal->urls, &url);
                        else
                                uri_cleanup(&url);
                } else {
-                       pr_op_debug("Cannot parse '%s' as a URI; ignoring.", fc);
+                       pr_op_debug("Ignoring URI '%s': %s", fc, error);
                }
 
                fc = nl + cr + 1;
index d9f73fd5db4365ec2cb878ba9bc3a518b576dfb7..f4a7e19bbac35256490444907317f96a379957b2 100644 (file)
@@ -28,10 +28,14 @@ static BIO *
 __rsync2bio(char const *src, char const *dst)
 {
        struct uri url;
+       error_msg errmsg;
        int error;
 
-       if (uri_init(&url, src) != 0)
+       errmsg = uri_init(&url, src);
+       if (errmsg) {
+               pr_op_err("Invalid URI: %s", errmsg);
                return NULL;
+       }
 
        // XXX use the cache
 
index 3d99c3a01eb6c20bd4a4cdc1a2749814b079e3f2..74bd78342c68f7e22ea5abc520f12d265d0cf6f2 100644 (file)
@@ -477,6 +477,7 @@ static int
 parse_file_metadata(xmlTextReaderPtr reader, struct file_metadata *meta)
 {
        xmlChar *xmlattr;
+       error_msg errmsg;
        int error;
 
        memset(meta, 0, sizeof(*meta));
@@ -484,10 +485,11 @@ parse_file_metadata(xmlTextReaderPtr reader, struct file_metadata *meta)
        xmlattr = parse_string(reader, RRDP_ATTR_URI);
        if (xmlattr == NULL)
                return -EINVAL;
-       error = uri_init(&meta->uri, (char const *)xmlattr);
+       errmsg = uri_init(&meta->uri, (char const *)xmlattr);
        xmlFree(xmlattr);
-       if (error)
-               return -EINVAL;
+       if (errmsg)
+               return pr_val_err("Cannot parse '%s' as a URI: %s",
+                   xmlattr, errmsg);
 
        error = parse_hash(reader, &meta->hash, &meta->hash_len);
        if (error) {
@@ -1473,6 +1475,7 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state)
        char const *path;
        unsigned long id, max_id;
        int error;
+       error_msg errmsg;
 
        error = json_get_object(jparent, "files", &jfiles);
        if (error < 0) {
@@ -1490,9 +1493,9 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state)
                        pr_op_warn("RRDP file URL '%s' is not a string.", jkey);
                        continue;
                }
-               error = uri_init(&url, jkey);
-               if (error) {
-                       pr_op_warn("Cannot parse '%s' as a URI.", jkey);
+               errmsg = uri_init(&url, jkey);
+               if (errmsg) {
+                       pr_op_warn("Cannot parse '%s' as a URI: %s", jkey, errmsg);
                        continue;
                }
 
@@ -1506,8 +1509,7 @@ json2files(json_t *jparent, char *parent, struct rrdp_state *state)
                        continue;
                }
 
-               error = hex2ulong(path + parent_len + 1, &id);
-               if (error) {
+               if (hex2ulong(path + parent_len + 1, &id) != 0) {
                        pr_op_warn("RRDP file '%s' is not a hexadecimal number.", path);
                        uri_cleanup(&url);
                        continue;
index 886f91a065b7e16326868648e57d05be93e4a4fa..b689ebe34e5160a6df85a84031f1e9c9d4676d1c 100644 (file)
@@ -7,8 +7,32 @@
 #include "log.h"
 #include "types/path.h"
 
+/*
+ * XXX IPv6 addresses
+ * XXX UTF-8
+ */
+
 #define URI_ALLOW_UNKNOWN_SCHEME (1 << 1)
 
+static error_msg EM_SCHEME_EMPTY = "Scheme seems empty";
+static error_msg EM_SCHEME_1ST = "First scheme character is not a letter";
+static error_msg EM_SCHEME_NTH = "Scheme character is not letter, digit, plus, period or hyphen";
+static error_msg EM_SCHEME_NOCOLON = "Scheme not terminated";
+static error_msg EM_SCHEME_UNKNOWN = "Unknown scheme";
+static error_msg EM_SCHEME_NOTREMOTE = "Missing \"://\"";
+static error_msg EM_PCT_NOTHEX = "Invalid hexadecimal digit in percent encoding";
+static error_msg EM_PCT_NOT3 = "Unterminated percent-encoding";
+static error_msg EM_USERINFO_BADCHR = "Illegal character in userinfo component";
+static error_msg EM_USERINFO_DISALLOWED = "Protocol disallows userinfo";
+static error_msg EM_HOST_BADCHR = "Illegal character in host component";
+static error_msg EM_HOST_EMPTY = "Protocol disallows empty host";
+static error_msg EM_PORT_BADCHR = "Illegal non-digit character in port component";
+static error_msg EM_PORT_RANGE = "Port value is out of range";
+static error_msg EM_PATH_BADCHR = "Illegal character in path component";
+static error_msg EM_QUERY_DISALLOWED = "Protocol disallows query";
+static error_msg EM_QF_BADCHR = "Illegal character in query or fragment";
+static error_msg EM_FRAGMENT_DISALLOWED = "Protocol disallows fragment";
+
 struct sized_string {
        char const *str;
        size_t len;
@@ -39,7 +63,8 @@ struct schema_metadata const HTTPS = {
 struct schema_metadata const RSYNC = {
        .default_port = 873,
        .allow_userinfo = true,
-       .allow_empty_host = true,
+       /* Seems actually allowed, but RPKI doesn't like it. */
+       .allow_empty_host = false,
        .allow_query = false,
        .allow_fragment = false,
 };
@@ -76,6 +101,18 @@ is_uppercase(char chr)
        return 'A' <= chr && chr <= 'Z';
 }
 
+static bool
+is_lowercase_hex(char chr)
+{
+       return 'a' <= chr && chr <= 'f';
+}
+
+static bool
+is_uppercase_hex(char chr)
+{
+       return 'A' <= chr && chr <= 'F';
+}
+
 static bool
 is_digit(char chr)
 {
@@ -103,13 +140,6 @@ to_uppercase(char chr)
        return is_lowercase(chr) ? (chr + ('A' - 'a')) : chr;
 }
 
-static bool
-invalid(char const *errmsg)
-{
-       printf("%s\n", errmsg);
-       return false;
-}
-
 static void
 approve_chara(struct uri_buffer *buf, char chr)
 {
@@ -122,7 +152,7 @@ approve_chara(struct uri_buffer *buf, char chr)
        buf->dst[buf->d++] = chr;
 }
 
-static bool
+static void
 collect_authority(char const *auth, char const **at, char const **colon,
     char const **end)
 {
@@ -136,7 +166,7 @@ collect_authority(char const *auth, char const **at, char const **colon,
                case '#':
                case '\0':
                        *end = auth;
-                       return true;
+                       return;
                case '@':
                        if ((*at) == NULL) {
                                *colon = NULL; /* Was a password if not null */
@@ -180,22 +210,19 @@ collect_fragment(char const *fragment, char const **end)
                }
 }
 
-static bool
+static error_msg
 normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
 {
        char chr;
        array_index c;
 
-       if (scheme->len == 0)
-               return invalid("Scheme seems empty.");
-
        chr = scheme->str[0];
        if (is_lowercase(chr))
                approve_chara(buf, chr);
        else if (is_uppercase(chr))
                approve_chara(buf, to_lowercase(chr));
        else
-               return invalid("First character is not a letter.");
+               return EM_SCHEME_1ST;
 
        for (c = 1; c < scheme->len; c++) {
                chr = scheme->str[c];
@@ -204,13 +231,13 @@ normalize_scheme(struct uri_buffer *buf, struct sized_string *scheme)
                else if (is_uppercase(chr))
                        approve_chara(buf, to_lowercase(chr));
                else
-                       return invalid("Schema character is not letter, digit, plus, period or hyphen.");
+                       return EM_SCHEME_NTH;
        }
 
        approve_chara(buf, ':');
        approve_chara(buf, '/');
        approve_chara(buf, '/');
-       return true;
+       return NULL;
 }
 
 static bool
@@ -228,27 +255,26 @@ is_subdelim(char chr)
        return is_symbol(chr, "!$&'()*+,;=");
 }
 
-static bool
+static error_msg
 char2hex(char chr, unsigned int *hex)
 {
        if (is_digit(chr)) {
                *hex = chr - '0';
-               return true;
+               return NULL;
        }
-       if (is_uppercase(chr)) {
+       if (is_uppercase_hex(chr)) {
                *hex = chr - 'A' + 10;
-               return true;
+               return NULL;
        }
-       if (is_lowercase(chr)) {
+       if (is_lowercase_hex(chr)) {
                *hex = chr - 'a' + 10;
-               return true;
+               return NULL;
        }
 
-       printf("Invalid hex digit: %c\n", chr);
-       return invalid("Invalid hexadecimal digit.");
+       return EM_PCT_NOTHEX;
 }
 
-static bool
+static error_msg
 approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
     array_index *offset)
 {
@@ -256,84 +282,71 @@ approve_pct_encoded(struct uri_buffer *buf, struct sized_string *sstr,
        unsigned int hex1;
        unsigned int hex2;
        unsigned int val;
+       error_msg error;
 
        off = *offset;
 
        if (sstr->len - off < 3)
-               return invalid("Unterminated %-encoding.");
+               return EM_PCT_NOT3;
 
-       if (!char2hex(sstr->str[off + 1], &hex1))
-               return false;
-       if (!char2hex(sstr->str[off + 2], &hex2))
-               return false;
+       error = char2hex(sstr->str[off + 1], &hex1);
+       if (error)
+               return error;
+       error = char2hex(sstr->str[off + 2], &hex2);
+       if (error)
+               return error;
 
        val = (hex1 << 4) | hex2;
 
        if (is_unreserved(val)) {
                approve_chara(buf, val);
                *offset += 2;
-               return true;
+               return NULL;
        }
 
        approve_chara(buf, '%');
        approve_chara(buf, to_uppercase(sstr->str[off + 1]));
        approve_chara(buf, to_uppercase(sstr->str[off + 2]));
        *offset += 2;
-       return true;
-}
-
-static bool
-handle_pchar(struct uri_buffer *buf, struct sized_string *sstr,
-    array_index *offset)
-{
-       char chr = sstr->str[*offset];
-
-       if (is_unreserved(chr))
-               approve_chara(buf, chr);
-       else if (chr == '%')
-               approve_pct_encoded(buf, sstr, offset);
-       else if (is_subdelim(chr))
-               approve_chara(buf, chr);
-       else if (chr == ':' || chr == '@')
-               approve_chara(buf, chr);
-       else
-               return false;
-       return true;
+       return NULL;
 }
 
-static bool
+static error_msg
 normalize_userinfo(struct uri_buffer *buf, struct sized_string *userinfo)
 {
        array_index c;
        char chr;
+       error_msg error;
 
        if (userinfo->len == 0)
-               return true;
+               return NULL;
 
        for (c = 0; c < userinfo->len; c++) {
                chr = userinfo->str[c];
                if (is_unreserved(chr))
                        approve_chara(buf, chr);
                else if (chr == '%') {
-                       if (!approve_pct_encoded(buf, userinfo, &c))
-                               return false;
+                       error = approve_pct_encoded(buf, userinfo, &c);
+                       if (error)
+                               return error;
                } else if (is_subdelim(chr))
                        approve_chara(buf, chr);
                else if (chr == ':')
                        approve_chara(buf, chr);
                else
-                       return invalid("Illegal character in userinfo section.");
+                       return EM_USERINFO_BADCHR;
        }
 
        approve_chara(buf, '@');
-       return true;
+       return NULL;
 }
 
-static bool
+static error_msg
 normalize_host(struct uri_buffer *buf, struct sized_string *host)
 {
        array_index c;
        char chr;
+       error_msg error;
 
        for (c = 0; c < host->len; c++) {
                chr = host->str[c];
@@ -342,18 +355,19 @@ normalize_host(struct uri_buffer *buf, struct sized_string *host)
                else if (is_unreserved(chr))
                        approve_chara(buf, chr);
                else if (chr == '%') {
-                       if (!approve_pct_encoded(buf, host, &c))
-                               return false;
+                       error = approve_pct_encoded(buf, host, &c);
+                       if (error)
+                               return error;
                } else if (is_subdelim(chr))
                        approve_chara(buf, chr);
                else
-                       return invalid("Illegal character in host section.");
+                       return EM_HOST_BADCHR;
        }
 
-       return true;
+       return NULL;
 }
 
-static bool
+static error_msg
 normalize_port(struct uri_buffer *buf, struct sized_string *port,
     struct schema_metadata const *schema)
 {
@@ -362,25 +376,25 @@ normalize_port(struct uri_buffer *buf, struct sized_string *port,
        unsigned int portnum;
 
        if (port->len == 0)
-               return true;
+               return NULL;
 
        portnum = 0;
        for (c = 0; c < port->len; c++) {
                chr = port->str[c];
                if (!is_digit(chr))
-                       return invalid("Illegal non-digit character in port section.");
+                       return EM_PORT_BADCHR;
                portnum = 10 * portnum + (chr - '0');
-               if (portnum > 0xFFFF)
-                       return invalid("Port value is too large.");
+               if (portnum == 0 || portnum > 0xFFFF)
+                       return EM_PORT_RANGE;
        }
 
        if (schema && (portnum == schema->default_port))
-               return true;
+               return NULL;
 
        approve_chara(buf, ':');
        for (c = 0; c < port->len; c++)
                approve_chara(buf, port->str[c]);
-       return true;
+       return NULL;
 }
 
 static char const *
@@ -412,17 +426,18 @@ rewind_buffer(struct uri_buffer *buf, size_t limit)
                ;
 }
 
-static bool
+static error_msg
 normalize_path(struct uri_buffer *buf, struct sized_string *path)
 {
        struct sized_string segment;
        array_index i;
        char chr;
        size_t limit;
+       error_msg error;
 
        if (path->len == 0) {
                approve_chara(buf, '/');
-               return true;
+               return NULL;
        }
 
        segment.str = path->str;
@@ -436,12 +451,13 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
                        if (is_unreserved(chr))
                                approve_chara(buf, chr);
                        else if (chr == '%') {
-                               if (!approve_pct_encoded(buf, &segment, &i))
-                                       return false;
+                               error = approve_pct_encoded(buf, &segment, &i);
+                               if (error)
+                                       return error;
                        } else if (is_subdelim(chr) || is_symbol(chr, ":@"))
                                approve_chara(buf, chr);
                        else
-                               return invalid("Illegal character in path section.");
+                               return EM_PATH_BADCHR;
                }
 
                if (buf->dst[buf->d - 2] == '/' &&
@@ -457,38 +473,55 @@ normalize_path(struct uri_buffer *buf, struct sized_string *path)
 
        if (limit == buf->d)
                approve_chara(buf, '/');
-       return true;
+       return NULL;
 }
 
-static bool
+static error_msg
 normalize_post_path(struct uri_buffer *buf, struct sized_string *post,
     char prefix)
 {
        array_index c;
        char chr;
+       error_msg error;
 
        if (post->len == 0)
-               return true;
+               return NULL;
 
        approve_chara(buf, prefix);
        for (c = 1; c < post->len; c++) {
-               if (handle_pchar(buf, post, &c))
-                       continue;
                chr = post->str[c];
-               if (chr == ':' || chr == '@')
+               if (is_unreserved(chr))
+                       approve_chara(buf, chr);
+               else if (chr == '%') {
+                       error = approve_pct_encoded(buf, post, &c);
+                       if (error)
+                               return error;
+               } else if (is_subdelim(chr))
+                       approve_chara(buf, chr);
+               else if (is_symbol(chr, ":@/?"))
                        approve_chara(buf, chr);
                else
-                       return invalid("Illegal character in query section.");
+                       return EM_QF_BADCHR;
        }
 
-       return true;
+       return NULL;
+}
+
+static void
+print_component(char const *name, struct sized_string *component)
+{
+       pr_clutter("  %s: %.*s (len:%zu)", name, (int)component->len,
+           component->str, component->len);
 }
 
 /*
  * See RFC 3986. Basically, "rsync://%61.b/./c/.././%64/." -> "rsync://a.b/d"
+ *
+ * The return value is an error message. If NULL, the URL was stored in @result
+ * and needs to be released.
  */
-static char *
-url_normalize(char const *url, int flags)
+static error_msg
+url_normalize(char const *url, int flags, char **result)
 {
        struct sized_string scheme;
        struct sized_string authority;
@@ -505,43 +538,36 @@ url_normalize(char const *url, int flags)
 
        struct schema_metadata const *meta;
        struct uri_buffer buf;
+       char const *error;
 
        pr_clutter("-----------------------");
        pr_clutter("input: %s", url);
 
        cursor = strchr(url, ':');
-       if (!cursor) {
-               printf("Schema not terminated\n");
-               return NULL;
-       }
+       if (!cursor)
+               return EM_SCHEME_NOCOLON;
+       if (cursor == url)
+               return EM_SCHEME_EMPTY;
 
        scheme.str = url;
        scheme.len = cursor - url;
-       pr_clutter("  scheme: %.*s (len:%zu)", (int)scheme.len, scheme.str, scheme.len);
+       print_component("scheme", &scheme);
+
        meta = get_metadata(&scheme);
-       if (!(flags & URI_ALLOW_UNKNOWN_SCHEME) && !meta) {
-               printf("Unknown scheme\n");
-               return NULL;
-       }
+       if (!(flags & URI_ALLOW_UNKNOWN_SCHEME) && !meta)
+               return EM_SCHEME_UNKNOWN;
 
-       if (cursor[1] != '/' || cursor[2] != '/') {
-               printf("Missing \"://\"\n");
-               return NULL;
-       }
+       if (cursor[1] != '/' || cursor[2] != '/')
+               return EM_SCHEME_NOTREMOTE;
 
        authority.str = cursor + 3;
-       if (!collect_authority(authority.str, &at, &colon, &cursor))
-               return NULL;
+       collect_authority(authority.str, &at, &colon, &cursor);
        authority.len = cursor - authority.str;
-       pr_clutter("  authority: %.*s (len:%zu)", (int)authority.len, authority.str, authority.len);
-       if (authority.len == 0)
-               return NULL;
+       print_component("authority", &authority);
 
        if (at != NULL) {
-               if (meta && !meta->allow_userinfo) {
-                       printf("Protocol disallows userinfo.\n");
-                       return NULL;
-               }
+               if (meta && !meta->allow_userinfo)
+                       return EM_USERINFO_DISALLOWED;
 
                userinfo.str = authority.str;
                userinfo.len = at - authority.str;
@@ -562,14 +588,12 @@ url_normalize(char const *url, int flags)
                port.len = 0;
        }
 
-       if (host.len == 0 && meta && !meta->allow_empty_host) {
-               printf("Protocol disallows empty host.\n");
-               return NULL;
-       }
+       if (host.len == 0 && meta && !meta->allow_empty_host)
+               return EM_HOST_EMPTY;
 
-       pr_clutter("  userinfo: %.*s (len:%zu)", (int)userinfo.len, userinfo.str, userinfo.len);
-       pr_clutter("  host: %.*s (len:%zu)", (int)host.len, host.str, host.len);
-       pr_clutter("  port: %.*s (len:%zu)", (int)port.len, port.str, port.len);
+       print_component("userinfo", &userinfo);
+       print_component("host", &host);
+       print_component("port", &port);
 
        if (cursor[0] == '\0') {
                memset(&path, 0, sizeof(path));
@@ -588,10 +612,8 @@ url_normalize(char const *url, int flags)
                        break;
 
                case '?':
-                       if (meta && !meta->allow_query) {
-                               printf("Protocol disallows query.\n");
-                               return NULL;
-                       }
+                       if (meta && !meta->allow_query)
+                               return EM_QUERY_DISALLOWED;
 
                        query.str = cursor;
                        collect_query(query.str + 1, &cursor);
@@ -611,10 +633,8 @@ url_normalize(char const *url, int flags)
                case '#':
                        memset(&query, 0, sizeof(query));
 
-frag:                  if (meta && !meta->allow_fragment) {
-                               printf("Protocol disallows fragment.\n");
-                               return NULL;
-                       }
+frag:                  if (meta && !meta->allow_fragment)
+                               return EM_FRAGMENT_DISALLOWED;
                        fragment.str = cursor;
                        collect_fragment(fragment.str + 1, &cursor);
                        fragment.len = cursor - fragment.str;
@@ -626,9 +646,9 @@ frag:                       if (meta && !meta->allow_fragment) {
                }
        }
 
-       pr_clutter("  path: %.*s (len:%zu)", (int)path.len, path.str, path.len);
-       pr_clutter("  query: %.*s (len:%zu)", (int)query.len, query.str, query.len);
-       pr_clutter("  fragment: %.*s (len:%zu)", (int)fragment.len, fragment.str, fragment.len);
+       print_component("path", &path);
+       print_component("query", &query);
+       print_component("fragment", &fragment);
 
        buf.capacity = scheme.len + authority.len + path.len
            + query.len + fragment.len + 5; /* "://" + maybe '/' + '\0' */
@@ -636,51 +656,54 @@ frag:                     if (meta && !meta->allow_fragment) {
        buf.d = 0;
 
        pr_clutter("-> Normalizing scheme.");
-       if (!normalize_scheme(&buf, &scheme))
+       error = normalize_scheme(&buf, &scheme);
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing userinfo.");
-       if (!normalize_userinfo(&buf, &userinfo))
+       error = normalize_userinfo(&buf, &userinfo);
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing host.");
-       if (!normalize_host(&buf, &host))
+       error = normalize_host(&buf, &host);
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing port.");
-       if (!normalize_port(&buf, &port, meta))
+       error = normalize_port(&buf, &port, meta);
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing path.");
-       if (!normalize_path(&buf, &path))
+       error = normalize_path(&buf, &path);
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing query.");
-       if (!normalize_post_path(&buf, &query, '?'))
+       error = normalize_post_path(&buf, &query, '?');
+       if (error)
                goto cancel;
        pr_clutter("-> Normalizing fragment.");
-       if (!normalize_post_path(&buf, &fragment, '#'))
+       error = normalize_post_path(&buf, &fragment, '#');
+       if (error)
                goto cancel;
 
        approve_chara(&buf, '\0');
-       return buf.dst;
+       *result = buf.dst;
+       return NULL;
 
 cancel:        free(buf.dst);
-       return NULL;
+       return error;
 }
 
-int
+error_msg
 uri_init(struct uri *url, char const *str)
 {
        char *normal;
+       error_msg error;
 
-       normal = url_normalize(str, 0);
-       if (!normal)
-               return EINVAL;
+       error = url_normalize(str, 0, &normal);
+       if (error)
+               return error;
 
        __URI_INIT(url, normal);
-
-       if (!uri_is_https(url) && !uri_is_rsync(url)) {
-               free(normal);
-               return ENOTSUP;
-       }
-
-       return 0;
+       return NULL;
 }
 
 /* @str must already be normalized. */
index 601e0490b24ef2f906fe69ae0464093119a64010..2fa555bf3cc05a1612d39ef2b00508d907eee416 100644 (file)
@@ -13,7 +13,9 @@ struct uri {
        size_t _len;
 };
 
-int uri_init(struct uri *, char const *);
+typedef char const *error_msg;
+
+error_msg uri_init(struct uri *, char const *);
 void __uri_init(struct uri *, char const *, size_t);
 #define __URI_INIT(uri, str) __uri_init(uri, str, strlen(str))
 void uri_copy(struct uri *, struct uri const *);
index de5e554c8ba363ad80b9fd3445b14c7d41e00ef4..4548d6b1a1a1325146bb3c4ed5bf9a2487f4cca9 100644 (file)
@@ -111,7 +111,7 @@ run_dl_rsync(char *caRepository, int expected_err, unsigned int expected_calls)
        struct sia_uris sias = { 0 };
        struct cache_cage *cage;
 
-       ck_assert_int_eq(0, uri_init(&sias.caRepository, caRepository));
+       ck_assert_ptr_eq(NULL, uri_init(&sias.caRepository, caRepository));
        cage = NULL;
 
        rsync_counter = 0;
@@ -149,7 +149,7 @@ run_dl_https(char const *url, unsigned int expected_calls,
        struct uri uri;
        char const *result;
 
-       ck_assert_int_eq(0, uri_init(&uri, url));
+       ck_assert_ptr_eq(NULL, uri_init(&uri, url));
 
        rsync_counter = 0;
        https_counter = 0;
@@ -173,7 +173,7 @@ ck_cage(struct cache_cage *cage, char const *url,
        struct uri uri;
        struct cache_node const *bkp;
 
-       ck_assert_int_eq(0, uri_init(&uri, url));
+       ck_assert_ptr_eq(NULL, uri_init(&uri, url));
 
        ck_assert_str(refresh, cage_map_file(cage, &uri));
 
@@ -213,11 +213,11 @@ queue_commit(char const *rpkiNotify, char const *caRepository,
        struct uri rn, cr;
 
        if (rpkiNotify)
-               ck_assert_int_eq(0, uri_init(&rn, rpkiNotify));
+               ck_assert_ptr_eq(NULL, uri_init(&rn, rpkiNotify));
        else
                memset(&rn, 0, sizeof(rn));
        if (caRepository)
-               ck_assert_int_eq(0, uri_init(&cr, caRepository));
+               ck_assert_ptr_eq(NULL, uri_init(&cr, caRepository));
        else
                memset(&cr, 0, sizeof(cr));
 
@@ -302,7 +302,7 @@ init_node_rsync(struct cache_node *node, char *url, char *path,
 {
        node->key.id = url;
        node->key.idlen = strlen(url);
-       ck_assert_int_eq(0, uri_init(&node->key.rsync, url));
+       ck_assert_ptr_eq(NULL, uri_init(&node->key.rsync, url));
        node->path = path;
        node->state = fresh ? DLS_FRESH : DLS_OUTDATED; /* XXX (test) */
        node->dlerr = dlerr;
@@ -315,7 +315,7 @@ init_node_https(struct cache_node *node, char *url, char *path,
 {
        node->key.id = url;
        node->key.idlen = strlen(url);
-       ck_assert_int_eq(0, uri_init(&node->key.http, url));
+       ck_assert_ptr_eq(NULL, uri_init(&node->key.http, url));
        node->path = path;
        node->state = fresh ? DLS_FRESH : DLS_OUTDATED;
        node->dlerr = dlerr;
@@ -682,12 +682,12 @@ START_TEST(test_https_cleanup)
        }
 
        /* 3 */
-       ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta50.cer"));
+       ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta50.cer"));
        map.path = pstrdup("https/50");
        cache_commit_file(&map);
        map_cleanup(&map);
 
-       ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta52.cer"));
+       ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta52.cer"));
        map.path = pstrdup("https/52");
        cache_commit_file(&map);
        map_cleanup(&map);
@@ -701,12 +701,12 @@ START_TEST(test_https_cleanup)
        new_iteration(false);
 
        /* 4 */
-       ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta50.cer"));
+       ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta50.cer"));
        map.path = pstrdup("fallback/0");
        cache_commit_file(&map);
        map_cleanup(&map);
 
-       ck_assert_int_eq(0, uri_init(&map.url, "https://domain/rpki/ta51.cer"));
+       ck_assert_ptr_eq(NULL, uri_init(&map.url, "https://domain/rpki/ta51.cer"));
        map.path = pstrdup("https/51");
        cache_commit_file(&map);
        map_cleanup(&map);
@@ -816,12 +816,12 @@ START_TEST(test_context)
        dls[1] = SHDR("3") PBLSH("rsync://x.y.z/mod5/rpp3/a.cer", "Rm9ydAo=") STAIL;
        dls[2] = NULL;
 
-       ck_assert_int_eq(0, uri_init(&file_url, FILE_URL));
+       ck_assert_ptr_eq(NULL, uri_init(&file_url, FILE_URL));
 
        printf("1. 1st CA succeeds on RRDP\n");
        print_tree();
-       ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
-       ck_assert_int_eq(0, uri_init(&sias.caRepository, CA_REPOSITORY));
+       ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
+       ck_assert_ptr_eq(NULL, uri_init(&sias.caRepository, CA_REPOSITORY));
        ck_assert_int_eq(0, cache_refresh_by_sias(&sias, &cage));
        ck_assert_str_eq(RPKI_NOTIFY, uri_str(&cage->rpkiNotify));
        ck_assert_str_eq(FILE_RRDP_PATH, cage_map_file(cage, &file_url));
@@ -849,7 +849,7 @@ START_TEST(test_context)
        rpp.files = pzalloc(sizeof(struct cache_mapping));
        uri_copy(&rpp.files->url, &file_url);
        rpp.files->path = pstrdup(FILE_RRDP_PATH);
-       ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
+       ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
        cache_commit_rpp(&sias.rpkiNotify, &sias.caRepository, &rpp);
 
        rpp.nfiles = 1;
@@ -869,7 +869,7 @@ START_TEST(test_context)
        ck_assert_int_eq(true, cage_disable_refresh(cage));
        ck_assert_str_eq("fallback/1/0", cage_map_file(cage, &file_url));
 
-       ck_assert_int_eq(0, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
+       ck_assert_ptr_eq(NULL, uri_init(&sias.rpkiNotify, RPKI_NOTIFY));
        ck_assert_int_eq(0, cache_refresh_by_sias(&sias, &cage));
        ck_assert_str_eq(RPKI_NOTIFY, uri_str(&cage->rpkiNotify));
        ck_assert_str_eq(FILE_RRDP_PATH, cage_map_file(cage, &file_url));
@@ -965,7 +965,7 @@ CACHE_FILE_ADD(struct rrdp_state *state, char const *url, char *path)
 {
        struct uri uri;
 
-       ck_assert_int_eq(0, uri_init(&uri, url));
+       ck_assert_ptr_eq(NULL, uri_init(&uri, url));
        ck_assert_ptr_ne(NULL, cache_file_add(state, &uri, pstrdup(path)));
        uri_cleanup(&uri);
 }
index 7eaa97e15451e66c2ae36e6b0fca7045c06a4b26..03aa129db089e9ffa1ac2219a733ff915f2822b4 100644 (file)
@@ -403,7 +403,7 @@ START_TEST(test_parse_notification_ok)
        struct uri nurl;
 
        ck_assert_int_eq(0, relax_ng_init());
-       ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml"));
+       ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml"));
        ck_assert_int_eq(0, parse_notification(&nurl,
            "resources/rrdp/notif-ok.xml", &notif));
        uri_cleanup(&nurl);
@@ -441,7 +441,7 @@ START_TEST(test_parse_notification_0deltas)
        struct uri nurl;
 
        ck_assert_int_eq(0, relax_ng_init());
-       ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml"));
+       ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml"));
        ck_assert_int_eq(0, parse_notification(&nurl,
            "resources/rrdp/notif-0deltas.xml", &notif));
        uri_cleanup(&nurl);
@@ -467,7 +467,7 @@ START_TEST(test_parse_notification_large_serial)
        struct uri nurl;
 
        ck_assert_int_eq(0, relax_ng_init());
-       ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml"));
+       ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml"));
        ck_assert_int_eq(0, parse_notification(&nurl,
            "resources/rrdp/notif-large-serial.xml", &notif));
        uri_cleanup(&nurl);
@@ -501,7 +501,7 @@ test_parse_notification_error(char *file)
        struct uri nurl;
 
        ck_assert_int_eq(0, relax_ng_init());
-       ck_assert_int_eq(0, uri_init(&nurl, "https://host/notification.xml"));
+       ck_assert_ptr_eq(NULL, uri_init(&nurl, "https://host/notification.xml"));
        ck_assert_int_eq(-EINVAL, parse_notification(&nurl, file, &notif));
        uri_cleanup(&nurl);
 
index 29edf560211b8767088048465a3034f9d39378ff..0766675fc29f05dd742b60653062964dd54a7fb4 100644 (file)
@@ -91,7 +91,7 @@ START_TEST(startup)
        seq.pathlen = strlen(seq.prefix);
        seq.free_prefix = false;
 
-       ck_assert_int_eq(0, uri_init(&url, URL));
+       ck_assert_ptr_eq(NULL, uri_init(&url, URL));
 
        dls[0] = NHDR("3")
                NSS("https://host/9d-8/3/snapshot.xml",
@@ -106,7 +106,7 @@ START_TEST(startup)
        ck_assert_uint_eq(true, changed);
        ck_file("rrdp/0/0"); /* "rrdp/<first-cage>/<c.cer>" */
 
-       ck_assert_int_eq(0, uri_init(&maps[0].url, "rsync://a/b/c.cer"));
+       ck_assert_ptr_eq(NULL, uri_init(&maps[0].url, "rsync://a/b/c.cer"));
        maps[0].path = "rrdp/0/0";
        memset(&maps[1], 0, sizeof(maps[1]));
        ck_state(TEST_SESSION, "3", 1, maps, state);
index e19b0482bb3d4594c89e82851dd2d3840e214270..aaa8320c2f1b49ac6463bf01ab4beb6af02f9b8b 100644 (file)
@@ -31,17 +31,24 @@ START_TEST(test_rewind)
 END_TEST
 
 #define TEST_NORMALIZE(dirty, clean)                                   \
-       normal = url_normalize(dirty, 0);                               \
+       ck_assert_pstr_eq(NULL, url_normalize(dirty, 0, &normal));      \
        ck_assert_str_eq(clean, normal);                                \
        free(normal)
 
 #define TEST_NORMALIZE_AUS(dirty, clean)                               \
-       normal = url_normalize(dirty, URI_ALLOW_UNKNOWN_SCHEME);        \
+       ck_assert_ptr_eq(NULL, url_normalize(                           \
+               dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal                \
+       ));                                                             \
        ck_assert_str_eq(clean, normal);                                \
        free(normal)
 
-#define TEST_NORMALIZE_FAIL(dirty)                                     \
-       ck_assert_ptr_eq(NULL, url_normalize(dirty, 0));
+#define TEST_NORMALIZE_FAIL(dirty, error)                              \
+       ck_assert_str_eq(error, url_normalize(dirty, 0, &normal));
+
+#define TEST_NORMALIZE_FAIL_AUS(dirty, error)                          \
+       ck_assert_str_eq(error, url_normalize(                          \
+               dirty, URI_ALLOW_UNKNOWN_SCHEME, &normal                \
+       ));
 
 START_TEST(awkward_dot_dotting)
 {
@@ -59,6 +66,82 @@ START_TEST(awkward_dot_dotting)
 }
 END_TEST
 
+START_TEST(test_port)
+{
+       char *normal;
+
+       printf("rfc3986#3.2.3: Port\n");
+
+       TEST_NORMALIZE_FAIL("https://a:-1/", EM_PORT_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a:0/", EM_PORT_RANGE);
+       TEST_NORMALIZE("https://a:1/", "https://a:1/");
+       TEST_NORMALIZE("https://a:65535/", "https://a:65535/");
+       TEST_NORMALIZE_FAIL("https://a:65536/", EM_PORT_RANGE);
+}
+END_TEST
+
+START_TEST(pct_encoding)
+{
+       char *normal;
+
+       printf("3986#2.1: Percent encoding\n");
+
+       TEST_NORMALIZE("https://%61/", "https://a/");
+       TEST_NORMALIZE("https://%6f/", "https://o/");
+       TEST_NORMALIZE("https://%6F/", "https://o/");
+       TEST_NORMALIZE("https://%7C/", "https://%7C/");
+       TEST_NORMALIZE("https://%7c/", "https://%7C/");
+
+       TEST_NORMALIZE_FAIL("https://%6G", EM_PCT_NOTHEX);
+       TEST_NORMALIZE_FAIL("https://%G6", EM_PCT_NOTHEX);
+
+       /* Host */
+       TEST_NORMALIZE_FAIL("https://%6", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%6:", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%:", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%6/", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%/", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%6?", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%?", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%6#", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://%#", EM_PCT_NOT3);
+
+       /* Userinfo */
+       TEST_NORMALIZE("rsync://%61@a/", "rsync://a@a/");
+       TEST_NORMALIZE_FAIL("rsync://%6@a", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("rsync://%@a", EM_PCT_NOT3);
+
+       /* Port */
+       TEST_NORMALIZE_FAIL("rsync://a:%31/", EM_PORT_BADCHR);
+       TEST_NORMALIZE_FAIL("rsync://a:%3", EM_PORT_BADCHR);
+       TEST_NORMALIZE_FAIL("rsync://a:%", EM_PORT_BADCHR);
+
+       /* Path */
+       TEST_NORMALIZE("https://a/%41", "https://a/A");
+       TEST_NORMALIZE_FAIL("https://a/%4", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%4/", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%/", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%4?", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%?", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%4#", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/%#", EM_PCT_NOT3);
+
+       /* Query */
+       TEST_NORMALIZE("https://a/?%30", "https://a/?0");
+       TEST_NORMALIZE_FAIL("https://a/?%3", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/?%", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/?%3#", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/?%#", EM_PCT_NOT3);
+
+       /* Fragment */
+       TEST_NORMALIZE("https://a/#%30", "https://a/#0");
+       TEST_NORMALIZE_FAIL("https://a/#%3", EM_PCT_NOT3);
+       TEST_NORMALIZE_FAIL("https://a/#%", EM_PCT_NOT3);
+}
+END_TEST
+
 #define ck_assert_origin(expected, s1, s2)                             \
        do {                                                            \
                __URI_INIT(&u1, s1);                                    \
@@ -96,10 +179,37 @@ START_TEST(test_unknown_protocols)
 
        printf("Unknown protocols\n");
 
-       TEST_NORMALIZE_FAIL("httpz://a.b.c/d");
-       TEST_NORMALIZE_FAIL("abcde://a.b.c/d");
+       TEST_NORMALIZE_AUS("https://a.b.c/d", "https://a.b.c/d");
+       TEST_NORMALIZE("https://a.b.c/d", "https://a.b.c/d");
+       TEST_NORMALIZE_AUS("http://a.b.c/d", "http://a.b.c/d");
+       TEST_NORMALIZE_FAIL("http://a.b.c/d", EM_SCHEME_UNKNOWN);
+
+       TEST_NORMALIZE_FAIL("httpz://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("abcde://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("://a.b.c/d", EM_SCHEME_EMPTY);
+       TEST_NORMALIZE_FAIL("0abc://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("9abc://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("+abc://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL(".abc://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("-abc://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("a_b://a.b.c/d", EM_SCHEME_UNKNOWN);
+       TEST_NORMALIZE_FAIL("a~b://a.b.c/d", EM_SCHEME_UNKNOWN);
+
        TEST_NORMALIZE_AUS("httpz://a.b.c/d", "httpz://a.b.c/d");
        TEST_NORMALIZE_AUS("abcde://a.b.c/d", "abcde://a.b.c/d");
+       TEST_NORMALIZE_FAIL_AUS("://a.b.c/d", EM_SCHEME_EMPTY);
+       TEST_NORMALIZE_FAIL_AUS("0abc://a.b.c/d", EM_SCHEME_1ST);
+       TEST_NORMALIZE_FAIL_AUS("9abc://a.b.c/d", EM_SCHEME_1ST);
+       TEST_NORMALIZE_FAIL_AUS("+abc://a.b.c/d", EM_SCHEME_1ST);
+       TEST_NORMALIZE_FAIL_AUS(".abc://a.b.c/d", EM_SCHEME_1ST);
+       TEST_NORMALIZE_FAIL_AUS("-abc://a.b.c/d", EM_SCHEME_1ST);
+       TEST_NORMALIZE_AUS("a0b://a.b.c/d", "a0b://a.b.c/d");
+       TEST_NORMALIZE_AUS("a9b://a.b.c/d", "a9b://a.b.c/d");
+       TEST_NORMALIZE_AUS("a+b://a.b.c/d", "a+b://a.b.c/d");
+       TEST_NORMALIZE_AUS("a.b://a.b.c/d", "a.b://a.b.c/d");
+       TEST_NORMALIZE_AUS("a-b://a.b.c/d", "a-b://a.b.c/d");
+       TEST_NORMALIZE_FAIL_AUS("a_b://a.b.c/d", EM_SCHEME_NTH);
+       TEST_NORMALIZE_FAIL_AUS("a~b://a.b.c/d", EM_SCHEME_NTH);
 }
 END_TEST
 
@@ -125,6 +235,55 @@ START_TEST(reserved_unchanged)
 }
 END_TEST
 
+START_TEST(test_query)
+{
+       char *normal;
+
+       printf("3986#3.4: Query\n");
+
+       TEST_NORMALIZE("https://a/?azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/?azAZ09-._~1!$&'()*+,;=:@/?");
+       TEST_NORMALIZE("https://a/?azAZ09-._~%31!$&'()*+,;=:@/?#", "https://a/?azAZ09-._~1!$&'()*+,;=:@/?#");
+
+       TEST_NORMALIZE_FAIL("https://a/?[", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?]", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/? ", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?\"", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?<", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?>", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?\\", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?^", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?`", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?{", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?}", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/?|", EM_QF_BADCHR);
+}
+END_TEST
+
+START_TEST(test_fragment)
+{
+       char *normal;
+
+       printf("3986#3.6: Fragment\n");
+
+       TEST_NORMALIZE("https://a/#azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/#azAZ09-._~1!$&'()*+,;=:@/?");
+       TEST_NORMALIZE("https://a/#azAZ09-._~%31!$&'()*+,;=:@/?", "https://a/#azAZ09-._~1!$&'()*+,;=:@/?");
+
+       TEST_NORMALIZE_FAIL("https://a/##", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#[", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#]", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/# ", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#\"", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#<", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#>", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#\\", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#^", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#`", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#{", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#}", EM_QF_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a/#|", EM_QF_BADCHR);
+}
+END_TEST
+
 START_TEST(lowercase_scheme_and_host)
 {
        char *normal;
@@ -208,20 +367,22 @@ END_TEST
 
 START_TEST(https_grammar)
 {
+       char *normal;
+
        printf("9110#4.2.2: https-URI     = \"https\" \"://\" authority path-abempty [ \"?\" query ]\n");
        printf("            authority     = host [ \":\" port ]\n");
        printf("            path-abempty  = *( \"/\" segment )\n");
        printf("            segment       = *pchar\n");
 
-       TEST_NORMALIZE_FAIL("");
-       TEST_NORMALIZE_FAIL("h");
-       TEST_NORMALIZE_FAIL("http");
-       TEST_NORMALIZE_FAIL("https");
-       TEST_NORMALIZE_FAIL("https:");
-       TEST_NORMALIZE_FAIL("https:/");
-       TEST_NORMALIZE_FAIL("https://");
-       TEST_NORMALIZE_FAIL("https://a.β.c/");
-       TEST_NORMALIZE_FAIL("https://a.b.c/β");
+       TEST_NORMALIZE_FAIL("", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("h", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("http", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("https", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("https:", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("https:/", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY);
+       TEST_NORMALIZE_FAIL("https://a.β.c/", EM_HOST_BADCHR);
+       TEST_NORMALIZE_FAIL("https://a.b.c/β", EM_PATH_BADCHR);
 
        /* I think everything else is already tested below. */
 }
@@ -254,15 +415,15 @@ START_TEST(disallow_http_empty_host)
        printf("(Also 9110#4.2.3: Empty path normalizes to '/')\n");
 
        TEST_NORMALIZE("https://a", "https://a/");
-       TEST_NORMALIZE_FAIL("https://");
+       TEST_NORMALIZE_FAIL("https://", EM_HOST_EMPTY);
        TEST_NORMALIZE("https://a/f/g", "https://a/f/g");
-       TEST_NORMALIZE_FAIL("https:///f/g");
+       TEST_NORMALIZE_FAIL("https:///f/g", EM_HOST_EMPTY);
        TEST_NORMALIZE("https://a:1234/f/g", "https://a:1234/f/g");
-       TEST_NORMALIZE_FAIL("https://:1234/f/g");
+       TEST_NORMALIZE_FAIL("https://:1234/f/g", EM_HOST_EMPTY);
        TEST_NORMALIZE("https://a?123", "https://a/?123");
-       TEST_NORMALIZE_FAIL("https://?123");
+       TEST_NORMALIZE_FAIL("https://?123", EM_HOST_EMPTY);
        TEST_NORMALIZE("https://a#123", "https://a/#123");
-       TEST_NORMALIZE_FAIL("https://#123");
+       TEST_NORMALIZE_FAIL("https://#123", EM_HOST_EMPTY);
 }
 END_TEST
 
@@ -360,8 +521,8 @@ START_TEST(disallow_https_userinfo)
        printf("9110#4.2.4: Disallow https userinfo\n");
 
        TEST_NORMALIZE("https://c.d.e/f/g", "https://c.d.e/f/g");
-       TEST_NORMALIZE_FAIL("https://a@c.d.e/f/g");
-       TEST_NORMALIZE_FAIL("https://a:b@c.d.e/f/g");
+       TEST_NORMALIZE_FAIL("https://a@c.d.e/f/g", EM_USERINFO_DISALLOWED);
+       TEST_NORMALIZE_FAIL("https://a:b@c.d.e/f/g", EM_USERINFO_DISALLOWED);
 }
 END_TEST
 
@@ -372,50 +533,51 @@ START_TEST(rsync_grammar)
        printf("5781#2: rsync://[user@]host[:PORT]/Source\n");
        printf("rsyncuri        = \"rsync:\" hier-part\n");
 
-       TEST_NORMALIZE_FAIL("");
-       TEST_NORMALIZE_FAIL("r");
-       TEST_NORMALIZE_FAIL("rsyn");
-       TEST_NORMALIZE_FAIL("rsync");
-       TEST_NORMALIZE_FAIL("rsync:");
-       TEST_NORMALIZE_FAIL("rsync:/");
-       TEST_NORMALIZE_FAIL("rsync://");
-       TEST_NORMALIZE_FAIL("rsync://a.β.c/");
-       TEST_NORMALIZE_FAIL("rsync://a.b.c/β");
+       TEST_NORMALIZE_FAIL("", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("r", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("rsyn", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("rsync", EM_SCHEME_NOCOLON);
+       TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync://", EM_HOST_EMPTY);
+       TEST_NORMALIZE_FAIL("rsync://a.β.c/", EM_HOST_BADCHR);
+       TEST_NORMALIZE_FAIL("rsync://a.b.c/β", EM_PATH_BADCHR);
 
        TEST_NORMALIZE("rsync://a.b.c/m", "rsync://a.b.c/m");
        TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
-       TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r?query");
-       TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r#fragment");
+       TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r?query", EM_QUERY_DISALLOWED);
+       TEST_NORMALIZE_FAIL("rsync://a.b.c/m/r#fragment", EM_FRAGMENT_DISALLOWED);
 
        /* hier-part     = "//" authority path-abempty */
        TEST_NORMALIZE("rsync://user@a.b.c:1234/m/r", "rsync://user@a.b.c:1234/m/r");
        TEST_NORMALIZE("rsync://a.b.c/m/r", "rsync://a.b.c/m/r");
        TEST_NORMALIZE("rsync://user@a.b.c:1234", "rsync://user@a.b.c:1234/");
        TEST_NORMALIZE("rsync://a.b.c", "rsync://a.b.c/");
+       TEST_NORMALIZE_FAIL("rsync://[@a.b.c", EM_USERINFO_BADCHR);
 
        /* hier-part     = path-absolute */
        /* ie. "rsync:/" [ pchar+ ( "/" pchar* )* ] */
        /* (These refer to local files. The RFC allows them, but Fort shouldn't.) */
-       TEST_NORMALIZE_FAIL("rsync:/");
-       TEST_NORMALIZE_FAIL("rsync:/a");
-       TEST_NORMALIZE_FAIL("rsync:/a/");
-       TEST_NORMALIZE_FAIL("rsync:/a/a");
-       TEST_NORMALIZE_FAIL("rsync:/a/a/a");
-       TEST_NORMALIZE_FAIL("rsync:/abc/def/xyz");
-       TEST_NORMALIZE_FAIL("rsync:/abc////def//xyz");
+       TEST_NORMALIZE_FAIL("rsync:/", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/a", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/a/", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/a/a", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/a/a/a", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/abc/def/xyz", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:/abc////def//xyz", EM_SCHEME_NOTREMOTE);
 
        /* hier-part     = path-rootless */
        /* ie. "rsync:" pchar+ ( "/" pchar* )* */
        /* (Also local paths. Disallowed by Fort needs.) */
-       TEST_NORMALIZE_FAIL("rsync:a");
-       TEST_NORMALIZE_FAIL("rsync:aa");
-       TEST_NORMALIZE_FAIL("rsync:aa/");
-       TEST_NORMALIZE_FAIL("rsync:aa/a");
-       TEST_NORMALIZE_FAIL("rsync:aa/aa");
-       TEST_NORMALIZE_FAIL("rsync:aa///aa");
+       TEST_NORMALIZE_FAIL("rsync:a", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:aa", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:aa/", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:aa/a", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:aa/aa", EM_SCHEME_NOTREMOTE);
+       TEST_NORMALIZE_FAIL("rsync:aa///aa", EM_SCHEME_NOTREMOTE);
 
        /* hier-part     = path-empty */
-       TEST_NORMALIZE_FAIL("rsync:");
+       TEST_NORMALIZE_FAIL("rsync:", EM_SCHEME_NOTREMOTE);
 }
 END_TEST
 
@@ -443,7 +605,11 @@ static Suite *create_suite(void)
        tcase_add_test(misc, test_same_origin);
 
        generic = tcase_create("RFC 3986 (generic URI)");
+       tcase_add_test(generic, pct_encoding);
        tcase_add_test(generic, reserved_unchanged);
+       tcase_add_test(generic, test_port);
+       tcase_add_test(generic, test_query);
+       tcase_add_test(generic, test_fragment);
        tcase_add_test(generic, lowercase_scheme_and_host);
        tcase_add_test(generic, decode_unreserved_characters);
        tcase_add_test(generic, path_segment_normalization);