From: Alberto Leiva Popper Date: Mon, 4 Mar 2024 01:49:20 +0000 (-0600) Subject: Normalize both URLs and local paths X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e6f0e2617a8de9abd615ad12097990a1a1ec4250;p=thirdparty%2FFORT-validator.git Normalize both URLs and local paths ("Normalization" = collapsing `.` and `..`) Before, only local paths were being normalized. This was because, by some twisted logic, I was under the impression that `https://a.b.c/d` and `https://a.b.c/d/.` are technically supposed to be different identifiers. My recent visit to RFC3986 disproved this. But regardless of what the standards say, it doesn't make sense to treat those URLs differently in the context of RP validation. They will be cached into the same directory. Not that this aspect of the code used to be incorrect, to the best of my knowledge. Fort needs normalization for proper pre-download identification (which in turn is needed to prevent a file from being downloaded twice during the same iteration). Old code used to identify by local path, and the upcoming session desync commit will need to index by remote path. Hence the need for this change. --- diff --git a/src/alloc.c b/src/alloc.c index 00a1b09c..8bc33ff5 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -60,3 +60,15 @@ pstrdup(const char *s) return result; } + +char * +pstrndup(char const *s, size_t size) +{ + char *result; + + result = strndup(s, size); + if (result == NULL) + enomem_panic(); + + return result; +} diff --git a/src/alloc.h b/src/alloc.h index b9e34e5a..5bd04975 100644 --- a/src/alloc.h +++ b/src/alloc.h @@ -20,5 +20,6 @@ void *prealloc(void *ptr, size_t size); /* strdup(), but panic on allocation failure. */ char *pstrdup(char const *s); +char *pstrndup(char const *s, size_t size); #endif /* SRC_ALLOC_H_ */ diff --git a/src/cache/local_cache.c b/src/cache/local_cache.c index 3f431d67..9d98bd4b 100644 --- a/src/cache/local_cache.c +++ b/src/cache/local_cache.c @@ -482,15 +482,12 @@ static int get_url(struct rpki_uri *uri, const char *tal, struct rpki_uri **url) { char const *guri, *c; - char *guri2; + char *gcopy; unsigned int slashes; int error; - if (uri_get_type(uri) != UT_RPP) { - uri_refget(uri); - *url = uri; - return 0; - } + if (uri_get_type(uri) != UT_RPP) + goto reuse_uri; /* * Careful with this code. rsync(1): @@ -534,6 +531,9 @@ get_url(struct rpki_uri *uri, const char *tal, struct rpki_uri **url) * But note: This only works if we're synchronizing a directory. * But this is fine, because this hack stacks with the minimum common * path performance hack. + * + * Minimum common path performance hack: rsync the rsync module root, + * not every RPP separately. The former is much faster. */ guri = uri_get_global(uri); @@ -541,27 +541,35 @@ get_url(struct rpki_uri *uri, const char *tal, struct rpki_uri **url) for (c = guri; *c != '\0'; c++) { if (*c == '/') { slashes++; - if (slashes == 4) - return __uri_create(url, tal, UT_RPP, - NULL, guri, c - guri + 1); + if (slashes == 4) { + if (c[1] == '\0') + goto reuse_uri; + gcopy = pstrndup(guri, c - guri + 1); + goto gcopy2url; + } } } - if (slashes == 3 && *(c - 1) != '/') { - guri2 = pstrdup(guri); /* Remove const */ - guri2[c - guri] = '/'; - error = __uri_create(url, tal, UT_RPP, NULL, guri2, - c - guri + 1); - free(guri2); - return error; + if (slashes == 3 && c[-1] != '/') { + gcopy = pmalloc(c - guri + 2); + memcpy(gcopy, guri, c - guri); + gcopy[c - guri] = '/'; + gcopy[c - guri + 1] = '\0'; + goto gcopy2url; } - /* - * Minimum common path performance hack: rsync the rsync module root, - * not every RPP separately. The former is much faster. - */ return pr_val_err("Can't rsync URL '%s': The URL seems to be missing a domain or rsync module.", guri); + +reuse_uri: + uri_refget(uri); + *url = uri; + return 0; + +gcopy2url: + error = uri_create(url, tal, UT_RPP, NULL, gcopy); + free(gcopy); + return error; } static bool diff --git a/src/data_structure/path_builder.c b/src/data_structure/path_builder.c index a9ab658c..5dbee72c 100644 --- a/src/data_structure/path_builder.c +++ b/src/data_structure/path_builder.c @@ -15,12 +15,13 @@ #endif #define MAX_CAPACITY 4096u +/* @reserve needs to be < INITIAL_CAPACITY. */ void -pb_init(struct path_builder *pb) +__pb_init(struct path_builder *pb, size_t reserve) { pb->string = pmalloc(INITIAL_CAPACITY); - pb->string[0] = 0; - pb->len = 0; + pb->string[reserve] = 0; + pb->len = reserve; pb->capacity = INITIAL_CAPACITY; } diff --git a/src/data_structure/path_builder.h b/src/data_structure/path_builder.h index 89989d83..45713c8c 100644 --- a/src/data_structure/path_builder.h +++ b/src/data_structure/path_builder.h @@ -9,7 +9,8 @@ struct path_builder { size_t capacity; }; -void pb_init(struct path_builder *); +void __pb_init(struct path_builder *, size_t); +#define pb_init(pb) __pb_init(pb, 0) int pb_init_cache(struct path_builder *, char const *, char const *); /* diff --git a/src/object/certificate.c b/src/object/certificate.c index bd4dc248..920a561c 100644 --- a/src/object/certificate.c +++ b/src/object/certificate.c @@ -1479,11 +1479,12 @@ end: * Create @uri from the @ad */ static int -uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, enum uri_type type, - bool is_notif) +uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, enum uri_type type) { ASN1_STRING *asn1str; + char *str; int ptype; + int error; asn1str = GENERAL_NAME_get0_value(ad->location, &ptype); @@ -1513,7 +1514,7 @@ uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, enum uri_type type, /* * GEN_URI signals an IA5String. * IA5String is a subset of ASCII, so this cast is safe. - * No guarantees of a NULL chara, though. + * No guarantees of a NULL chara though, which is why we need a dup. * * TODO (testers) According to RFC 5280, accessLocation can be an IRI * somehow converted into URI form. I don't think that's an issue @@ -1522,9 +1523,15 @@ uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, enum uri_type type, * directory our g2l version of @asn1_string should contain. * But ask the testers to keep an eye on it anyway. */ - return __uri_create(uri, - tal_get_file_name(validation_tal(state_retrieve())), type, NULL, - ASN1_STRING_get0_data(asn1str), ASN1_STRING_length(asn1str)); + str = pstrndup((char const *)ASN1_STRING_get0_data(asn1str), + ASN1_STRING_length(asn1str)); + + error = uri_create(uri, + tal_get_file_name(validation_tal(state_retrieve())), + type, NULL, str); + + free(str); + return error; } /** @@ -1558,8 +1565,7 @@ handle_ad(int nid, struct ad_metadata const *meta, SIGNATURE_INFO_ACCESS *ia, for (i = 0; i < sk_ACCESS_DESCRIPTION_num(ia); i++) { ad = sk_ACCESS_DESCRIPTION_value(ia, i); if (OBJ_obj2nid(ad->method) == nid) { - error = uri_create_ad(&uri, ad, meta->type, - meta == &RPKI_NOTIFY); + error = uri_create_ad(&uri, ad, meta->type); switch (error) { case 0: break; diff --git a/src/object/manifest.c b/src/object/manifest.c index f527db86..02b12c7c 100644 --- a/src/object/manifest.c +++ b/src/object/manifest.c @@ -25,7 +25,7 @@ cage(struct rpki_uri **uri, struct rpki_uri *notif) return uri_create_caged(uri, tal_get_file_name(validation_tal(state_retrieve())), notif, - uri_get_global(*uri), uri_get_global_len(*uri)); + uri_get_global(*uri)); } static int diff --git a/src/types/uri.c b/src/types/uri.c index e381bf18..753c3b5c 100644 --- a/src/types/uri.c +++ b/src/types/uri.c @@ -35,8 +35,6 @@ struct rpki_uri { * non-ASCII characters. */ char *global; - /** Length of @global. */ - size_t global_len; /** * "Local URI". @@ -107,27 +105,65 @@ validate_url_character(int character) : pr_val_err("URL has non-printable character code '%d'.", character); } +static int append_guri(struct path_builder *, char const *, char const *, + int, bool); + /** - * Initializes @uri->global* by cloning @str. - * This function does not assume that @str is null-terminated. + * Initializes @uri->global by building a normalized version of @str. */ static int -str2global(char const *str, size_t str_len, struct rpki_uri *uri) +str2global(char const *str, struct rpki_uri *uri) { +#define SCHEMA_LEN 8 /* strlen("rsync://"), strlen("https://") */ + + char const *s; + char const *pfx; int error; - size_t i; + struct path_builder pb; - for (i = 0; i < str_len; i++) { - error = validate_url_character(str[i]); + if (str == NULL){ + uri->global = NULL; + return 0; + } + + for (s = str; s[0] != '\0'; s++) { + error = validate_url_character(s[0]); if (error) return error; } - uri->global = pmalloc(str_len + 1); - strncpy(uri->global, str, str_len); - uri->global[str_len] = '\0'; - uri->global_len = str_len; + pfx = NULL; + error = 0; + + switch (uri->type) { + case UT_TA_RSYNC: + case UT_RPP: + case UT_CAGED: + case UT_AIA: + case UT_SO: + case UT_MFT: + pfx = "rsync://"; + error = ENOTRSYNC; + break; + case UT_TA_HTTP: + case UT_NOTIF: + case UT_TMP: + pfx = "https://"; + error = ENOTHTTPS; + break; + } + + if (pfx == NULL) + pr_crit("Unknown URI type: %u", uri->type); + + __pb_init(&pb, SCHEMA_LEN - 1); + error = append_guri(&pb, str, pfx, error, true); + if (error) { + pb_cleanup(&pb); + return error; + } + uri->global = strncpy(pb.string, str, SCHEMA_LEN); return 0; } @@ -207,7 +243,6 @@ ia5str2global(struct rpki_uri *uri, char const *mft, IA5String_t *ia5) joined[dir_len + ia5->size] = '\0'; uri->global = joined; - uri->global_len = dir_len + ia5->size; return 0; } @@ -315,9 +350,10 @@ get_rrdp_workspace(struct path_builder *pb, char const *tal, if (error) return error; - error = append_guri(pb, notif->global, "https://", ENOTHTTPS, true); + error = pb_append(pb, ¬if->global[SCHEMA_LEN]); if (error) pb_cleanup(pb); + return error; } @@ -325,16 +361,16 @@ get_rrdp_workspace(struct path_builder *pb, char const *tal, * Maps "rsync://a.b.c/d/e.cer" into "/rsync/a.b.c/d/e.cer". */ static int -map_simple(struct rpki_uri *uri, char const *tal, char const *gprefix, int err) +map_simple(struct rpki_uri *uri, char const *tal, char const *subdir) { struct path_builder pb; int error; - error = pb_init_cache(&pb, tal, NULL); + error = pb_init_cache(&pb, tal, subdir); if (error) return error; - error = append_guri(&pb, uri->global, gprefix, err, false); + error = pb_append(&pb, &uri->global[SCHEMA_LEN]); if (error) { pb_cleanup(&pb); return error; @@ -358,16 +394,17 @@ map_caged(struct rpki_uri *uri, char const *tal, struct rpki_uri *notif) if (error) return error; - if (uri->global[0] == '\0') - goto end; /* Caller is only interested in the cage. */ + if (uri->global == NULL) + goto success; /* Caller is only interested in the cage. */ - error = append_guri(&pb, uri->global, "rsync://", ENOTRSYNC, true); + error = pb_append(&pb, &uri->global[SCHEMA_LEN]); if (error) { pb_cleanup(&pb); return error; } -end: uri->local = pb.string; +success: + uri->local = pb.string; return 0; } @@ -378,11 +415,11 @@ autocomplete_local(struct rpki_uri *uri, char const *tal, switch (uri->type) { case UT_TA_RSYNC: case UT_RPP: - return map_simple(uri, tal, "rsync://", ENOTRSYNC); + return map_simple(uri, tal, "rsync"); case UT_TA_HTTP: case UT_NOTIF: - return map_simple(uri, tal, "https://", ENOTHTTPS); + return map_simple(uri, tal, "https"); case UT_TMP: return cache_tmpfile(&uri->local); @@ -403,24 +440,26 @@ autocomplete_local(struct rpki_uri *uri, char const *tal, /* * I think the reason why @guri is not a char * is to convey that it doesn't * need to be NULL terminated, but I'm not sure. + * + * FIXME callers now need to ensure @guri is NULL-terminated. */ int -__uri_create(struct rpki_uri **result, char const *tal, enum uri_type type, - struct rpki_uri *notif, void const *guri, size_t guri_len) +uri_create(struct rpki_uri **result, char const *tal, enum uri_type type, + struct rpki_uri *notif, char const *guri) { struct rpki_uri *uri; int error; uri = pmalloc(sizeof(struct rpki_uri)); + uri->type = type; + uri->references = 1; - error = str2global(guri, guri_len, uri); + error = str2global(guri, uri); if (error) { free(uri); return error; } - uri->type = type; - error = autocomplete_local(uri, tal, notif); if (error) { free(uri->global); @@ -428,8 +467,6 @@ __uri_create(struct rpki_uri **result, char const *tal, enum uri_type type, return error; } - uri->references = 1; - *result = uri; return 0; } @@ -446,6 +483,8 @@ uri_create_mft(struct rpki_uri **result, char const *tal, int error; uri = pmalloc(sizeof(struct rpki_uri)); + uri->type = (notif == NULL) ? UT_RPP : UT_CAGED; + uri->references = 1; error = ia5str2global(uri, mft->global, ia5); if (error) { @@ -453,8 +492,6 @@ uri_create_mft(struct rpki_uri **result, char const *tal, return error; } - uri->type = (notif == NULL) ? UT_RPP : UT_CAGED; - error = autocomplete_local(uri, tal, notif); if (error) { free(uri->global); @@ -462,8 +499,6 @@ uri_create_mft(struct rpki_uri **result, char const *tal, return error; } - uri->references = 1; - *result = uri; return 0; } @@ -514,12 +549,6 @@ uri_get_local(struct rpki_uri *uri) return uri->local; } -size_t -uri_get_global_len(struct rpki_uri *uri) -{ - return uri->global_len; -} - bool uri_equals(struct rpki_uri *u1, struct rpki_uri *u2) { @@ -530,14 +559,17 @@ uri_equals(struct rpki_uri *u1, struct rpki_uri *u2) bool uri_has_extension(struct rpki_uri *uri, char const *ext) { + size_t uri_len; size_t ext_len; int cmp; + uri_len = strlen(uri->global); ext_len = strlen(ext); - if (uri->global_len < ext_len) + + if (uri_len < ext_len) return false; - cmp = strncmp(uri->global + uri->global_len - ext_len, ext, ext_len); + cmp = strncmp(uri->global + uri_len - ext_len, ext, ext_len); return cmp == 0; } diff --git a/src/types/uri.h b/src/types/uri.h index dbfe9c32..3ba86704 100644 --- a/src/types/uri.h +++ b/src/types/uri.h @@ -19,18 +19,16 @@ enum uri_type { struct rpki_uri; -int __uri_create(struct rpki_uri **, char const *, enum uri_type, - struct rpki_uri *, void const *, size_t); +int uri_create(struct rpki_uri **, char const *, enum uri_type, + struct rpki_uri *, char const *); int uri_create_mft(struct rpki_uri **, char const *, struct rpki_uri *, struct rpki_uri *, IA5String_t *); struct rpki_uri *uri_create_cache(char const *); -#define uri_create(uri, tal, type, notif, guri) \ - __uri_create(uri, tal, type, notif, guri, strlen(guri)) -#define uri_create_caged(uri, tal, notif, guri, guri_len) \ - __uri_create(uri, tal, UT_CAGED, notif, guri, guri_len) +#define uri_create_caged(uri, tal, notif, guri) \ + uri_create(uri, tal, UT_CAGED, notif, guri) #define uri_create_cage(uri, tal, notif) \ - uri_create_caged(uri, tal, notif, "", 0) + uri_create_caged(uri, tal, notif, NULL) struct rpki_uri *uri_refget(struct rpki_uri *); void uri_refput(struct rpki_uri *); @@ -41,7 +39,6 @@ void uri_refput(struct rpki_uri *); */ char const *uri_get_global(struct rpki_uri *); char const *uri_get_local(struct rpki_uri *); -size_t uri_get_global_len(struct rpki_uri *); bool uri_equals(struct rpki_uri *, struct rpki_uri *); bool uri_has_extension(struct rpki_uri *, char const *); diff --git a/test/crypto/hash_test.c b/test/crypto/hash_test.c index 5673aa58..c870c9af 100644 --- a/test/crypto/hash_test.c +++ b/test/crypto/hash_test.c @@ -45,7 +45,6 @@ START_TEST(test_hash) hash_setup(); uri.global = "https://example.com/resources/lorem-ipsum.txt"; - uri.global_len = strlen(uri.global); uri.local = "resources/lorem-ipsum.txt"; uri.type = UT_TA_HTTP; uri.references = 1; diff --git a/test/rrdp_test.c b/test/rrdp_test.c index e5ac7acb..cf67f788 100644 --- a/test/rrdp_test.c +++ b/test/rrdp_test.c @@ -413,7 +413,6 @@ START_TEST(test_parse_snapshot_bad_publish) ck_assert_int_eq(0, relax_ng_init()); notif_uri.global = "https://example.com/notification.xml"; - notif_uri.global_len = strlen(notif_uri.global); notif_uri.local = "cache/example.com/notification.xml"; notif_uri.type = UT_NOTIF; notif_uri.references = 1; diff --git a/test/types/uri_test.c b/test/types/uri_test.c index 8b611e8d..e80f308a 100644 --- a/test/types/uri_test.c +++ b/test/types/uri_test.c @@ -56,7 +56,7 @@ START_TEST(test_constructor) uri_refput(uri); ck_assert_int_eq(0, URI_CREATE_HTTP(uri, "https://a.b.c/")); - ck_assert_str_eq("https://a.b.c/", uri_get_global(uri)); + ck_assert_str_eq("https://a.b.c", uri_get_global(uri)); ck_assert_str_eq("tmp/test.tal/https/a.b.c", uri_get_local(uri)); uri_refput(uri); @@ -71,22 +71,22 @@ START_TEST(test_constructor) uri_refput(uri); ck_assert_int_eq(0, URI_CREATE_HTTP(uri, "https://a.b.c/d/..")); - ck_assert_str_eq("https://a.b.c/d/..", uri_get_global(uri)); + ck_assert_str_eq("https://a.b.c", uri_get_global(uri)); ck_assert_str_eq("tmp/test.tal/https/a.b.c", uri_get_local(uri)); uri_refput(uri); ck_assert_int_eq(0, URI_CREATE_HTTP(uri, "https://a.b.c/.")); - ck_assert_str_eq("https://a.b.c/.", uri_get_global(uri)); + ck_assert_str_eq("https://a.b.c", uri_get_global(uri)); ck_assert_str_eq("tmp/test.tal/https/a.b.c", uri_get_local(uri)); uri_refput(uri); ck_assert_int_eq(0, URI_CREATE_HTTP(uri, "https://a.b.c/././d/././e/./.")); - ck_assert_str_eq("https://a.b.c/././d/././e/./.", uri_get_global(uri)); + ck_assert_str_eq("https://a.b.c/d/e", uri_get_global(uri)); ck_assert_str_eq("tmp/test.tal/https/a.b.c/d/e", uri_get_local(uri)); uri_refput(uri); ck_assert_int_eq(0, URI_CREATE_HTTP(uri, "https://a.b.c/a/b/.././..")); - ck_assert_str_eq("https://a.b.c/a/b/.././..", uri_get_global(uri)); + ck_assert_str_eq("https://a.b.c", uri_get_global(uri)); ck_assert_str_eq("tmp/test.tal/https/a.b.c", uri_get_local(uri)); uri_refput(uri);