From: Alberto Leiva Popper Date: Thu, 14 Nov 2024 19:54:14 +0000 (-0600) Subject: Cache cleanup review X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7711a247beb1119ee1349e9ddc0d13cd6ab08917;p=thirdparty%2FFORT-validator.git Cache cleanup review Fixes several XXXs. - Separate node->mtim into attempt_ts and success_ts. Because they're really two different timestamps; The former is meant for node expiration, the latter for HTTP IMS. - Move removal of orphaned fallbacks to remove_abandoned(). Because orphaned refreshes need the same logic. - Added the (randomly missing) expiration threshold for orphans. It's still missing the implementation of remove_orphaned_files(), but I'm still weighting options, as it seems it's going to be an expensive operation that's rarely going to do anything. --- diff --git a/src/cache.c b/src/cache.c index 5ee086b3..2f4572e3 100644 --- a/src/cache.c +++ b/src/cache.c @@ -26,10 +26,16 @@ #include "types/url.h" #include "types/uthash.h" -enum dl_state { - DLS_OUTDATED = 0, /* Not downloaded yet */ - DLS_ONGOING, /* Download in progress */ - DLS_FRESH, /* Download complete */ +enum node_state { + /* Refresh nodes: Not downloaded yet (stale) */ + /* Fallback nodes: Queued for commit */ + DLS_OUTDATED = 0, + /* Refresh nodes: Download in progress */ + /* Fallback nodes: N/A */ + DLS_ONGOING, + /* Refresh nodes: Download complete */ + /* Fallback nodes: Committed */ + DLS_FRESH, }; /* @@ -50,9 +56,11 @@ enum dl_state { struct cache_node { struct cache_mapping map; - enum dl_state state; - int dlerr; /* Result code of recent download attempt */ - time_t mtim; /* Last successful download time, or zero */ + enum node_state state; + /* Result code of recent dl attempt (DLS_FRESH only) */ + int dlerr; + time_t attempt_ts; /* Refresh: Dl attempt. Fallback: Commit */ + time_t success_ts; /* Refresh: Dl success. Fallback: Commit */ struct rrdp_state *rrdp; @@ -122,7 +130,7 @@ static void __delete_node_cb(struct cache_node const *); #endif static void -delete_node(struct cache_table *tbl, struct cache_node *node) +delete_node(struct cache_table *tbl, struct cache_node *node, void *arg) { #ifdef UNIT_TESTING __delete_node_cb(node); @@ -136,18 +144,25 @@ delete_node(struct cache_table *tbl, struct cache_node *node) } static void -cache_foreach(void (*cb)(struct cache_table *, struct cache_node *)) +foreach_node(void (*cb)(struct cache_table *, struct cache_node *, void *), + void *arg) { struct cache_node *node, *tmp; HASH_ITER(hh, cache.rsync.nodes, node, tmp) - cb(&cache.rsync, node); + cb(&cache.rsync, node, arg); HASH_ITER(hh, cache.https.nodes, node, tmp) - cb(&cache.https, node); + cb(&cache.https, node, arg); HASH_ITER(hh, cache.rrdp.nodes, node, tmp) - cb(&cache.rrdp, node); + cb(&cache.rrdp, node, arg); HASH_ITER(hh, cache.fallback.nodes, node, tmp) - cb(&cache.fallback, node); + cb(&cache.fallback, node, arg); +} + +static void +flush_nodes(void) +{ + foreach_node(delete_node, NULL); } char * @@ -371,10 +386,10 @@ json2node(json_t *json) if (json_get_str(json, "path", &str)) goto fail; node->map.path = pstrdup(str); - error = json_get_int(json, "dlerr", &node->dlerr); + error = json_get_ts(json, "attempt", &node->attempt_ts); if (error != 0 && error != ENOENT) goto fail; - error = json_get_ts(json, "mtim", &node->mtim); + error = json_get_ts(json, "success", &node->success_ts); if (error != 0 && error != ENOENT) goto fail; error = json_get_object(json, "rrdp", &rrdp); @@ -494,7 +509,7 @@ cache_prepare(void) return 0; -fail: cache_foreach(delete_node); +fail: flush_nodes(); unlock_cache(); return error; } @@ -512,9 +527,9 @@ node2json(struct cache_node *node) goto fail; if (json_add_str(json, "path", node->map.path)) goto fail; - if (node->dlerr && json_add_int(json, "dlerr", node->dlerr)) // XXX relevant? + if (node->attempt_ts && json_add_ts(json, "attempt", node->attempt_ts)) goto fail; - if (node->mtim && json_add_ts(json, "mtim", node->mtim)) + if (node->success_ts && json_add_ts(json, "success", node->success_ts)) goto fail; if (node->rrdp) if (json_object_add(json, "rrdp", rrdp_state2json(node->rrdp))) @@ -605,44 +620,39 @@ dl_rsync(struct cache_node *module) if (error) return error; - module->mtim = time_nonfatal(); /* XXX probably not needed */ + module->success_ts = module->attempt_ts; return 0; } static int dl_rrdp(struct cache_node *notif) { - time_t mtim; bool changed; int error; - mtim = time_nonfatal(); - - error = rrdp_update(¬if->map, notif->mtim, &changed, ¬if->rrdp); + error = rrdp_update(¬if->map, notif->success_ts, + &changed, ¬if->rrdp); if (error) return error; if (changed) - notif->mtim = mtim; + notif->success_ts = notif->attempt_ts; return 0; } static int dl_http(struct cache_node *file) { - time_t mtim; bool changed; int error; - mtim = time_nonfatal(); - error = http_download(file->map.url, file->map.path, - file->mtim, &changed); + file->success_ts, &changed); if (error) return error; if (changed) - file->mtim = mtim; + file->success_ts = file->attempt_ts; return 0; } @@ -718,6 +728,7 @@ do_refresh(struct cache_table *tbl, char const *uri, struct cache_node **result) node->state = DLS_ONGOING; mutex_unlock(&tbl->lock); + node->attempt_ts = time_fatal(); node->dlerr = tbl->download(node); downloaded = true; @@ -798,7 +809,7 @@ get_fallback(struct sia_uris *sias) return rsync; if (rsync == NULL) return rrdp; - return (difftime(rsync->mtim, rrdp->mtim) > 0) ? rsync : rrdp; + return (difftime(rsync->success_ts, rrdp->success_ts) > 0) ? rsync : rrdp; } /* Do not free nor modify the result. */ @@ -1045,92 +1056,6 @@ cache_print(void) table_print(&cache.fallback); } -//static void -//cleanup_node(struct rpki_cache *cache, struct cache_node *node, -// time_t last_week) -//{ -// char const *path; -// int error; -// -// path = map_get_path(node->map); -// if (map_get_type(node->map) == MAP_NOTIF) -// goto skip_file; -// -// error = file_exists(path); -// switch (error) { -// case 0: -// break; -// case ENOENT: -// /* Node exists but file doesn't: Delete node */ -// pr_op_debug("Node exists but file doesn't: %s", path); -// delete_node_and_cage(cache, node); -// return; -// default: -// pr_op_err("Trouble cleaning '%s'; stat() returned errno %d: %s", -// map_op_get_printable(node->map), error, strerror(error)); -// } -// -//skip_file: -// if (!is_node_fresh(node, last_week)) { -// pr_op_debug("Deleting expired cache element %s.", path); -// file_rm_rf(path); -// delete_node_and_cage(cache, node); -// } -//} -// -///* -// * "Do not clean." List of mappings that should not be deleted from the cache. -// * Global because nftw doesn't have a generic argument. -// */ -//static struct map_list dnc; -//static pthread_mutex_t dnc_lock = PTHREAD_MUTEX_INITIALIZER; -// -//static bool -//is_cached(char const *_fpath) -//{ -// struct cache_mapping **node; -// char const *fpath, *npath; -// size_t c; -// -// /* -// * This relies on paths being normalized, which is currently done by the -// * struct cache_mapping constructors. -// */ -// -// ARRAYLIST_FOREACH(&dnc, node) { -// fpath = _fpath; -// npath = map_get_path(*node); -// -// for (c = 0; fpath[c] == npath[c]; c++) -// if (fpath[c] == '\0') -// return true; -// if (fpath[c] == '\0' && npath[c] == '/') -// return true; -// if (npath[c] == '\0' && fpath[c] == '/') -// return true; -// } -// -// return false; -//} - -static int -rmf(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) -{ - if (remove(fpath) < 0) - pr_op_warn("Can't remove %s: %s", fpath, strerror(errno)); - else - pr_op_debug("Removed %s.", fpath); - return 0; -} - -static void -cleanup_tmp(void) -{ - if (nftw(CACHE_TMPDIR, rmf, 32, FTW_DEPTH | FTW_PHYS)) - pr_op_warn("Cannot empty the cache's tmp directory: %s", - strerror(errno)); -} - static bool is_fallback(char const *path) { @@ -1160,10 +1085,7 @@ commit_rpp(struct cache_commit *commit, struct cache_node *fb) if (!dst) goto skip; - pr_op_debug("Hard-linking: %s -> %s", src->path, dst); - if (link(src->path, dst) < 0) - pr_op_warn("Could not hard-link cache file: %s", - strerror(errno)); + file_ln(src->path, dst); skip: free(src->path); src->path = pstrdup(dst); @@ -1225,21 +1147,20 @@ next: free(file_path); } static void -commit_fallbacks(void) +commit_fallbacks(time_t now) { struct cache_commit *commit; - struct cache_node *fb, *tmp; - time_t now; + struct cache_node *fb; array_index i; - int error; - - now = time_fatal(); while (!STAILQ_EMPTY(&commits)) { commit = STAILQ_FIRST(&commits); STAILQ_REMOVE_HEAD(&commits, lh); if (commit->caRepository) { + pr_op_debug("Creating fallback for %s (%s)", + commit->caRepository, commit->rpkiNotify); + if (commit->rpkiNotify) { char *key; key = get_rrdp_fallback_key(commit->rpkiNotify, @@ -1260,19 +1181,17 @@ commit_fallbacks(void) } else { /* TA */ struct cache_mapping *map = &commit->files[0]; + pr_op_debug("Creating fallback for %s", map->url); + fb = provide_node(&cache.fallback, map->url); if (is_fallback(map->path)) goto freshen; - pr_op_debug("Hard-linking TA: %s -> %s", - map->path, fb->map.path); - if (link(map->path, fb->map.path) < 0) - pr_op_warn("Could not hard-link cache file: %s", - strerror(errno)); + file_ln(map->path, fb->map.path); } freshen: fb->state = DLS_FRESH; - fb->mtim = now; + fb->attempt_ts = fb->success_ts = now; skip: free(commit->rpkiNotify); free(commit->caRepository); for (i = 0; i < commit->nfiles; i++) { @@ -1282,64 +1201,70 @@ skip: free(commit->rpkiNotify); free(commit->files); free(commit); } - - HASH_ITER(hh, cache.fallback.nodes, fb, tmp) { - if (fb->state == DLS_FRESH) - continue; - - /* - * XXX This one, on the other hand, would definitely benefit - * from an expiration threshold. - */ - pr_op_debug("Removing orphaned fallback: %s", fb->map.path); - error = file_rm_rf(fb->map.path); - if (error) - pr_op_warn("%s removal failed: %s", - fb->map.path, strerror(error)); - delete_node(&cache.fallback, fb); - } } static void -remove_abandoned(void) +remove_abandoned(struct cache_table *table, struct cache_node *node, void *arg) { - // XXX no need to recurse anymore. - /* - nftw_root = cache.rsync; - nftw("rsync", nftw_remove_abandoned, 32, FTW_DEPTH | FTW_PHYS); // XXX + time_t now; + + if (node->state == DLS_FRESH) + return; - nftw_root = cache.https; - nftw("https", nftw_remove_abandoned, 32, FTW_DEPTH | FTW_PHYS); // XXX - */ + now = *((time_t *)arg); + if (difftime(node->attempt_ts + cfg_cache_threshold(), now) < 0) { + file_rm_rf(node->map.path); + delete_node(table, node, NULL); + } } static void -remove_orphaned(struct cache_table *table, struct cache_node *node) +remove_orphaned_nodes(struct cache_table *table, struct cache_node *node, + void *arg) { if (file_exists(node->map.path) == ENOENT) { pr_op_debug("Missing file; deleting node: %s", node->map.path); - delete_node(table, node); + delete_node(table, node, NULL); } } -/* - * Deletes unknown and old untraversed cached files, writes metadata into XML. - */ +static void +remove_orphaned_files(void) +{ + // XXX +} + +/* Deletes obsolete files and nodes from the cache. */ static void cleanup_cache(void) { - // XXX Review + time_t now = time_fatal(); + + /* Delete the entirety of cache/tmp/. */ pr_op_debug("Cleaning up temporal files."); - cleanup_tmp(); + file_rm_rf(CACHE_TMPDIR); - pr_op_debug("Creating fallbacks for valid RPPs."); - commit_fallbacks(); + /* + * Ensure valid RPPs and TAs are linked in fallback, + * by hard-linking the new files. + */ + pr_op_debug("Committing fallbacks."); + commit_fallbacks(now); - pr_op_debug("Cleaning up old abandoned and unknown cache files."); - remove_abandoned(); + /* + * Delete refresh nodes that haven't been downloaded in a while, + * and fallback nodes that haven't been valid in a while. + */ + pr_op_debug("Cleaning up abandoned cache files."); + foreach_node(remove_abandoned, &now); + /* (Paranoid) Delete nodes that are no longer mapped to files. */ pr_op_debug("Cleaning up orphaned nodes."); - cache_foreach(remove_orphaned); + foreach_node(remove_orphaned_nodes, NULL); + + /* (Paranoid) Delete files that are no longer mapped to nodes. */ + pr_op_debug("Cleaning up orphaned files."); + remove_orphaned_files(); } void @@ -1348,7 +1273,7 @@ cache_commit(void) cleanup_cache(); write_index_file(); unlock_cache(); - cache_foreach(delete_node); + flush_nodes(); } void diff --git a/src/file.c b/src/file.c index 3bfe428d..98f224ab 100644 --- a/src/file.c +++ b/src/file.c @@ -192,7 +192,6 @@ file_rm_f(char const *path) static int rm(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { - pr_op_debug("Deleting %s.", fpath); if (remove(fpath) < 0) pr_op_warn("Cannot delete %s: %s", fpath, strerror(errno)); return 0; @@ -204,22 +203,16 @@ file_rm_rf(char const *path) { int error; + pr_op_debug("rm -rf %s", path); + /* TODO (performance) optimize that 32 */ - errno = 0; - switch (nftw(path, rm, 32, FTW_DEPTH | FTW_PHYS)) { - case 0: - return 0; /* Happy path */ - case -1: - /* - * POSIX requires nftw() to set errno, - * but the Linux man page doesn't mention it at all... - */ + if (nftw(path, rm, 32, FTW_DEPTH | FTW_PHYS) < 0) { error = errno; + pr_op_warn("Cannot remove %s: %s", path, strerror(error)); return error ? error : -1; } - /* This is supposed to be unreachable, but let's not panic. */ - return -1; + return 0; } /* If @force, don't treat EEXIST as an error. */ @@ -241,6 +234,15 @@ file_mkdir(char const *path, bool force) return 0; } +void +file_ln(char const *oldpath, char const *newpath) +{ + pr_op_debug("ln %s %s", oldpath, newpath); + if (link(oldpath, newpath) < 0) + pr_op_warn("Could not hard-link %s to %s: %s", + newpath, oldpath, strerror(errno)); +} + void cseq_init(struct cache_sequence *seq, char *prefix, bool free_prefix) { diff --git a/src/file.h b/src/file.h index f8544f05..6cdf2604 100644 --- a/src/file.h +++ b/src/file.h @@ -37,6 +37,7 @@ int file_exists(char const *); int file_rm_f(char const *); int file_rm_rf(char const *); int file_mkdir(char const *, bool); +void file_ln(char const *, char const *); struct cache_sequence { char *prefix; diff --git a/test/cache_test.c b/test/cache_test.c index 2fc455d4..841a5a10 100644 --- a/test/cache_test.c +++ b/test/cache_test.c @@ -301,9 +301,11 @@ get_days_ago(int days) static time_t epoch; static void -unfreshen(struct cache_table *tbl, struct cache_node *node) +unfreshen(struct cache_table *tbl, struct cache_node *node, void *arg) { node->state = DLS_OUTDATED; + node->attempt_ts -= 4; + node->attempt_ts -= 4; } static int @@ -328,7 +330,7 @@ new_iteration(bool outdate) epoch = outdate ? get_days_ago(30) : get_days_ago(1); pr_op_debug("--- Unfreshening... ---"); - cache_foreach(unfreshen); + foreach_node(unfreshen, NULL); ck_assert_int_eq(0, nftw(".", nftw_unfreshen, 32, FTW_PHYS)); pr_op_debug("---- Tree now stale. ----"); @@ -436,7 +438,7 @@ START_TEST(test_cache_download_rsync_error) } END_TEST -START_TEST(test_rsync_commit) +START_TEST(test_rsync_cleanup) { unsigned int i; @@ -461,7 +463,7 @@ START_TEST(test_rsync_commit) /* Commit 1: Empty -> Empty */ /* Commit 2: Empty -> Empty (just free noise) */ for (i = 0; i < 2; i++) { - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); new_iteration(false); @@ -471,7 +473,7 @@ START_TEST(test_rsync_commit) queue_commit(NULL, "rsync://domain/mod/rpp0", "rsync/0/0", "rsync/0/1"); queue_commit(NULL, "rsync://domain/mod/rpp2", "rsync/2/0", "rsync/2/1"); queue_commit(NULL, "rsync://domain/mod/rpp3", "rsync/3/0", "rsync/3/2"); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", /* RPP0 */ "fallback/0/0", "A", "fallback/0/1", "B", /* RPP2 */ "fallback/1/0", "E", "fallback/1/1", "F", @@ -485,7 +487,7 @@ START_TEST(test_rsync_commit) queue_commit(NULL, "rsync://domain/mod/rpp0", "fallback/0/0", "fallback/0/1"); queue_commit(NULL, "rsync://domain/mod/rpp1", "rsync/1/0", "rsync/1/1"); queue_commit(NULL, "rsync://domain/mod/rpp3", "fallback/2/0", "rsync/3/1"); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", /* RPP0 */ "fallback/0/0", "A", "fallback/0/1", "B", @@ -496,10 +498,10 @@ START_TEST(test_rsync_commit) new_iteration(false); /* Commit 5: Populated -> Empty */ - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); - cache_foreach(delete_node); + flush_nodes(); } END_TEST @@ -561,8 +563,8 @@ START_TEST(test_cache_download_https_error) } END_TEST -/* See comments at test_rsync_commit(). */ -START_TEST(test_https_commit) +/* See comments at test_rsync_cleanup(). */ +START_TEST(test_https_cleanup) { struct cache_mapping map; unsigned int i; @@ -575,7 +577,7 @@ START_TEST(test_https_commit) /* 1, 2 */ for (i = 0; i < 2; i++) { - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); new_iteration(false); @@ -588,7 +590,7 @@ START_TEST(test_https_commit) map.url = "https://domain/rpki/ta52.cer"; map.path = "https/52"; cache_commit_file(&map); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", "fallback/0", "A", "fallback/1", "C", NULL); new_iteration(false); @@ -600,21 +602,21 @@ START_TEST(test_https_commit) map.url = "https://domain/rpki/ta51.cer"; map.path = "https/51"; cache_commit_file(&map); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", "fallback/0", "A", "fallback/2", "B", NULL); new_iteration(false); /* 5 */ - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); - cache_foreach(delete_node); + flush_nodes(); } END_TEST -/* See comments at test_rsync_commit(). */ -START_TEST(test_rrdp_commit) +/* See comments at test_rsync_cleanup(). */ +START_TEST(test_rrdp_cleanup) { char const *notif = "https://domain/rpki/notif.xml"; unsigned int i; @@ -635,7 +637,7 @@ START_TEST(test_rrdp_commit) /* 1, 2 */ for (i = 0; i < 2; i++) { - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); new_iteration(false); @@ -645,7 +647,7 @@ START_TEST(test_rrdp_commit) queue_commit(notif, "rsync://domain/mod/rpp0", "rrdp/0/0", "rrdp/0/1"); queue_commit(notif, "rsync://domain/mod/rpp2", "rrdp/2/0", "rrdp/2/1"); queue_commit(notif, "rsync://domain/mod/rpp3", "rrdp/3/0", "rrdp/3/2"); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", "fallback/0/0", "A", "fallback/0/1", "B", "fallback/1/0", "E", "fallback/1/1", "F", @@ -658,7 +660,7 @@ START_TEST(test_rrdp_commit) queue_commit(notif, "rsync://domain/mod/rpp0", "fallback/0/0", "fallback/0/1"); queue_commit(notif, "rsync://domain/mod/rpp1", "rrdp/1/0", "rrdp/1/1"); queue_commit(notif, "rsync://domain/mod/rpp3", "fallback/2/0", "rrdp/3/1"); - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", "fallback/0/0", "A", "fallback/0/1", "B", "fallback/2/0", "G", "fallback/2/2", "H", @@ -668,10 +670,10 @@ START_TEST(test_rrdp_commit) new_iteration(false); /* 5 */ - commit_fallbacks(); + cleanup_cache(); ck_filesystem("fallback", NULL); - cache_foreach(delete_node); + flush_nodes(); } END_TEST @@ -731,7 +733,7 @@ START_TEST(test_context) rpp.files->path = pstrdup(FILE_RSYNC_PATH); cache_commit_rpp(NULL, CA_REPOSITORY, &rpp); - commit_fallbacks(); + commit_fallbacks(time_fatal()); /* 4. Redo both CAs, check the fallbacks too */ ck_assert_int_eq(0, cache_refresh_by_sias(&sias, &cage)); @@ -763,15 +765,15 @@ static Suite *create_suite(void) rsync = tcase_create("rsync"); tcase_add_test(rsync, test_cache_download_rsync); tcase_add_test(rsync, test_cache_download_rsync_error); - tcase_add_test(rsync, test_rsync_commit); + tcase_add_test(rsync, test_rsync_cleanup); https = tcase_create("https"); tcase_add_test(https, test_cache_download_https); tcase_add_test(https, test_cache_download_https_error); - tcase_add_test(https, test_https_commit); + tcase_add_test(https, test_https_cleanup); rrdp = tcase_create("rrdp"); - tcase_add_test(rrdp, test_rrdp_commit); + tcase_add_test(rrdp, test_rrdp_cleanup); multi = tcase_create("multi-protocol"); tcase_add_test(multi, test_context); diff --git a/test/mock.c b/test/mock.c index 25b5d97c..9ea4d2cc 100644 --- a/test/mock.c +++ b/test/mock.c @@ -112,7 +112,7 @@ v6addr2str2(struct in6_addr const *addr) MOCK_NULL(config_get_slurm, char const *, void) MOCK(config_get_tal, char const *, "tal/", void) MOCK(config_get_local_repository, char const *, "tmp", void) -MOCK(cfg_cache_threshold, time_t, 60 * 60 * 24 * 7, void) +MOCK(cfg_cache_threshold, time_t, 2, void) MOCK(config_get_mode, enum mode, STANDALONE, void) MOCK_UINT(config_get_rrdp_delta_threshold, 5, void) MOCK_TRUE(config_get_rsync_enabled, void)