From: Alberto Leiva Popper Date: Sat, 25 Nov 2023 22:22:55 +0000 (-0600) Subject: Remove unrecognized files during cache cleanup X-Git-Tag: 1.6.0~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=baff8afcdcd47150cf71e65647969ae3c4e789cb;p=thirdparty%2FFORT-validator.git Remove unrecognized files during cache cleanup Before, it used to clean old abandoned files, and nodes for which the files seemed to have disappeared. Now it also deletes files for which the node seems to have disappeared. Been postponing this tweak since ee47fe9d43614a929d591ba6039d751d7577070d. --- diff --git a/src/cache/local_cache.c b/src/cache/local_cache.c index 90af30d4..436f73c1 100644 --- a/src/cache/local_cache.c +++ b/src/cache/local_cache.c @@ -1,5 +1,6 @@ #include "cache/local_cache.h" +#include #include #include "alloc.h" @@ -9,20 +10,12 @@ #include "json_util.h" #include "log.h" #include "rrdp.h" +#include "data_structure/array_list.h" #include "data_structure/path_builder.h" #include "data_structure/uthash.h" #include "http/http.h" #include "rsync/rsync.h" -#define TAGNAME_BN "basename" -#define TAGNAME_DIRECT "direct-download" -#define TAGNAME_ERROR "latest-result" -#define TAGNAME_TSATTEMPT "attempt-timestamp" -#define TAGNAME_SUCCESS "successful-download" -#define TAGNAME_TSSUCCESS "success-timestamp" -#define TAGNAME_FILE "is-file" -#define TAGNAME_CHILDREN "children" - struct cache_node { struct rpki_uri *url; @@ -677,7 +670,101 @@ cleanup_node(struct rpki_cache *cache, struct cache_node *node, } } -/* Deletes old untraversed cached files, writes metadata into XML */ +/* + * "Do not clean." List of URIs that should not be deleted from the cache. + * Global because nftw doesn't have a generic argument. + */ +static struct uri_list dnc; +static pthread_mutex_t dnc_lock = PTHREAD_MUTEX_INITIALIZER; + +static bool +is_cached(char const *_fpath) +{ + struct rpki_uri **node; + char const *fpath, *npath; + size_t c; + + /* + * This relies on paths being normalized, which is currently done by the + * URI constructors. + */ + + ARRAYLIST_FOREACH(&dnc, node) { + fpath = _fpath; + npath = uri_get_local(*node); + + for (c = 0; fpath[c] == npath[c]; c++) + if (fpath[c] == '\0') + return true; + if (fpath[c] == '\0' && npath[c] == '/') + return true; + if (npath[c] == '\0' && fpath[c] == '/') + return true; + } + + return false; +} + +static int +delete_if_unknown(const char *fpath, const struct stat *sb, int typeflag, + struct FTW *ftw) +{ + if (!is_cached(fpath)) { + pr_op_debug("Deleting untracked file or directory %s.", fpath); + remove(fpath); + } + return 0; +} + +static void +delete_unknown_files(struct rpki_cache *cache) +{ + struct cache_node *node, *tmp; + struct rpki_uri *cage; + struct path_builder pb; + int error; + + error = pb_init_cache(&pb, cache->tal, "metadata.json"); + if (error) { + pr_op_err("Cannot delete unknown files from %s's cache: %s", + cache->tal, strerror(error)); + return; + } + + mutex_lock(&dnc_lock); + uris_init(&dnc); + + uris_add(&dnc, uri_create_cache(pb.string)); + HASH_ITER(hh, cache->ht, node, tmp) { + uri_refget(node->url); + uris_add(&dnc, node->url); + + error = __uri_create(&cage, cache->tal, UT_CAGED, node->url, + "", 0); + if (error) { + pr_op_err("Cannot generate %s's cage. I'm probably going to end up deleting it from the cache.", + uri_op_get_printable(node->url)); + continue; + } + uris_add(&dnc, cage); + } + + pb_pop(&pb, true); + /* TODO (performance) optimize that 32 */ + error = nftw(pb.string, delete_if_unknown, 32, FTW_PHYS); + if (error) + pr_op_warn("The cache cleanup ended prematurely with error code %d (%s)", + error, strerror(error)); + + uris_cleanup(&dnc); + mutex_unlock(&dnc_lock); + + pb_cleanup(&pb); +} + +/* + * Deletes unknown and old untraversed cached files, writes metadata into XML. + */ static void cache_cleanup(struct rpki_cache *cache) { @@ -687,6 +774,8 @@ cache_cleanup(struct rpki_cache *cache) last_week = get_days_ago(7); HASH_ITER(hh, cache->ht, node, tmp) cleanup_node(cache, node, last_week); + + delete_unknown_files(cache); } void diff --git a/src/file.c b/src/file.c index 085b76d6..2cbc2e66 100644 --- a/src/file.c +++ b/src/file.c @@ -158,6 +158,6 @@ rm(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) int file_rm_rf(char const *path) { - /* FIXME optimize that 32 */ + /* TODO (performance) optimize that 32 */ return nftw(path, rm, 32, FTW_DEPTH | FTW_PHYS); } diff --git a/src/object/tal.c b/src/object/tal.c index 6c68925c..fa8e6341 100644 --- a/src/object/tal.c +++ b/src/object/tal.c @@ -460,8 +460,8 @@ end: fnstack_cleanup(); finish = time(NULL); if (start != ((time_t) -1) && finish != ((time_t) -1)) - pr_op_info("- Tal %s: %.0lfs", args.tal.file_name, - difftime(finish, start)); + pr_op_debug("The %s tree took %.0lf seconds.", + args.tal.file_name, difftime(finish, start)); return NULL; } diff --git a/src/types/uri.c b/src/types/uri.c index 779b4c6c..bc691659 100644 --- a/src/types/uri.c +++ b/src/types/uri.c @@ -462,6 +462,19 @@ uri_create_mft(struct rpki_uri **result, char const *tal, return 0; } +/* Cache-only; global URI and type are meaningless. */ +struct rpki_uri * +uri_create_cache(char const *path) +{ + struct rpki_uri *uri; + + uri = pzalloc(sizeof(struct rpki_uri)); + uri->local = pstrdup(path); + uri->references = 1; + + return uri; +} + struct rpki_uri * uri_refget(struct rpki_uri *uri) { diff --git a/src/types/uri.h b/src/types/uri.h index 9c417a4a..d4d8188c 100644 --- a/src/types/uri.h +++ b/src/types/uri.h @@ -24,6 +24,7 @@ int uri_create(struct rpki_uri **, char const *, enum uri_type, struct rpki_uri *, char const *); int uri_create_mft(struct rpki_uri **, char const *, struct rpki_uri *, struct rpki_uri *, IA5String_t *); +struct rpki_uri *uri_create_cache(char const *); struct rpki_uri *uri_refget(struct rpki_uri *); void uri_refput(struct rpki_uri *);