From 66450e635491b1d56e743bbce5ff558e85b7c687 Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Fri, 25 Aug 2023 11:41:38 +0000 Subject: [PATCH] filelist: Use libarchive to scan for files It seems that fts_* has threading issues which are difficult to resolve. Since we already use libarchive to read any metadata information, it makes sense to use libarchive to traverse the filesystem tree, too. Signed-off-by: Michael Tremer --- src/libpakfire/filelist.c | 160 +++++++++++++++----------------------- 1 file changed, 63 insertions(+), 97 deletions(-) diff --git a/src/libpakfire/filelist.c b/src/libpakfire/filelist.c index d33b62a01..9302adffb 100644 --- a/src/libpakfire/filelist.c +++ b/src/libpakfire/filelist.c @@ -20,9 +20,7 @@ #include #include -#include #include -#include #include #include @@ -217,55 +215,53 @@ static int is_glob(const char* s) { return 0; } -static int pakfire_filelist_match_patterns(const char* path, const char** patterns) { - int flags = 0; - int r; +struct pakfire_filelist_matches { + const char* root; + const char** includes; + const char** excludes; +}; - for (const char** pattern = patterns; *pattern; pattern++) { - // Match any subdirectories - if (pakfire_string_startswith(path, *pattern)) - return 1; +static int pakfire_filelist_scan_filter(struct archive* archive, void* p, + struct archive_entry* entry) { + const struct pakfire_filelist_matches* matches = p; - // Skip fnmatch if the pattern doesn't have any globbing characters - if (!is_glob(*pattern)) - continue; + // Descend if possible + if (archive_read_disk_can_descend(archive)) + archive_read_disk_descend(archive); - // Reset flags - flags = 0; + // Fetch the path + const char* path = archive_entry_pathname(entry); - /* - fnmatch is way too eager for patterns line /usr/lib/lib*.so which will also match - things like /usr/lib/python3.x/blah/libblubb.so. - To prevent this for absolute file paths, we set the FNM_FILE_NAME flag so that - asterisk (*) won't match any slashes (/). - */ - if (**pattern == '/') - flags |= FNM_FILE_NAME; + // Make the path relative to the root + path = pakfire_path_relpath(matches->root, path); - // Perform matching - r = fnmatch(*pattern, path, flags); + // Skip the root + if (!path || !*path) + return 0; - // Found a match - if (r == 0) - return 1; + // Store the new path + archive_entry_set_pathname(entry, path); - // No match found - else if (r == FNM_NOMATCH) - continue; + // Skip excludes + if (matches->excludes && pakfire_filelist_match_patterns(path, matches->excludes)) + return 0; - // Any other error - else - return r; - } + // Skip what is not included + if (matches->includes && !pakfire_filelist_match_patterns(path, matches->includes)) + return 0; - // No match - return 0; + return 1; } int pakfire_filelist_scan(struct pakfire_filelist* list, const char* root, const char** includes, const char** excludes) { struct pakfire_file* file = NULL; struct archive_entry* entry = NULL; + struct pakfire_filelist_matches matches = { + .root = root, + .includes = includes, + .excludes = excludes, + }; int r = 1; // Root must be absolute @@ -290,73 +286,48 @@ int pakfire_filelist_scan(struct pakfire_filelist* list, const char* root, DEBUG(list->pakfire, " %s\n", *exclude); } + // Create a new disk reader struct archive* reader = pakfire_make_archive_disk_reader(list->pakfire, 1); if (!reader) - return 1; - - // Allocate a new file entry - entry = archive_entry_new(); - if (!entry) goto ERROR; - char* paths[] = { - (char*)root, NULL, - }; - - // Walk through the whole file system tree and find all matching files - FTS* tree = fts_open(paths, FTS_NOCHDIR, 0); - if (!tree) + // Start reading from here + r = archive_read_disk_open(reader, root); + if (r) { + ERROR(list->pakfire, "Could not open %s: %s\n", root, + archive_error_string(reader)); goto ERROR; + } - FTSENT* node = NULL; - const char* path = NULL; - - while ((node = fts_read(tree))) { - // Ignore any directories in post order - if (node->fts_info == FTS_DP) - continue; + // Configure filter function + r = archive_read_disk_set_metadata_filter_callback(reader, + pakfire_filelist_scan_filter, &matches); + if (r) { + ERROR(list->pakfire, "Could not set filter callback: %s\n", + archive_error_string(reader)); + goto ERROR; + } - // Compute the relative path - path = pakfire_path_relpath(root, node->fts_path); - if (!path || !*path) - continue; + // Walk through all files + for (;;) { + r = archive_read_next_header(reader, &entry); - // Skip excludes - if (excludes && pakfire_filelist_match_patterns(path, excludes)) { - DEBUG(list->pakfire, "Skipping %s...\n", path); + // Handle the return code + switch (r) { + // Fall through if everything is okay + case ARCHIVE_OK: + break; - r = fts_set(tree, node, FTS_SKIP); - if (r) + // Return OK when we reached the end of the archive + case ARCHIVE_EOF: + r = 0; goto ERROR; - continue; - } - - // Skip what is not included - if (includes && !pakfire_filelist_match_patterns(path, includes)) { - DEBUG(list->pakfire, "Skipping %s...\n", path); - - // We do not mark the whole tree as to skip because some matches might - // look for file extensions, etc. - continue; - } - - DEBUG(list->pakfire, "Processing %s...\n", path); - - // Reset the file entry - entry = archive_entry_clear(entry); - - // Set path - archive_entry_set_pathname(entry, path); - - // Set source path - archive_entry_copy_sourcepath(entry, node->fts_path); - - // Read all file attributes from disk - r = archive_read_disk_entry_from_file(reader, entry, -1, node->fts_statp); - if (r) { - ERROR(list->pakfire, "Could not read from %s: %m\n", node->fts_path); - goto ERROR; + // Raise any other errors + default: + ERROR(list->pakfire, "Could not read next file: %s\n", + archive_error_string(reader)); + goto ERROR; } // Create file @@ -374,12 +345,7 @@ int pakfire_filelist_scan(struct pakfire_filelist* list, const char* root, pakfire_file_unref(file); } - // Success - r = 0; - ERROR: - if (entry) - archive_entry_free(entry); archive_read_free(reader); return r; -- 2.39.5