From: Michael Tremer Date: Thu, 12 Jan 2023 17:07:44 +0000 (+0000) Subject: build: Try to classify files using libmagic X-Git-Tag: 0.9.29~396 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=71f6f465c5e0b8732d1f5e9cc260edb18b139d33;p=pakfire.git build: Try to classify files using libmagic Just filtering by "*.pm" will result in packages that miss some dependencies, but the provides script generates too many false-positives so that we cannot just pass the entire filelist. This will now filter them using libmagic. Signed-off-by: Michael Tremer --- diff --git a/src/libpakfire/build.c b/src/libpakfire/build.c index 57da89d73..70d45d437 100644 --- a/src/libpakfire/build.c +++ b/src/libpakfire/build.c @@ -265,7 +265,7 @@ struct pakfire_find_deps_ctx { struct pakfire_package* pkg; int dep; struct pakfire_scriptlet* scriptlet; - const char* pattern; + int class; const pcre2_code* filter; struct pakfire_filelist* filelist; @@ -325,8 +325,8 @@ static int pakfire_build_send_filelist(struct pakfire* pakfire, void* data, int goto ERROR; } - // Skip files if we are performing pattern matching - if (ctx->pattern && !pakfire_file_matches(file, ctx->pattern)) + // Skip files that don't match what we are looking for + if (ctx->class && !pakfire_file_matches_class(file, ctx->class)) goto SKIP; // Write path to stdin @@ -454,13 +454,13 @@ ERROR: */ static int pakfire_build_find_deps(struct pakfire_build* build, struct pakfire_package* pkg, int dep, const char* script, - struct pakfire_filelist* filelist, const char* pattern, const pcre2_code* filter) { + struct pakfire_filelist* filelist, const int class, const pcre2_code* filter) { // Construct the context struct pakfire_find_deps_ctx ctx = { .pkg = pkg, .dep = dep, - .pattern = pattern, + .class = class, .filter = filter, // Filelist @@ -509,25 +509,25 @@ static int pakfire_build_find_dependencies(struct pakfire_build* build, // Find all provides r = pakfire_build_find_deps(build, pkg, - PAKFIRE_PKG_PROVIDES, "find-provides", filelist, NULL, filter_provides); + PAKFIRE_PKG_PROVIDES, "find-provides", filelist, 0, filter_provides); if (r) goto ERROR; // Find all Perl provides r = pakfire_build_find_deps(build, pkg, - PAKFIRE_PKG_PROVIDES, "perl.prov", filelist, "*.pm", filter_provides); + PAKFIRE_PKG_PROVIDES, "perl.prov", filelist, PAKFIRE_FILE_PERL, filter_provides); if (r) goto ERROR; // Find all requires r = pakfire_build_find_deps(build, pkg, - PAKFIRE_PKG_REQUIRES, "find-requires", filelist, NULL, filter_requires); + PAKFIRE_PKG_REQUIRES, "find-requires", filelist, 0, filter_requires); if (r) goto ERROR; // Find all Perl requires r = pakfire_build_find_deps(build, pkg, - PAKFIRE_PKG_REQUIRES, "perl.req", filelist, "*.pm", filter_requires); + PAKFIRE_PKG_REQUIRES, "perl.req", filelist, PAKFIRE_FILE_PERL, filter_requires); if (r) goto ERROR; diff --git a/src/libpakfire/file.c b/src/libpakfire/file.c index de92f4e47..d6f748a30 100644 --- a/src/libpakfire/file.c +++ b/src/libpakfire/file.c @@ -77,6 +77,9 @@ struct pakfire_file { // Digests struct pakfire_digests digests; + // Class + int class; + // Verification Status int verify_status; @@ -906,6 +909,136 @@ int pakfire_file_remove(struct pakfire_file* file) { return r; } +/* + Classification +*/ + +static int pakfire_file_classify_mode(struct pakfire_file* file) { + // Check for regular files + if (S_ISREG(file->st.st_mode)) + file->class |= PAKFIRE_FILE_REGULAR; + + // Check for directories + else if (S_ISDIR(file->st.st_mode)) + file->class |= PAKFIRE_FILE_DIRECTORY; + + // Check for symlinks + else if (S_ISLNK(file->st.st_mode)) + file->class |= PAKFIRE_FILE_SYMLINK; + + // Check for character devices + else if (S_ISCHR(file->st.st_mode)) + file->class |= PAKFIRE_FILE_CHARACTER; + + // Check for block devices + else if (S_ISBLK(file->st.st_mode)) + file->class |= PAKFIRE_FILE_BLOCK; + + // Check for FIFO pipes + else if (S_ISFIFO(file->st.st_mode)) + file->class |= PAKFIRE_FILE_FIFO; + + // Check for sockets + else if (S_ISSOCK(file->st.st_mode)) + file->class |= PAKFIRE_FILE_SOCKET; + + return 0; +} + +static const struct extension { + const char* extension; + int class; +} extensions[] = { + { "*.pm", PAKFIRE_FILE_PERL }, + { "*.pc", PAKFIRE_FILE_PKGCONFIG }, + { NULL , 0 }, +}; + +static int pakfire_file_classify_extension(struct pakfire_file* file) { + for (const struct extension* e = extensions; e->extension; e++) { + if (pakfire_file_matches(file, e->extension)) { + file->class |= e->class; + break; + } + } + + return 0; +} + +static const struct mimetype { + const char* mimetype; + int class; +} mimetypes[] = { + { "application/x-sharedlibary", PAKFIRE_FILE_EXECUTABLE }, + { "text/x-perl", PAKFIRE_FILE_PERL }, + { NULL, 0 }, +}; + +static int pakfire_file_classify_magic(struct pakfire_file* file) { + // Don't run this if the file has already been classified + if (file->class & ~PAKFIRE_FILE_REGULAR) + return 0; + + // Fetch the magic cookie + magic_t magic = pakfire_get_magic(file->pakfire); + if (!magic) + return 1; + + // Check the file + const char* mimetype = magic_file(magic, file->abspath); + if (!mimetype) { + ERROR(file->pakfire, "Could not classify %s: %s\n", file->path, magic_error(magic)); + return 1; + } + + DEBUG(file->pakfire, "Classified %s as %s\n", file->path, mimetype); + + for (const struct mimetype* m = mimetypes; m->mimetype; m++) { + if (strcmp(m->mimetype, mimetype) == 0) { + file->class |= m->class; + break; + } + } + + return 0; +} + +int pakfire_file_classify(struct pakfire_file* file) { + int r; + + if (!file->class) { + // First, check the mode so that we won't run magic on directories, symlinks, ... + r = pakfire_file_classify_mode(file); + if (r) + goto ERROR; + + // Only run this for regular files + if (file->class & PAKFIRE_FILE_REGULAR) { + // Then check for the extension + r = pakfire_file_classify_extension(file); + if (r) + goto ERROR; + + // After that, we will use libmagic... + r = pakfire_file_classify_magic(file); + if (r) + goto ERROR; + } + } + + return file->class; + +ERROR: + // Reset the class + file->class = PAKFIRE_FILE_UNKNOWN; + + return r; +} + +int pakfire_file_matches_class(struct pakfire_file* file, const int class) { + return pakfire_file_classify(file) & class; +} + /* This function tries to remove the file after it has been packaged. diff --git a/src/libpakfire/include/pakfire/file.h b/src/libpakfire/include/pakfire/file.h index 3dcae33a3..4eef5334c 100644 --- a/src/libpakfire/include/pakfire/file.h +++ b/src/libpakfire/include/pakfire/file.h @@ -91,6 +91,24 @@ int pakfire_file_matches(struct pakfire_file* file, const char* pattern); #include +enum pakfire_file_classes { + PAKFIRE_FILE_UNKNOWN = 0, + + // Simple types + PAKFIRE_FILE_REGULAR = (1 << 0), + PAKFIRE_FILE_DIRECTORY = (1 << 1), + PAKFIRE_FILE_SYMLINK = (1 << 2), + PAKFIRE_FILE_CHARACTER = (1 << 3), + PAKFIRE_FILE_BLOCK = (1 << 4), + PAKFIRE_FILE_FIFO = (1 << 5), + PAKFIRE_FILE_SOCKET = (1 << 6), + + // The rest + PAKFIRE_FILE_EXECUTABLE = (1 << 7), + PAKFIRE_FILE_PKGCONFIG = (1 << 8), + PAKFIRE_FILE_PERL = (1 << 9), +}; + int pakfire_file_create_from_path(struct pakfire_file** file, struct pakfire* pakfire, const char* path); int pakfire_file_create_from_archive_entry(struct pakfire_file** file, struct pakfire* pakfire, @@ -107,6 +125,9 @@ int pakfire_file_compute_digests(struct pakfire_file* file, const int types); int pakfire_file_remove(struct pakfire_file* file); int pakfire_file_cleanup(struct pakfire_file* file); +int pakfire_file_classify(struct pakfire_file* file); +int pakfire_file_matches_class(struct pakfire_file* file, const int class); + int pakfire_file_verify(struct pakfire_file* file, int* status); #endif diff --git a/src/libpakfire/include/pakfire/pakfire.h b/src/libpakfire/include/pakfire/pakfire.h index 664adb097..36de4efe5 100644 --- a/src/libpakfire/include/pakfire/pakfire.h +++ b/src/libpakfire/include/pakfire/pakfire.h @@ -121,6 +121,7 @@ int pakfire_sync(struct pakfire* pakfire, int solver_flags, int flags, int* chan #include #include +#include #include #include @@ -149,6 +150,7 @@ int pakfire_is_mountpoint(struct pakfire* pakfire, const char* path); int pakfire_confirm(struct pakfire* pakfire, const char* message, const char* question); +magic_t pakfire_get_magic(struct pakfire* pakfire); gpgme_ctx_t pakfire_get_gpgctx(struct pakfire* pakfire); const char* pakfire_get_distro_name(struct pakfire* pakfire); diff --git a/src/libpakfire/pakfire.c b/src/libpakfire/pakfire.c index 0f3755606..507c94719 100644 --- a/src/libpakfire/pakfire.c +++ b/src/libpakfire/pakfire.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -121,6 +122,9 @@ struct pakfire { // GPG Context gpgme_ctx_t gpgctx; + // Magic Context + magic_t magic; + // States int destroy_on_free:1; int pool_ready:1; @@ -356,6 +360,10 @@ static void pakfire_free(struct pakfire* pakfire) { pakfire_repo_unref(repo); } + // Release Magic Context + if (pakfire->magic) + magic_close(pakfire->magic); + // Release GPGME context if (pakfire->gpgctx) pakfire_keystore_destroy(pakfire, &pakfire->gpgctx); @@ -1028,6 +1036,39 @@ gpgme_ctx_t pakfire_get_gpgctx(struct pakfire* pakfire) { return pakfire->gpgctx; } +magic_t pakfire_get_magic(struct pakfire* pakfire) { + int r; + + // Initialize the context if not already done + if (!pakfire->magic) { + // Allocate a new context + pakfire->magic = magic_open(MAGIC_MIME_TYPE | MAGIC_ERROR | MAGIC_NO_CHECK_TOKENS); + if (!pakfire->magic) { + ERROR(pakfire, "Could not allocate magic context: %m\n"); + return NULL; + } + + // Load the database + r = magic_load(pakfire->magic, NULL); + if (r) { + ERROR(pakfire, "Could not open the magic database: %s\n", + magic_error(pakfire->magic)); + goto ERROR; + } + } + + return pakfire->magic; + +ERROR: + if (pakfire->magic) + magic_close(pakfire->magic); + + // Reset the pointer + pakfire->magic = NULL; + + return NULL; +} + PAKFIRE_EXPORT int pakfire_list_keys(struct pakfire* pakfire, struct pakfire_key*** keys) { // Reset keys *keys = NULL;