]> git.ipfire.org Git - people/stevee/pakfire.git/commitdiff
build: Try to classify files using libmagic
authorMichael Tremer <michael.tremer@ipfire.org>
Thu, 12 Jan 2023 17:07:44 +0000 (17:07 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Thu, 12 Jan 2023 17:07:44 +0000 (17:07 +0000)
Just filtering by "*.pm" will result in packages that miss some
dependencies, but the provides script generates too many false-positives
so that we cannot just pass the entire filelist.

This will now filter them using libmagic.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/libpakfire/build.c
src/libpakfire/file.c
src/libpakfire/include/pakfire/file.h
src/libpakfire/include/pakfire/pakfire.h
src/libpakfire/pakfire.c

index 57da89d73502dd69f43b04ec51f98397543d3d5b..70d45d437147055d256795a9c8433dd159762e66 100644 (file)
@@ -265,7 +265,7 @@ struct pakfire_find_deps_ctx {
        struct pakfire_package* pkg;
        int dep;
        struct pakfire_scriptlet* scriptlet;
-       const char* pattern;
+       int class;
        const pcre2_code* filter;
 
        struct pakfire_filelist* filelist;
@@ -325,8 +325,8 @@ static int pakfire_build_send_filelist(struct pakfire* pakfire, void* data, int
                goto ERROR;
        }
 
-       // Skip files if we are performing pattern matching
-       if (ctx->pattern && !pakfire_file_matches(file, ctx->pattern))
+       // Skip files that don't match what we are looking for
+       if (ctx->class && !pakfire_file_matches_class(file, ctx->class))
                goto SKIP;
 
        // Write path to stdin
@@ -454,13 +454,13 @@ ERROR:
 */
 static int pakfire_build_find_deps(struct pakfire_build* build,
                struct pakfire_package* pkg, int dep, const char* script,
-               struct pakfire_filelist* filelist, const char* pattern, const pcre2_code* filter) {
+               struct pakfire_filelist* filelist, const int class, const pcre2_code* filter) {
 
        // Construct the context
        struct pakfire_find_deps_ctx ctx = {
                .pkg      = pkg,
                .dep      = dep,
-               .pattern  = pattern,
+               .class    = class,
                .filter   = filter,
 
                // Filelist
@@ -509,25 +509,25 @@ static int pakfire_build_find_dependencies(struct pakfire_build* build,
 
        // Find all provides
        r = pakfire_build_find_deps(build, pkg,
-               PAKFIRE_PKG_PROVIDES, "find-provides", filelist, NULL, filter_provides);
+               PAKFIRE_PKG_PROVIDES, "find-provides", filelist, 0, filter_provides);
        if (r)
                goto ERROR;
 
        // Find all Perl provides
        r = pakfire_build_find_deps(build, pkg,
-               PAKFIRE_PKG_PROVIDES, "perl.prov", filelist, "*.pm", filter_provides);
+               PAKFIRE_PKG_PROVIDES, "perl.prov", filelist, PAKFIRE_FILE_PERL, filter_provides);
        if (r)
                goto ERROR;
 
        // Find all requires
        r = pakfire_build_find_deps(build, pkg,
-               PAKFIRE_PKG_REQUIRES, "find-requires", filelist, NULL, filter_requires);
+               PAKFIRE_PKG_REQUIRES, "find-requires", filelist, 0, filter_requires);
        if (r)
                goto ERROR;
 
        // Find all Perl requires
        r = pakfire_build_find_deps(build, pkg,
-               PAKFIRE_PKG_REQUIRES, "perl.req", filelist, "*.pm", filter_requires);
+               PAKFIRE_PKG_REQUIRES, "perl.req", filelist, PAKFIRE_FILE_PERL, filter_requires);
        if (r)
                goto ERROR;
 
index de92f4e47bb825478ce3ac202f5ec01e78da8fd8..d6f748a30cde9af38a9a0e838c32f76e94a00101 100644 (file)
@@ -77,6 +77,9 @@ struct pakfire_file {
        // Digests
        struct pakfire_digests digests;
 
+       // Class
+       int class;
+
        // Verification Status
        int verify_status;
 
@@ -906,6 +909,136 @@ int pakfire_file_remove(struct pakfire_file* file) {
        return r;
 }
 
+/*
+       Classification
+*/
+
+static int pakfire_file_classify_mode(struct pakfire_file* file) {
+       // Check for regular files
+       if (S_ISREG(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_REGULAR;
+
+       // Check for directories
+       else if (S_ISDIR(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_DIRECTORY;
+
+       // Check for symlinks
+       else if (S_ISLNK(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_SYMLINK;
+
+       // Check for character devices
+       else if (S_ISCHR(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_CHARACTER;
+
+       // Check for block devices
+       else if (S_ISBLK(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_BLOCK;
+
+       // Check for FIFO pipes
+       else if (S_ISFIFO(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_FIFO;
+
+       // Check for sockets
+       else if (S_ISSOCK(file->st.st_mode))
+               file->class |= PAKFIRE_FILE_SOCKET;
+
+       return 0;
+}
+
+static const struct extension {
+       const char* extension;
+       int class;
+} extensions[] = {
+       { "*.pm", PAKFIRE_FILE_PERL },
+       { "*.pc", PAKFIRE_FILE_PKGCONFIG },
+       { NULL , 0 },
+};
+
+static int pakfire_file_classify_extension(struct pakfire_file* file) {
+       for (const struct extension* e = extensions; e->extension; e++) {
+               if (pakfire_file_matches(file, e->extension)) {
+                       file->class |= e->class;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static const struct mimetype {
+       const char* mimetype;
+       int class;
+} mimetypes[] = {
+       { "application/x-sharedlibary", PAKFIRE_FILE_EXECUTABLE },
+       { "text/x-perl", PAKFIRE_FILE_PERL },
+       { NULL, 0 },
+};
+
+static int pakfire_file_classify_magic(struct pakfire_file* file) {
+       // Don't run this if the file has already been classified
+       if (file->class & ~PAKFIRE_FILE_REGULAR)
+               return 0;
+
+       // Fetch the magic cookie
+       magic_t magic = pakfire_get_magic(file->pakfire);
+       if (!magic)
+               return 1;
+
+       // Check the file
+       const char* mimetype = magic_file(magic, file->abspath);
+       if (!mimetype) {
+               ERROR(file->pakfire, "Could not classify %s: %s\n", file->path, magic_error(magic));
+               return 1;
+       }
+
+       DEBUG(file->pakfire, "Classified %s as %s\n", file->path, mimetype);
+
+       for (const struct mimetype* m = mimetypes; m->mimetype; m++) {
+               if (strcmp(m->mimetype, mimetype) == 0) {
+                       file->class |= m->class;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+int pakfire_file_classify(struct pakfire_file* file) {
+       int r;
+
+       if (!file->class) {
+               // First, check the mode so that we won't run magic on directories, symlinks, ...
+               r = pakfire_file_classify_mode(file);
+               if (r)
+                       goto ERROR;
+
+               // Only run this for regular files
+               if (file->class & PAKFIRE_FILE_REGULAR) {
+                       // Then check for the extension
+                       r = pakfire_file_classify_extension(file);
+                       if (r)
+                               goto ERROR;
+
+                       // After that, we will use libmagic...
+                       r = pakfire_file_classify_magic(file);
+                       if (r)
+                               goto ERROR;
+               }
+       }
+
+       return file->class;
+
+ERROR:
+       // Reset the class
+       file->class = PAKFIRE_FILE_UNKNOWN;
+
+       return r;
+}
+
+int pakfire_file_matches_class(struct pakfire_file* file, const int class) {
+       return pakfire_file_classify(file) & class;
+}
+
 /*
        This function tries to remove the file after it has been packaged.
 
index 3dcae33a3cb8acf443d21f636eb63390c411894c..4eef5334cb65ca3258a83c06008650f8f4635536 100644 (file)
@@ -91,6 +91,24 @@ int pakfire_file_matches(struct pakfire_file* file, const char* pattern);
 
 #include <archive_entry.h>
 
+enum pakfire_file_classes {
+       PAKFIRE_FILE_UNKNOWN      = 0,
+
+       // Simple types
+       PAKFIRE_FILE_REGULAR      = (1 << 0),
+       PAKFIRE_FILE_DIRECTORY    = (1 << 1),
+       PAKFIRE_FILE_SYMLINK      = (1 << 2),
+       PAKFIRE_FILE_CHARACTER    = (1 << 3),
+       PAKFIRE_FILE_BLOCK        = (1 << 4),
+       PAKFIRE_FILE_FIFO         = (1 << 5),
+       PAKFIRE_FILE_SOCKET       = (1 << 6),
+
+       // The rest
+       PAKFIRE_FILE_EXECUTABLE   = (1 << 7),
+       PAKFIRE_FILE_PKGCONFIG    = (1 << 8),
+       PAKFIRE_FILE_PERL         = (1 << 9),
+};
+
 int pakfire_file_create_from_path(struct pakfire_file** file,
        struct pakfire* pakfire, const char* path);
 int pakfire_file_create_from_archive_entry(struct pakfire_file** file, struct pakfire* pakfire,
@@ -107,6 +125,9 @@ int pakfire_file_compute_digests(struct pakfire_file* file, const int types);
 int pakfire_file_remove(struct pakfire_file* file);
 int pakfire_file_cleanup(struct pakfire_file* file);
 
+int pakfire_file_classify(struct pakfire_file* file);
+int pakfire_file_matches_class(struct pakfire_file* file, const int class);
+
 int pakfire_file_verify(struct pakfire_file* file, int* status);
 
 #endif
index 664adb097ad39ce2c7f3b5795d54300479bb90b6..36de4efe53b0becaf9608dc785dbdc12fc28ca1f 100644 (file)
@@ -121,6 +121,7 @@ int pakfire_sync(struct pakfire* pakfire, int solver_flags, int flags, int* chan
 #include <sys/types.h>
 
 #include <gpgme.h>
+#include <magic.h>
 #include <solv/pool.h>
 
 #include <pakfire/config.h>
@@ -149,6 +150,7 @@ int pakfire_is_mountpoint(struct pakfire* pakfire, const char* path);
 
 int pakfire_confirm(struct pakfire* pakfire, const char* message, const char* question);
 
+magic_t pakfire_get_magic(struct pakfire* pakfire);
 gpgme_ctx_t pakfire_get_gpgctx(struct pakfire* pakfire);
 
 const char* pakfire_get_distro_name(struct pakfire* pakfire);
index 0f37556060853b76def159258c4a2faba3fb233d..507c9471990c5b1b555ff224c463e1d9598e0cfb 100644 (file)
@@ -35,6 +35,7 @@
 #include <archive.h>
 #include <archive_entry.h>
 #include <gpgme.h>
+#include <magic.h>
 #include <solv/evr.h>
 #include <solv/pool.h>
 #include <solv/poolarch.h>
@@ -121,6 +122,9 @@ struct pakfire {
        // GPG Context
        gpgme_ctx_t gpgctx;
 
+       // Magic Context
+       magic_t magic;
+
        // States
        int destroy_on_free:1;
        int pool_ready:1;
@@ -356,6 +360,10 @@ static void pakfire_free(struct pakfire* pakfire) {
                pakfire_repo_unref(repo);
        }
 
+       // Release Magic Context
+       if (pakfire->magic)
+               magic_close(pakfire->magic);
+
        // Release GPGME context
        if (pakfire->gpgctx)
                pakfire_keystore_destroy(pakfire, &pakfire->gpgctx);
@@ -1028,6 +1036,39 @@ gpgme_ctx_t pakfire_get_gpgctx(struct pakfire* pakfire) {
        return pakfire->gpgctx;
 }
 
+magic_t pakfire_get_magic(struct pakfire* pakfire) {
+       int r;
+
+       // Initialize the context if not already done
+       if (!pakfire->magic) {
+               // Allocate a new context
+               pakfire->magic = magic_open(MAGIC_MIME_TYPE | MAGIC_ERROR | MAGIC_NO_CHECK_TOKENS);
+               if (!pakfire->magic) {
+                       ERROR(pakfire, "Could not allocate magic context: %m\n");
+                       return NULL;
+               }
+
+               // Load the database
+               r = magic_load(pakfire->magic, NULL);
+               if (r) {
+                       ERROR(pakfire, "Could not open the magic database: %s\n",
+                               magic_error(pakfire->magic));
+                       goto ERROR;
+               }
+       }
+
+       return pakfire->magic;
+
+ERROR:
+       if (pakfire->magic)
+               magic_close(pakfire->magic);
+
+       // Reset the pointer
+       pakfire->magic = NULL;
+
+       return NULL;
+}
+
 PAKFIRE_EXPORT int pakfire_list_keys(struct pakfire* pakfire, struct pakfire_key*** keys) {
        // Reset keys
        *keys = NULL;