From 210aabe9f82dabd2b5d7c31eb39bce07011ff47d Mon Sep 17 00:00:00 2001 From: Michael Tremer Date: Sat, 11 Mar 2023 15:16:57 +0000 Subject: [PATCH] archive: Store MIME type of files This is going to be helpful in the build service and generally some useful metadata. Signed-off-by: Michael Tremer --- src/_pakfire/file.c | 31 +++++++++ src/libpakfire/db.c | 60 +++++++++++++---- src/libpakfire/file.c | 96 +++++++++++++++++++++++---- src/libpakfire/include/pakfire/file.h | 7 ++ src/libpakfire/libpakfire.sym | 2 + src/libpakfire/packager.c | 7 ++ 6 files changed, 177 insertions(+), 26 deletions(-) diff --git a/src/_pakfire/file.c b/src/_pakfire/file.c index 60120151b..fc4df125a 100644 --- a/src/_pakfire/file.c +++ b/src/_pakfire/file.c @@ -234,6 +234,30 @@ static PyObject* File_digest(FileObject* self, PyObject* args) { return PyBytes_FromStringAndSize((const char*)digest, length); } +static PyObject* File_get_mimetype(FileObject* self) { + const char* mimetype = pakfire_file_get_mimetype(self->file); + + return PyUnicode_FromString(mimetype); +} + +static int File_set_mimetype(FileObject* self, PyObject* value) { + int r; + + // Read the MIME type + const char* mimetype = PyUnicode_AsUTF8(value); + if (!mimetype) + return -1; + + // Set the value + r = pakfire_file_set_mimetype(self->file, mimetype); + if (r) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + return 0; +} + static struct PyMethodDef File_methods[] = { { "digest", @@ -259,6 +283,13 @@ static struct PyGetSetDef File_getsetters[] = { NULL, NULL, }, + { + "mimetype", + (getter)File_get_mimetype, + (setter)File_set_mimetype, + NULL, + NULL, + }, { "mode", (getter)File_get_mode, diff --git a/src/libpakfire/db.c b/src/libpakfire/db.c index 0ca3f7523..9d17ce9a0 100644 --- a/src/libpakfire/db.c +++ b/src/libpakfire/db.c @@ -421,6 +421,7 @@ static int pakfire_db_create_schema(struct pakfire_db* db) { "gname TEXT, " "ctime INTEGER, " "mtime INTEGER, " + "mimetype TEXT, " "capabilities TEXT, " "digest_sha2_512 BLOB, " "digest_sha2_256 BLOB, " @@ -955,6 +956,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct "gname, " "ctime, " "mtime, " + "mimetype, " "capabilities, " "digest_sha2_512, " "digest_sha2_256, " @@ -963,7 +965,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct "digest_sha3_512, " "digest_sha3_256" ") " - "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; + "VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"; // Prepare the statement r = sqlite3_prepare_v2(db->handle, sql, strlen(sql), &stmt, NULL); @@ -1070,8 +1072,29 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct goto END; } + // Bind MIME type + const char* mimetype = pakfire_file_get_mimetype(file); + + if (mimetype) { + r = sqlite3_bind_text(stmt, 11, mimetype, -1, NULL); + if (r) { + ERROR(db->pakfire, "Could not bind MIME type: %s\n", + sqlite3_errmsg(db->handle)); + pakfire_file_unref(file); + goto END; + } + } else { + r = sqlite3_bind_null(stmt, 11); + if (r) { + ERROR(db->pakfire, "Could not bind an empty MIME type: %s\n", + sqlite3_errmsg(db->handle)); + pakfire_file_unref(file); + goto END; + } + } + // Bind capabilities - XXX TODO - r = sqlite3_bind_null(stmt, 11); + r = sqlite3_bind_null(stmt, 12); if (r) { ERROR(db->pakfire, "Could not bind capabilities: %s\n", sqlite3_errmsg(db->handle)); pakfire_file_unref(file); @@ -1079,7 +1102,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // SHA2-512 Digest - r = pakfire_db_bind_digest(db, stmt, 12, file, PAKFIRE_DIGEST_SHA2_512); + r = pakfire_db_bind_digest(db, stmt, 13, file, PAKFIRE_DIGEST_SHA2_512); if (r) { ERROR(db->pakfire, "Could not bind SHA2-512 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -1088,7 +1111,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // SHA2-256 Digest - r = pakfire_db_bind_digest(db, stmt, 13, file, PAKFIRE_DIGEST_SHA2_256); + r = pakfire_db_bind_digest(db, stmt, 14, file, PAKFIRE_DIGEST_SHA2_256); if (r) { ERROR(db->pakfire, "Could not bind SHA2-256 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -1097,7 +1120,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // BLAKE2b512 Digest - r = pakfire_db_bind_digest(db, stmt, 14, file, PAKFIRE_DIGEST_BLAKE2B512); + r = pakfire_db_bind_digest(db, stmt, 15, file, PAKFIRE_DIGEST_BLAKE2B512); if (r) { ERROR(db->pakfire, "Could not bind BLAKE2b512 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -1106,7 +1129,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // BLAKE2s256 Digest - r = pakfire_db_bind_digest(db, stmt, 15, file, PAKFIRE_DIGEST_BLAKE2S256); + r = pakfire_db_bind_digest(db, stmt, 16, file, PAKFIRE_DIGEST_BLAKE2S256); if (r) { ERROR(db->pakfire, "Could not bind BLAKE2s256 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -1115,7 +1138,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // SHA3-512 Digest - r = pakfire_db_bind_digest(db, stmt, 16, file, PAKFIRE_DIGEST_SHA3_512); + r = pakfire_db_bind_digest(db, stmt, 17, file, PAKFIRE_DIGEST_SHA3_512); if (r) { ERROR(db->pakfire, "Could not bind SHA3-512 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -1124,7 +1147,7 @@ static int pakfire_db_add_files(struct pakfire_db* db, unsigned long id, struct } // SHA3-256 Digest - r = pakfire_db_bind_digest(db, stmt, 17, file, PAKFIRE_DIGEST_SHA3_256); + r = pakfire_db_bind_digest(db, stmt, 18, file, PAKFIRE_DIGEST_SHA3_256); if (r) { ERROR(db->pakfire, "Could not bind SHA3-256 digest: %s\n", sqlite3_errmsg(db->handle)); @@ -2187,33 +2210,40 @@ static int pakfire_db_load_file(struct pakfire_db* db, struct pakfire_filelist* if (mtime) pakfire_file_set_mtime(file, mtime); + const char* mimetype = (const char*)sqlite3_column_text(stmt, 7); + + // MIME type + r = pakfire_file_set_mimetype(file, mimetype); + if (r) + goto ERROR; + // SHA2-512 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA2_512, 7); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA2_512, 8); if (r) goto ERROR; // SHA2-256 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA2_256, 8); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA2_256, 9); if (r) goto ERROR; // BLAKE2b512 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_BLAKE2B512, 9); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_BLAKE2B512, 10); if (r) goto ERROR; // BLAKE2s256 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_BLAKE2S256, 10); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_BLAKE2S256, 11); if (r) goto ERROR; // SHA3-512 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA3_512, 11); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA3_512, 12); if (r) goto ERROR; // SHA3-256 Digest - r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA3_256, 12); + r = pakfire_db_load_file_digest(db, file, stmt, PAKFIRE_DIGEST_SHA3_256, 13); if (r) goto ERROR; @@ -2243,6 +2273,7 @@ int pakfire_db_filelist(struct pakfire_db* db, struct pakfire_filelist** filelis "gname, " "ctime, " "mtime, " + "mimetype, " "digest_sha2_512, " "digest_sha2_256, " "digest_blake2b512, " @@ -2339,6 +2370,7 @@ int pakfire_db_package_filelist(struct pakfire_db* db, struct pakfire_filelist** "gname, " "ctime, " "mtime, " + "mimetype, " "digest_sha2_512, " "digest_sha2_256, " "digest_blake2b512, " diff --git a/src/libpakfire/file.c b/src/libpakfire/file.c index b1f939550..b3c9c8283 100644 --- a/src/libpakfire/file.c +++ b/src/libpakfire/file.c @@ -79,6 +79,9 @@ struct pakfire_file { // Digests struct pakfire_digests digests; + // MIME Type + char mimetype[NAME_MAX]; + // Class int class; @@ -96,6 +99,7 @@ struct pakfire_file { }; static int pakfire_file_from_archive_entry(struct pakfire_file* file, struct archive_entry* entry) { + char* buffer = NULL; const char* path = NULL; const char* attr = NULL; const void* value = NULL; @@ -158,12 +162,25 @@ static int pakfire_file_from_archive_entry(struct pakfire_file* file, struct arc pakfire_file_set_ctime(file, archive_entry_ctime(entry)); pakfire_file_set_mtime(file, archive_entry_mtime(entry)); + // Reset iterating over extended attributes archive_entry_xattr_reset(entry); // Read any extended attributes while (archive_entry_xattr_next(entry, &attr, &value, &size) == ARCHIVE_OK) { + // MIME type + if (strcmp(attr, "PAKFIRE.mimetype") == 0) { + // Copy the value into a NULL-terminated buffer + r = asprintf(&buffer, "%.*s", (int)size, (const char*)value); + if (r < 0) + goto ERROR; + + // Assign the value + r = pakfire_file_set_mimetype(file, buffer); + if (r) + goto ERROR; + // Digest: SHA-3-512 - if (strcmp(attr, "PAKFIRE.digests.sha3_512") == 0) { + } else if (strcmp(attr, "PAKFIRE.digests.sha3_512") == 0) { r = pakfire_file_set_digest(file, PAKFIRE_DIGEST_SHA3_512, value, size); if (r) goto ERROR; @@ -204,6 +221,9 @@ static int pakfire_file_from_archive_entry(struct pakfire_file* file, struct arc } ERROR: + if (buffer) + free(buffer); + return r; } @@ -329,6 +349,13 @@ struct archive_entry* pakfire_file_archive_entry(struct pakfire_file* file, int archive_entry_set_ctime(entry, pakfire_file_get_ctime(file), 0); archive_entry_set_mtime(entry, pakfire_file_get_mtime(file), 0); + // Set MIME type + const char* mimetype = pakfire_file_get_mimetype(file); + if (mimetype) { + archive_entry_xattr_add_entry(entry, + "PAKFIRE.mimetype", mimetype, strlen(mimetype)); + } + // Compute any required file digests r = pakfire_file_compute_digests(file, digest_types); if (r) @@ -1004,6 +1031,51 @@ int pakfire_file_symlink_target_exists(struct pakfire_file* file) { return pakfire_path_exists(file->abspath); } +/* + MIME Type +*/ + +int pakfire_file_detect_mimetype(struct pakfire_file* file) { + // Only process regular files + if (!S_ISREG(file->st.st_mode)) + return 0; + + // Skip if MIME type is already set + if (*file->mimetype) + return 0; + + // Fetch the magic cookie + magic_t magic = pakfire_get_magic(file->pakfire); + if (!magic) + return 1; + + // Check the file + const char* mimetype = magic_file(magic, file->abspath); + if (!mimetype) { + ERROR(file->pakfire, "Could not classify %s: %s\n", file->path, magic_error(magic)); + return 1; + } + + DEBUG(file->pakfire, "Classified %s as %s\n", file->path, mimetype); + + // Store the value + return pakfire_file_set_mimetype(file, mimetype); +} + +PAKFIRE_EXPORT const char* pakfire_file_get_mimetype(struct pakfire_file* file) { + // Return nothing on an empty mimetype + if (!*file->mimetype) + return NULL; + + return file->mimetype; +} + +PAKFIRE_EXPORT int pakfire_file_set_mimetype( + struct pakfire_file* file, const char* mimetype) { + // Store the value + return pakfire_string_set(file->mimetype, mimetype); +} + /* Classification */ @@ -1077,30 +1149,30 @@ static const struct mimetype { const char* mimetype; int class; } mimetypes[] = { + { "application/x-pie-executable", PAKFIRE_FILE_EXECUTABLE }, { "application/x-sharedlibary", PAKFIRE_FILE_EXECUTABLE }, { "text/x-perl", PAKFIRE_FILE_PERL }, { NULL, 0 }, }; static int pakfire_file_classify_magic(struct pakfire_file* file) { + int r; + // Don't run this if the file has already been classified if (file->class & ~PAKFIRE_FILE_REGULAR) return 0; - // Fetch the magic cookie - magic_t magic = pakfire_get_magic(file->pakfire); - if (!magic) - return 1; + // Detect the MIME type + r = pakfire_file_detect_mimetype(file); + if (r) + return r; - // Check the file - const char* mimetype = magic_file(magic, file->abspath); - if (!mimetype) { - ERROR(file->pakfire, "Could not classify %s: %s\n", file->path, magic_error(magic)); + // Fetch the MIME type + const char* mimetype = pakfire_file_get_mimetype(file); + if (!mimetype) return 1; - } - - DEBUG(file->pakfire, "Classified %s as %s\n", file->path, mimetype); + // Match the MIME type with a flag for (const struct mimetype* m = mimetypes; m->mimetype; m++) { if (strcmp(m->mimetype, mimetype) == 0) { file->class |= m->class; diff --git a/src/libpakfire/include/pakfire/file.h b/src/libpakfire/include/pakfire/file.h index 801ecb2f2..9aa6dd5c9 100644 --- a/src/libpakfire/include/pakfire/file.h +++ b/src/libpakfire/include/pakfire/file.h @@ -81,6 +81,10 @@ const unsigned char* pakfire_file_get_digest(struct pakfire_file* file, int pakfire_file_set_digest(struct pakfire_file* file, const enum pakfire_digest_types type, const unsigned char* digest, const size_t length); +// MIME Type +const char* pakfire_file_get_mimetype(struct pakfire_file* file); +int pakfire_file_set_mimetype(struct pakfire_file* file, const char* mimetype); + struct pakfire_file* pakfire_file_parse_from_file(const char* list, unsigned int format); int pakfire_file_matches(struct pakfire_file* file, const char* pattern); @@ -146,6 +150,9 @@ int pakfire_file_cleanup(struct pakfire_file* file); int pakfire_file_symlink_target_exists(struct pakfire_file* file); +// MIME Type +int pakfire_file_detect_mimetype(struct pakfire_file* file); + int pakfire_file_classify(struct pakfire_file* file); int pakfire_file_matches_class(struct pakfire_file* file, const int class); diff --git a/src/libpakfire/libpakfire.sym b/src/libpakfire/libpakfire.sym index af2a56a3c..762d654a0 100644 --- a/src/libpakfire/libpakfire.sym +++ b/src/libpakfire/libpakfire.sym @@ -87,6 +87,7 @@ global: pakfire_file_get_digest; pakfire_file_get_gname; pakfire_file_get_inode; + pakfire_file_get_mimetype; pakfire_file_get_mode; pakfire_file_get_mtime; pakfire_file_get_nlink; @@ -102,6 +103,7 @@ global: pakfire_file_set_digest; pakfire_file_set_gname; pakfire_file_set_inode; + pakfire_file_set_mimetype; pakfire_file_set_mode; pakfire_file_set_mtime; pakfire_file_set_nlink; diff --git a/src/libpakfire/packager.c b/src/libpakfire/packager.c index 5bacd0259..15c16a721 100644 --- a/src/libpakfire/packager.c +++ b/src/libpakfire/packager.c @@ -583,6 +583,8 @@ ERROR: } int pakfire_packager_add_file(struct pakfire_packager* packager, struct pakfire_file* file) { + int r; + // Check input if (!file) { errno = EINVAL; @@ -607,6 +609,11 @@ int pakfire_packager_add_file(struct pakfire_packager* packager, struct pakfire_ DEBUG(packager->pakfire, "Adding file to payload: %s\n", path); + // Detect the MIME type + r = pakfire_file_detect_mimetype(file); + if (r) + return r; + // Overwrite a couple of things for source archives if (pakfire_package_is_source(packager->pkg)) { // Reset permissions -- 2.39.5