From: Michael Tremer Date: Tue, 21 Mar 2023 08:14:12 +0000 (+0000) Subject: archive: Return a file descriptor for any archive files X-Git-Tag: 0.9.29~229 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=893f73039b5d9b194f6c805dd334a9e4bd6fdf67;p=pakfire.git archive: Return a file descriptor for any archive files This is a lot more handy for us later on when we are dealing with any of the payload which might potentially larger as it can now be read bit by bit. Signed-off-by: Michael Tremer --- diff --git a/Makefile.am b/Makefile.am index 1c0b20631..010f7f4f9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -145,6 +145,8 @@ _pakfire_la_SOURCES = \ src/_pakfire/_pakfiremodule.c \ src/_pakfire/archive.c \ src/_pakfire/archive.h \ + src/_pakfire/archive_file.c \ + src/_pakfire/archive_file.h \ src/_pakfire/errors.h \ src/_pakfire/file.c \ src/_pakfire/file.h \ diff --git a/src/_pakfire/_pakfiremodule.c b/src/_pakfire/_pakfiremodule.c index 26bb5a63b..e4dc018b0 100644 --- a/src/_pakfire/_pakfiremodule.c +++ b/src/_pakfire/_pakfiremodule.c @@ -24,6 +24,7 @@ #include #include "archive.h" +#include "archive_file.h" #include "errors.h" #include "file.h" #include "key.h" @@ -165,6 +166,13 @@ PyMODINIT_FUNC PyInit__pakfire(void) { Py_INCREF(&ArchiveType); PyModule_AddObject(module, "Archive", (PyObject *)&ArchiveType); + // Archive File + if (PyType_Ready(&ArchiveFileType) < 0) + return NULL; + + Py_INCREF(&ArchiveFileType); + PyModule_AddObject(module, "ArchiveFile", (PyObject*)&ArchiveFileType); + // File if (PyType_Ready(&FileType) < 0) return NULL; diff --git a/src/_pakfire/archive.c b/src/_pakfire/archive.c index aedf67cd1..012d57326 100644 --- a/src/_pakfire/archive.c +++ b/src/_pakfire/archive.c @@ -27,6 +27,7 @@ #include #include "archive.h" +#include "archive_file.h" #include "errors.h" #include "key.h" #include "package.h" @@ -79,18 +80,18 @@ static PyObject* Archive_get_format(ArchiveObject* self) { } static PyObject* Archive_read(ArchiveObject* self, PyObject* args) { + PyObject* file = NULL; + FILE* f = NULL; const char* filename = NULL; if (!PyArg_ParseTuple(args, "s", &filename)) return NULL; - char* data = NULL; - size_t data_size = 0; - Py_BEGIN_ALLOW_THREADS - int r = pakfire_archive_read(self->archive, filename, &data, &data_size); - if (r) { + // Try to open the file + f = pakfire_archive_read(self->archive, filename); + if (!f) { Py_BLOCK_THREADS PyErr_SetFromErrno(PyExc_OSError); return NULL; @@ -98,16 +99,18 @@ static PyObject* Archive_read(ArchiveObject* self, PyObject* args) { Py_END_ALLOW_THREADS - // Return None if there was no data - if (!data) - Py_RETURN_NONE; + // Map the file to Python + file = new_archive_file(&ArchiveFileType, f); + if (!file) + goto ERROR; - // Create a copy of data and return it to the user - PyObject* bytes = PyBytes_FromStringAndSize(data, data_size); + return file; - free(data); +ERROR: + if (f) + fclose(f); - return bytes; + return NULL; } static PyObject* Archive_verify(ArchiveObject* self) { diff --git a/src/_pakfire/archive_file.c b/src/_pakfire/archive_file.c new file mode 100644 index 000000000..6ccf8bc2e --- /dev/null +++ b/src/_pakfire/archive_file.c @@ -0,0 +1,204 @@ +/*############################################################################# +# # +# Pakfire - The IPFire package management system # +# Copyright (C) 2014 Pakfire development team # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +#############################################################################*/ + +#include + +#include "archive_file.h" + +PyObject* new_archive_file(PyTypeObject* type, FILE* f) { + ArchiveFileObject* self = (ArchiveFileObject *)type->tp_alloc(type, 0); + if (self) { + self->f = f; + } + + return (PyObject*)self; +} + +static void ArchiveFile_dealloc(ArchiveFileObject* self) { + if (self->f) + fclose(self->f); + + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject* ArchiveFile_close(ArchiveFileObject* self) { + int r; + + if (self->f) { + // Close the underlying file descriptor + r = fclose(self->f); + self->f = NULL; + + // Raise any errors + if (r) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + } + + Py_RETURN_NONE; +} + +static PyObject* ArchiveFile_readblock(ArchiveFileObject* self, ssize_t size) { + PyObject* result = NULL; + char* buffer = NULL; + ssize_t bytes_read = 0; + + Py_BEGIN_ALLOW_THREADS; + + // Allocate a buffer that is large enough to hold the result + buffer = malloc(size); + if (!buffer) { + Py_BLOCK_THREADS; + goto ERROR; + } + + // Read data into the buffer + bytes_read = fread(buffer, 1, size, self->f); + if (bytes_read < 0) { + Py_BLOCK_THREADS; + goto ERROR; + } + + Py_END_ALLOW_THREADS; + + // Copy buffer to Python + result = PyBytes_FromStringAndSize(buffer, bytes_read); + +ERROR: + if (buffer) + free(buffer); + + if (result) + return result; + + PyErr_SetFromErrno(PyExc_OSError); + return NULL; +} + +static PyObject* ArchiveFile_readall(ArchiveFileObject* self) { + PyObject* result = NULL; + const size_t block_length = 8192; + char* buffer = NULL; + size_t buffer_length = 0; + size_t length = 0; + + // Cannot run this after f has been closed + if (!self->f) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file"); + return NULL; + } + + Py_BEGIN_ALLOW_THREADS + + for (;;) { + // Increase the buffer by one block + buffer_length += block_length; + + // Allocate the space + buffer = realloc(buffer, buffer_length); + if (!buffer) { + Py_BLOCK_THREADS; + PyErr_SetFromErrno(PyExc_OSError); + goto ERROR; + } + + // Determine the beginning of the new block + char* b = buffer + length; + + // Try reading another block + ssize_t bytes_read = fread(b, 1, block_length, self->f); + if (bytes_read < 0) { + Py_BLOCK_THREADS; + PyErr_SetFromErrno(PyExc_OSError); + goto ERROR; + + // EOF + } else if (bytes_read == 0) + break; + + // Increment length + length += bytes_read; + } + + Py_END_ALLOW_THREADS + + // Copy buffer to Python + result = PyBytes_FromStringAndSize(buffer, length); + +ERROR: + if (buffer) + free(buffer); + + return result; +} + +static PyObject* ArchiveFile_read(ArchiveFileObject* self, PyObject* args) { + PyObject* result = NULL; + ssize_t size = -1; + + if (!PyArg_ParseTuple(args, "|n", &size)) + return NULL; + + // Cannot run this after f has been closed + if (!self->f) { + PyErr_SetString(PyExc_ValueError, "I/O operation on closed file"); + return NULL; + } + + if (size > 0) + result = ArchiveFile_readblock(self, size); + else + result = ArchiveFile_readall(self); + + return result; +} + +static struct PyMethodDef Archive_methods[] = { + { + "close", + (PyCFunction)ArchiveFile_close, + METH_NOARGS, + NULL, + }, + { + "read", + (PyCFunction)ArchiveFile_read, + METH_VARARGS, + NULL, + }, + { + "readall", + (PyCFunction)ArchiveFile_readall, + METH_NOARGS, + NULL, + }, + { NULL }, +}; + +PyTypeObject ArchiveFileType = { + PyVarObject_HEAD_INIT(NULL, 0) + tp_name: "_pakfire.ArchiveFile", + tp_basicsize: sizeof(ArchiveFileObject), + tp_flags: Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + tp_dealloc: (destructor)ArchiveFile_dealloc, + tp_doc: "Archive File object", + tp_methods: Archive_methods, +}; diff --git a/src/_pakfire/archive_file.h b/src/_pakfire/archive_file.h new file mode 100644 index 000000000..a44e02d20 --- /dev/null +++ b/src/_pakfire/archive_file.h @@ -0,0 +1,37 @@ +/*############################################################################# +# # +# Pakfire - The IPFire package management system # +# Copyright (C) 2014 Pakfire development team # +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +#############################################################################*/ + +#ifndef PYTHON_PAKFIRE_ARCHIVE_FILE_H +#define PYTHON_PAKFIRE_ARCHIVE_FILE_H + +#include + +#include + +typedef struct { + PyObject_HEAD + FILE* f; +} ArchiveFileObject; + +extern PyTypeObject ArchiveFileType; + +PyObject* new_archive_file(PyTypeObject* type, FILE* f); + +#endif /* PYTHON_PAKFIRE_ARCHIVE_FILE_H */ diff --git a/src/libpakfire/archive.c b/src/libpakfire/archive.c index d7f1bfad0..ff83af823 100644 --- a/src/libpakfire/archive.c +++ b/src/libpakfire/archive.c @@ -79,6 +79,24 @@ struct pakfire_archive { int verify; }; +static FILE* pakfire_archive_clone_file(struct pakfire_archive* archive) { + int fd = fileno(archive->f); + if (fd < 0) { + ERROR(archive->pakfire, "Could not fetch the archive's file descriptor: %m\n"); + return NULL; + } + + // Duplicate the file descriptor + fd = dup(fd); + if (fd < 0) { + ERROR(archive->pakfire, "Could not duplicate the file descriptor: %m\n"); + return NULL; + } + + // Re-open a file handle + return fdopen(fd, "r"); +} + static int pakfire_archive_compute_digests(struct pakfire_archive* archive) { int r; @@ -105,7 +123,11 @@ static void close_archive(struct pakfire_archive* archive, struct archive* a) { /* A helper function that opens the archive for reading */ -static struct archive* open_archive(struct pakfire_archive* archive) { +static struct archive* open_archive(struct pakfire_archive* archive, FILE* f) { + // If no special file descriptor has been given we use the open one + if (!f) + f = archive->f; + // Create a new archive object struct archive* a = archive_read_new(); if (!a) @@ -118,10 +140,10 @@ static struct archive* open_archive(struct pakfire_archive* archive) { archive_read_support_filter_zstd(a); // Start reading from the beginning - rewind(archive->f); + rewind(f); // Try opening the archive file - int r = archive_read_open_FILE(a, archive->f); + int r = archive_read_open_FILE(a, f); if (r) { ERROR(archive->pakfire, "Could not open archive %s: %s\n", archive->path, archive_error_string(a)); @@ -142,7 +164,7 @@ static int pakfire_archive_walk(struct pakfire_archive* archive, int r; // Open the archive file - struct archive* a = open_archive(archive); + struct archive* a = open_archive(archive, NULL); if (!a) return 1; @@ -548,32 +570,92 @@ static int __pakfire_archive_filter_payload(struct pakfire* pakfire, } } -PAKFIRE_EXPORT int pakfire_archive_read(struct pakfire_archive* archive, - const char* path, char** data, size_t* length) { - int found = 0; +/* + Read files from the archive +*/ +struct pakfire_archive_read_cookie { + // A reference to the archive + struct pakfire_archive* archive; + + // A copy of the underlying file descriptor + FILE* f; + + // The opened archive + struct archive* a; +}; + +static ssize_t __pakfire_archive_cookie_read(void* c, char* buffer, size_t size) { + struct pakfire_archive_read_cookie* cookie = (struct pakfire_archive_read_cookie*)c; + + // Read the data directly from the archive + return archive_read_data(cookie->a, buffer, size); +} + +static int __pakfire_archive_cookie_close(void* c) { + struct pakfire_archive_read_cookie* cookie = (struct pakfire_archive_read_cookie*)c; + + if (cookie->archive) + pakfire_archive_unref(cookie->archive); + if (cookie->a) + archive_read_free(cookie->a); + if (cookie->f) + fclose(cookie->f); + + // Free the cookie + free(cookie); + + return 0; +} + +static cookie_io_functions_t pakfire_archive_read_functions = { + .read = __pakfire_archive_cookie_read, + .close = __pakfire_archive_cookie_close, +}; + +PAKFIRE_EXPORT FILE* pakfire_archive_read(struct pakfire_archive* archive, const char* path) { + struct pakfire_archive_read_cookie* cookie = NULL; + FILE* f = NULL; int r; // Check if path is absolute if (!path || *path != '/') { errno = EINVAL; - return 1; - - // Check other inputs - } else if (!data || !length) { - errno = EINVAL; - return 1; + return NULL; } // Strip leading / from filenames, because tarballs don't use any leading slashes path = pakfire_path_relpath("/", path); if (!path) - return 1; + return NULL; + + // Allocate a cookie + cookie = calloc(1, sizeof(*cookie)); + if (!cookie) { + ERROR(archive->pakfire, "Could not allocate a cookie: %m\n"); + goto ERROR; + } + + // Store a reference to the archive + cookie->archive = pakfire_archive_ref(archive); + + // Clone the archive file descriptor to read the file independently + cookie->f = pakfire_archive_clone_file(archive); + if (!cookie->f) { + ERROR(archive->pakfire, "Could not duplicate file descriptor for %s: %m\n", + archive->path); + goto ERROR; + } + + // Open the archive + cookie->a = open_archive(archive, cookie->f); + if (!cookie->a) + goto ERROR; // Tries to find a matching file in the archive int __pakfire_archive_read_filter(struct pakfire* pakfire, struct archive* a, - struct archive_entry* e, void* __data) { + struct archive_entry* e, void* data) { // Stop reading the archive after we have found our file - if (found) + if (f) return PAKFIRE_WALK_END; // Fetch path @@ -591,30 +673,38 @@ PAKFIRE_EXPORT int pakfire_archive_read(struct pakfire_archive* archive, // Reads a matching file into memory int __pakfire_archive_read(struct pakfire* pakfire, struct archive* a, - struct archive_entry* e, void* __data) { - // We have found our file - found = 1; + struct archive_entry* e, void* data) { + // Create a file descriptor + f = fopencookie(cookie, "r", pakfire_archive_read_functions); + if (!f) { + ERROR(pakfire, "Could not open /%s: %m\n", path); + return PAKFIRE_WALK_ERROR; + } - return pakfire_archive_copy_data_to_buffer(pakfire, a, e, data, length); + return PAKFIRE_WALK_DONE; } // Walk through the archive - r = pakfire_archive_walk(archive, + r = pakfire_walk(archive->pakfire, cookie->a, __pakfire_archive_read, __pakfire_archive_read_filter, NULL); if (r) - return r; + goto ERROR; // Nothing found - if (!found) { + if (!f) { // No such file or directory errno = ENOENT; ERROR(archive->pakfire, "Could not find /%s: %m\n", path); - - return 1; } - return 0; + return f; + +ERROR: + if (cookie) + __pakfire_archive_cookie_close(cookie); + + return NULL; } int pakfire_archive_copy(struct pakfire_archive* archive, const char* path) { @@ -739,7 +829,7 @@ static int __pakfire_archive_extract(struct pakfire_archive* archive, int flags) } // Open the archive - a = open_archive(archive); + a = open_archive(archive, NULL); if (!a) { r = 1; goto ERROR; diff --git a/src/libpakfire/include/pakfire/archive.h b/src/libpakfire/include/pakfire/archive.h index fed3f5727..9a15e0aae 100644 --- a/src/libpakfire/include/pakfire/archive.h +++ b/src/libpakfire/include/pakfire/archive.h @@ -35,8 +35,7 @@ int pakfire_archive_open(struct pakfire_archive** archive, struct pakfire* pakfi struct pakfire_archive* pakfire_archive_ref(struct pakfire_archive* archive); struct pakfire_archive* pakfire_archive_unref(struct pakfire_archive* archive); -int pakfire_archive_read(struct pakfire_archive* archive, const char* filename, - char** data, size_t* data_size); +FILE* pakfire_archive_read(struct pakfire_archive* archive, const char* filename); int pakfire_archive_extract(struct pakfire_archive* archive); const char* pakfire_archive_get_path(struct pakfire_archive* archive); diff --git a/tests/libpakfire/archive.c b/tests/libpakfire/archive.c index 4b691fa20..79e7dcb4a 100644 --- a/tests/libpakfire/archive.c +++ b/tests/libpakfire/archive.c @@ -88,7 +88,8 @@ FAIL: static int test_read(const struct test* t) { struct pakfire_archive* archive = NULL; - char* data = NULL; + FILE* f = NULL; + char data[20000]; size_t length = 0; int r = EXIT_FAILURE; @@ -101,7 +102,10 @@ static int test_read(const struct test* t) { ASSERT_SUCCESS(pakfire_archive_open(&archive, t->pakfire, TEST_SRC_PATH TEST_PKG1_PATH)); // Read a file - ASSERT_SUCCESS(pakfire_archive_read(archive, "/usr/bin/beep", &data, &length)); + ASSERT(f = pakfire_archive_read(archive, "/usr/bin/beep")); + + // Read the entire content + ASSERT(length = fread(data, 1, sizeof(data), f)); // Check filesize ASSERT(length == 17192); @@ -109,18 +113,11 @@ static int test_read(const struct test* t) { // Check the first couple of bytes ASSERT_COMPARE(data, beep, sizeof(beep)); - if (data) { - free(data); - data = NULL; - } - // Try to access a file that does not exist - ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", &data, &length), ENOENT); + ASSERT_ERRNO((pakfire_archive_read(archive, "/does/not/exist") == NULL), ENOENT); // Some invalid calls - ASSERT_ERRNO(pakfire_archive_read(archive, NULL, &data, &length), EINVAL); - ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", NULL, &length), EINVAL); - ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", &data, NULL), EINVAL); + ASSERT_ERRNO((pakfire_archive_read(archive, NULL) == NULL), EINVAL); // Everything passed r = EXIT_SUCCESS; @@ -128,8 +125,6 @@ static int test_read(const struct test* t) { FAIL: if (archive) pakfire_archive_unref(archive); - if (data) - free(data); return r; }