archive: Return a file descriptor for any archive files

author Michael Tremer <michael.tremer@ipfire.org>

Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)

committer Michael Tremer <michael.tremer@ipfire.org>

Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)
author Michael Tremer <michael.tremer@ipfire.org>
Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)
committer Michael Tremer <michael.tremer@ipfire.org>
Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)
diff --git a/Makefile.am b/Makefile.am

index 1c0b206317a1e955e7dba009d900f629f11c88da..010f7f4f96a39ee09e4cb41e2374d93b907b61f6 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -145,6 +145,8 @@ _pakfire_la_SOURCES = \
         src/_pakfire/_pakfiremodule.c \
         src/_pakfire/archive.c \
         src/_pakfire/archive.h \
+       src/_pakfire/archive_file.c \
+       src/_pakfire/archive_file.h \
         src/_pakfire/errors.h \
         src/_pakfire/file.c \
         src/_pakfire/file.h \
diff --git a/src/_pakfire/_pakfiremodule.c b/src/_pakfire/_pakfiremodule.c

index 26bb5a63be6aa8bb5950f9d76d091b6a71d80750..e4dc018b0b49a6c8f18dc89d8b5f975c103c8070 100644 (file)
--- a/src/_pakfire/_pakfiremodule.c
+++ b/src/_pakfire/_pakfiremodule.c
@@ -24,6 +24,7 @@
  #include <pakfire/arch.h>
  
  #include "archive.h"
+#include "archive_file.h"
  #include "errors.h"
  #include "file.h"
  #include "key.h"
@@ -165,6 +166,13 @@ PyMODINIT_FUNC PyInit__pakfire(void) {
         Py_INCREF(&ArchiveType);
         PyModule_AddObject(module, "Archive", (PyObject *)&ArchiveType);
  
+       // Archive File
+       if (PyType_Ready(&ArchiveFileType) < 0)
+               return NULL;
+
+       Py_INCREF(&ArchiveFileType);
+       PyModule_AddObject(module, "ArchiveFile", (PyObject*)&ArchiveFileType);
+
         // File
         if (PyType_Ready(&FileType) < 0)
                 return NULL;
diff --git a/src/_pakfire/archive.c b/src/_pakfire/archive.c

index aedf67cd18dd5ef81e169a5e2f75fb21d24644f8..012d57326bac0b35192cab3638de0bcfcfd33600 100644 (file)
--- a/src/_pakfire/archive.c
+++ b/src/_pakfire/archive.c
@@ -27,6 +27,7 @@
  #include <pakfire/util.h>
  
  #include "archive.h"
+#include "archive_file.h"
  #include "errors.h"
  #include "key.h"
  #include "package.h"
@@ -79,18 +80,18 @@ static PyObject* Archive_get_format(ArchiveObject* self) {
  }
  
  static PyObject* Archive_read(ArchiveObject* self, PyObject* args) {
+       PyObject* file = NULL;
+       FILE* f = NULL;
         const char* filename = NULL;
  
         if (!PyArg_ParseTuple(args, "s", &filename))
                 return NULL;
  
-       char* data = NULL;
-       size_t data_size = 0;
-
         Py_BEGIN_ALLOW_THREADS
  
-       int r = pakfire_archive_read(self->archive, filename, &data, &data_size);
-       if (r) {
+       // Try to open the file
+       f = pakfire_archive_read(self->archive, filename);
+       if (!f) {
                 Py_BLOCK_THREADS
                 PyErr_SetFromErrno(PyExc_OSError);
                 return NULL;
@@ -98,16 +99,18 @@ static PyObject* Archive_read(ArchiveObject* self, PyObject* args) {
  
         Py_END_ALLOW_THREADS
  
-       // Return None if there was no data
-       if (!data)
-               Py_RETURN_NONE;
+       // Map the file to Python
+       file = new_archive_file(&ArchiveFileType, f);
+       if (!file)
+               goto ERROR;
  
-       // Create a copy of data and return it to the user
-       PyObject* bytes = PyBytes_FromStringAndSize(data, data_size);
+       return file;
  
-       free(data);
+ERROR:
+       if (f)
+               fclose(f);
  
-       return bytes;
+       return NULL;
  }
  
  static PyObject* Archive_verify(ArchiveObject* self) {
diff --git a/src/_pakfire/archive_file.c b/src/_pakfire/archive_file.c

new file mode 100644 (file)

index 0000000..6ccf8bc
--- /dev/null
+++ b/src/_pakfire/archive_file.c
@@ -0,0 +1,204 @@
+/*#############################################################################
+#                                                                             #
+# Pakfire - The IPFire package management system                              #
+# Copyright (C) 2014 Pakfire development team                                 #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+#############################################################################*/
+
+#include <Python.h>
+
+#include "archive_file.h"
+
+PyObject* new_archive_file(PyTypeObject* type, FILE* f) {
+       ArchiveFileObject* self = (ArchiveFileObject *)type->tp_alloc(type, 0);
+       if (self) {
+               self->f = f;
+       }
+
+       return (PyObject*)self;
+}
+
+static void ArchiveFile_dealloc(ArchiveFileObject* self) {
+       if (self->f)
+               fclose(self->f);
+
+       Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+static PyObject* ArchiveFile_close(ArchiveFileObject* self) {
+       int r;
+
+       if (self->f) {
+               // Close the underlying file descriptor
+               r = fclose(self->f);
+               self->f = NULL;
+
+               // Raise any errors
+               if (r) {
+                       PyErr_SetFromErrno(PyExc_OSError);
+                       return NULL;
+               }
+       }
+
+       Py_RETURN_NONE;
+}
+
+static PyObject* ArchiveFile_readblock(ArchiveFileObject* self, ssize_t size) {
+       PyObject* result = NULL;
+       char* buffer = NULL;
+       ssize_t bytes_read = 0;
+
+       Py_BEGIN_ALLOW_THREADS;
+
+       // Allocate a buffer that is large enough to hold the result
+       buffer = malloc(size);
+       if (!buffer) {
+               Py_BLOCK_THREADS;
+               goto ERROR;
+       }
+
+       // Read data into the buffer
+       bytes_read = fread(buffer, 1, size, self->f);
+       if (bytes_read < 0) {
+               Py_BLOCK_THREADS;
+               goto ERROR;
+       }
+
+       Py_END_ALLOW_THREADS;
+
+       // Copy buffer to Python
+       result = PyBytes_FromStringAndSize(buffer, bytes_read);
+
+ERROR:
+       if (buffer)
+               free(buffer);
+
+       if (result)
+               return result;
+
+       PyErr_SetFromErrno(PyExc_OSError);
+       return NULL;
+}
+
+static PyObject* ArchiveFile_readall(ArchiveFileObject* self) {
+       PyObject* result = NULL;
+       const size_t block_length = 8192;
+       char* buffer = NULL;
+       size_t buffer_length = 0;
+       size_t length = 0;
+
+       // Cannot run this after f has been closed
+       if (!self->f) {
+               PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
+               return NULL;
+       }
+
+       Py_BEGIN_ALLOW_THREADS
+
+       for (;;) {
+               // Increase the buffer by one block
+               buffer_length += block_length;
+
+               // Allocate the space
+               buffer = realloc(buffer, buffer_length);
+               if (!buffer) {
+                       Py_BLOCK_THREADS;
+                       PyErr_SetFromErrno(PyExc_OSError);
+                       goto ERROR;
+               }
+
+               // Determine the beginning of the new block
+               char* b = buffer + length;
+
+               // Try reading another block
+               ssize_t bytes_read = fread(b, 1, block_length, self->f);
+               if (bytes_read < 0) {
+                       Py_BLOCK_THREADS;
+                       PyErr_SetFromErrno(PyExc_OSError);
+                       goto ERROR;
+
+               // EOF
+               } else if (bytes_read == 0)
+                       break;
+
+               // Increment length
+               length += bytes_read;
+       }
+
+       Py_END_ALLOW_THREADS
+
+       // Copy buffer to Python
+       result = PyBytes_FromStringAndSize(buffer, length);
+
+ERROR:
+       if (buffer)
+               free(buffer);
+
+       return result;
+}
+
+static PyObject* ArchiveFile_read(ArchiveFileObject* self, PyObject* args) {
+       PyObject* result = NULL;
+       ssize_t size = -1;
+
+       if (!PyArg_ParseTuple(args, "|n", &size))
+               return NULL;
+
+       // Cannot run this after f has been closed
+       if (!self->f) {
+               PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
+               return NULL;
+       }
+
+       if (size > 0)
+               result = ArchiveFile_readblock(self, size);
+       else
+               result = ArchiveFile_readall(self);
+
+       return result;
+}
+
+static struct PyMethodDef Archive_methods[] = {
+       {
+               "close",
+               (PyCFunction)ArchiveFile_close,
+               METH_NOARGS,
+               NULL,
+       },
+       {
+               "read",
+               (PyCFunction)ArchiveFile_read,
+               METH_VARARGS,
+               NULL,
+       },
+       {
+               "readall",
+               (PyCFunction)ArchiveFile_readall,
+               METH_NOARGS,
+               NULL,
+       },
+       { NULL },
+};
+
+PyTypeObject ArchiveFileType = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       tp_name:            "_pakfire.ArchiveFile",
+       tp_basicsize:       sizeof(ArchiveFileObject),
+       tp_flags:           Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       tp_dealloc:         (destructor)ArchiveFile_dealloc,
+       tp_doc:             "Archive File object",
+       tp_methods:         Archive_methods,
+};
diff --git a/src/_pakfire/archive_file.h b/src/_pakfire/archive_file.h

new file mode 100644 (file)

index 0000000..a44e02d
--- /dev/null
+++ b/src/_pakfire/archive_file.h
@@ -0,0 +1,37 @@
+/*#############################################################################
+#                                                                             #
+# Pakfire - The IPFire package management system                              #
+# Copyright (C) 2014 Pakfire development team                                 #
+#                                                                             #
+# This program is free software: you can redistribute it and/or modify        #
+# it under the terms of the GNU General Public License as published by        #
+# the Free Software Foundation, either version 3 of the License, or           #
+# (at your option) any later version.                                         #
+#                                                                             #
+# This program is distributed in the hope that it will be useful,             #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of              #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               #
+# GNU General Public License for more details.                                #
+#                                                                             #
+# You should have received a copy of the GNU General Public License           #
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.       #
+#                                                                             #
+#############################################################################*/
+
+#ifndef PYTHON_PAKFIRE_ARCHIVE_FILE_H
+#define PYTHON_PAKFIRE_ARCHIVE_FILE_H
+
+#include <Python.h>
+
+#include <stdio.h>
+
+typedef struct {
+       PyObject_HEAD
+       FILE* f;
+} ArchiveFileObject;
+
+extern PyTypeObject ArchiveFileType;
+
+PyObject* new_archive_file(PyTypeObject* type, FILE* f);
+
+#endif /* PYTHON_PAKFIRE_ARCHIVE_FILE_H */
diff --git a/src/libpakfire/archive.c b/src/libpakfire/archive.c

index d7f1bfad0534b188644f234698187049b9b15ace..ff83af82315bb47b3e3ec0c2c0d3d3991b8878bc 100644 (file)
--- a/src/libpakfire/archive.c
+++ b/src/libpakfire/archive.c
@@ -79,6 +79,24 @@ struct pakfire_archive {
         int verify;
  };
  
+static FILE* pakfire_archive_clone_file(struct pakfire_archive* archive) {
+       int fd = fileno(archive->f);
+       if (fd < 0) {
+               ERROR(archive->pakfire, "Could not fetch the archive's file descriptor: %m\n");
+               return NULL;
+       }
+
+       // Duplicate the file descriptor
+       fd = dup(fd);
+       if (fd < 0) {
+               ERROR(archive->pakfire, "Could not duplicate the file descriptor: %m\n");
+               return NULL;
+       }
+
+       // Re-open a file handle
+       return fdopen(fd, "r");
+}
+
  static int pakfire_archive_compute_digests(struct pakfire_archive* archive) {
         int r;
  
@@ -105,7 +123,11 @@ static void close_archive(struct pakfire_archive* archive, struct archive* a) {
  /*
         A helper function that opens the archive for reading
  */
-static struct archive* open_archive(struct pakfire_archive* archive) {
+static struct archive* open_archive(struct pakfire_archive* archive, FILE* f) {
+       // If no special file descriptor has been given we use the open one
+       if (!f)
+               f = archive->f;
+
         // Create a new archive object
         struct archive* a = archive_read_new();
         if (!a)
@@ -118,10 +140,10 @@ static struct archive* open_archive(struct pakfire_archive* archive) {
         archive_read_support_filter_zstd(a);
  
         // Start reading from the beginning
-       rewind(archive->f);
+       rewind(f);
  
         // Try opening the archive file
-       int r = archive_read_open_FILE(a, archive->f);
+       int r = archive_read_open_FILE(a, f);
         if (r) {
                 ERROR(archive->pakfire, "Could not open archive %s: %s\n",
                         archive->path, archive_error_string(a));
@@ -142,7 +164,7 @@ static int pakfire_archive_walk(struct pakfire_archive* archive,
         int r;
  
         // Open the archive file
-       struct archive* a = open_archive(archive);
+       struct archive* a = open_archive(archive, NULL);
         if (!a)
                 return 1;
  
@@ -548,32 +570,92 @@ static int __pakfire_archive_filter_payload(struct pakfire* pakfire,
         }
  }
  
-PAKFIRE_EXPORT int pakfire_archive_read(struct pakfire_archive* archive,
-               const char* path, char** data, size_t* length) {
-       int found = 0;
+/*
+       Read files from the archive
+*/
+struct pakfire_archive_read_cookie {
+       // A reference to the archive
+       struct pakfire_archive* archive;
+
+       // A copy of the underlying file descriptor
+       FILE* f;
+
+       // The opened archive
+       struct archive* a;
+};
+
+static ssize_t __pakfire_archive_cookie_read(void* c, char* buffer, size_t size) {
+       struct pakfire_archive_read_cookie* cookie = (struct pakfire_archive_read_cookie*)c;
+
+       // Read the data directly from the archive
+       return archive_read_data(cookie->a, buffer, size);
+}
+
+static int __pakfire_archive_cookie_close(void* c) {
+       struct pakfire_archive_read_cookie* cookie = (struct pakfire_archive_read_cookie*)c;
+
+       if (cookie->archive)
+               pakfire_archive_unref(cookie->archive);
+       if (cookie->a)
+               archive_read_free(cookie->a);
+       if (cookie->f)
+               fclose(cookie->f);
+
+       // Free the cookie
+       free(cookie);
+
+       return 0;
+}
+
+static cookie_io_functions_t pakfire_archive_read_functions = {
+       .read  = __pakfire_archive_cookie_read,
+       .close = __pakfire_archive_cookie_close,
+};
+
+PAKFIRE_EXPORT FILE* pakfire_archive_read(struct pakfire_archive* archive, const char* path) {
+       struct pakfire_archive_read_cookie* cookie = NULL;
+       FILE* f = NULL;
         int r;
  
         // Check if path is absolute
         if (!path || *path != '/') {
                 errno = EINVAL;
-               return 1;
-
-       // Check other inputs
-       } else if (!data || !length) {
-               errno = EINVAL;
-               return 1;
+               return NULL;
         }
  
         // Strip leading / from filenames, because tarballs don't use any leading slashes
         path = pakfire_path_relpath("/", path);
         if (!path)
-               return 1;
+               return NULL;
+
+       // Allocate a cookie
+       cookie = calloc(1, sizeof(*cookie));
+       if (!cookie) {
+               ERROR(archive->pakfire, "Could not allocate a cookie: %m\n");
+               goto ERROR;
+       }
+
+       // Store a reference to the archive
+       cookie->archive = pakfire_archive_ref(archive);
+
+       // Clone the archive file descriptor to read the file independently
+       cookie->f = pakfire_archive_clone_file(archive);
+       if (!cookie->f) {
+               ERROR(archive->pakfire, "Could not duplicate file descriptor for %s: %m\n",
+                       archive->path);
+               goto ERROR;
+       }
+
+       // Open the archive
+       cookie->a = open_archive(archive, cookie->f);
+       if (!cookie->a)
+               goto ERROR;
  
         // Tries to find a matching file in the archive
         int __pakfire_archive_read_filter(struct pakfire* pakfire, struct archive* a,
-                       struct archive_entry* e, void* __data) {
+                       struct archive_entry* e, void* data) {
                 // Stop reading the archive after we have found our file
-               if (found)
+               if (f)
                         return PAKFIRE_WALK_END;
  
                 // Fetch path
@@ -591,30 +673,38 @@ PAKFIRE_EXPORT int pakfire_archive_read(struct pakfire_archive* archive,
  
         // Reads a matching file into memory
         int __pakfire_archive_read(struct pakfire* pakfire, struct archive* a,
-                       struct archive_entry* e, void* __data) {
-               // We have found our file
-               found = 1;
+                       struct archive_entry* e, void* data) {
+               // Create a file descriptor
+               f = fopencookie(cookie, "r", pakfire_archive_read_functions);
+               if (!f) {
+                       ERROR(pakfire, "Could not open /%s: %m\n", path);
+                       return PAKFIRE_WALK_ERROR;
+               }
  
-               return pakfire_archive_copy_data_to_buffer(pakfire, a, e, data, length);
+               return PAKFIRE_WALK_DONE;
         }
  
         // Walk through the archive
-       r = pakfire_archive_walk(archive,
+       r = pakfire_walk(archive->pakfire, cookie->a,
                 __pakfire_archive_read, __pakfire_archive_read_filter, NULL);
         if (r)
-               return r;
+               goto ERROR;
  
         // Nothing found
-       if (!found) {
+       if (!f) {
                 // No such file or directory
                 errno = ENOENT;
  
                 ERROR(archive->pakfire, "Could not find /%s: %m\n", path);
-
-               return 1;
         }
  
-       return 0;
+       return f;
+
+ERROR:
+       if (cookie)
+               __pakfire_archive_cookie_close(cookie);
+
+       return NULL;
  }
  
  int pakfire_archive_copy(struct pakfire_archive* archive, const char* path) {
@@ -739,7 +829,7 @@ static int __pakfire_archive_extract(struct pakfire_archive* archive, int flags)
         }
  
         // Open the archive
-       a = open_archive(archive);
+       a = open_archive(archive, NULL);
         if (!a) {
                 r = 1;
                 goto ERROR;
diff --git a/src/libpakfire/include/pakfire/archive.h b/src/libpakfire/include/pakfire/archive.h

index fed3f5727e333b8822eb0ee5bd1284b7ae966ed1..9a15e0aae41186ff34f23787098e52eba0c2171d 100644 (file)
--- a/src/libpakfire/include/pakfire/archive.h
+++ b/src/libpakfire/include/pakfire/archive.h
@@ -35,8 +35,7 @@ int pakfire_archive_open(struct pakfire_archive** archive, struct pakfire* pakfi
  struct pakfire_archive* pakfire_archive_ref(struct pakfire_archive* archive);
  struct pakfire_archive* pakfire_archive_unref(struct pakfire_archive* archive);
  
-int pakfire_archive_read(struct pakfire_archive* archive, const char* filename,
-       char** data, size_t* data_size);
+FILE* pakfire_archive_read(struct pakfire_archive* archive, const char* filename);
  int pakfire_archive_extract(struct pakfire_archive* archive);
  
  const char* pakfire_archive_get_path(struct pakfire_archive* archive);
diff --git a/tests/libpakfire/archive.c b/tests/libpakfire/archive.c

index 4b691fa203fa19ef78e90e4d5f46f03b43133091..79e7dcb4ac9bd59606e82b11ab5c9a4de9013bee 100644 (file)
--- a/tests/libpakfire/archive.c
+++ b/tests/libpakfire/archive.c
@@ -88,7 +88,8 @@ FAIL:
  
  static int test_read(const struct test* t) {
         struct pakfire_archive* archive = NULL;
-       char* data = NULL;
+       FILE* f = NULL;
+       char data[20000];
         size_t length = 0;
         int r = EXIT_FAILURE;
  
@@ -101,7 +102,10 @@ static int test_read(const struct test* t) {
         ASSERT_SUCCESS(pakfire_archive_open(&archive, t->pakfire, TEST_SRC_PATH TEST_PKG1_PATH));
  
         // Read a file
-       ASSERT_SUCCESS(pakfire_archive_read(archive, "/usr/bin/beep", &data, &length));
+       ASSERT(f = pakfire_archive_read(archive, "/usr/bin/beep"));
+
+       // Read the entire content
+       ASSERT(length = fread(data, 1, sizeof(data), f));
  
         // Check filesize
         ASSERT(length == 17192);
@@ -109,18 +113,11 @@ static int test_read(const struct test* t) {
         // Check the first couple of bytes
         ASSERT_COMPARE(data, beep, sizeof(beep));
  
-       if (data) {
-               free(data);
-               data = NULL;
-       }
-
         // Try to access a file that does not exist
-       ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", &data, &length), ENOENT);
+       ASSERT_ERRNO((pakfire_archive_read(archive, "/does/not/exist") == NULL), ENOENT);
  
         // Some invalid calls
-       ASSERT_ERRNO(pakfire_archive_read(archive, NULL, &data, &length), EINVAL);
-       ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", NULL, &length), EINVAL);
-       ASSERT_ERRNO(pakfire_archive_read(archive, "/does/not/exist", &data, NULL), EINVAL);
+       ASSERT_ERRNO((pakfire_archive_read(archive, NULL) == NULL), EINVAL);
  
         // Everything passed
         r = EXIT_SUCCESS;
@@ -128,8 +125,6 @@ static int test_read(const struct test* t) {
  FAIL:
         if (archive)
                 pakfire_archive_unref(archive);
-       if (data)
-               free(data);
  
         return r;
  }
author	Michael Tremer <michael.tremer@ipfire.org>
	Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)
committer	Michael Tremer <michael.tremer@ipfire.org>
	Tue, 21 Mar 2023 08:14:12 +0000 (08:14 +0000)
Makefile.am		patch \| blob \| blame \| history
src/_pakfire/_pakfiremodule.c		patch \| blob \| blame \| history
src/_pakfire/archive.c		patch \| blob \| blame \| history
src/_pakfire/archive_file.c	[new file with mode: 0644]	patch \| blob
src/_pakfire/archive_file.h	[new file with mode: 0644]	patch \| blob
src/libpakfire/archive.c		patch \| blob \| blame \| history
src/libpakfire/include/pakfire/archive.h		patch \| blob \| blame \| history
tests/libpakfire/archive.c		patch \| blob \| blame \| history