]> git.ipfire.org Git - thirdparty/systemd.git/commitdiff
tar-util: properly deal with sparse files
authorLennart Poettering <lennart@poettering.net>
Thu, 21 Aug 2025 17:08:01 +0000 (19:08 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 4 Nov 2025 13:12:39 +0000 (14:12 +0100)
The extractor already deals with sparse files properly (because
archive_read_data_into_fd() does).

Let's also make sure the archiver also does this, and attaches the
necessary sparse file metadata to each file.

src/shared/libarchive-util.c
src/shared/libarchive-util.h
src/shared/tar-util.c

index dcd532df1dc6eea88429038939e5cd51842d3bf8..e3387e6e97cac4b9e749c6a87cf7b429a94d5316 100644 (file)
@@ -37,6 +37,7 @@ DLSYM_PROTOTYPE(archive_entry_set_rdevminor) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_size) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_symlink) = NULL;
 DLSYM_PROTOTYPE(archive_entry_set_uid) = NULL;
+DLSYM_PROTOTYPE(archive_entry_sparse_add_entry) = NULL;
 DLSYM_PROTOTYPE(archive_entry_symlink) = NULL;
 DLSYM_PROTOTYPE(archive_entry_uid) = NULL;
 #if HAVE_LIBARCHIVE_UID_IS_SET
@@ -61,7 +62,7 @@ DLSYM_PROTOTYPE(archive_write_new) = NULL;
 DLSYM_PROTOTYPE(archive_write_open_FILE) = NULL;
 DLSYM_PROTOTYPE(archive_write_open_fd) = NULL;
 DLSYM_PROTOTYPE(archive_write_set_format_filter_by_ext) = NULL;
-DLSYM_PROTOTYPE(archive_write_set_format_gnutar) = NULL;
+DLSYM_PROTOTYPE(archive_write_set_format_pax) = NULL;
 
 int dlopen_libarchive(void) {
         ELF_NOTE_DLOPEN("archive",
@@ -103,6 +104,7 @@ int dlopen_libarchive(void) {
                         DLSYM_ARG(archive_entry_set_size),
                         DLSYM_ARG(archive_entry_set_symlink),
                         DLSYM_ARG(archive_entry_set_uid),
+                        DLSYM_ARG(archive_entry_sparse_add_entry),
                         DLSYM_ARG(archive_entry_symlink),
                         DLSYM_ARG(archive_entry_uid),
 #if HAVE_LIBARCHIVE_UID_IS_SET
@@ -127,8 +129,7 @@ int dlopen_libarchive(void) {
                         DLSYM_ARG(archive_write_open_FILE),
                         DLSYM_ARG(archive_write_open_fd),
                         DLSYM_ARG(archive_write_set_format_filter_by_ext),
-                        DLSYM_ARG(archive_write_set_format_gnutar)
-        );
+                        DLSYM_ARG(archive_write_set_format_pax));
 }
 
 /* libarchive uses its own file type macros. They happen to be defined the same way as the Linux ones, and
index e58e4d26a543ff5ac38ec574ad005869076b04be..7534b0d016e1ee5511c0c97a9e0ad44128247056 100644 (file)
@@ -33,6 +33,7 @@ extern DLSYM_PROTOTYPE(archive_entry_set_rdevminor);
 extern DLSYM_PROTOTYPE(archive_entry_set_size);
 extern DLSYM_PROTOTYPE(archive_entry_set_symlink);
 extern DLSYM_PROTOTYPE(archive_entry_set_uid);
+extern DLSYM_PROTOTYPE(archive_entry_sparse_add_entry);
 extern DLSYM_PROTOTYPE(archive_entry_symlink);
 extern DLSYM_PROTOTYPE(archive_entry_uid);
 extern DLSYM_PROTOTYPE(archive_entry_xattr_add_entry);
@@ -54,7 +55,7 @@ extern DLSYM_PROTOTYPE(archive_write_new);
 extern DLSYM_PROTOTYPE(archive_write_open_FILE);
 extern DLSYM_PROTOTYPE(archive_write_open_fd);
 extern DLSYM_PROTOTYPE(archive_write_set_format_filter_by_ext);
-extern DLSYM_PROTOTYPE(archive_write_set_format_gnutar);
+extern DLSYM_PROTOTYPE(archive_write_set_format_pax);
 
 #if HAVE_LIBARCHIVE_UID_IS_SET
 extern DLSYM_PROTOTYPE(archive_entry_gid_is_set);
index 9d28009f7195981b6dfb5176f43a1eb16e65d60a..83b346d52ce4a13fedc3e09c39db393fef60c99d 100644 (file)
@@ -804,6 +804,57 @@ bypass:
         return 0;
 }
 
+static int archive_generate_sparse(struct archive_entry *entry, int fd) {
+        assert(entry);
+        assert(fd);
+
+        off_t c = 0;
+        for (;;) {
+                /* Look for the next hole */
+                off_t h = lseek(fd, c, SEEK_HOLE);
+                if (h < 0) {
+                        if (errno != ENXIO)
+                                return log_error_errno(errno, "Failed to issue SEEK_HOLE: %m");
+
+                        /* If errno == ENXIO, that means we've reached the final data of the file and
+                         * that data isn't followed by anything more */
+
+                        /* Figure out where the end of the file is */
+                        off_t e = lseek(fd, 0, SEEK_END);
+                        if (e < 0)
+                                return log_error_errno(errno, "Failed to issue SEEK_END: %m");
+
+                        /* Generate sparse entry for final block */
+                        if (e > c && c != 0) {
+                                log_debug("final sparse block %" PRIu64 "…%" PRIu64, (uint64_t) c, (uint64_t) e);
+                                sym_archive_entry_sparse_add_entry(entry, c, e - c);
+                        }
+
+                        break;
+                }
+
+                if (h > c) {
+                        log_debug("inner sparse block %" PRIu64 "…%" PRIu64 " (%" PRIu64 ")", (uint64_t) c, (uint64_t) h, (uint64_t) h - (uint64_t) c);
+                        sym_archive_entry_sparse_add_entry(entry, c, h - c);
+                }
+
+                /* Now look for the next data after the hole */
+                c = lseek(fd, h, SEEK_DATA);
+                if (c < 0) {
+                        if (errno != ENXIO)
+                                return log_error_errno(errno, "Failed to issue SEEK_DATA: %m");
+
+                        /* No data anymore */
+                        break;
+                }
+        }
+
+        if (lseek(fd, 0, SEEK_SET) < 0)
+                return log_error_errno(errno, "Failed to reset seek offset: %m");
+
+        return 0;
+}
+
 static int archive_item(
                 RecurseDirEvent event,
                 const char *path,
@@ -912,17 +963,24 @@ static int archive_item(
                 sym_archive_entry_xattr_add_entry(entry, xa, buf, size);
         }
 
-        if (sym_archive_write_header(d->archive, entry) != ARCHIVE_OK)
-                return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Failed to write archive entry header: %s", sym_archive_error_string(d->archive));
-
+        _cleanup_close_ int data_fd = -EBADF;
         if (S_ISREG(sx->stx_mode)) {
-                _cleanup_close_ int data_fd = -EBADF;
-
-                /* Convert the O_PATH fd in a proper fd */
+                /* Convert the O_PATH fd into a proper fd */
                 data_fd = fd_reopen(inode_fd, O_RDONLY|O_CLOEXEC);
                 if (data_fd < 0)
                         return log_error_errno(data_fd, "Failed to open '%s': %m", path);
 
+                r = archive_generate_sparse(entry, data_fd);
+                if (r < 0)
+                        return r;
+        }
+
+        if (sym_archive_write_header(d->archive, entry) != ARCHIVE_OK)
+                return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Failed to write archive entry header: %s", sym_archive_error_string(d->archive));
+
+        if (S_ISREG(sx->stx_mode)) {
+                assert(data_fd >= 0);
+
                 for (;;) {
                         char buffer[64*1024];
                         ssize_t l;
@@ -965,7 +1023,7 @@ int tar_c(int tree_fd, int output_fd, const char *filename, TarFlags flags) {
         if (filename)
                 r = sym_archive_write_set_format_filter_by_ext(a, filename);
         else
-                r = sym_archive_write_set_format_gnutar(a);
+                r = sym_archive_write_set_format_pax(a);
         if (r != ARCHIVE_OK)
                 return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "Failed to set libarchive output format: %s", sym_archive_error_string(a));