]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Support fully-sparse files 655/head
authorNate Rosenblum <natr@google.com>
Sat, 6 Feb 2016 01:39:03 +0000 (17:39 -0800)
committerNate Rosenblum <natr@google.com>
Thu, 18 Feb 2016 00:02:23 +0000 (16:02 -0800)
Avoids an edge condition where fully-sparse files would not trigger
sparse file handling, leading to large tar files full of zeros.

libarchive/archive_read_disk_entry_from_file.c
libarchive/archive_read_disk_posix.c
libarchive/test/test_sparse_basic.c

index 38303aa8725b900de952e8b4a5a86e43fc61a1ff..74fe353d9d347bb68a6334ec10632cbb42892480 100644 (file)
@@ -1046,7 +1046,7 @@ setup_sparse(struct archive_read_disk *a,
        struct fiemap *fm;
        struct fiemap_extent *fe;
        int64_t size;
-       int count, do_fiemap;
+       int count, do_fiemap, iters;
        int exit_sts = ARCHIVE_OK;
 
        if (archive_entry_filetype(entry) != AE_IFREG
@@ -1083,7 +1083,7 @@ setup_sparse(struct archive_read_disk *a,
        fm->fm_extent_count = count;
        do_fiemap = 1;
        size = archive_entry_size(entry);
-       for (;;) {
+       for (iters = 0; ; ++iters) {
                int i, r;
 
                r = ioctl(*fd, FS_IOC_FIEMAP, fm); 
@@ -1093,8 +1093,13 @@ setup_sparse(struct archive_read_disk *a,
                         * version(<2.6.28) cannot perfom FS_IOC_FIEMAP. */
                        goto exit_setup_sparse;
                }
-               if (fm->fm_mapped_extents == 0)
+               if (fm->fm_mapped_extents == 0) {
+                       if (iters == 0) {
+                               /* Fully sparse file; insert a zero-length "data" entry */
+                               archive_entry_sparse_add_entry(entry, 0, 0);
+                       }
                        break;
+               }
                fe = fm->fm_extents;
                for (i = 0; i < (int)fm->fm_mapped_extents; i++, fe++) {
                        if (!(fe->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
@@ -1139,6 +1144,7 @@ setup_sparse(struct archive_read_disk *a,
        off_t initial_off; /* FreeBSD/Solaris only, so off_t okay here */
        off_t off_s, off_e; /* FreeBSD/Solaris only, so off_t okay here */
        int exit_sts = ARCHIVE_OK;
+       int check_fully_sparse = 0;
 
        if (archive_entry_filetype(entry) != AE_IFREG
            || archive_entry_size(entry) <= 0
@@ -1191,8 +1197,14 @@ setup_sparse(struct archive_read_disk *a,
        while (off_s < size) {
                off_s = lseek(*fd, off_s, SEEK_DATA);
                if (off_s == (off_t)-1) {
-                       if (errno == ENXIO)
-                               break;/* no more hole */
+                       if (errno == ENXIO) {
+                               /* no more hole */
+                               if (archive_entry_sparse_count(entry) == 0) {
+                                       /* Potentially a fully-sparse file. */
+                                       check_fully_sparse = 1;
+                               }
+                               break;
+                       }
                        archive_set_error(&a->archive, errno,
                            "lseek(SEEK_HOLE) failed");
                        exit_sts = ARCHIVE_FAILED;
@@ -1216,6 +1228,14 @@ setup_sparse(struct archive_read_disk *a,
                        off_e - off_s);
                off_s = off_e;
        }
+
+       if (check_fully_sparse) {
+               if (lseek(*fd, 0, SEEK_HOLE) == 0 &&
+                       lseek(*fd, 0, SEEK_END) == size) {
+                       /* Fully sparse file; insert a zero-length "data" entry */
+                       archive_entry_sparse_add_entry(entry, 0, 0);
+               }
+       }
 exit_setup_sparse:
        lseek(*fd, initial_off, SEEK_SET);
        return (exit_sts);
index f4805392233d50506f86d5ce50c6ba3acd9b8ff8..067fa6d4ae720e7a0a7796d1d731585e94728ba4 100644 (file)
@@ -717,6 +717,7 @@ _archive_read_data_block(struct archive *_a, const void **buff,
        int r;
        ssize_t bytes;
        size_t buffbytes;
+       int empty_sparse_region = 0;
 
        archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA,
            "archive_read_data_block");
@@ -798,6 +799,9 @@ _archive_read_data_block(struct archive *_a, const void **buff,
        if ((int64_t)buffbytes > t->current_sparse->length)
                buffbytes = t->current_sparse->length;
 
+       if (t->current_sparse->length == 0)
+               empty_sparse_region = 1;
+
        /*
         * Skip hole.
         * TODO: Should we consider t->current_filesystem->xfer_align?
@@ -828,7 +832,15 @@ _archive_read_data_block(struct archive *_a, const void **buff,
                }
        } else
                bytes = 0;
-       if (bytes == 0) {
+       /*
+        * Return an EOF unless we've read a leading empty sparse region, which
+        * is used to represent fully-sparse files.
+        *
+        * TODO: it is not technically necessary to check for entry_tota == 0,
+        * but this simplifies some unit tests that expect to only read data
+        * regions with length > 0. Consider fixing these tests (test_sparse_basic).
+       */
+       if (bytes == 0 && !(empty_sparse_region && t->entry_total == 0)) {
                /* Get EOF */
                t->entry_eof = 1;
                r = ARCHIVE_EOF;
index 081fa3179a62ef213c5a3f55500a868167b60422..09c0c9f5e8dc26b76b1b99cdb23eacb5f5582db9 100644 (file)
@@ -455,3 +455,37 @@ DEFINE_TEST(test_sparse_basic)
        assertEqualInt(ARCHIVE_OK, archive_read_free(a));
        free(cwd);
 }
+
+DEFINE_TEST(test_fully_sparse_files)
+{
+       char *cwd;
+       struct archive *a;
+
+       const struct sparse sparse_file[] = {
+               { HOLE, 409600 }, { END, 0 }
+       };
+       /* Check if the filesystem where CWD on can
+        * report the number of the holes of a sparse file. */
+#ifdef PATH_MAX
+       cwd = getcwd(NULL, PATH_MAX);/* Solaris getcwd needs the size. */
+#else
+       cwd = getcwd(NULL, 0);
+#endif
+       if (!assert(cwd != NULL))
+               return;
+       if (!is_sparse_supported(cwd)) {
+               free(cwd);
+               skipping("This filesystem or platform do not support "
+                   "the reporting of the holes of a sparse file through "
+                   "API such as lseek(HOLE)");
+               return;
+       }
+
+       assert((a = archive_read_disk_new()) != NULL);
+
+       /* Fully sparse files are encoded with a zero-length "data" block. */
+       verify_sparse_file(a, "file0", sparse_file, 1, 1);
+
+       assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+       free(cwd);
+}