From b3950dc412310765c78d2e2b42abd026550d561a Mon Sep 17 00:00:00 2001 From: Nate Rosenblum Date: Fri, 5 Feb 2016 17:39:03 -0800 Subject: [PATCH] Support fully-sparse files Avoids an edge condition where fully-sparse files would not trigger sparse file handling, leading to large tar files full of zeros. --- .../archive_read_disk_entry_from_file.c | 30 +++++++++++++--- libarchive/archive_read_disk_posix.c | 14 +++++++- libarchive/test/test_sparse_basic.c | 34 +++++++++++++++++++ 3 files changed, 72 insertions(+), 6 deletions(-) diff --git a/libarchive/archive_read_disk_entry_from_file.c b/libarchive/archive_read_disk_entry_from_file.c index 38303aa87..74fe353d9 100644 --- a/libarchive/archive_read_disk_entry_from_file.c +++ b/libarchive/archive_read_disk_entry_from_file.c @@ -1046,7 +1046,7 @@ setup_sparse(struct archive_read_disk *a, struct fiemap *fm; struct fiemap_extent *fe; int64_t size; - int count, do_fiemap; + int count, do_fiemap, iters; int exit_sts = ARCHIVE_OK; if (archive_entry_filetype(entry) != AE_IFREG @@ -1083,7 +1083,7 @@ setup_sparse(struct archive_read_disk *a, fm->fm_extent_count = count; do_fiemap = 1; size = archive_entry_size(entry); - for (;;) { + for (iters = 0; ; ++iters) { int i, r; r = ioctl(*fd, FS_IOC_FIEMAP, fm); @@ -1093,8 +1093,13 @@ setup_sparse(struct archive_read_disk *a, * version(<2.6.28) cannot perfom FS_IOC_FIEMAP. */ goto exit_setup_sparse; } - if (fm->fm_mapped_extents == 0) + if (fm->fm_mapped_extents == 0) { + if (iters == 0) { + /* Fully sparse file; insert a zero-length "data" entry */ + archive_entry_sparse_add_entry(entry, 0, 0); + } break; + } fe = fm->fm_extents; for (i = 0; i < (int)fm->fm_mapped_extents; i++, fe++) { if (!(fe->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { @@ -1139,6 +1144,7 @@ setup_sparse(struct archive_read_disk *a, off_t initial_off; /* FreeBSD/Solaris only, so off_t okay here */ off_t off_s, off_e; /* FreeBSD/Solaris only, so off_t okay here */ int exit_sts = ARCHIVE_OK; + int check_fully_sparse = 0; if (archive_entry_filetype(entry) != AE_IFREG || archive_entry_size(entry) <= 0 @@ -1191,8 +1197,14 @@ setup_sparse(struct archive_read_disk *a, while (off_s < size) { off_s = lseek(*fd, off_s, SEEK_DATA); if (off_s == (off_t)-1) { - if (errno == ENXIO) - break;/* no more hole */ + if (errno == ENXIO) { + /* no more hole */ + if (archive_entry_sparse_count(entry) == 0) { + /* Potentially a fully-sparse file. */ + check_fully_sparse = 1; + } + break; + } archive_set_error(&a->archive, errno, "lseek(SEEK_HOLE) failed"); exit_sts = ARCHIVE_FAILED; @@ -1216,6 +1228,14 @@ setup_sparse(struct archive_read_disk *a, off_e - off_s); off_s = off_e; } + + if (check_fully_sparse) { + if (lseek(*fd, 0, SEEK_HOLE) == 0 && + lseek(*fd, 0, SEEK_END) == size) { + /* Fully sparse file; insert a zero-length "data" entry */ + archive_entry_sparse_add_entry(entry, 0, 0); + } + } exit_setup_sparse: lseek(*fd, initial_off, SEEK_SET); return (exit_sts); diff --git a/libarchive/archive_read_disk_posix.c b/libarchive/archive_read_disk_posix.c index f48053922..067fa6d4a 100644 --- a/libarchive/archive_read_disk_posix.c +++ b/libarchive/archive_read_disk_posix.c @@ -717,6 +717,7 @@ _archive_read_data_block(struct archive *_a, const void **buff, int r; ssize_t bytes; size_t buffbytes; + int empty_sparse_region = 0; archive_check_magic(_a, ARCHIVE_READ_DISK_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_block"); @@ -798,6 +799,9 @@ _archive_read_data_block(struct archive *_a, const void **buff, if ((int64_t)buffbytes > t->current_sparse->length) buffbytes = t->current_sparse->length; + if (t->current_sparse->length == 0) + empty_sparse_region = 1; + /* * Skip hole. * TODO: Should we consider t->current_filesystem->xfer_align? @@ -828,7 +832,15 @@ _archive_read_data_block(struct archive *_a, const void **buff, } } else bytes = 0; - if (bytes == 0) { + /* + * Return an EOF unless we've read a leading empty sparse region, which + * is used to represent fully-sparse files. + * + * TODO: it is not technically necessary to check for entry_tota == 0, + * but this simplifies some unit tests that expect to only read data + * regions with length > 0. Consider fixing these tests (test_sparse_basic). + */ + if (bytes == 0 && !(empty_sparse_region && t->entry_total == 0)) { /* Get EOF */ t->entry_eof = 1; r = ARCHIVE_EOF; diff --git a/libarchive/test/test_sparse_basic.c b/libarchive/test/test_sparse_basic.c index 081fa3179..09c0c9f5e 100644 --- a/libarchive/test/test_sparse_basic.c +++ b/libarchive/test/test_sparse_basic.c @@ -455,3 +455,37 @@ DEFINE_TEST(test_sparse_basic) assertEqualInt(ARCHIVE_OK, archive_read_free(a)); free(cwd); } + +DEFINE_TEST(test_fully_sparse_files) +{ + char *cwd; + struct archive *a; + + const struct sparse sparse_file[] = { + { HOLE, 409600 }, { END, 0 } + }; + /* Check if the filesystem where CWD on can + * report the number of the holes of a sparse file. */ +#ifdef PATH_MAX + cwd = getcwd(NULL, PATH_MAX);/* Solaris getcwd needs the size. */ +#else + cwd = getcwd(NULL, 0); +#endif + if (!assert(cwd != NULL)) + return; + if (!is_sparse_supported(cwd)) { + free(cwd); + skipping("This filesystem or platform do not support " + "the reporting of the holes of a sparse file through " + "API such as lseek(HOLE)"); + return; + } + + assert((a = archive_read_disk_new()) != NULL); + + /* Fully sparse files are encoded with a zero-length "data" block. */ + verify_sparse_file(a, "file0", sparse_file, 1, 1); + + assertEqualInt(ARCHIVE_OK, archive_read_free(a)); + free(cwd); +} -- 2.47.2