From: Paul Eggert Date: Sat, 2 Nov 2024 20:06:47 +0000 (-0700) Subject: Improve sparse I/O performance X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=568919d77b346bfc833d476761204f3325868c0a;p=thirdparty%2Ftar.git Improve sparse I/O performance * src/sparse.c (sparse_dump_region, sparse_extract_region): Don’t insist on reading and writing sparse files 512 bytes at a time. This resulted in a 4× to 6× performance improvement on my platform. --- diff --git a/NEWS b/NEWS index 56f59dbb..7a5ee22e 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -GNU tar NEWS - User visible changes. 2023-09-12 +GNU tar NEWS - User visible changes. 2024-11-02 Please send GNU tar bug reports to version TBD @@ -36,6 +36,10 @@ used, command output will be parsed using strptime(3). ** Transformations that change case (e.g., --transform='s/.*/\L&/') now work correctly with multi-byte characters. +* Performance improvements + +** Sparse files are now read and written with larger blocksizes. + version 1.35 - Sergey Poznyakoff, 2023-07-18 diff --git a/src/sparse.c b/src/sparse.c index d04798b9..9518b3ce 100644 --- a/src/sparse.c +++ b/src/sparse.c @@ -415,7 +415,8 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i) while (bytes_left > 0) { union block *blk = find_next_block (); - idx_t bufsize = min (bytes_left, BLOCKSIZE); + idx_t avail = available_space_after (blk); + idx_t bufsize = min (bytes_left, avail); idx_t bytes_read = full_read (file->fd, blk->buffer, bufsize); if (bytes_read < BLOCKSIZE) memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read); @@ -449,7 +450,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i) return false; } - set_next_block_after (blk); + set_next_block_after (blk + (bufsize - 1) / BLOCKSIZE); } return true; @@ -473,15 +474,16 @@ sparse_extract_region (struct tar_sparse_file *file, idx_t i) } else while (write_size > 0) { - idx_t wrbytes = min (write_size, BLOCKSIZE); union block *blk = find_next_block (); if (!blk) { paxerror (0, _("Unexpected EOF in archive")); return false; } - set_next_block_after (blk); - file->dumped_size += BLOCKSIZE; + idx_t avail = available_space_after (blk); + idx_t wrbytes = min (write_size, avail); + set_next_block_after (blk + (wrbytes - 1) / BLOCKSIZE); + file->dumped_size += avail; idx_t count = blocking_write (file->fd, blk->buffer, wrbytes); write_size -= count; mv_size_left (file->stat_info->archive_file_size - file->dumped_size);