extern int list_only;
extern int inplace;
extern int preallocate_files;
+extern int sparse_files;
extern int preserve_perms;
extern int preserve_executability;
extern int open_noatime;
OFF_T do_fallocate(int fd, OFF_T offset, OFF_T length)
{
- int opts = inplace || preallocate_files ? DO_FALLOC_OPTIONS : 0;
+ /* FALLOC_FL_KEEP_SIZE lets --preallocate/--inplace keep the file size at 0
+ * until data is written, but a later hole-punch (for --sparse) can only
+ * deallocate blocks that lie within the file's size -- with KEEP_SIZE the
+ * reserved blocks sit beyond EOF and the punch silently does nothing,
+ * leaving the file fully allocated. So when holes will also be punched,
+ * preallocate at full size instead (write_sparse then punches the nulls). */
+ int opts = (inplace || preallocate_files) && sparse_files <= 0 ? DO_FALLOC_OPTIONS : 0;
int ret;
RETURN_ERROR_IF(dry_run, 0);
RETURN_ERROR_IF_RO_OR_LO;
return length;
return st.st_blocks * S_BLKSIZE;
}
- return 0;
+ /* With FALLOC_FL_KEEP_SIZE the blocks for [0, length) are reserved even
+ * though the file size stays put. Return that reserved length (not 0) so
+ * the caller's preallocated_len is meaningful: write_sparse() needs it to
+ * choose do_punch_hole() over a plain lseek() when turning a null run into
+ * a hole, and the receiver uses it to trim any over-preallocation. (A
+ * stray 0 here, from 2019's switch to KEEP_SIZE, is why --preallocate
+ * --sparse stopped producing sparse files.) */
+ return length;
}
#endif
do_fallocate (--preallocate) and do_punch_hole (sparse writes).
These are receiver-side file operations the resolver restructure also touches.
-Where the filesystem lacks fallocate/punch-hole the calls warn and the transfer
-still completes, so the content assertions hold regardless; the coverage is
-gained wherever the kernel supports them.
+Content must survive everywhere; in addition, where the filesystem stores holes,
+--preallocate --sparse must end up sparse (st_blocks below the apparent size).
+That is a regression guard: do_fallocate() must report the preallocated length
+so write_sparse() punches holes in the reserved extent instead of lseek'ing over
+it -- a stray 0 there silently left the file fully allocated.
"""
import os
from rsyncfns import (
FROMDIR, TODIR,
- assert_same, make_data_file, makepath, rmtree, run_rsync, test_skipped,
+ assert_same, make_data_file, makepath, rmtree, run_rsync, test_fail,
+ test_skipped,
)
src = FROMDIR
deep = os.path.join('d1', 'd2', 'd3', 'f')
+
+def allocated(path):
+ return os.stat(path).st_blocks * 512
+
# --preallocate needs fallocate/posix_fallocate, and do_punch_hole needs
# FALLOC_FL_PUNCH_HOLE -- both Linux (and Cygwin) features. macOS, the *BSDs and
# Solaris build without preallocation and reject the option outright ("prealloc-
check=False, capture_output=True).returncode != 0:
test_skipped("--preallocate not supported on this platform")
+def fs_can_punch_holes():
+ """True only where the kernel can deallocate blocks via FALLOC_FL_PUNCH_HOLE
+ -- the mechanism do_punch_hole uses for --sparse. A filesystem may report
+ seek-based sparseness yet still keep every block on a punch (e.g. where
+ rsync's punch falls back to writing zeros), so probe the real capability and
+ assert the hole only where it actually frees blocks."""
+ import ctypes
+ import ctypes.util
+ KEEP_SIZE, PUNCH_HOLE = 0x01, 0x02
+ p = src / 'punch-probe'
+ fd = -1
+ try:
+ libc = ctypes.CDLL(ctypes.util.find_library('c') or 'libc.so.6',
+ use_errno=True)
+ libc.fallocate.argtypes = [ctypes.c_int, ctypes.c_int,
+ ctypes.c_longlong, ctypes.c_longlong]
+ fd = os.open(p, os.O_CREAT | os.O_RDWR | os.O_TRUNC, 0o644)
+ os.write(fd, b'\xff' * 65536)
+ before = os.fstat(fd).st_blocks
+ ret = libc.fallocate(fd, PUNCH_HOLE | KEEP_SIZE, 0, 65536)
+ return ret == 0 and os.fstat(fd).st_blocks < before
+ except (OSError, AttributeError, ValueError):
+ return False
+ finally:
+ if fd >= 0:
+ os.close(fd)
+ try:
+ os.unlink(p)
+ except OSError:
+ pass
+
+
+can_punch = fs_can_punch_holes()
+
def seed_plain(size=1_000_000):
rmtree(src)
assert_same(TODIR / deep, src / deep, label='--preallocate content')
# --- --preallocate --sparse on a holey file: do_fallocate + do_punch_hole ---
+# rsync.1 promises sparse blocks for this combination where the FS supports
+# holes. Assert it: do_fallocate reserves the whole extent, then the zero run
+# must be punched back out (st_blocks well below the apparent size).
seed_holey()
run_rsync('-a', '--preallocate', '--sparse', f'{src}/', f'{TODIR}/')
assert_same(TODIR / deep, src / deep, label='--preallocate --sparse content')
+if can_punch and allocated(TODIR / deep) >= os.path.getsize(TODIR / deep):
+ test_fail(f"--preallocate --sparse left the file fully allocated "
+ f"(allocated {allocated(TODIR / deep)} for a "
+ f"{os.path.getsize(TODIR / deep)}-byte file); the preallocated "
+ "extent's zero run was not punched into a hole")
# --- --inplace --sparse update that introduces a zero run: do_punch_hole ----
# (sparse_end's updating_basis_or_equiv branch punches the hole in place.)