From 06a6758d036886d2e13c0c936fd65cee9eb223b4 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Tue, 5 Aug 2025 14:45:18 -0700 Subject: [PATCH] cp: refactor out data copying * po/POTFILES.in, src/local.mk (copy_sources): Add the new file. * src/copy.c: Move the #includes of alignalloc.h, buffer-lcm.h, fadvise.h, full-write.h, ioblksize.h to copy-file-data.c. (enum copy_debug_val, struct copy_debug): Move these decls to copy.h. (punch_hole, create_hole, is_CLONENOTSUP, sparse_copy) (write_zeros, lseek_copy, HAVE_STRUCT_STAT_ST_BLOCKS) (enum scantype, struct scan_inference, infer_scantype): Move to copy-file-data.c. (copy_reg): Move the data-copying part of this function to the new function copy_file_data in copy-file-data.c. * src/copy-file-data.c: New file, taken from part of copy.c. --- po/POTFILES.in | 1 + src/copy-file-data.c | 593 ++++++++++++++++++++++++++++++++++++++++++ src/copy.c | 595 ------------------------------------------- src/copy.h | 27 ++ src/local.mk | 1 + 5 files changed, 622 insertions(+), 595 deletions(-) create mode 100644 src/copy-file-data.c diff --git a/po/POTFILES.in b/po/POTFILES.in index 8e8fb056c7..444202840b 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -45,6 +45,7 @@ src/chroot.c src/cksum.c src/comm.c src/copy.c +src/copy-file-data.c src/coreutils.c src/cp.c src/csplit.c diff --git a/src/copy-file-data.c b/src/copy-file-data.c new file mode 100644 index 0000000000..3a139689de --- /dev/null +++ b/src/copy-file-data.c @@ -0,0 +1,593 @@ +/* Copy data from one file to another. + Copyright 1989-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Extracted from copy.c by Paul Eggert. */ + +#include + +#include "system.h" + +#include "alignalloc.h" +#include "backupfile.h" +#include "buffer-lcm.h" +#include "copy.h" +#include "fadvise.h" +#include "full-write.h" +#include "ioblksize.h" + +/* Result of infer_scantype when it returns LSEEK_SCANTYPE. */ +struct scan_inference +{ + /* The offset of the first data block, or -1 if the file has no data. */ + off_t ext_start; + + /* The offset of the first hole. */ + off_t hole_start; +}; + +/* Attempt to punch a hole to avoid any permanent + speculative preallocation on file systems such as XFS. + Return values as per fallocate(2) except ENOSYS etc. are ignored. */ +static int +punch_hole (int fd, off_t offset, off_t length) +{ + int ret = 0; +/* +0 is to work around older defining HAVE_FALLOCATE to empty. */ +#if HAVE_FALLOCATE + 0 +# if defined FALLOC_FL_PUNCH_HOLE && defined FALLOC_FL_KEEP_SIZE + ret = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, length); + if (ret < 0 && (is_ENOTSUP (errno) || errno == ENOSYS)) + ret = 0; +# endif +#endif + return ret; +} + +/* Create a hole at the end of the file with descriptor FD and name NAME. + The hole is of size SIZE. Assume FD is already at file end, + and advance FD past the newly-created hole. */ +static bool +create_hole (int fd, char const *name, off_t size) +{ + off_t file_end = lseek (fd, size, SEEK_CUR); + + if (file_end < 0) + { + error (0, errno, _("cannot lseek %s"), quoteaf (name)); + return false; + } + + /* Some file systems (like XFS) preallocate when write extending a file. + I.e., a previous write() may have preallocated extra space + that the seek above will not discard. A subsequent write() could + then make this allocation permanent. */ + if (punch_hole (fd, file_end - size, size) < 0) + { + error (0, errno, _("error deallocating %s"), quoteaf (name)); + return false; + } + + return true; +} + +/* Similarly, whether ERR indicates that the copying operation is not + supported or allowed for this file or process, even though the + operation was invoked correctly. */ +static bool +is_CLONENOTSUP (int err) +{ + return err == ENOSYS || err == ENOTTY || is_ENOTSUP (err) + || err == EINVAL || err == EBADF + || err == EXDEV || err == ETXTBSY + || err == EPERM || err == EACCES; +} + +/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, + honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer + *ABUF for temporary storage, allocating it lazily if *ABUF is null. + For best results, *ABUF should be well-aligned. + Copy no more than MAX_N_READ bytes. + If HOLE_SIZE, look for holes in the input; *HOLE_SIZE contains + the size of the current hole so far, and update *HOLE_SIZE + at end to be the size of the hole at the end of the copy. + Set *TOTAL_N_READ to the number of bytes read; this counts + the trailing hole, which has not yet been output. + Read and update *DEBUG as needed. + Return true upon successful completion; + print a diagnostic and return false upon error. */ +static bool +sparse_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, + bool allow_reflink, + char const *src_name, char const *dst_name, + uintmax_t max_n_read, off_t *hole_size, off_t *total_n_read, + struct copy_debug *debug) +{ + *total_n_read = 0; + + if (debug->sparse_detection == COPY_DEBUG_UNKNOWN) + debug->sparse_detection = hole_size ? COPY_DEBUG_YES : COPY_DEBUG_NO; + else if (hole_size && debug->sparse_detection == COPY_DEBUG_EXTERNAL) + debug->sparse_detection = COPY_DEBUG_EXTERNAL_INTERNAL; + + /* If not looking for holes, use copy_file_range if functional, + but don't use if reflink disallowed as that may be implicit. */ + if (!hole_size && allow_reflink) + while (max_n_read) + { + /* Copy at most COPY_MAX bytes at a time; this is min + (SSIZE_MAX, SIZE_MAX) truncated to a value that is + surely aligned well. */ + ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30; + ssize_t n_copied = copy_file_range (src_fd, nullptr, dest_fd, nullptr, + MIN (max_n_read, copy_max), 0); + if (n_copied == 0) + { + /* copy_file_range incorrectly returns 0 when reading from + the proc file system on the Linux kernel through at + least 5.6.19 (2020), so fall back on 'read' if the + input file seems empty. */ + if (*total_n_read == 0) + break; + debug->offload = COPY_DEBUG_YES; + return true; + } + if (n_copied < 0) + { + debug->offload = COPY_DEBUG_UNSUPPORTED; + + /* Consider operation unsupported only if no data copied. + For example, EPERM could occur if copy_file_range not enabled + in seccomp filters, so retry with a standard copy. EPERM can + also occur for immutable files, but that would only be in the + edge case where the file is made immutable after creating, + in which case the (more accurate) error is still shown. */ + if (*total_n_read == 0 && is_CLONENOTSUP (errno)) + break; + + /* ENOENT was seen sometimes across CIFS shares, resulting in + no data being copied, but subsequent standard copies succeed. */ + if (*total_n_read == 0 && errno == ENOENT) + break; + + if (errno == EINTR) + n_copied = 0; + else + { + error (0, errno, _("error copying %s to %s"), + quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); + return false; + } + } + debug->offload = COPY_DEBUG_YES; + max_n_read -= n_copied; + *total_n_read += n_copied; + } + else + debug->offload = COPY_DEBUG_AVOIDED; + + + off_t psize = hole_size ? *hole_size : 0; + bool make_hole = !!psize; + + while (max_n_read) + { + if (!*abuf) + *abuf = xalignalloc (getpagesize (), buf_size); + char *buf = *abuf; + ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); + if (n_read < 0) + { + if (errno == EINTR) + continue; + error (0, errno, _("error reading %s"), quoteaf (src_name)); + return false; + } + if (n_read == 0) + break; + max_n_read -= n_read; + *total_n_read += n_read; + + /* If looking for holes, loop over the input buffer in chunks + corresponding to the minimum hole size. Otherwise, scan the + whole buffer. struct stat does not report the minimum hole + size for a file, so use ST_NBLOCKSIZE which should be the + minimum for all file systems on this platform. */ + size_t csize = hole_size ? ST_NBLOCKSIZE : buf_size; + char *cbuf = buf; + char *pbuf = buf; + + while (n_read) + { + bool prev_hole = make_hole; + csize = MIN (csize, n_read); + + if (hole_size) + make_hole = is_nul (cbuf, csize); + + bool transition = (make_hole != prev_hole) && psize; + bool last_chunk = n_read == csize && !make_hole; + + if (transition || last_chunk) + { + if (! transition) + psize += csize; + else if (prev_hole) + { + if (! create_hole (dest_fd, dst_name, psize)) + return false; + pbuf = cbuf; + psize = csize; + } + + if (!prev_hole || (transition && last_chunk)) + { + if (full_write (dest_fd, pbuf, psize) != psize) + { + error (0, errno, _("error writing %s"), + quoteaf (dst_name)); + return false; + } + psize = !prev_hole && transition ? csize : 0; + } + } + else /* Coalesce writes/seeks. */ + { + if (ckd_add (&psize, psize, csize)) + { + error (0, 0, _("overflow reading %s"), quoteaf (src_name)); + return false; + } + } + + n_read -= csize; + cbuf += csize; + } + + /* It's tempting to break early here upon a short read from + a regular file. That would save the final read syscall + for each file. Unfortunately that doesn't work for + certain files in /proc or /sys with linux kernels. */ + } + + if (hole_size) + *hole_size = make_hole ? psize : 0; + return true; +} + +/* Write N_BYTES zero bytes to file descriptor FD. Return true if successful. + Upon write failure, set errno and return false. */ +static bool +write_zeros (int fd, off_t n_bytes) +{ + static char *zeros; + static size_t nz = IO_BUFSIZE; + + /* Attempt to use a relatively large calloc'd source buffer for + efficiency, but if that allocation fails, resort to a smaller + statically allocated one. */ + if (zeros == nullptr) + { + static char fallback[1024]; + zeros = calloc (nz, 1); + if (zeros == nullptr) + { + zeros = fallback; + nz = sizeof fallback; + } + } + + while (n_bytes) + { + size_t n = MIN (nz, n_bytes); + if ((full_write (fd, zeros, n)) != n) + return false; + n_bytes -= n; + } + + return true; +} + +#ifdef SEEK_HOLE +/* Perform an efficient extent copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus makes copying sparse files much more efficient. + Copy from SRC_FD to DEST_FD, using *ABUF (of size BUF_SIZE) for a buffer. + Allocate *ABUF lazily if *ABUF is null. + The input file is of size SRC_TOTAL_SIZE. + Use SPARSE_MODE to determine whether to create holes in the output. + SRC_NAME and DST_NAME are the input and output file names. + Set *HOLE_SIZE to be the size of the hole at the end of the input. + Set *TOTAL_N_READ to the number of bytes read; this counts + the trailing hole, which has not yet been output. + Read and update *DEBUG as needed. + Return true if successful, false (with a diagnostic) otherwise. */ +static bool +lseek_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, + off_t src_pos, struct scan_inference const *scan_inference, + off_t src_total_size, + enum Sparse_type sparse_mode, + bool allow_reflink, + char const *src_name, char const *dst_name, + off_t *hole_size, off_t *total_n_read, + struct copy_debug *debug) +{ + off_t last_ext_start = src_pos; + off_t last_ext_len = 0; + off_t dest_pos = 0; + + debug->sparse_detection = COPY_DEBUG_EXTERNAL; + + for (off_t ext_start = scan_inference->ext_start; 0 <= ext_start; ) + { + off_t ext_end = (ext_start == 0 + ? scan_inference->hole_start + : lseek (src_fd, ext_start, SEEK_HOLE)); + if (ext_end < 0) + { + if (errno != ENXIO) + goto cannot_lseek; + ext_end = src_total_size; + if (ext_end <= ext_start) + { + /* The input file grew; get its current size. */ + src_total_size = lseek (src_fd, 0, SEEK_END); + if (src_total_size < 0) + goto cannot_lseek; + + /* If the input file shrank after growing, stop copying. */ + if (src_total_size <= ext_start) + break; + + ext_end = src_total_size; + } + } + /* If the input file must have grown, increase its measured size. */ + if (src_total_size < ext_end) + src_total_size = ext_end; + + if (lseek (src_fd, ext_start, SEEK_SET) < 0) + goto cannot_lseek; + + off_t ext_hole_size = ext_start - last_ext_start - last_ext_len; + + if (ext_hole_size) + { + if (sparse_mode == SPARSE_ALWAYS) + *hole_size += ext_hole_size; + else if (sparse_mode != SPARSE_NEVER) + { + if (! create_hole (dest_fd, dst_name, ext_hole_size)) + return false; + } + else + { + /* When not inducing holes and when there is a hole between + the end of the previous extent and the beginning of the + current one, write zeros to the destination file. */ + if (! write_zeros (dest_fd, ext_hole_size)) + { + error (0, errno, _("%s: write failed"), + quotef (dst_name)); + return false; + } + } + } + + off_t ext_len = ext_end - ext_start; + last_ext_start = ext_start; + last_ext_len = ext_len; + + /* Copy this extent, looking for further opportunities to not + bother to write zeros if --sparse=always, since SEEK_HOLE + is conservative and may miss some holes. */ + off_t n_read; + if ( ! sparse_copy (src_fd, dest_fd, abuf, buf_size, + allow_reflink, src_name, dst_name, + ext_len, + sparse_mode == SPARSE_ALWAYS ? hole_size : nullptr, + &n_read, debug)) + return false; + + dest_pos = ext_start + n_read; + if (n_read < ext_len) + { + /* The input file shrank. */ + src_total_size = dest_pos; + break; + } + + ext_start = lseek (src_fd, dest_pos, SEEK_DATA); + if (ext_start < 0 && errno != ENXIO) + goto cannot_lseek; + } + + *hole_size += src_total_size - (last_ext_start + last_ext_len); + *total_n_read = src_total_size; + return true; + + cannot_lseek: + error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); + return false; +} +#endif + +#ifndef HAVE_STRUCT_STAT_ST_BLOCKS +# define HAVE_STRUCT_STAT_ST_BLOCKS 0 +#endif + +/* Type of scan being done on the input when looking for sparseness. */ +enum scantype + { + /* An error was found when determining scantype. */ + ERROR_SCANTYPE, + + /* No fancy scanning; just read and write. */ + PLAIN_SCANTYPE, + + /* Read and examine data looking for zero blocks; useful when + attempting to create sparse output. */ + ZERO_SCANTYPE, + + /* lseek information is available. */ + LSEEK_SCANTYPE, + }; + +/* Return how to scan a file with descriptor FD and stat buffer SB. + The scan starts at POS. + Set *SCAN_INFERENCE if returning LSEEK_SCANTYPE. */ +static enum scantype +infer_scantype (int fd, struct stat const *sb, off_t pos, + struct scan_inference *scan_inference) +{ + /* Try SEEK_HOLE only if this heuristic + suggests the file is sparse. */ + if (! (HAVE_STRUCT_STAT_ST_BLOCKS + && S_ISREG (sb->st_mode) + && STP_NBLOCKS (sb) < sb->st_size / ST_NBLOCKSIZE)) + return PLAIN_SCANTYPE; + +#ifdef SEEK_HOLE + scan_inference->ext_start = lseek (fd, pos, SEEK_DATA); + if (scan_inference->ext_start == pos) + { + scan_inference->hole_start = lseek (fd, pos, SEEK_HOLE); + if (0 <= scan_inference->hole_start) + { + if (scan_inference->hole_start < sb->st_size) + return LSEEK_SCANTYPE; + + /* Though the file likely has holes, SEEK_DATA and SEEK_HOLE + didn't find any. This can happen with file systems like + circa-2025 squashfs that support SEEK_HOLE only trivially. + Fall back on ZERO_SCANTYPE. */ + if (lseek (fd, pos, SEEK_SET) < 0) + return ERROR_SCANTYPE; + } + } + else if (pos < scan_inference->ext_start || errno == ENXIO) + { + scan_inference->hole_start = 0; /* Pacify -Wmaybe-uninitialized. */ + return LSEEK_SCANTYPE; + } + else if (errno != EINVAL && !is_ENOTSUP (errno)) + return ERROR_SCANTYPE; +#endif + + return ZERO_SCANTYPE; +} + +/* Copy data from input file (descriptor IFD, status IST, initial file + offset IPOS, and name INAME) to output file (OFD, OST, OPOS, ONAME). + Copy until IBYTES have been copied or until end of file; + if IBYTES is OFF_T_MAX that suffices to copy to end of file. + Respect copy options X's sparse_mode and reflink_mode settings. + Read and update *DEBUG as needed. */ +bool +copy_file_data (int ifd, struct stat const *ist, off_t ipos, char const *iname, + int ofd, struct stat const *ost, off_t opos, char const *oname, + off_t ibytes, struct cp_options const *x, struct copy_debug *debug) +{ + /* Choose a suitable buffer size; it may be adjusted later. */ + size_t buf_size = io_blksize (ost); + + /* Deal with sparse files. */ + struct scan_inference scan_inference; + enum scantype scantype = infer_scantype (ifd, ist, ipos, &scan_inference); + if (scantype == ERROR_SCANTYPE) + { + error (0, errno, _("cannot lseek %s"), quoteaf (iname)); + return false; + } + bool make_holes + = (S_ISREG (ost->st_mode) + && (x->sparse_mode == SPARSE_ALWAYS + || (x->sparse_mode == SPARSE_AUTO + && scantype != PLAIN_SCANTYPE))); + + fdadvise (ifd, ipos, ibytes, FADVISE_SEQUENTIAL); + + /* If not making a sparse file, try to use a more-efficient + buffer size. */ + if (! make_holes) + { + /* Compute the least common multiple of the input and output + buffer sizes, adjusting for outlandish values. + Note we read in multiples of the reported block size + to support (unusual) devices that have this constraint. */ + size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX); + size_t blcm = buffer_lcm (io_blksize (ist), buf_size, + blcm_max); + + /* Do not bother with a buffer larger than the input file, plus one + byte to make sure the file has not grown while reading it. */ + if (S_ISREG (ist->st_mode) && ist->st_size < buf_size) + buf_size = ist->st_size + 1; + + /* However, stick with a block size that is a positive multiple of + blcm, overriding the above adjustments. Watch out for + overflow. */ + buf_size += blcm - 1; + buf_size -= buf_size % blcm; + if (buf_size == 0 || blcm_max < buf_size) + buf_size = blcm; + } + + char *buf = nullptr; + bool copy_ok; + off_t hole_size = 0, n_read; + +#ifdef SEEK_HOLE + if (scantype == LSEEK_SCANTYPE) + copy_ok = lseek_copy (ifd, ofd, &buf, buf_size, + ipos, &scan_inference, ist->st_size, + make_holes ? x->sparse_mode : SPARSE_NEVER, + x->reflink_mode != REFLINK_NEVER, + iname, oname, &hole_size, &n_read, debug); +#else + assume (scantype != LSEEK_SCANTYPE); +#endif + + if (scantype != LSEEK_SCANTYPE) + copy_ok = sparse_copy (ifd, ofd, &buf, buf_size, + x->reflink_mode != REFLINK_NEVER, + iname, oname, UINTMAX_MAX, + make_holes ? &hole_size : nullptr, + &n_read, debug); + + if (copy_ok && 0 < hole_size) + { + off_t oend; + if (ckd_add (&oend, opos, n_read) + ? (errno = EOVERFLOW, true) + : make_holes + ? ftruncate (ofd, oend) < 0 + : !write_zeros (ofd, hole_size)) + { + error (0, errno, _("failed to extend %s"), quoteaf (oname)); + copy_ok = false; + } + else if (make_holes + && punch_hole (ofd, oend - hole_size, hole_size) < 0) + { + error (0, errno, _("error deallocating %s"), quoteaf (oname)); + copy_ok = false; + } + } + + alignfree (buf); + return copy_ok; +} diff --git a/src/copy.c b/src/copy.c index b5db72f27d..e9bfff9420 100644 --- a/src/copy.c +++ b/src/copy.c @@ -31,24 +31,19 @@ #include "system.h" #include "acl.h" -#include "alignalloc.h" #include "assure.h" #include "backupfile.h" -#include "buffer-lcm.h" #include "canonicalize.h" #include "copy.h" #include "cp-hash.h" -#include "fadvise.h" #include "fcntl--.h" #include "file-set.h" #include "filemode.h" #include "filenamecat.h" #include "force-link.h" -#include "full-write.h" #include "hash.h" #include "hashcode-file.h" #include "ignore-value.h" -#include "ioblksize.h" #include "issymlink.h" #include "quote.h" #include "renameatu.h" @@ -107,16 +102,6 @@ # define CAN_HARDLINK_SYMLINKS 0 #endif -/* Result of infer_scantype when it returns LSEEK_SCANTYPE. */ -struct scan_inference -{ - /* The offset of the first data block, or -1 if the file has no data. */ - off_t ext_start; - - /* The offset of the first hole. */ - off_t hole_start; -}; - struct dir_list { struct dir_list *parent; @@ -143,24 +128,6 @@ static bool owner_failure_ok (struct cp_options const *x); static char const *top_level_src_name; static char const *top_level_dst_name; -enum copy_debug_val - { - COPY_DEBUG_UNKNOWN, - COPY_DEBUG_NO, - COPY_DEBUG_YES, - COPY_DEBUG_EXTERNAL, - COPY_DEBUG_EXTERNAL_INTERNAL, - COPY_DEBUG_AVOIDED, - COPY_DEBUG_UNSUPPORTED, - }; - -struct copy_debug -{ - enum copy_debug_val offload; - enum copy_debug_val reflink; - enum copy_debug_val sparse_detection; -}; - /* debug info about the last file copy. */ static struct copy_debug copy_debug; @@ -245,55 +212,6 @@ follow_fstatat (int dirfd, char const *filename, struct stat *st, int flags) return result; } -/* Attempt to punch a hole to avoid any permanent - speculative preallocation on file systems such as XFS. - Return values as per fallocate(2) except ENOSYS etc. are ignored. */ - -static int -punch_hole (int fd, off_t offset, off_t length) -{ - int ret = 0; -/* +0 is to work around older defining HAVE_FALLOCATE to empty. */ -#if HAVE_FALLOCATE + 0 -# if defined FALLOC_FL_PUNCH_HOLE && defined FALLOC_FL_KEEP_SIZE - ret = fallocate (fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - offset, length); - if (ret < 0 && (is_ENOTSUP (errno) || errno == ENOSYS)) - ret = 0; -# endif -#endif - return ret; -} - -/* Create a hole at the end of the file with descriptor FD and name NAME. - The hole is of size SIZE. Assume FD is already at file end, - and advance FD past the newly-created hole. */ - -static bool -create_hole (int fd, char const *name, off_t size) -{ - off_t file_end = lseek (fd, size, SEEK_CUR); - - if (file_end < 0) - { - error (0, errno, _("cannot lseek %s"), quoteaf (name)); - return false; - } - - /* Some file systems (like XFS) preallocate when write extending a file. - I.e., a previous write() may have preallocated extra space - that the seek above will not discard. A subsequent write() could - then make this allocation permanent. */ - if (punch_hole (fd, file_end - size, size) < 0) - { - error (0, errno, _("error deallocating %s"), quoteaf (name)); - return false; - } - - return true; -} - - /* Whether an errno value ERR, set by FICLONE or copy_file_range, indicates that the copying operation has terminally failed, even though it was invoked correctly (so that, e.g, EBADF cannot occur) @@ -305,192 +223,6 @@ is_terminal_error (int err) return err == EIO || err == ENOMEM || err == ENOSPC || err == EDQUOT; } -/* Similarly, whether ERR indicates that the copying operation is not - supported or allowed for this file or process, even though the - operation was invoked correctly. */ - -static bool -is_CLONENOTSUP (int err) -{ - return err == ENOSYS || err == ENOTTY || is_ENOTSUP (err) - || err == EINVAL || err == EBADF - || err == EXDEV || err == ETXTBSY - || err == EPERM || err == EACCES; -} - - -/* Copy the regular file open on SRC_FD/SRC_NAME to DST_FD/DST_NAME, - honoring the MAKE_HOLES setting and using the BUF_SIZE-byte buffer - *ABUF for temporary storage, allocating it lazily if *ABUF is null. - For best results, *ABUF should be well-aligned. - Copy no more than MAX_N_READ bytes. - If HOLE_SIZE, look for holes in the input; *HOLE_SIZE contains - the size of the current hole so far, and update *HOLE_SIZE - at end to be the size of the hole at the end of the copy. - Set *TOTAL_N_READ to the number of bytes read; this counts - the trailing hole, which has not yet been output. - Read and update *DEBUG as needed. - Return true upon successful completion; - print a diagnostic and return false upon error. */ -static bool -sparse_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, - bool allow_reflink, - char const *src_name, char const *dst_name, - uintmax_t max_n_read, off_t *hole_size, off_t *total_n_read, - struct copy_debug *debug) -{ - *total_n_read = 0; - - if (debug->sparse_detection == COPY_DEBUG_UNKNOWN) - debug->sparse_detection = hole_size ? COPY_DEBUG_YES : COPY_DEBUG_NO; - else if (hole_size && debug->sparse_detection == COPY_DEBUG_EXTERNAL) - debug->sparse_detection = COPY_DEBUG_EXTERNAL_INTERNAL; - - /* If not looking for holes, use copy_file_range if functional, - but don't use if reflink disallowed as that may be implicit. */ - if (!hole_size && allow_reflink) - while (max_n_read) - { - /* Copy at most COPY_MAX bytes at a time; this is min - (SSIZE_MAX, SIZE_MAX) truncated to a value that is - surely aligned well. */ - ssize_t copy_max = MIN (SSIZE_MAX, SIZE_MAX) >> 30 << 30; - ssize_t n_copied = copy_file_range (src_fd, nullptr, dest_fd, nullptr, - MIN (max_n_read, copy_max), 0); - if (n_copied == 0) - { - /* copy_file_range incorrectly returns 0 when reading from - the proc file system on the Linux kernel through at - least 5.6.19 (2020), so fall back on 'read' if the - input file seems empty. */ - if (*total_n_read == 0) - break; - debug->offload = COPY_DEBUG_YES; - return true; - } - if (n_copied < 0) - { - debug->offload = COPY_DEBUG_UNSUPPORTED; - - /* Consider operation unsupported only if no data copied. - For example, EPERM could occur if copy_file_range not enabled - in seccomp filters, so retry with a standard copy. EPERM can - also occur for immutable files, but that would only be in the - edge case where the file is made immutable after creating, - in which case the (more accurate) error is still shown. */ - if (*total_n_read == 0 && is_CLONENOTSUP (errno)) - break; - - /* ENOENT was seen sometimes across CIFS shares, resulting in - no data being copied, but subsequent standard copies succeed. */ - if (*total_n_read == 0 && errno == ENOENT) - break; - - if (errno == EINTR) - n_copied = 0; - else - { - error (0, errno, _("error copying %s to %s"), - quoteaf_n (0, src_name), quoteaf_n (1, dst_name)); - return false; - } - } - debug->offload = COPY_DEBUG_YES; - max_n_read -= n_copied; - *total_n_read += n_copied; - } - else - debug->offload = COPY_DEBUG_AVOIDED; - - - off_t psize = hole_size ? *hole_size : 0; - bool make_hole = !!psize; - - while (max_n_read) - { - if (!*abuf) - *abuf = xalignalloc (getpagesize (), buf_size); - char *buf = *abuf; - ssize_t n_read = read (src_fd, buf, MIN (max_n_read, buf_size)); - if (n_read < 0) - { - if (errno == EINTR) - continue; - error (0, errno, _("error reading %s"), quoteaf (src_name)); - return false; - } - if (n_read == 0) - break; - max_n_read -= n_read; - *total_n_read += n_read; - - /* If looking for holes, loop over the input buffer in chunks - corresponding to the minimum hole size. Otherwise, scan the - whole buffer. struct stat does not report the minimum hole - size for a file, so use ST_NBLOCKSIZE which should be the - minimum for all file systems on this platform. */ - size_t csize = hole_size ? ST_NBLOCKSIZE : buf_size; - char *cbuf = buf; - char *pbuf = buf; - - while (n_read) - { - bool prev_hole = make_hole; - csize = MIN (csize, n_read); - - if (hole_size) - make_hole = is_nul (cbuf, csize); - - bool transition = (make_hole != prev_hole) && psize; - bool last_chunk = n_read == csize && !make_hole; - - if (transition || last_chunk) - { - if (! transition) - psize += csize; - else if (prev_hole) - { - if (! create_hole (dest_fd, dst_name, psize)) - return false; - pbuf = cbuf; - psize = csize; - } - - if (!prev_hole || (transition && last_chunk)) - { - if (full_write (dest_fd, pbuf, psize) != psize) - { - error (0, errno, _("error writing %s"), - quoteaf (dst_name)); - return false; - } - psize = !prev_hole && transition ? csize : 0; - } - } - else /* Coalesce writes/seeks. */ - { - if (ckd_add (&psize, psize, csize)) - { - error (0, 0, _("overflow reading %s"), quoteaf (src_name)); - return false; - } - } - - n_read -= csize; - cbuf += csize; - } - - /* It's tempting to break early here upon a short read from - a regular file. That would save the final read syscall - for each file. Unfortunately that doesn't work for - certain files in /proc or /sys with linux kernels. */ - } - - if (hole_size) - *hole_size = make_hole ? psize : 0; - return true; -} - /* Perform the O(1) btrfs clone operation, if possible. Upon success, return 0. Otherwise, return -1 and set errno. */ static inline int @@ -506,164 +238,6 @@ clone_file (int dest_fd, int src_fd) #endif } -/* Write N_BYTES zero bytes to file descriptor FD. Return true if successful. - Upon write failure, set errno and return false. */ -static bool -write_zeros (int fd, off_t n_bytes) -{ - static char *zeros; - static size_t nz = IO_BUFSIZE; - - /* Attempt to use a relatively large calloc'd source buffer for - efficiency, but if that allocation fails, resort to a smaller - statically allocated one. */ - if (zeros == nullptr) - { - static char fallback[1024]; - zeros = calloc (nz, 1); - if (zeros == nullptr) - { - zeros = fallback; - nz = sizeof fallback; - } - } - - while (n_bytes) - { - size_t n = MIN (nz, n_bytes); - if ((full_write (fd, zeros, n)) != n) - return false; - n_bytes -= n; - } - - return true; -} - -#ifdef SEEK_HOLE -/* Perform an efficient extent copy, if possible. This avoids - the overhead of detecting holes in hole-introducing/preserving - copy, and thus makes copying sparse files much more efficient. - Copy from SRC_FD to DEST_FD, using *ABUF (of size BUF_SIZE) for a buffer. - Allocate *ABUF lazily if *ABUF is null. - The input file is of size SRC_TOTAL_SIZE. - Use SPARSE_MODE to determine whether to create holes in the output. - SRC_NAME and DST_NAME are the input and output file names. - Set *HOLE_SIZE to be the size of the hole at the end of the input. - Set *TOTAL_N_READ to the number of bytes read; this counts - the trailing hole, which has not yet been output. - Read and update *DEBUG as needed. - Return true if successful, false (with a diagnostic) otherwise. */ - -static bool -lseek_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, - off_t src_pos, struct scan_inference const *scan_inference, - off_t src_total_size, - enum Sparse_type sparse_mode, - bool allow_reflink, - char const *src_name, char const *dst_name, - off_t *hole_size, off_t *total_n_read, - struct copy_debug *debug) -{ - off_t last_ext_start = src_pos; - off_t last_ext_len = 0; - off_t dest_pos = 0; - - debug->sparse_detection = COPY_DEBUG_EXTERNAL; - - for (off_t ext_start = scan_inference->ext_start; 0 <= ext_start; ) - { - off_t ext_end = (ext_start == 0 - ? scan_inference->hole_start - : lseek (src_fd, ext_start, SEEK_HOLE)); - if (ext_end < 0) - { - if (errno != ENXIO) - goto cannot_lseek; - ext_end = src_total_size; - if (ext_end <= ext_start) - { - /* The input file grew; get its current size. */ - src_total_size = lseek (src_fd, 0, SEEK_END); - if (src_total_size < 0) - goto cannot_lseek; - - /* If the input file shrank after growing, stop copying. */ - if (src_total_size <= ext_start) - break; - - ext_end = src_total_size; - } - } - /* If the input file must have grown, increase its measured size. */ - if (src_total_size < ext_end) - src_total_size = ext_end; - - if (lseek (src_fd, ext_start, SEEK_SET) < 0) - goto cannot_lseek; - - off_t ext_hole_size = ext_start - last_ext_start - last_ext_len; - - if (ext_hole_size) - { - if (sparse_mode == SPARSE_ALWAYS) - *hole_size += ext_hole_size; - else if (sparse_mode != SPARSE_NEVER) - { - if (! create_hole (dest_fd, dst_name, ext_hole_size)) - return false; - } - else - { - /* When not inducing holes and when there is a hole between - the end of the previous extent and the beginning of the - current one, write zeros to the destination file. */ - if (! write_zeros (dest_fd, ext_hole_size)) - { - error (0, errno, _("%s: write failed"), - quotef (dst_name)); - return false; - } - } - } - - off_t ext_len = ext_end - ext_start; - last_ext_start = ext_start; - last_ext_len = ext_len; - - /* Copy this extent, looking for further opportunities to not - bother to write zeros if --sparse=always, since SEEK_HOLE - is conservative and may miss some holes. */ - off_t n_read; - if ( ! sparse_copy (src_fd, dest_fd, abuf, buf_size, - allow_reflink, src_name, dst_name, - ext_len, - sparse_mode == SPARSE_ALWAYS ? hole_size : nullptr, - &n_read, debug)) - return false; - - dest_pos = ext_start + n_read; - if (n_read < ext_len) - { - /* The input file shrank. */ - src_total_size = dest_pos; - break; - } - - ext_start = lseek (src_fd, dest_pos, SEEK_DATA); - if (ext_start < 0 && errno != ENXIO) - goto cannot_lseek; - } - - *hole_size += src_total_size - (last_ext_start + last_ext_len); - *total_n_read = src_total_size; - return true; - - cannot_lseek: - error (0, errno, _("cannot lseek %s"), quoteaf (src_name)); - return false; -} -#endif - /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -1084,71 +658,6 @@ set_file_security_ctx (char const *dst_name, return true; } -#ifndef HAVE_STRUCT_STAT_ST_BLOCKS -# define HAVE_STRUCT_STAT_ST_BLOCKS 0 -#endif - -/* Type of scan being done on the input when looking for sparseness. */ -enum scantype - { - /* An error was found when determining scantype. */ - ERROR_SCANTYPE, - - /* No fancy scanning; just read and write. */ - PLAIN_SCANTYPE, - - /* Read and examine data looking for zero blocks; useful when - attempting to create sparse output. */ - ZERO_SCANTYPE, - - /* lseek information is available. */ - LSEEK_SCANTYPE, - }; - -/* Return how to scan a file with descriptor FD and stat buffer SB. - The scan starts at POS. - Set *SCAN_INFERENCE if returning LSEEK_SCANTYPE. */ -static enum scantype -infer_scantype (int fd, struct stat const *sb, off_t pos, - struct scan_inference *scan_inference) -{ - /* Try SEEK_HOLE only if this heuristic - suggests the file is sparse. */ - if (! (HAVE_STRUCT_STAT_ST_BLOCKS - && S_ISREG (sb->st_mode) - && STP_NBLOCKS (sb) < sb->st_size / ST_NBLOCKSIZE)) - return PLAIN_SCANTYPE; - -#ifdef SEEK_HOLE - scan_inference->ext_start = lseek (fd, pos, SEEK_DATA); - if (scan_inference->ext_start == pos) - { - scan_inference->hole_start = lseek (fd, pos, SEEK_HOLE); - if (0 <= scan_inference->hole_start) - { - if (scan_inference->hole_start < sb->st_size) - return LSEEK_SCANTYPE; - - /* Though the file likely has holes, SEEK_DATA and SEEK_HOLE - didn't find any. This can happen with file systems like - circa-2025 squashfs that support SEEK_HOLE only trivially. - Fall back on ZERO_SCANTYPE. */ - if (lseek (fd, pos, SEEK_SET) < 0) - return ERROR_SCANTYPE; - } - } - else if (pos < scan_inference->ext_start || errno == ENXIO) - { - scan_inference->hole_start = 0; /* Pacify -Wmaybe-uninitialized. */ - return LSEEK_SCANTYPE; - } - else if (errno != EINVAL && !is_ENOTSUP (errno)) - return ERROR_SCANTYPE; -#endif - - return ZERO_SCANTYPE; -} - #if HAVE_FCLONEFILEAT && !USE_XATTR # include /* Return true if FD has a nontrivial ACL. */ @@ -1205,110 +714,6 @@ handle_clone_fail (int dst_dirfd, char const *dst_relname, return true; } - -/* Copy data from input file (descriptor IFD, status IST, initial file - offset IPOS, and name INAME) to output file (OFD, OST, OPOS, ONAME). - Copy until IBYTES have been copied or until end of file; - if IBYTES is OFF_T_MAX that suffices to copy to end of file. - Respect copy options X's sparse_mode and reflink_mode settings. - Read and update *DEBUG as needed. */ - -static bool -copy_file_data (int ifd, struct stat const *ist, off_t ipos, char const *iname, - int ofd, struct stat const *ost, off_t opos, char const *oname, - off_t ibytes, struct cp_options const *x, struct copy_debug *debug) -{ - /* Choose a suitable buffer size; it may be adjusted later. */ - size_t buf_size = io_blksize (ost); - - /* Deal with sparse files. */ - struct scan_inference scan_inference; - enum scantype scantype = infer_scantype (ifd, ist, ipos, &scan_inference); - if (scantype == ERROR_SCANTYPE) - { - error (0, errno, _("cannot lseek %s"), quoteaf (iname)); - return false; - } - bool make_holes - = (S_ISREG (ost->st_mode) - && (x->sparse_mode == SPARSE_ALWAYS - || (x->sparse_mode == SPARSE_AUTO - && scantype != PLAIN_SCANTYPE))); - - fdadvise (ifd, ipos, ibytes, FADVISE_SEQUENTIAL); - - /* If not making a sparse file, try to use a more-efficient - buffer size. */ - if (! make_holes) - { - /* Compute the least common multiple of the input and output - buffer sizes, adjusting for outlandish values. - Note we read in multiples of the reported block size - to support (unusual) devices that have this constraint. */ - size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX); - size_t blcm = buffer_lcm (io_blksize (ist), buf_size, - blcm_max); - - /* Do not bother with a buffer larger than the input file, plus one - byte to make sure the file has not grown while reading it. */ - if (S_ISREG (ist->st_mode) && ist->st_size < buf_size) - buf_size = ist->st_size + 1; - - /* However, stick with a block size that is a positive multiple of - blcm, overriding the above adjustments. Watch out for - overflow. */ - buf_size += blcm - 1; - buf_size -= buf_size % blcm; - if (buf_size == 0 || blcm_max < buf_size) - buf_size = blcm; - } - - char *buf = nullptr; - bool copy_ok; - off_t hole_size = 0, n_read; - -#ifdef SEEK_HOLE - if (scantype == LSEEK_SCANTYPE) - copy_ok = lseek_copy (ifd, ofd, &buf, buf_size, - ipos, &scan_inference, ist->st_size, - make_holes ? x->sparse_mode : SPARSE_NEVER, - x->reflink_mode != REFLINK_NEVER, - iname, oname, &hole_size, &n_read, debug); -#else - assume (scantype != LSEEK_SCANTYPE); -#endif - - if (scantype != LSEEK_SCANTYPE) - copy_ok = sparse_copy (ifd, ofd, &buf, buf_size, - x->reflink_mode != REFLINK_NEVER, - iname, oname, UINTMAX_MAX, - make_holes ? &hole_size : nullptr, - &n_read, debug); - - if (copy_ok && 0 < hole_size) - { - off_t oend; - if (ckd_add (&oend, opos, n_read) - ? (errno = EOVERFLOW, true) - : make_holes - ? ftruncate (ofd, oend) < 0 - : !write_zeros (ofd, hole_size)) - { - error (0, errno, _("failed to extend %s"), quoteaf (oname)); - copy_ok = false; - } - else if (make_holes - && punch_hole (ofd, oend - hole_size, hole_size) < 0) - { - error (0, errno, _("error deallocating %s"), quoteaf (oname)); - copy_ok = false; - } - } - - alignfree (buf); - return copy_ok; -} - /* Copy a regular file from SRC_NAME to DST_NAME aka DST_DIRFD+DST_RELNAME. If the source file contains holes, copies holes and blocks of zeros in the source file as holes in the destination file. diff --git a/src/copy.h b/src/copy.h index 35068ea5da..ccb00d8073 100644 --- a/src/copy.h +++ b/src/copy.h @@ -302,12 +302,39 @@ struct cp_options Hash_table *src_info; }; +enum copy_debug_val + { + COPY_DEBUG_UNKNOWN, + COPY_DEBUG_NO, + COPY_DEBUG_YES, + COPY_DEBUG_EXTERNAL, + COPY_DEBUG_EXTERNAL_INTERNAL, + COPY_DEBUG_AVOIDED, + COPY_DEBUG_UNSUPPORTED, + }; + +/* debug info about the last file copy. */ +struct copy_debug +{ + enum copy_debug_val offload; + enum copy_debug_val reflink; + enum copy_debug_val sparse_detection; +}; + bool copy (char const *src_name, char const *dst_name, int dst_dirfd, char const *dst_relname, int nonexistent_dst, const struct cp_options *options, bool *copy_into_self, bool *rename_succeeded) _GL_ATTRIBUTE_NONNULL ((1, 2, 4, 6, 7)); +bool copy_file_data (int ifd, struct stat const *ist, off_t ipos, + char const *iname, + int ofd, struct stat const *ost, off_t opos, + char const *oname, + off_t ibytes, struct cp_options const *x, + struct copy_debug *copy_debug) + _GL_ATTRIBUTE_NONNULL ((2, 4, 6, 8, 10, 11)); + extern bool set_process_security_ctx (char const *src_name, char const *dst_name, mode_t mode, bool new_dst, diff --git a/src/local.mk b/src/local.mk index c7c77a7c95..a2985140f8 100644 --- a/src/local.mk +++ b/src/local.mk @@ -347,6 +347,7 @@ selinux_sources = \ copy_sources = \ src/copy.c \ + src/copy-file-data.c \ src/cp-hash.c \ src/force-link.c \ src/force-link.h -- 2.47.3