cp and install now default to copy-on-write (COW) if available.
cp, install and mv now use the copy_file_range syscall if available.
- Also, they prefer lseek+SEEK_HOLE to ioctl+FS_IOC_FIEMAP on sparse
+ Also, they use lseek+SEEK_HOLE rather than ioctl+FS_IOC_FIEMAP on sparse
files, as lseek is simpler and more portable.
On GNU/Linux systems, ls no longer issues an error message on a
#include "canonicalize.h"
#include "copy.h"
#include "cp-hash.h"
-#include "extent-scan.h"
#include "die.h"
#include "error.h"
#include "fadvise.h"
#include "fcntl--.h"
-#include "fiemap.h"
#include "file-set.h"
#include "filemode.h"
#include "filenamecat.h"
return true;
}
-/* Perform an efficient extent copy, if possible. This avoids
- the overhead of detecting holes in hole-introducing/preserving
- copy, and thus makes copying sparse files much more efficient.
- Upon a successful copy, return true. If the initial extent scan
- fails, set *NORMAL_COPY_REQUIRED to true and return false.
- Upon any other failure, set *NORMAL_COPY_REQUIRED to false and
- return false.
-
- FIXME: Once we no longer need to support Linux kernel versions
- before 3.1 (2011), this function can be retired as it is superseded
- by lseek_copy. That is, we no longer need extent-scan.h and can
- remove any of the code that uses it. */
-static bool
-extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
- size_t hole_size, off_t src_total_size,
- enum Sparse_type sparse_mode,
- bool allow_reflink,
- char const *src_name, char const *dst_name,
- struct extent_scan *scan)
-{
- off_t last_ext_start = 0;
- off_t last_ext_len = 0;
-
- /* Keep track of the output position.
- We may need this at the end, for a final ftruncate. */
- off_t dest_pos = 0;
-
- bool wrote_hole_at_eof = true;
- while (true)
- {
- bool empty_extent = false;
- for (unsigned int i = 0; i < scan->ei_count || empty_extent; i++)
- {
- off_t ext_start;
- off_t ext_len;
- off_t ext_hole_size;
-
- if (i < scan->ei_count)
- {
- ext_start = scan->ext_info[i].ext_logical;
- ext_len = scan->ext_info[i].ext_length;
- }
- else /* empty extent at EOF. */
- {
- i--;
- ext_start = last_ext_start + scan->ext_info[i].ext_length;
- ext_len = 0;
- }
-
- /* Truncate extent to EOF. Extents starting after EOF are
- treated as zero length extents starting right after EOF.
- Generally this will trigger with an extent starting after
- src_total_size, and result in creating a hole or zeros until EOF.
- Though in a file in which extents have changed since src_total_size
- was determined, we might have an extent spanning that size,
- in which case we'll only copy data up to that size. */
- if (src_total_size < ext_start + ext_len)
- {
- if (src_total_size < ext_start)
- ext_start = src_total_size;
- ext_len = src_total_size - ext_start;
- }
-
- ext_hole_size = ext_start - last_ext_start - last_ext_len;
-
- wrote_hole_at_eof = false;
-
- if (ext_hole_size)
- {
- if (lseek (src_fd, ext_start, SEEK_SET) < 0)
- {
- error (0, errno, _("cannot lseek %s"), quoteaf (src_name));
- fail:
- extent_scan_free (scan);
- return false;
- }
-
- if ((empty_extent && sparse_mode == SPARSE_ALWAYS)
- || (!empty_extent && sparse_mode != SPARSE_NEVER))
- {
- if (! create_hole (dest_fd, dst_name,
- sparse_mode == SPARSE_ALWAYS,
- ext_hole_size))
- goto fail;
- wrote_hole_at_eof = true;
- }
- else
- {
- /* When not inducing holes and when there is a hole between
- the end of the previous extent and the beginning of the
- current one, write zeros to the destination file. */
- off_t nzeros = ext_hole_size;
- if (empty_extent)
- nzeros = MIN (src_total_size - dest_pos, ext_hole_size);
-
- if (! write_zeros (dest_fd, nzeros))
- {
- error (0, errno, _("%s: write failed"),
- quotef (dst_name));
- goto fail;
- }
-
- dest_pos = MIN (src_total_size, ext_start);
- }
- }
-
- last_ext_start = ext_start;
-
- /* Treat an unwritten but allocated extent much like a hole.
- I.e., don't read, but don't convert to a hole in the destination,
- unless SPARSE_ALWAYS. */
- /* For now, do not treat FIEMAP_EXTENT_UNWRITTEN specially,
- because that (in combination with no sync) would lead to data
- loss at least on XFS and ext4 when using 2.6.39-rc3 kernels. */
- if (0 && (scan->ext_info[i].ext_flags & FIEMAP_EXTENT_UNWRITTEN))
- {
- empty_extent = true;
- last_ext_len = 0;
- if (ext_len == 0) /* The last extent is empty and processed. */
- empty_extent = false;
- }
- else
- {
- off_t n_read;
- empty_extent = false;
- last_ext_len = ext_len;
- bool read_hole;
-
- if ( ! sparse_copy (src_fd, dest_fd, buf, buf_size,
- sparse_mode == SPARSE_ALWAYS ? hole_size: 0,
- true, allow_reflink, src_name, dst_name,
- ext_len, &n_read, &read_hole))
- goto fail;
-
- dest_pos = ext_start + n_read;
- if (n_read)
- wrote_hole_at_eof = read_hole;
- }
-
- /* If the file ends with unwritten extents not accounted for in the
- size, then skip processing them, and the associated redundant
- read() calls which will always return 0. We will need to
- remove this when we add fallocate() so that we can maintain
- extents beyond the apparent size. */
- if (dest_pos == src_total_size)
- {
- scan->hit_final_extent = true;
- break;
- }
- }
-
- /* Release the space allocated to scan->ext_info. */
- extent_scan_free (scan);
-
- if (scan->hit_final_extent)
- break;
- if (! extent_scan_read (scan) && ! scan->hit_final_extent)
- {
- error (0, errno, _("%s: failed to get extents info"),
- quotef (src_name));
- return false;
- }
- }
-
- /* When the source file ends with a hole, we have to do a little more work,
- since the above copied only up to and including the final extent.
- In order to complete the copy, we may have to insert a hole or write
- zeros in the destination corresponding to the source file's hole-at-EOF.
-
- In addition, if the final extent was a block of zeros at EOF and we've
- just converted them to a hole in the destination, we must call ftruncate
- here in order to record the proper length in the destination. */
- if ((dest_pos < src_total_size || wrote_hole_at_eof)
- && (sparse_mode != SPARSE_NEVER
- ? ftruncate (dest_fd, src_total_size)
- : ! write_zeros (dest_fd, src_total_size - dest_pos)))
- {
- error (0, errno, _("failed to extend %s"), quoteaf (dst_name));
- return false;
- }
-
- if (sparse_mode == SPARSE_ALWAYS && dest_pos < src_total_size
- && punch_hole (dest_fd, dest_pos, src_total_size - dest_pos) < 0)
- {
- error (0, errno, _("error deallocating %s"), quoteaf (dst_name));
- return false;
- }
-
- return true;
-}
-
#ifdef SEEK_HOLE
/* Perform an efficient extent copy, if possible. This avoids
the overhead of detecting holes in hole-introducing/preserving
/* lseek information is available. */
LSEEK_SCANTYPE,
-
- /* Extent information is available. */
- EXTENT_SCANTYPE
};
/* Result of infer_scantype. */
/* Used if infer_scantype returns LSEEK_SCANTYPE. This is the
offset of the first data block, or -1 if the file has no data. */
off_t ext_start;
-
- /* Used if infer_scantype returns EXTENT_SCANTYPE. */
- struct extent_scan extent_scan;
};
/* Return how to scan a file with descriptor FD and stat buffer SB.
return errno == ENXIO ? LSEEK_SCANTYPE : ERROR_SCANTYPE;
#endif
- struct extent_scan *scan = &scan_inference->extent_scan;
- extent_scan_init (fd, scan);
- extent_scan_read (scan);
- return scan->initial_scan_failed ? ZERO_SCANTYPE : EXTENT_SCANTYPE;
+ return ZERO_SCANTYPE;
}
off_t n_read;
bool wrote_hole_at_eof = false;
- if (! (scantype == EXTENT_SCANTYPE
- ? extent_copy (source_desc, dest_desc, buf, buf_size, hole_size,
- src_open_sb.st_size,
- make_holes ? x->sparse_mode : SPARSE_NEVER,
- x->reflink_mode != REFLINK_NEVER,
- src_name, dst_name, &scan_inference.extent_scan)
+ if (! (
#ifdef SEEK_HOLE
- : scantype == LSEEK_SCANTYPE
+ scantype == LSEEK_SCANTYPE
? lseek_copy (source_desc, dest_desc, buf, buf_size, hole_size,
scan_inference.ext_start, src_open_sb.st_size,
make_holes ? x->sparse_mode : SPARSE_NEVER,
x->reflink_mode != REFLINK_NEVER,
src_name, dst_name)
+ :
#endif
- : sparse_copy (source_desc, dest_desc, buf, buf_size,
+ sparse_copy (source_desc, dest_desc, buf, buf_size,
make_holes ? hole_size : 0,
x->sparse_mode == SPARSE_ALWAYS,
x->reflink_mode != REFLINK_NEVER,
+++ /dev/null
-/* extent-scan.c -- core functions for scanning extents
- Copyright (C) 2010-2021 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
-
- Written by Jie Liu (jeff.liu@oracle.com). */
-
-#include <config.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/utsname.h>
-#include <assert.h>
-
-#include "system.h"
-#include "extent-scan.h"
-#include "fiemap.h"
-#include "xstrtol.h"
-
-
-/* Work around Linux kernel issues on BTRFS and EXT4. */
-static bool
-extent_need_sync (void)
-{
- /* For now always return true, to be on the safe side.
- If/when FIEMAP semantics are well defined (before SEEK_HOLE support
- is usable) and kernels implementing them are in use, we may relax
- this once again. */
- return true;
-
-#if FIEMAP_BEHAVIOR_IS_DEFINED_AND_USABLE
- static int need_sync = -1;
-
- if (need_sync == -1)
- {
- struct utsname name;
- need_sync = 0; /* No workaround by default. */
-
-# ifdef __linux__
- if (uname (&name) != -1 && STRNCMP_LIT (name.release, "2.6.") == 0)
- {
- unsigned long val;
- if (xstrtoul (name.release + 4, NULL, 10, &val, NULL) == LONGINT_OK)
- {
- if (val < 39)
- need_sync = 1;
- }
- }
-# endif
- }
-
- return need_sync;
-#endif
-}
-
-/* Allocate space for struct extent_scan, initialize the entries if
- necessary and return it as the input argument of extent_scan_read(). */
-extern void
-extent_scan_init (int src_fd, struct extent_scan *scan)
-{
- scan->fd = src_fd;
- scan->ei_count = 0;
- scan->ext_info = NULL;
- scan->scan_start = 0;
- scan->initial_scan_failed = false;
- scan->hit_final_extent = false;
- scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
-}
-
-#ifdef __linux__
-# ifndef FS_IOC_FIEMAP
-# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap)
-# endif
-/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to
- obtain a map of file extents excluding holes. */
-extern bool
-extent_scan_read (struct extent_scan *scan)
-{
- unsigned int si = 0;
- struct extent_info *last_ei = scan->ext_info;
-
- while (true)
- {
- union { struct fiemap f; char c[4096]; } fiemap_buf;
- struct fiemap *fiemap = &fiemap_buf.f;
- struct fiemap_extent *fm_extents = &fiemap->fm_extents[0];
- enum { headersize = offsetof (struct fiemap, fm_extents) };
- enum { count = (sizeof fiemap_buf - headersize) / sizeof *fm_extents };
- verify (count > 1);
-
- /* This is required at least to initialize fiemap->fm_start,
- but also serves (in mid 2010) to appease valgrind, which
- appears not to know the semantics of the FIEMAP ioctl. */
- memset (&fiemap_buf, 0, sizeof fiemap_buf);
-
- fiemap->fm_start = scan->scan_start;
- fiemap->fm_flags = scan->fm_flags;
- fiemap->fm_extent_count = count;
- fiemap->fm_length = FIEMAP_MAX_OFFSET - scan->scan_start;
-
- /* Fall back to the standard copy if call ioctl(2) failed for
- the first time. */
- if (ioctl (scan->fd, FS_IOC_FIEMAP, fiemap) < 0)
- {
- if (scan->scan_start == 0)
- scan->initial_scan_failed = true;
- return false;
- }
-
- /* If 0 extents are returned, then no more scans are needed. */
- if (fiemap->fm_mapped_extents == 0)
- {
- scan->hit_final_extent = true;
- return scan->scan_start != 0;
- }
-
- assert (scan->ei_count <= SIZE_MAX - fiemap->fm_mapped_extents);
- scan->ei_count += fiemap->fm_mapped_extents;
- {
- /* last_ei points into a buffer that may be freed via xnrealloc.
- Record its offset and adjust after allocation. */
- size_t prev_idx = last_ei - scan->ext_info;
- scan->ext_info = xnrealloc (scan->ext_info, scan->ei_count,
- sizeof (struct extent_info));
- last_ei = scan->ext_info + prev_idx;
- }
-
- unsigned int i = 0;
- for (i = 0; i < fiemap->fm_mapped_extents; i++)
- {
- assert (fm_extents[i].fe_logical
- <= OFF_T_MAX - fm_extents[i].fe_length);
-
- verify (sizeof last_ei->ext_flags >= sizeof fm_extents->fe_flags);
-
- if (si && last_ei->ext_flags
- == (fm_extents[i].fe_flags & ~FIEMAP_EXTENT_LAST)
- && (last_ei->ext_logical + last_ei->ext_length
- == fm_extents[i].fe_logical))
- {
- /* Merge previous with last. */
- last_ei->ext_length += fm_extents[i].fe_length;
- /* Copy flags in case different. */
- last_ei->ext_flags = fm_extents[i].fe_flags;
- }
- else if ((si == 0 && scan->scan_start > fm_extents[i].fe_logical)
- || (si && (last_ei->ext_logical + last_ei->ext_length
- > fm_extents[i].fe_logical)))
- {
- /* BTRFS before 2.6.38 could return overlapping extents
- for sparse files. We adjust the returned extents
- rather than failing, as otherwise it would be inefficient
- to detect this on the initial scan. */
- uint64_t new_logical;
- uint64_t length_adjust;
- if (si == 0)
- new_logical = scan->scan_start;
- else
- {
- /* We could return here if scan->scan_start == 0
- but don't so as to minimize special cases. */
- new_logical = last_ei->ext_logical + last_ei->ext_length;
- }
- length_adjust = new_logical - fm_extents[i].fe_logical;
- /* If an extent is contained within the previous one, fail. */
- if (length_adjust < fm_extents[i].fe_length)
- {
- if (scan->scan_start == 0)
- scan->initial_scan_failed = true;
- return false;
- }
- fm_extents[i].fe_logical = new_logical;
- fm_extents[i].fe_length -= length_adjust;
- /* Process the adjusted extent again. */
- i--;
- continue;
- }
- else
- {
- last_ei = scan->ext_info + si;
- last_ei->ext_logical = fm_extents[i].fe_logical;
- last_ei->ext_length = fm_extents[i].fe_length;
- last_ei->ext_flags = fm_extents[i].fe_flags;
- si++;
- }
- }
-
- if (last_ei->ext_flags & FIEMAP_EXTENT_LAST)
- scan->hit_final_extent = true;
-
- /* If we have enough extents, discard the last as it might
- be merged with one from the next scan. */
- if (si > count && !scan->hit_final_extent)
- last_ei = scan->ext_info + --si - 1;
-
- /* We don't bother reallocating any trailing slots. */
- scan->ei_count = si;
-
- if (scan->hit_final_extent)
- break;
- else
- scan->scan_start = last_ei->ext_logical + last_ei->ext_length;
-
- if (si >= count)
- break;
- }
-
- return true;
-}
-#else
-extern bool
-extent_scan_read (struct extent_scan *scan _GL_UNUSED)
-{
- scan->initial_scan_failed = true;
- errno = ENOTSUP;
- return false;
-}
-#endif
+++ /dev/null
-/* core functions for efficient reading sparse files
- Copyright (C) 2010-2021 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
-
- Written by Jie Liu (jeff.liu@oracle.com). */
-
-#ifndef EXTENT_SCAN_H
-# define EXTENT_SCAN_H
-
-/* Structure used to store information of each extent. */
-struct extent_info
-{
- /* Logical offset of an extent. */
- off_t ext_logical;
-
- /* Extent length. */
- off_t ext_length;
-
- /* Extent flags, use it for FIEMAP only, or set it to zero. */
- unsigned int ext_flags;
-};
-
-/* Structure used to reserve extent scan information per file. */
-struct extent_scan
-{
- /* File descriptor of extent scan run against. */
- int fd;
-
- /* Next scan start offset. */
- off_t scan_start;
-
- /* Flags to use for scan. */
- unsigned int fm_flags;
-
- /* How many extent info returned for a scan. */
- size_t ei_count;
-
- /* If true, fall back to a normal copy, either set by the
- failure of ioctl(2) for FIEMAP or lseek(2) with SEEK_DATA. */
- bool initial_scan_failed;
-
- /* If true, the total extent scan per file has been finished. */
- bool hit_final_extent;
-
- /* Extent information: a malloc'd array of ei_count structs. */
- struct extent_info *ext_info;
-};
-
-void extent_scan_init (int src_fd, struct extent_scan *scan);
-
-bool extent_scan_read (struct extent_scan *scan);
-
-static inline void
-extent_scan_free (struct extent_scan *scan)
-{
- free (scan->ext_info);
- scan->ext_info = NULL;
- scan->ei_count = 0;
-}
-
-#endif /* EXTENT_SCAN_H */
+++ /dev/null
-/* FS_IOC_FIEMAP ioctl infrastructure.
- Some portions copyright (C) 2007 Cluster File Systems, Inc
- Authors: Mark Fasheh <mfasheh@suse.com>
- Kalpak Shah <kalpak.shah@sun.com>
- Andreas Dilger <adilger@sun.com>. */
-
-/* Copy from kernel, modified to respect GNU code style by Jie Liu. */
-
-#ifndef _LINUX_FIEMAP_H
-# define _LINUX_FIEMAP_H
-
-# include <stdint.h>
-
-struct fiemap_extent
-{
- /* Logical offset in bytes for the start of the extent
- from the beginning of the file. */
- uint64_t fe_logical;
-
- /* Physical offset in bytes for the start of the extent
- from the beginning of the disk. */
- uint64_t fe_physical;
-
- /* Length in bytes for this extent. */
- uint64_t fe_length;
-
- uint64_t fe_reserved64[2];
-
- /* FIEMAP_EXTENT_* flags for this extent. */
- uint32_t fe_flags;
-
- uint32_t fe_reserved[3];
-};
-
-struct fiemap
-{
- /* Logical offset(inclusive) at which to start mapping(in). */
- uint64_t fm_start;
-
- /* Logical length of mapping which userspace wants(in). */
- uint64_t fm_length;
-
- /* FIEMAP_FLAG_* flags for request(in/out). */
- uint32_t fm_flags;
-
- /* Number of extents that were mapped(out). */
- uint32_t fm_mapped_extents;
-
- /* Size of fm_extents array(in). */
- uint32_t fm_extent_count;
-
- uint32_t fm_reserved;
-
- /* Array of mapped extents(out). */
- struct fiemap_extent fm_extents[FLEXIBLE_ARRAY_MEMBER];
-};
-
-/* The maximum offset can be mapped for a file. */
-# define FIEMAP_MAX_OFFSET (~0ULL)
-
-/* Sync file data before map. */
-# define FIEMAP_FLAG_SYNC 0x00000001
-
-/* Map extented attribute tree. */
-# define FIEMAP_FLAG_XATTR 0x00000002
-
-# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
-
-/* Last extent in file. */
-# define FIEMAP_EXTENT_LAST 0x00000001
-
-/* Data location unknown. */
-# define FIEMAP_EXTENT_UNKNOWN 0x00000002
-
-/* Location still pending, Sets EXTENT_UNKNOWN. */
-# define FIEMAP_EXTENT_DELALLOC 0x00000004
-
-/* Data cannot be read while fs is unmounted. */
-# define FIEMAP_EXTENT_ENCODED 0x00000008
-
-/* Data is encrypted by fs. Sets EXTENT_NO_BYPASS. */
-# define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080
-
-/* Extent offsets may not be block aligned. */
-# define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100
-
-/* Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. */
-# define FIEMAP_EXTENT_DATA_INLINE 0x00000200
-
-/* Multiple files in block. Set EXTENT_NOT_ALIGNED. */
-# define FIEMAP_EXTENT_DATA_TAIL 0x00000400
-
-/* Space allocated, but not data (i.e., zero). */
-# define FIEMAP_EXTENT_UNWRITTEN 0x00000800
-
-/* File does not natively support extents. Result merged for efficiency. */
-# define FIEMAP_EXTENT_MERGED 0x00001000
-
-/* Space shared with other files. */
-# define FIEMAP_EXTENT_SHARED 0x00002000
-
-#endif
src/die.h \
src/dircolors.h \
src/expand-common.h \
- src/fiemap.h \
src/find-mount-point.h \
src/fs.h \
src/fs-is-local.h \
copy_sources = \
src/copy.c \
src/cp-hash.c \
- src/extent-scan.c \
- src/extent-scan.h \
src/force-link.c \
src/force-link.h