]> git.ipfire.org Git - thirdparty/util-linux.git/commitdiff
hexdump: add FIEMAP-based sparse file optimization
authorWanBingjiang <wanbingjiang@webray.com.cn>
Fri, 30 Jan 2026 05:48:24 +0000 (13:48 +0800)
committerWanBingjiang <wanbingjiang@webray.com.cn>
Fri, 30 Jan 2026 07:16:38 +0000 (15:16 +0800)
Use Linux FIEMAP ioctl to detect sparse file holes and skip reading
them. This significantly improves performance when dumping large
sparse files by avoiding unnecessary disk I/O for hole regions.

Key changes:
- Add fiemap, in_sparse_hole, and region_end fields to struct hexdump
- Use check_hole() with cached region boundaries to minimize syscalls
- Only skip holes when vflag==DUP to preserve correct "*" output
- Distinguish real sparse holes from duplicate data in regular files

text-utils/hexdump-display.c
text-utils/hexdump.h

index b605cad44296943d10e7f08d176a17d236c3afc4..13235cd8ec74fe549bb2df2a058b4d81cf13da97 100644 (file)
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/ioctl.h>
 #include <unistd.h>
+#include <fcntl.h>
 #include <errno.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#ifdef __linux__
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#endif
 #include "hexdump.h"
 #include "xalloc.h"
 #include "c.h"
 static void doskip(const char *, int, struct hexdump *);
 static u_char *get(struct hexdump *);
 
+#ifdef __linux__
+#define FIEMAP_EXTENTS_BATCH 256
+
+static void free_fiemap(struct hexdump *hex)
+{
+       if (!hex->fiemap)
+               return;
+       free(hex->fiemap);
+       hex->fiemap = NULL;
+}
+
+/*
+ * Use FIEMAP ioctl to get file extent map for sparse file optimization.
+ * This allows us to skip holes without reading them.
+ */
+static void init_fiemap(struct hexdump *hex, int fd)
+{
+       struct stat st;
+       struct fiemap *fm;
+       size_t fm_size;
+
+       /* Free previous fiemap (when processing multiple files) */
+       free_fiemap(hex);
+       hex->current_extent = 0;
+       hex->file_size = 0;
+       hex->in_sparse_hole = 0;
+       hex->region_end = 0;
+
+       if (fstat(fd, &st) < 0 || !S_ISREG(st.st_mode))
+               return;
+
+       hex->file_size = st.st_size;
+
+       fm_size = sizeof(struct fiemap) + sizeof(struct fiemap_extent) * FIEMAP_EXTENTS_BATCH;
+       fm = xcalloc(1, fm_size);
+
+       fm->fm_start = 0;
+       fm->fm_length = st.st_size;
+       fm->fm_flags = 0;
+       fm->fm_extent_count = FIEMAP_EXTENTS_BATCH;
+
+       if (ioctl(fd, FS_IOC_FIEMAP, fm) < 0) {
+               free(fm);
+               return;
+       }
+
+       /* If no extents, the entire file is a hole - keep fiemap to indicate this */
+       if (fm->fm_mapped_extents == 0) {
+               hex->fiemap = fm;
+               return;
+       }
+
+       /* Check if we got all extents or need more */
+       if (fm->fm_mapped_extents == FIEMAP_EXTENTS_BATCH &&
+           !(fm->fm_extents[fm->fm_mapped_extents - 1].fe_flags & FIEMAP_EXTENT_LAST)) {
+               unsigned int count = FIEMAP_EXTENTS_BATCH * 16;
+               free(fm);
+               fm_size = sizeof(struct fiemap) + sizeof(struct fiemap_extent) * count;
+               fm = xcalloc(1, fm_size);
+               fm->fm_start = 0;
+               fm->fm_length = st.st_size;
+               fm->fm_flags = 0;
+               fm->fm_extent_count = count;
+
+               if (ioctl(fd, FS_IOC_FIEMAP, fm) < 0) {
+                       free(fm);
+                       return;
+               }
+       }
+
+       hex->fiemap = fm;
+}
+
+/*
+ * Check if position is in a hole.
+ *
+ * Returns: 1 if in hole, 0 if in data.
+ */
+static int check_hole(struct hexdump *hex, off_t pos)
+{
+       struct fiemap *fm = hex->fiemap;
+       unsigned int i;
+       struct fiemap_extent *last_ext;
+
+       if (!fm) {
+               hex->in_sparse_hole = 0;
+               hex->region_end = 0;
+               return 0;
+       }
+
+       /* If no extents, entire file is a hole */
+       if (fm->fm_mapped_extents == 0) {
+               hex->in_sparse_hole = 1;
+               hex->region_end = hex->file_size;
+               return 1;
+       }
+
+       /* Start search from current_extent for efficiency */
+       for (i = hex->current_extent; i < fm->fm_mapped_extents; i++) {
+               struct fiemap_extent *ext = &fm->fm_extents[i];
+               off_t ext_end = ext->fe_logical + ext->fe_length;
+
+               if (pos < (off_t)ext->fe_logical) {
+                       /* pos is before this extent - it's in a hole */
+                       hex->current_extent = i;
+                       hex->in_sparse_hole = 1;
+                       hex->region_end = ext->fe_logical;
+                       return 1;
+               }
+               if (pos < ext_end) {
+                       /* pos is within this extent - it's in data */
+                       hex->current_extent = i;
+                       hex->in_sparse_hole = 0;
+                       hex->region_end = ext_end;
+                       return 0;
+               }
+       }
+
+       last_ext = &fm->fm_extents[fm->fm_mapped_extents - 1];
+       if (last_ext->fe_flags & FIEMAP_EXTENT_LAST) {
+               hex->in_sparse_hole = 1;
+               hex->region_end = hex->file_size;
+               return 1;
+       }
+
+       /* Incomplete extent map - disable optimization for safety */
+       hex->in_sparse_hole = 0;
+       hex->region_end = hex->file_size;
+       return 0;
+}
+#endif /* __linux__ */
+
 enum _vflag vflag = FIRST;
 
 static off_t address;                  /* address/offset in stream */
@@ -374,6 +512,49 @@ get(struct hexdump *hex)
                        warnx(_("all input file arguments failed"));
                        goto retnul;
                }
+
+#ifdef __linux__
+               /*
+                * FIEMAP-based sparse file optimization:
+                */
+               if (hex->fiemap) {
+                       off_t curpos = address + nread;
+
+                       if (curpos >= hex->region_end)
+                               check_hole(hex, curpos);
+
+                       if (hex->in_sparse_hole && vflag == DUP) {
+                               int savp_is_zero = 1;
+                               ssize_t j;
+                               for (j = 0; j < hex->blocksize; j++) {
+                                       if (savp[j] != 0) {
+                                               savp_is_zero = 0;
+                                               break;
+                                       }
+                               }
+
+                               // Only if savp is all zeros, we can skip the hole.
+                               if (savp_is_zero) {
+                                       off_t next_data = hex->region_end;
+                                       off_t aligned_pos = (next_data / hex->blocksize) * hex->blocksize;
+                                       if (aligned_pos > curpos) {
+                                               off_t skip = aligned_pos - curpos;
+                                               if (fseeko(stdin, aligned_pos, SEEK_SET) == 0) {
+                                                       address = aligned_pos;
+                                                       if (hex->length != -1)
+                                                               hex->length -= skip;
+                                                       memset(curp, 0, hex->blocksize);
+                                                       need = hex->blocksize;
+                                                       nread = 0;
+                                                       hex->region_end = 0;
+                                                       continue;
+                                               }
+                                       }
+                               }
+                       }
+               }
+#endif
+
                n = fread((char *)curp + nread, sizeof(unsigned char),
                    hex->length == -1 ? need : min(hex->length, need), stdin);
                if (!n) {
@@ -405,6 +586,9 @@ get(struct hexdump *hex)
 retnul:
        free (curp);
        free (savp);
+#ifdef __linux__
+       free_fiemap(hex);
+#endif
        return NULL;
 }
 
@@ -431,6 +615,9 @@ int next(char **argv, struct hexdump *hex)
                                return(0);
                        statok = 0;
                }
+#ifdef __linux__
+               init_fiemap(hex, fileno(stdin));
+#endif
                if (hex->skip)
                        doskip(statok ? *_argv : "stdin", statok, hex);
                if (*_argv)
index 4b17e269830b653ce48d01561e2318c8bd0f4203..6fbf3cb0b9ff8a641f95f322f3a004410a214251 100644 (file)
@@ -92,6 +92,12 @@ struct hexdump {
   int exitval;                         /* final exit value */
   ssize_t length;                      /* max bytes to read */
   off_t skip;                          /* bytes to skip */
+  /* Sparse file optimization using FIEMAP (Linux only) */
+  void *fiemap;                                /* struct fiemap pointer */
+  unsigned int current_extent;         /* current extent index for search */
+  off_t file_size;                     /* total file size */
+  int in_sparse_hole;                  /* currently in a sparse file hole */
+  off_t region_end;                    /* end of current hole/data region */
 };
 
 extern struct hexdump_fu *endfu;