From: WanBingjiang Date: Fri, 30 Jan 2026 05:48:24 +0000 (+0800) Subject: hexdump: add FIEMAP-based sparse file optimization X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=03d14f5bc33d82f54578b384437101846622b958;p=thirdparty%2Futil-linux.git hexdump: add FIEMAP-based sparse file optimization Use Linux FIEMAP ioctl to detect sparse file holes and skip reading them. This significantly improves performance when dumping large sparse files by avoiding unnecessary disk I/O for hole regions. Key changes: - Add fiemap, in_sparse_hole, and region_end fields to struct hexdump - Use check_hole() with cached region boundaries to minimize syscalls - Only skip holes when vflag==DUP to preserve correct "*" output - Distinguish real sparse holes from duplicate data in regular files --- diff --git a/text-utils/hexdump-display.c b/text-utils/hexdump-display.c index b605cad44..13235cd8e 100644 --- a/text-utils/hexdump-display.c +++ b/text-utils/hexdump-display.c @@ -34,12 +34,18 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#ifdef __linux__ +#include +#include +#endif #include "hexdump.h" #include "xalloc.h" #include "c.h" @@ -49,6 +55,138 @@ static void doskip(const char *, int, struct hexdump *); static u_char *get(struct hexdump *); +#ifdef __linux__ +#define FIEMAP_EXTENTS_BATCH 256 + +static void free_fiemap(struct hexdump *hex) +{ + if (!hex->fiemap) + return; + free(hex->fiemap); + hex->fiemap = NULL; +} + +/* + * Use FIEMAP ioctl to get file extent map for sparse file optimization. + * This allows us to skip holes without reading them. + */ +static void init_fiemap(struct hexdump *hex, int fd) +{ + struct stat st; + struct fiemap *fm; + size_t fm_size; + + /* Free previous fiemap (when processing multiple files) */ + free_fiemap(hex); + hex->current_extent = 0; + hex->file_size = 0; + hex->in_sparse_hole = 0; + hex->region_end = 0; + + if (fstat(fd, &st) < 0 || !S_ISREG(st.st_mode)) + return; + + hex->file_size = st.st_size; + + fm_size = sizeof(struct fiemap) + sizeof(struct fiemap_extent) * FIEMAP_EXTENTS_BATCH; + fm = xcalloc(1, fm_size); + + fm->fm_start = 0; + fm->fm_length = st.st_size; + fm->fm_flags = 0; + fm->fm_extent_count = FIEMAP_EXTENTS_BATCH; + + if (ioctl(fd, FS_IOC_FIEMAP, fm) < 0) { + free(fm); + return; + } + + /* If no extents, the entire file is a hole - keep fiemap to indicate this */ + if (fm->fm_mapped_extents == 0) { + hex->fiemap = fm; + return; + } + + /* Check if we got all extents or need more */ + if (fm->fm_mapped_extents == FIEMAP_EXTENTS_BATCH && + !(fm->fm_extents[fm->fm_mapped_extents - 1].fe_flags & FIEMAP_EXTENT_LAST)) { + unsigned int count = FIEMAP_EXTENTS_BATCH * 16; + free(fm); + fm_size = sizeof(struct fiemap) + sizeof(struct fiemap_extent) * count; + fm = xcalloc(1, fm_size); + fm->fm_start = 0; + fm->fm_length = st.st_size; + fm->fm_flags = 0; + fm->fm_extent_count = count; + + if (ioctl(fd, FS_IOC_FIEMAP, fm) < 0) { + free(fm); + return; + } + } + + hex->fiemap = fm; +} + +/* + * Check if position is in a hole. + * + * Returns: 1 if in hole, 0 if in data. + */ +static int check_hole(struct hexdump *hex, off_t pos) +{ + struct fiemap *fm = hex->fiemap; + unsigned int i; + struct fiemap_extent *last_ext; + + if (!fm) { + hex->in_sparse_hole = 0; + hex->region_end = 0; + return 0; + } + + /* If no extents, entire file is a hole */ + if (fm->fm_mapped_extents == 0) { + hex->in_sparse_hole = 1; + hex->region_end = hex->file_size; + return 1; + } + + /* Start search from current_extent for efficiency */ + for (i = hex->current_extent; i < fm->fm_mapped_extents; i++) { + struct fiemap_extent *ext = &fm->fm_extents[i]; + off_t ext_end = ext->fe_logical + ext->fe_length; + + if (pos < (off_t)ext->fe_logical) { + /* pos is before this extent - it's in a hole */ + hex->current_extent = i; + hex->in_sparse_hole = 1; + hex->region_end = ext->fe_logical; + return 1; + } + if (pos < ext_end) { + /* pos is within this extent - it's in data */ + hex->current_extent = i; + hex->in_sparse_hole = 0; + hex->region_end = ext_end; + return 0; + } + } + + last_ext = &fm->fm_extents[fm->fm_mapped_extents - 1]; + if (last_ext->fe_flags & FIEMAP_EXTENT_LAST) { + hex->in_sparse_hole = 1; + hex->region_end = hex->file_size; + return 1; + } + + /* Incomplete extent map - disable optimization for safety */ + hex->in_sparse_hole = 0; + hex->region_end = hex->file_size; + return 0; +} +#endif /* __linux__ */ + enum _vflag vflag = FIRST; static off_t address; /* address/offset in stream */ @@ -374,6 +512,49 @@ get(struct hexdump *hex) warnx(_("all input file arguments failed")); goto retnul; } + +#ifdef __linux__ + /* + * FIEMAP-based sparse file optimization: + */ + if (hex->fiemap) { + off_t curpos = address + nread; + + if (curpos >= hex->region_end) + check_hole(hex, curpos); + + if (hex->in_sparse_hole && vflag == DUP) { + int savp_is_zero = 1; + ssize_t j; + for (j = 0; j < hex->blocksize; j++) { + if (savp[j] != 0) { + savp_is_zero = 0; + break; + } + } + + // Only if savp is all zeros, we can skip the hole. + if (savp_is_zero) { + off_t next_data = hex->region_end; + off_t aligned_pos = (next_data / hex->blocksize) * hex->blocksize; + if (aligned_pos > curpos) { + off_t skip = aligned_pos - curpos; + if (fseeko(stdin, aligned_pos, SEEK_SET) == 0) { + address = aligned_pos; + if (hex->length != -1) + hex->length -= skip; + memset(curp, 0, hex->blocksize); + need = hex->blocksize; + nread = 0; + hex->region_end = 0; + continue; + } + } + } + } + } +#endif + n = fread((char *)curp + nread, sizeof(unsigned char), hex->length == -1 ? need : min(hex->length, need), stdin); if (!n) { @@ -405,6 +586,9 @@ get(struct hexdump *hex) retnul: free (curp); free (savp); +#ifdef __linux__ + free_fiemap(hex); +#endif return NULL; } @@ -431,6 +615,9 @@ int next(char **argv, struct hexdump *hex) return(0); statok = 0; } +#ifdef __linux__ + init_fiemap(hex, fileno(stdin)); +#endif if (hex->skip) doskip(statok ? *_argv : "stdin", statok, hex); if (*_argv) diff --git a/text-utils/hexdump.h b/text-utils/hexdump.h index 4b17e2698..6fbf3cb0b 100644 --- a/text-utils/hexdump.h +++ b/text-utils/hexdump.h @@ -92,6 +92,12 @@ struct hexdump { int exitval; /* final exit value */ ssize_t length; /* max bytes to read */ off_t skip; /* bytes to skip */ + /* Sparse file optimization using FIEMAP (Linux only) */ + void *fiemap; /* struct fiemap pointer */ + unsigned int current_extent; /* current extent index for search */ + off_t file_size; /* total file size */ + int in_sparse_hole; /* currently in a sparse file hole */ + off_t region_end; /* end of current hole/data region */ }; extern struct hexdump_fu *endfu;