From: Scott Bauersfeld Date: Mon, 27 Apr 2026 19:26:38 +0000 (+0000) Subject: index-pack, unpack-objects: increase input buffer from 4 KiB to 128 KiB X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=d8a3c8d0e89caec2de0da2bde6f54a99b78184ca;p=thirdparty%2Fgit.git index-pack, unpack-objects: increase input buffer from 4 KiB to 128 KiB index-pack and unpack-objects both read pack data from stdin through a 4 KiB static buffer. In index-pack, each fill() flushes consumed bytes to the pack file via write_or_die(), capping every write(2) at 4 KiB. unpack-objects uses the same buffer pattern for reads. On FUSE-backed filesystems every write(2) is a synchronous round trip through the FUSE protocol (userspace -> kernel -> userspace -> back), so the 4 KiB buffer turns a clone into many unnecessary tiny writes with noticeable latency overhead. Increase the buffer from 4 KiB to 128 KiB. Introduce a shared DEFAULT_IO_BUFFER_SIZE constant in git-compat-util.h (next to MAX_IO_SIZE) and use it in index-pack, unpack-objects, and the hashfile layer in csum-file (which already used 128 KiB but hardcoded the value). Syscall counts via strace on HTTPS clones of git/git (~296 MB pack, 5 runs per variant, isolated builds from the same v2.54.0 source): index-pack pack file writes: 72,465 -> 24,943 avg (65% fewer) total write() syscalls: 310,192 -> 259,530 avg (16% fewer) writes of exactly 4096 bytes: ~40,077 -> 0 Wall-clock time of git clone over HTTPS onto a FUSE passthrough filesystem with writeback caching disabled, 3 runs per variant: vscode (~1.26 GB pack): 84.5s -> 75.7s avg (10% faster) git/git (~306 MB pack): 22.6s -> 20.0s avg (11% faster) Signed-off-by: Scott Bauersfeld Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff --git a/builtin/index-pack.c b/builtin/index-pack.c index ca7784dc2c..bb3639641c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -145,8 +145,7 @@ static int check_self_contained_and_connected; static struct progress *progress; -/* We always read in 4kB chunks. */ -static unsigned char input_buffer[4096]; +static unsigned char input_buffer[DEFAULT_IO_BUFFER_SIZE]; static unsigned int input_offset, input_len; static off_t consumed_bytes; static off_t max_input_size; diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index e01cf6e360..af67d1a1d3 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -23,8 +23,7 @@ static int dry_run, quiet, recover, has_errors, strict; static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]"; -/* We always read in 4kB chunks. */ -static unsigned char buffer[4096]; +static unsigned char buffer[DEFAULT_IO_BUFFER_SIZE]; static unsigned int offset, len; static off_t consumed_bytes; static off_t max_input_size; diff --git a/csum-file.c b/csum-file.c index 9558177a11..d7a682c2b6 100644 --- a/csum-file.c +++ b/csum-file.c @@ -178,7 +178,7 @@ struct hashfile *hashfd_ext(const struct git_hash_algo *algop, f->algop = unsafe_hash_algo(algop); f->algop->init_fn(&f->ctx); - f->buffer_len = opts->buffer_len ? opts->buffer_len : 128 * 1024; + f->buffer_len = opts->buffer_len ? opts->buffer_len : DEFAULT_IO_BUFFER_SIZE; f->buffer = xmalloc(f->buffer_len); f->check_buffer = NULL; diff --git a/git-compat-util.h b/git-compat-util.h index ae1bdc90a4..5024814bd4 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -712,6 +712,12 @@ static inline uint64_t u64_add(uint64_t a, uint64_t b) # endif #endif +/* + * Default buffer size for buffered I/O in index-pack, unpack-objects, + * and the hashfile layer in csum-file. + */ +#define DEFAULT_IO_BUFFER_SIZE (128 * 1024) + #ifdef HAVE_ALLOCA_H # include # define xalloca(size) (alloca(size))