From: Junio C Hamano <gitster@pobox.com>
Date: Mon, 29 Sep 2025 18:46:45 +0000 (-0700)
Subject: Merge branch 'ps/packfile-store' into next
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=342bb57fc27401ce6abd975ebb73479d9b8086c7;p=thirdparty%2Fgit.git

Merge branch 'ps/packfile-store' into next

Code clean-up around the in-core list of all the pack files and
object database(s).

* ps/packfile-store:
  packfile: refactor `get_packed_git_mru()` to work on packfile store
  packfile: refactor `get_all_packs()` to work on packfile store
  packfile: refactor `get_packed_git()` to work on packfile store
  packfile: move `get_multi_pack_index()` into "midx.c"
  packfile: introduce function to load and add packfiles
  packfile: refactor `install_packed_git()` to work on packfile store
  packfile: split up responsibilities of `reprepare_packed_git()`
  packfile: refactor `prepare_packed_git()` to work on packfile store
  packfile: reorder functions to avoid function declaration
  odb: move kept cache into `struct packfile_store`
  odb: move MRU list of packfiles into `struct packfile_store`
  odb: move packfile map into `struct packfile_store`
  odb: move initialization bit into `struct packfile_store`
  odb: move list of packfiles into `struct packfile_store`
  packfile: introduce a new `struct packfile_store`
---

342bb57fc27401ce6abd975ebb73479d9b8086c7
diff --cc object-file.c
index 17a236d2fe,2bc36ab3ee..4675c8ed6b
--- a/object-file.c
+++ b/object-file.c
@@@ -1331,274 -1243,6 +1331,274 @@@ static int index_core(struct index_stat
  	return ret;
  }
  
 +static int already_written(struct odb_transaction *transaction,
 +			   struct object_id *oid)
 +{
 +	/* The object may already exist in the repository */
 +	if (odb_has_object(transaction->odb, oid,
 +			   HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
 +		return 1;
 +
 +	/* Might want to keep the list sorted */
 +	for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
 +		if (oideq(&transaction->packfile.written[i]->oid, oid))
 +			return 1;
 +
 +	/* This is a new object we need to keep */
 +	return 0;
 +}
 +
 +/* Lazily create backing packfile for the state */
 +static void prepare_packfile_transaction(struct odb_transaction *transaction,
 +					 unsigned flags)
 +{
 +	struct transaction_packfile *state = &transaction->packfile;
 +	if (!(flags & INDEX_WRITE_OBJECT) || state->f)
 +		return;
 +
 +	state->f = create_tmp_packfile(transaction->odb->repo,
 +				       &state->pack_tmp_name);
 +	reset_pack_idx_option(&state->pack_idx_opts);
 +
 +	/* Pretend we are going to write only one object */
 +	state->offset = write_pack_header(state->f, 1);
 +	if (!state->offset)
 +		die_errno("unable to write pack header");
 +}
 +
 +/*
 + * Read the contents from fd for size bytes, streaming it to the
 + * packfile in state while updating the hash in ctx. Signal a failure
 + * by returning a negative value when the resulting pack would exceed
 + * the pack size limit and this is not the first object in the pack,
 + * so that the caller can discard what we wrote from the current pack
 + * by truncating it and opening a new one. The caller will then call
 + * us again after rewinding the input fd.
 + *
 + * The already_hashed_to pointer is kept untouched by the caller to
 + * make sure we do not hash the same byte when we are called
 + * again. This way, the caller does not have to checkpoint its hash
 + * status before calling us just in case we ask it to call us again
 + * with a new pack.
 + */
 +static int stream_blob_to_pack(struct transaction_packfile *state,
 +			       struct git_hash_ctx *ctx, off_t *already_hashed_to,
 +			       int fd, size_t size, const char *path,
 +			       unsigned flags)
 +{
 +	git_zstream s;
 +	unsigned char ibuf[16384];
 +	unsigned char obuf[16384];
 +	unsigned hdrlen;
 +	int status = Z_OK;
 +	int write_object = (flags & INDEX_WRITE_OBJECT);
 +	off_t offset = 0;
 +
 +	git_deflate_init(&s, pack_compression_level);
 +
 +	hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
 +	s.next_out = obuf + hdrlen;
 +	s.avail_out = sizeof(obuf) - hdrlen;
 +
 +	while (status != Z_STREAM_END) {
 +		if (size && !s.avail_in) {
 +			size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
 +			ssize_t read_result = read_in_full(fd, ibuf, rsize);
 +			if (read_result < 0)
 +				die_errno("failed to read from '%s'", path);
 +			if ((size_t)read_result != rsize)
 +				die("failed to read %u bytes from '%s'",
 +				    (unsigned)rsize, path);
 +			offset += rsize;
 +			if (*already_hashed_to < offset) {
 +				size_t hsize = offset - *already_hashed_to;
 +				if (rsize < hsize)
 +					hsize = rsize;
 +				if (hsize)
 +					git_hash_update(ctx, ibuf, hsize);
 +				*already_hashed_to = offset;
 +			}
 +			s.next_in = ibuf;
 +			s.avail_in = rsize;
 +			size -= rsize;
 +		}
 +
 +		status = git_deflate(&s, size ? 0 : Z_FINISH);
 +
 +		if (!s.avail_out || status == Z_STREAM_END) {
 +			if (write_object) {
 +				size_t written = s.next_out - obuf;
 +
 +				/* would we bust the size limit? */
 +				if (state->nr_written &&
 +				    pack_size_limit_cfg &&
 +				    pack_size_limit_cfg < state->offset + written) {
 +					git_deflate_abort(&s);
 +					return -1;
 +				}
 +
 +				hashwrite(state->f, obuf, written);
 +				state->offset += written;
 +			}
 +			s.next_out = obuf;
 +			s.avail_out = sizeof(obuf);
 +		}
 +
 +		switch (status) {
 +		case Z_OK:
 +		case Z_BUF_ERROR:
 +		case Z_STREAM_END:
 +			continue;
 +		default:
 +			die("unexpected deflate failure: %d", status);
 +		}
 +	}
 +	git_deflate_end(&s);
 +	return 0;
 +}
 +
 +static void flush_packfile_transaction(struct odb_transaction *transaction)
 +{
 +	struct transaction_packfile *state = &transaction->packfile;
 +	struct repository *repo = transaction->odb->repo;
 +	unsigned char hash[GIT_MAX_RAWSZ];
 +	struct strbuf packname = STRBUF_INIT;
 +	char *idx_tmp_name = NULL;
 +
 +	if (!state->f)
 +		return;
 +
 +	if (state->nr_written == 0) {
 +		close(state->f->fd);
 +		free_hashfile(state->f);
 +		unlink(state->pack_tmp_name);
 +		goto clear_exit;
 +	} else if (state->nr_written == 1) {
 +		finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
 +				  CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 +	} else {
 +		int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
 +		fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
 +					 state->nr_written, hash,
 +					 state->offset);
 +		close(fd);
 +	}
 +
 +	strbuf_addf(&packname, "%s/pack/pack-%s.",
 +		    repo_get_object_directory(transaction->odb->repo),
 +		    hash_to_hex_algop(hash, repo->hash_algo));
 +
 +	stage_tmp_packfiles(repo, &packname, state->pack_tmp_name,
 +			    state->written, state->nr_written, NULL,
 +			    &state->pack_idx_opts, hash, &idx_tmp_name);
 +	rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name);
 +
 +	for (uint32_t i = 0; i < state->nr_written; i++)
 +		free(state->written[i]);
 +
 +clear_exit:
 +	free(idx_tmp_name);
 +	free(state->pack_tmp_name);
 +	free(state->written);
 +	memset(state, 0, sizeof(*state));
 +
 +	strbuf_release(&packname);
 +	/* Make objects we just wrote available to ourselves */
- 	reprepare_packed_git(repo);
++	odb_reprepare(repo->objects);
 +}
 +
 +/*
 + * This writes the specified object to a packfile. Objects written here
 + * during the same transaction are written to the same packfile. The
 + * packfile is not flushed until the transaction is flushed. The caller
 + * is expected to ensure a valid transaction is setup for objects to be
 + * recorded to.
 + *
 + * This also bypasses the usual "convert-to-git" dance, and that is on
 + * purpose. We could write a streaming version of the converting
 + * functions and insert that before feeding the data to fast-import
 + * (or equivalent in-core API described above). However, that is
 + * somewhat complicated, as we do not know the size of the filter
 + * result, which we need to know beforehand when writing a git object.
 + * Since the primary motivation for trying to stream from the working
 + * tree file and to avoid mmaping it in core is to deal with large
 + * binary blobs, they generally do not want to get any conversion, and
 + * callers should avoid this code path when filters are requested.
 + */
 +static int index_blob_packfile_transaction(struct odb_transaction *transaction,
 +					   struct object_id *result_oid, int fd,
 +					   size_t size, const char *path,
 +					   unsigned flags)
 +{
 +	struct transaction_packfile *state = &transaction->packfile;
 +	off_t seekback, already_hashed_to;
 +	struct git_hash_ctx ctx;
 +	unsigned char obuf[16384];
 +	unsigned header_len;
 +	struct hashfile_checkpoint checkpoint;
 +	struct pack_idx_entry *idx = NULL;
 +
 +	seekback = lseek(fd, 0, SEEK_CUR);
 +	if (seekback == (off_t)-1)
 +		return error("cannot find the current offset");
 +
 +	header_len = format_object_header((char *)obuf, sizeof(obuf),
 +					  OBJ_BLOB, size);
 +	transaction->odb->repo->hash_algo->init_fn(&ctx);
 +	git_hash_update(&ctx, obuf, header_len);
 +
 +	/* Note: idx is non-NULL when we are writing */
 +	if ((flags & INDEX_WRITE_OBJECT) != 0) {
 +		CALLOC_ARRAY(idx, 1);
 +
 +		prepare_packfile_transaction(transaction, flags);
 +		hashfile_checkpoint_init(state->f, &checkpoint);
 +	}
 +
 +	already_hashed_to = 0;
 +
 +	while (1) {
 +		prepare_packfile_transaction(transaction, flags);
 +		if (idx) {
 +			hashfile_checkpoint(state->f, &checkpoint);
 +			idx->offset = state->offset;
 +			crc32_begin(state->f);
 +		}
 +		if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
 +					 fd, size, path, flags))
 +			break;
 +		/*
 +		 * Writing this object to the current pack will make
 +		 * it too big; we need to truncate it, start a new
 +		 * pack, and write into it.
 +		 */
 +		if (!idx)
 +			BUG("should not happen");
 +		hashfile_truncate(state->f, &checkpoint);
 +		state->offset = checkpoint.offset;
 +		flush_packfile_transaction(transaction);
 +		if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
 +			return error("cannot seek back");
 +	}
 +	git_hash_final_oid(result_oid, &ctx);
 +	if (!idx)
 +		return 0;
 +
 +	idx->crc32 = crc32_end(state->f);
 +	if (already_written(transaction, result_oid)) {
 +		hashfile_truncate(state->f, &checkpoint);
 +		state->offset = checkpoint.offset;
 +		free(idx);
 +	} else {
 +		oidcpy(&idx->oid, result_oid);
 +		ALLOC_GROW(state->written,
 +			   state->nr_written + 1,
 +			   state->alloc_written);
 +		state->written[state->nr_written++] = idx;
 +	}
 +	return 0;
 +}
 +
  int index_fd(struct index_state *istate, struct object_id *oid,
  	     int fd, struct stat *st,
  	     enum object_type type, const char *path, unsigned flags)
diff --cc odb.c
index 1fc1488891,65a6cc67b6..00a6e71568
--- a/odb.c
+++ b/odb.c
@@@ -1035,29 -1034,34 +1034,44 @@@ void odb_clear(struct object_database *
  		free((char *) o->cached_objects[i].value.buf);
  	FREE_AND_NULL(o->cached_objects);
  
- 	INIT_LIST_HEAD(&o->packed_git_mru);
  	close_object_store(o);
+ 	packfile_store_free(o->packfiles);
+ 	o->packfiles = NULL;
+ 
+ 	string_list_clear(&o->submodule_source_paths, 0);
+ }
+ 
+ void odb_reprepare(struct object_database *o)
+ {
+ 	struct odb_source *source;
+ 
+ 	obj_read_lock();
  
  	/*
- 	 * `close_object_store()` only closes the packfiles, but doesn't free
- 	 * them. We thus have to do this manually.
+ 	 * Reprepare alt odbs, in case the alternates file was modified
+ 	 * during the course of this process. This only _adds_ odbs to
+ 	 * the linked list, so existing odbs will continue to exist for
+ 	 * the lifetime of the process.
  	 */
- 	for (struct packed_git *p = o->packed_git, *next; p; p = next) {
- 		next = p->next;
- 		free(p);
- 	}
- 	o->packed_git = NULL;
+ 	o->loaded_alternates = 0;
+ 	odb_prepare_alternates(o);
  
- 	hashmap_clear(&o->pack_map);
- 	string_list_clear(&o->submodule_source_paths, 0);
+ 	for (source = o->sources; source; source = source->next)
+ 		odb_clear_loose_cache(source);
+ 
+ 	o->approximate_object_count_valid = 0;
+ 
+ 	packfile_store_reprepare(o->packfiles);
+ 
+ 	obj_read_unlock();
  }
 +
 +struct odb_transaction *odb_transaction_begin(struct object_database *odb)
 +{
 +	return object_file_transaction_begin(odb->sources);
 +}
 +
 +void odb_transaction_commit(struct odb_transaction *transaction)
 +{
 +	object_file_transaction_commit(transaction);
 +}
diff --cc odb.h
index 9e3dd9d6df,ab39e3605d..7a3cfd34e1
--- a/odb.h
+++ b/odb.h
@@@ -91,8 -90,8 +90,9 @@@ struct odb_source 
  };
  
  struct packed_git;
+ struct packfile_store;
  struct cached_object_entry;
 +struct odb_transaction;
  
  /*
   * The object database encapsulates access to objects in a repository. It
@@@ -192,20 -160,13 +168,26 @@@ struct object_database 
  
  struct object_database *odb_new(struct repository *repo);
  void odb_clear(struct object_database *o);
+ 
+ /*
+  * Clear caches, reload alternates and then reload object sources so that new
+  * objects may become accessible.
+  */
+ void odb_reprepare(struct object_database *o);
  
 +/*
 + * Starts an ODB transaction. Subsequent objects are written to the transaction
 + * and not committed until odb_transaction_commit() is invoked on the
 + * transaction. If the ODB already has a pending transaction, NULL is returned.
 + */
 +struct odb_transaction *odb_transaction_begin(struct object_database *odb);
 +
 +/*
 + * Commits an ODB transaction making the written objects visible. If the
 + * specified transaction is NULL, the function is a no-op.
 + */
 +void odb_transaction_commit(struct odb_transaction *transaction);
 +
  /*
   * Find source by its object directory path. Returns a `NULL` pointer in case
   * the source could not be found.