]> git.ipfire.org Git - thirdparty/git.git/commitdiff
Merge branch 'ps/packfile-store' into jch
authorJunio C Hamano <gitster@pobox.com>
Sun, 5 Oct 2025 22:04:13 +0000 (15:04 -0700)
committerJunio C Hamano <gitster@pobox.com>
Sun, 5 Oct 2025 22:04:13 +0000 (15:04 -0700)
Code clean-up around the in-core list of all the pack files and
object database(s).

* ps/packfile-store:
  packfile: refactor `get_packed_git_mru()` to work on packfile store
  packfile: refactor `get_all_packs()` to work on packfile store
  packfile: refactor `get_packed_git()` to work on packfile store
  packfile: move `get_multi_pack_index()` into "midx.c"
  packfile: introduce function to load and add packfiles
  packfile: refactor `install_packed_git()` to work on packfile store
  packfile: split up responsibilities of `reprepare_packed_git()`
  packfile: refactor `prepare_packed_git()` to work on packfile store
  packfile: reorder functions to avoid function declaration
  odb: move kept cache into `struct packfile_store`
  odb: move MRU list of packfiles into `struct packfile_store`
  odb: move packfile map into `struct packfile_store`
  odb: move initialization bit into `struct packfile_store`
  odb: move list of packfiles into `struct packfile_store`
  packfile: introduce a new `struct packfile_store`

1  2 
builtin/fast-import.c
builtin/grep.c
builtin/pack-objects.c
builtin/pack-redundant.c
http.c
object-file.c
object-name.c
odb.c
odb.h

Simple merge
diff --cc builtin/grep.c
Simple merge
Simple merge
Simple merge
diff --cc http.c
Simple merge
diff --cc object-file.c
index 17a236d2fe121bc447f73138c9db4a651b07ca22,2bc36ab3ee8cbf2d83c4b3204a7c5df132b934d6..4675c8ed6b67eb8b1f054aa7326f380d9a0a29b5
@@@ -1331,274 -1243,6 +1331,274 @@@ static int index_core(struct index_stat
        return ret;
  }
  
-       reprepare_packed_git(repo);
 +static int already_written(struct odb_transaction *transaction,
 +                         struct object_id *oid)
 +{
 +      /* The object may already exist in the repository */
 +      if (odb_has_object(transaction->odb, oid,
 +                         HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
 +              return 1;
 +
 +      /* Might want to keep the list sorted */
 +      for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
 +              if (oideq(&transaction->packfile.written[i]->oid, oid))
 +                      return 1;
 +
 +      /* This is a new object we need to keep */
 +      return 0;
 +}
 +
 +/* Lazily create backing packfile for the state */
 +static void prepare_packfile_transaction(struct odb_transaction *transaction,
 +                                       unsigned flags)
 +{
 +      struct transaction_packfile *state = &transaction->packfile;
 +      if (!(flags & INDEX_WRITE_OBJECT) || state->f)
 +              return;
 +
 +      state->f = create_tmp_packfile(transaction->odb->repo,
 +                                     &state->pack_tmp_name);
 +      reset_pack_idx_option(&state->pack_idx_opts);
 +
 +      /* Pretend we are going to write only one object */
 +      state->offset = write_pack_header(state->f, 1);
 +      if (!state->offset)
 +              die_errno("unable to write pack header");
 +}
 +
 +/*
 + * Read the contents from fd for size bytes, streaming it to the
 + * packfile in state while updating the hash in ctx. Signal a failure
 + * by returning a negative value when the resulting pack would exceed
 + * the pack size limit and this is not the first object in the pack,
 + * so that the caller can discard what we wrote from the current pack
 + * by truncating it and opening a new one. The caller will then call
 + * us again after rewinding the input fd.
 + *
 + * The already_hashed_to pointer is kept untouched by the caller to
 + * make sure we do not hash the same byte when we are called
 + * again. This way, the caller does not have to checkpoint its hash
 + * status before calling us just in case we ask it to call us again
 + * with a new pack.
 + */
 +static int stream_blob_to_pack(struct transaction_packfile *state,
 +                             struct git_hash_ctx *ctx, off_t *already_hashed_to,
 +                             int fd, size_t size, const char *path,
 +                             unsigned flags)
 +{
 +      git_zstream s;
 +      unsigned char ibuf[16384];
 +      unsigned char obuf[16384];
 +      unsigned hdrlen;
 +      int status = Z_OK;
 +      int write_object = (flags & INDEX_WRITE_OBJECT);
 +      off_t offset = 0;
 +
 +      git_deflate_init(&s, pack_compression_level);
 +
 +      hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);
 +      s.next_out = obuf + hdrlen;
 +      s.avail_out = sizeof(obuf) - hdrlen;
 +
 +      while (status != Z_STREAM_END) {
 +              if (size && !s.avail_in) {
 +                      size_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);
 +                      ssize_t read_result = read_in_full(fd, ibuf, rsize);
 +                      if (read_result < 0)
 +                              die_errno("failed to read from '%s'", path);
 +                      if ((size_t)read_result != rsize)
 +                              die("failed to read %u bytes from '%s'",
 +                                  (unsigned)rsize, path);
 +                      offset += rsize;
 +                      if (*already_hashed_to < offset) {
 +                              size_t hsize = offset - *already_hashed_to;
 +                              if (rsize < hsize)
 +                                      hsize = rsize;
 +                              if (hsize)
 +                                      git_hash_update(ctx, ibuf, hsize);
 +                              *already_hashed_to = offset;
 +                      }
 +                      s.next_in = ibuf;
 +                      s.avail_in = rsize;
 +                      size -= rsize;
 +              }
 +
 +              status = git_deflate(&s, size ? 0 : Z_FINISH);
 +
 +              if (!s.avail_out || status == Z_STREAM_END) {
 +                      if (write_object) {
 +                              size_t written = s.next_out - obuf;
 +
 +                              /* would we bust the size limit? */
 +                              if (state->nr_written &&
 +                                  pack_size_limit_cfg &&
 +                                  pack_size_limit_cfg < state->offset + written) {
 +                                      git_deflate_abort(&s);
 +                                      return -1;
 +                              }
 +
 +                              hashwrite(state->f, obuf, written);
 +                              state->offset += written;
 +                      }
 +                      s.next_out = obuf;
 +                      s.avail_out = sizeof(obuf);
 +              }
 +
 +              switch (status) {
 +              case Z_OK:
 +              case Z_BUF_ERROR:
 +              case Z_STREAM_END:
 +                      continue;
 +              default:
 +                      die("unexpected deflate failure: %d", status);
 +              }
 +      }
 +      git_deflate_end(&s);
 +      return 0;
 +}
 +
 +static void flush_packfile_transaction(struct odb_transaction *transaction)
 +{
 +      struct transaction_packfile *state = &transaction->packfile;
 +      struct repository *repo = transaction->odb->repo;
 +      unsigned char hash[GIT_MAX_RAWSZ];
 +      struct strbuf packname = STRBUF_INIT;
 +      char *idx_tmp_name = NULL;
 +
 +      if (!state->f)
 +              return;
 +
 +      if (state->nr_written == 0) {
 +              close(state->f->fd);
 +              free_hashfile(state->f);
 +              unlink(state->pack_tmp_name);
 +              goto clear_exit;
 +      } else if (state->nr_written == 1) {
 +              finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
 +                                CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 +      } else {
 +              int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
 +              fixup_pack_header_footer(repo->hash_algo, fd, hash, state->pack_tmp_name,
 +                                       state->nr_written, hash,
 +                                       state->offset);
 +              close(fd);
 +      }
 +
 +      strbuf_addf(&packname, "%s/pack/pack-%s.",
 +                  repo_get_object_directory(transaction->odb->repo),
 +                  hash_to_hex_algop(hash, repo->hash_algo));
 +
 +      stage_tmp_packfiles(repo, &packname, state->pack_tmp_name,
 +                          state->written, state->nr_written, NULL,
 +                          &state->pack_idx_opts, hash, &idx_tmp_name);
 +      rename_tmp_packfile_idx(repo, &packname, &idx_tmp_name);
 +
 +      for (uint32_t i = 0; i < state->nr_written; i++)
 +              free(state->written[i]);
 +
 +clear_exit:
 +      free(idx_tmp_name);
 +      free(state->pack_tmp_name);
 +      free(state->written);
 +      memset(state, 0, sizeof(*state));
 +
 +      strbuf_release(&packname);
 +      /* Make objects we just wrote available to ourselves */
++      odb_reprepare(repo->objects);
 +}
 +
 +/*
 + * This writes the specified object to a packfile. Objects written here
 + * during the same transaction are written to the same packfile. The
 + * packfile is not flushed until the transaction is flushed. The caller
 + * is expected to ensure a valid transaction is setup for objects to be
 + * recorded to.
 + *
 + * This also bypasses the usual "convert-to-git" dance, and that is on
 + * purpose. We could write a streaming version of the converting
 + * functions and insert that before feeding the data to fast-import
 + * (or equivalent in-core API described above). However, that is
 + * somewhat complicated, as we do not know the size of the filter
 + * result, which we need to know beforehand when writing a git object.
 + * Since the primary motivation for trying to stream from the working
 + * tree file and to avoid mmaping it in core is to deal with large
 + * binary blobs, they generally do not want to get any conversion, and
 + * callers should avoid this code path when filters are requested.
 + */
 +static int index_blob_packfile_transaction(struct odb_transaction *transaction,
 +                                         struct object_id *result_oid, int fd,
 +                                         size_t size, const char *path,
 +                                         unsigned flags)
 +{
 +      struct transaction_packfile *state = &transaction->packfile;
 +      off_t seekback, already_hashed_to;
 +      struct git_hash_ctx ctx;
 +      unsigned char obuf[16384];
 +      unsigned header_len;
 +      struct hashfile_checkpoint checkpoint;
 +      struct pack_idx_entry *idx = NULL;
 +
 +      seekback = lseek(fd, 0, SEEK_CUR);
 +      if (seekback == (off_t)-1)
 +              return error("cannot find the current offset");
 +
 +      header_len = format_object_header((char *)obuf, sizeof(obuf),
 +                                        OBJ_BLOB, size);
 +      transaction->odb->repo->hash_algo->init_fn(&ctx);
 +      git_hash_update(&ctx, obuf, header_len);
 +
 +      /* Note: idx is non-NULL when we are writing */
 +      if ((flags & INDEX_WRITE_OBJECT) != 0) {
 +              CALLOC_ARRAY(idx, 1);
 +
 +              prepare_packfile_transaction(transaction, flags);
 +              hashfile_checkpoint_init(state->f, &checkpoint);
 +      }
 +
 +      already_hashed_to = 0;
 +
 +      while (1) {
 +              prepare_packfile_transaction(transaction, flags);
 +              if (idx) {
 +                      hashfile_checkpoint(state->f, &checkpoint);
 +                      idx->offset = state->offset;
 +                      crc32_begin(state->f);
 +              }
 +              if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,
 +                                       fd, size, path, flags))
 +                      break;
 +              /*
 +               * Writing this object to the current pack will make
 +               * it too big; we need to truncate it, start a new
 +               * pack, and write into it.
 +               */
 +              if (!idx)
 +                      BUG("should not happen");
 +              hashfile_truncate(state->f, &checkpoint);
 +              state->offset = checkpoint.offset;
 +              flush_packfile_transaction(transaction);
 +              if (lseek(fd, seekback, SEEK_SET) == (off_t)-1)
 +                      return error("cannot seek back");
 +      }
 +      git_hash_final_oid(result_oid, &ctx);
 +      if (!idx)
 +              return 0;
 +
 +      idx->crc32 = crc32_end(state->f);
 +      if (already_written(transaction, result_oid)) {
 +              hashfile_truncate(state->f, &checkpoint);
 +              state->offset = checkpoint.offset;
 +              free(idx);
 +      } else {
 +              oidcpy(&idx->oid, result_oid);
 +              ALLOC_GROW(state->written,
 +                         state->nr_written + 1,
 +                         state->alloc_written);
 +              state->written[state->nr_written++] = idx;
 +      }
 +      return 0;
 +}
 +
  int index_fd(struct index_state *istate, struct object_id *oid,
             int fd, struct stat *st,
             enum object_type type, const char *path, unsigned flags)
diff --cc object-name.c
Simple merge
diff --cc odb.c
index 1fc14888919684c2ef75ef8ef40b3385c29c7f6b,65a6cc67b61ccf99e7b7293906e6772f2927da9b..00a6e71568b5985c0b344bfebbe92d1e0bff1294
--- 1/odb.c
--- 2/odb.c
+++ b/odb.c
@@@ -1035,29 -1034,34 +1034,44 @@@ void odb_clear(struct object_database *
                free((char *) o->cached_objects[i].value.buf);
        FREE_AND_NULL(o->cached_objects);
  
-       INIT_LIST_HEAD(&o->packed_git_mru);
        close_object_store(o);
+       packfile_store_free(o->packfiles);
+       o->packfiles = NULL;
+       string_list_clear(&o->submodule_source_paths, 0);
+ }
+ void odb_reprepare(struct object_database *o)
+ {
+       struct odb_source *source;
+       obj_read_lock();
  
        /*
-        * `close_object_store()` only closes the packfiles, but doesn't free
-        * them. We thus have to do this manually.
+        * Reprepare alt odbs, in case the alternates file was modified
+        * during the course of this process. This only _adds_ odbs to
+        * the linked list, so existing odbs will continue to exist for
+        * the lifetime of the process.
         */
-       for (struct packed_git *p = o->packed_git, *next; p; p = next) {
-               next = p->next;
-               free(p);
-       }
-       o->packed_git = NULL;
+       o->loaded_alternates = 0;
+       odb_prepare_alternates(o);
  
-       hashmap_clear(&o->pack_map);
-       string_list_clear(&o->submodule_source_paths, 0);
+       for (source = o->sources; source; source = source->next)
+               odb_clear_loose_cache(source);
+       o->approximate_object_count_valid = 0;
+       packfile_store_reprepare(o->packfiles);
+       obj_read_unlock();
  }
 +
 +struct odb_transaction *odb_transaction_begin(struct object_database *odb)
 +{
 +      return object_file_transaction_begin(odb->sources);
 +}
 +
 +void odb_transaction_commit(struct odb_transaction *transaction)
 +{
 +      object_file_transaction_commit(transaction);
 +}
diff --cc odb.h
index 9e3dd9d6df9db78ce4388bb322b60fea35f7041f,ab39e3605d541997d8b12fbf14bc9e78def515a5..7a3cfd34e149247491d242119db42d866b4efedb
--- 1/odb.h
--- 2/odb.h
+++ b/odb.h
@@@ -91,8 -90,8 +90,9 @@@ struct odb_source 
  };
  
  struct packed_git;
+ struct packfile_store;
  struct cached_object_entry;
 +struct odb_transaction;
  
  /*
   * The object database encapsulates access to objects in a repository. It
@@@ -192,20 -160,13 +168,26 @@@ struct object_database 
  
  struct object_database *odb_new(struct repository *repo);
  void odb_clear(struct object_database *o);
+ /*
+  * Clear caches, reload alternates and then reload object sources so that new
+  * objects may become accessible.
+  */
+ void odb_reprepare(struct object_database *o);
  
 +/*
 + * Starts an ODB transaction. Subsequent objects are written to the transaction
 + * and not committed until odb_transaction_commit() is invoked on the
 + * transaction. If the ODB already has a pending transaction, NULL is returned.
 + */
 +struct odb_transaction *odb_transaction_begin(struct object_database *odb);
 +
 +/*
 + * Commits an ODB transaction making the written objects visible. If the
 + * specified transaction is NULL, the function is a no-op.
 + */
 +void odb_transaction_commit(struct odb_transaction *transaction);
 +
  /*
   * Find source by its object directory path. Returns a `NULL` pointer in case
   * the source could not be found.