]> git.ipfire.org Git - thirdparty/git.git/commitdiff
bulk-checkin: require transaction for index_blob_bulk_checkin()
authorJustin Tobler <jltobler@gmail.com>
Fri, 22 Aug 2025 21:34:59 +0000 (16:34 -0500)
committerJunio C Hamano <gitster@pobox.com>
Mon, 25 Aug 2025 16:48:13 +0000 (09:48 -0700)
The bulk-checkin subsystem provides a mechanism to write blobs directly
to a packfile via `index_blob_bulk_checkin()`. If there is an ongoing
transaction when invoked, objects written via this function are stored
in the same packfile. The packfile is not flushed until the transaction
itself is flushed. If there is no transaction, the single object is
written to a packfile and immediately flushed. This complicates
`index_blob_bulk_checkin()` as it cannot reliably use the provided
transaction to get the associated repository.

Update `index_blob_bulk_checkin()` to assume that a valid transaction is
always provided. Callers are now expected to ensure a transaction is set
up beforehand. With this simplification, `deflate_blob_bulk_checkin()`
is no longer needed as a standalone internal function and is combined
with `index_blob_bulk_checkin()`. The single call site in
`object-file.c:index_fd()` is updated accordingly. Due to how
`{begin,end}_odb_transaction()` handles nested transactions, a new
transaction is only created and committed if there is not already an
ongoing transaction.

Signed-off-by: Justin Tobler <jltobler@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
bulk-checkin.c
bulk-checkin.h
object-file.c

index 53a20a2d92fd77a18fa7de3b46487228726ea9f6..542d8125a863e2cc3e193a4f9a58972fcf7f2359 100644 (file)
@@ -254,11 +254,11 @@ static void prepare_to_stream(struct bulk_checkin_packfile *state,
                die_errno("unable to write pack header");
 }
 
-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
-                               struct object_id *result_oid,
-                               int fd, size_t size,
-                               const char *path, unsigned flags)
+int index_blob_bulk_checkin(struct odb_transaction *transaction,
+                           struct object_id *result_oid, int fd, size_t size,
+                           const char *path, unsigned flags)
 {
+       struct bulk_checkin_packfile *state = &transaction->packfile;
        off_t seekback, already_hashed_to;
        struct git_hash_ctx ctx;
        unsigned char obuf[16384];
@@ -361,25 +361,6 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
        }
 }
 
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
-                           struct object_id *oid, int fd, size_t size,
-                           const char *path, unsigned flags)
-{
-       int status;
-
-       if (transaction) {
-               status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
-                                             size, path, flags);
-       } else {
-               struct bulk_checkin_packfile state = { 0 };
-
-               status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
-               flush_bulk_checkin_packfile(&state);
-       }
-
-       return status;
-}
-
 struct odb_transaction *begin_odb_transaction(struct object_database *odb)
 {
        if (!odb->transaction) {
index 16254ce6a704f6e66e5e83298306f0a14716e3f3..ac8887f476b496ba11a36e325bf34277b6a2c476 100644 (file)
@@ -14,8 +14,11 @@ void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
                                     int fd, const char *filename);
 
 /*
- * This creates one packfile per large blob unless bulk-checkin
- * machinery is "plugged".
+ * This writes the specified object to a packfile. Objects written here
+ * during the same transaction are written to the same packfile. The
+ * packfile is not flushed until the transaction is flushed. The caller
+ * is expected to ensure a valid transaction is setup for objects to be
+ * recorded to.
  *
  * This also bypasses the usual "convert-to-git" dance, and that is on
  * purpose. We could write a streaming version of the converting
index 1740aa2b2e35f3161d1cae583743317e5f5d101c..bc15af424509490720c399b7f29fc19e7867a65c 100644 (file)
@@ -1253,19 +1253,26 @@ int index_fd(struct index_state *istate, struct object_id *oid,
         * Call xsize_t() only when needed to avoid potentially unnecessary
         * die() for large files.
         */
-       if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
+       if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path)) {
                ret = index_stream_convert_blob(istate, oid, fd, path, flags);
-       else if (!S_ISREG(st->st_mode))
+       } else if (!S_ISREG(st->st_mode)) {
                ret = index_pipe(istate, oid, fd, type, path, flags);
-       else if ((st->st_size >= 0 && (size_t) st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
-                type != OBJ_BLOB ||
-                (path && would_convert_to_git(istate, path)))
+       } else if ((st->st_size >= 0 &&
+                   (size_t)st->st_size <= repo_settings_get_big_file_threshold(istate->repo)) ||
+                  type != OBJ_BLOB ||
+                  (path && would_convert_to_git(istate, path))) {
                ret = index_core(istate, oid, fd, xsize_t(st->st_size),
                                 type, path, flags);
-       else
-               ret = index_blob_bulk_checkin(the_repository->objects->transaction,
+       } else {
+               struct odb_transaction *transaction;
+
+               transaction = begin_odb_transaction(the_repository->objects);
+               ret = index_blob_bulk_checkin(transaction,
                                              oid, fd, xsize_t(st->st_size),
                                              path, flags);
+               end_odb_transaction(transaction);
+       }
+
        close(fd);
        return ret;
 }