]> git.ipfire.org Git - thirdparty/git.git/commitdiff
core.fsync: introduce granular fsync control infrastructure
authorNeeraj Singh <neerajsi@microsoft.com>
Thu, 10 Mar 2022 22:43:21 +0000 (22:43 +0000)
committerJunio C Hamano <gitster@pobox.com>
Thu, 10 Mar 2022 23:10:22 +0000 (15:10 -0800)
This commit introduces the infrastructure for the core.fsync
configuration knob. The repository components we want to sync
are identified by flags so that we can turn on or off syncing
for specific components.

If core.fsyncObjectFiles is set and the core.fsync configuration
also includes FSYNC_COMPONENT_LOOSE_OBJECT, we will fsync any
loose objects. This picks the strictest data integrity behavior
if core.fsync and core.fsyncObjectFiles are set to conflicting values.

This change introduces the currently unused fsync_component
helper, which will be used by a later patch that adds fsyncing to
the refs backend.

Actual configuration and documentation of the fsync components
list are in other patches in the series to separate review of
the underlying mechanism from the policy of how it's configured.

Helped-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
15 files changed:
builtin/fast-import.c
builtin/index-pack.c
builtin/pack-objects.c
bulk-checkin.c
cache.h
commit-graph.c
csum-file.c
csum-file.h
environment.c
midx.c
object-file.c
pack-bitmap-write.c
pack-write.c
read-cache.c
write-or-die.c

index b7105fcad9bae67c93c8087a8206ec962793a4ca..f2c036a89556e876b564097edd23ac97fedeab56 100644 (file)
@@ -865,7 +865,7 @@ static void end_packfile(void)
                struct tag *t;
 
                close_pack_windows(pack_data);
-               finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
+               finalize_hashfile(pack_file, cur_pack_oid.hash, FSYNC_COMPONENT_PACK, 0);
                fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
                                         pack_data->pack_name, object_count,
                                         cur_pack_oid.hash, pack_size);
index c45273de3b124942553647ae0f216b4e6a3bb3d1..c5f12f14df5d76055bb21a4994eacdd3c2ff1dff 100644 (file)
@@ -1290,7 +1290,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
                            nr_objects - nr_objects_initial);
                stop_progress_msg(&progress, msg.buf);
                strbuf_release(&msg);
-               finalize_hashfile(f, tail_hash, 0);
+               finalize_hashfile(f, tail_hash, FSYNC_COMPONENT_PACK, 0);
                hashcpy(read_hash, pack_hash);
                fixup_pack_header_footer(output_fd, pack_hash,
                                         curr_pack, nr_objects,
@@ -1512,7 +1512,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
        if (!from_stdin) {
                close(input_fd);
        } else {
-               fsync_or_die(output_fd, curr_pack_name);
+               fsync_component_or_die(FSYNC_COMPONENT_PACK, output_fd, curr_pack_name);
                err = close(output_fd);
                if (err)
                        die_errno(_("error while closing pack file"));
index 178e611f09ddcddd9f5652cf25db258877e04d34..c14fee8e99f56b9512264b62beedbcf208929f8d 100644 (file)
@@ -1199,16 +1199,26 @@ static void write_pack_file(void)
                        display_progress(progress_state, written);
                }
 
-               /*
-                * Did we write the wrong # entries in the header?
-                * If so, rewrite it like in fast-import
-                */
                if (pack_to_stdout) {
-                       finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
+                       /*
+                        * We never fsync when writing to stdout since we may
+                        * not be writing to an actual pack file. For instance,
+                        * the upload-pack code passes a pipe here. Calling
+                        * fsync on a pipe results in unnecessary
+                        * synchronization with the reader on some platforms.
+                        */
+                       finalize_hashfile(f, hash, FSYNC_COMPONENT_NONE,
+                                         CSUM_HASH_IN_STREAM | CSUM_CLOSE);
                } else if (nr_written == nr_remaining) {
-                       finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+                       finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK,
+                                         CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
                } else {
-                       int fd = finalize_hashfile(f, hash, 0);
+                       /*
+                        * If we wrote the wrong number of entries in the
+                        * header, rewrite it like in fast-import.
+                        */
+
+                       int fd = finalize_hashfile(f, hash, FSYNC_COMPONENT_PACK, 0);
                        fixup_pack_header_footer(fd, hash, pack_tmp_name,
                                                 nr_written, hash, offset);
                        close(fd);
index 8785b2ac80699255ae8abd0801922ba025235309..a2cf9dcbc8d4a20289fa237a613c10c1226b9786 100644 (file)
@@ -53,9 +53,10 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
                unlink(state->pack_tmp_name);
                goto clear_exit;
        } else if (state->nr_written == 1) {
-               finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+               finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,
+                                 CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
        } else {
-               int fd = finalize_hashfile(state->f, hash, 0);
+               int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);
                fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
                                         state->nr_written, hash,
                                         state->offset);
diff --git a/cache.h b/cache.h
index 82f0194a3dd5b52017d3f720de12ae9347b7aaa3..7ac1959258d5b02e82dec11939ee3fdf3c02a206 100644 (file)
--- a/cache.h
+++ b/cache.h
@@ -993,6 +993,27 @@ void reset_shared_repository(void);
 extern int read_replace_refs;
 extern char *git_replace_ref_base;
 
+/*
+ * These values are used to help identify parts of a repository to fsync.
+ * FSYNC_COMPONENT_NONE identifies data that will not be a persistent part of the
+ * repository and so shouldn't be fsynced.
+ */
+enum fsync_component {
+       FSYNC_COMPONENT_NONE,
+       FSYNC_COMPONENT_LOOSE_OBJECT            = 1 << 0,
+       FSYNC_COMPONENT_PACK                    = 1 << 1,
+       FSYNC_COMPONENT_PACK_METADATA           = 1 << 2,
+       FSYNC_COMPONENT_COMMIT_GRAPH            = 1 << 3,
+};
+
+#define FSYNC_COMPONENTS_DEFAULT (FSYNC_COMPONENT_PACK | \
+                                 FSYNC_COMPONENT_PACK_METADATA | \
+                                 FSYNC_COMPONENT_COMMIT_GRAPH)
+
+/*
+ * A bitmask indicating which components of the repo should be fsynced.
+ */
+extern enum fsync_component fsync_components;
 extern int fsync_object_files;
 extern int use_fsync;
 
@@ -1707,6 +1728,8 @@ int copy_file_with_time(const char *dst, const char *src, int mode);
 
 void write_or_die(int fd, const void *buf, size_t count);
 void fsync_or_die(int fd, const char *);
+int fsync_component(enum fsync_component component, int fd);
+void fsync_component_or_die(enum fsync_component component, int fd, const char *msg);
 
 ssize_t read_in_full(int fd, void *buf, size_t count);
 ssize_t write_in_full(int fd, const void *buf, size_t count);
index 265c010122e8edefc141f1dec8f506078bbbee19..64897f57d9f2ebc9503994c34eb3ced714352fda 100644 (file)
@@ -1942,7 +1942,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
        }
 
        close_commit_graph(ctx->r->objects);
-       finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
+       finalize_hashfile(f, file_hash, FSYNC_COMPONENT_COMMIT_GRAPH,
+                         CSUM_HASH_IN_STREAM | CSUM_FSYNC);
        free_chunkfile(cf);
 
        if (ctx->split) {
index 26e8a6df44e9415ffe02fc612ec045c6fa32b032..59ef3398ca2b01676055fff40b80ac7896f87ffc 100644 (file)
@@ -58,7 +58,8 @@ static void free_hashfile(struct hashfile *f)
        free(f);
 }
 
-int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
+int finalize_hashfile(struct hashfile *f, unsigned char *result,
+                     enum fsync_component component, unsigned int flags)
 {
        int fd;
 
@@ -69,7 +70,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
        if (flags & CSUM_HASH_IN_STREAM)
                flush(f, f->buffer, the_hash_algo->rawsz);
        if (flags & CSUM_FSYNC)
-               fsync_or_die(f->fd, f->name);
+               fsync_component_or_die(component, f->fd, f->name);
        if (flags & CSUM_CLOSE) {
                if (close(f->fd))
                        die_errno("%s: sha1 file error on close", f->name);
index 291215b34eb12bee5583913ad93388c1f9175c0f..0d29f528fbcb51df13eac225c0e2cc120198bd3f 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef CSUM_FILE_H
 #define CSUM_FILE_H
 
+#include "cache.h"
 #include "hash.h"
 
 struct progress;
@@ -38,7 +39,7 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
 struct hashfile *hashfd(int fd, const char *name);
 struct hashfile *hashfd_check(const char *name);
 struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp);
-int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
+int finalize_hashfile(struct hashfile *, unsigned char *, enum fsync_component, unsigned int);
 void hashwrite(struct hashfile *, const void *, unsigned int);
 void hashflush(struct hashfile *f);
 void crc32_begin(struct hashfile *);
index 3e3620d759fbaefc89067b650e58faf47ced64f4..36ca5fb2e778ed75dff295133827e14bbb80444d 100644 (file)
@@ -45,6 +45,7 @@ int pack_compression_level = Z_DEFAULT_COMPRESSION;
 int fsync_object_files;
 int use_fsync = -1;
 enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
+enum fsync_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
 size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
 size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
 size_t delta_base_cache_limit = 96 * 1024 * 1024;
diff --git a/midx.c b/midx.c
index 865170bad05a04f15d051fc871529022467dc6f8..107365d2114ce2945d4221ff853df97829b22d05 100644 (file)
--- a/midx.c
+++ b/midx.c
@@ -1438,7 +1438,8 @@ static int write_midx_internal(const char *object_dir,
        write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
        write_chunkfile(cf, &ctx);
 
-       finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
+       finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
+                         CSUM_FSYNC | CSUM_HASH_IN_STREAM);
        free_chunkfile(cf);
 
        if (flags & MIDX_WRITE_REV_INDEX &&
index 03bd6a3baf3ed35f8636b0318f165c4017455f59..e3f0bf27ff13e3fadc7b859b859c259f8c3c9231 100644 (file)
@@ -1849,11 +1849,16 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
 /* Finalize a file on disk, and close it. */
 static void close_loose_object(int fd)
 {
-       if (!the_repository->objects->odb->will_destroy) {
-               if (fsync_object_files)
-                       fsync_or_die(fd, "loose object file");
-       }
+       if (the_repository->objects->odb->will_destroy)
+               goto out;
 
+       if (fsync_object_files > 0)
+               fsync_or_die(fd, "loose object file");
+       else
+               fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT, fd,
+                                      "loose object file");
+
+out:
        if (close(fd) != 0)
                die_errno(_("error when closing loose object file"));
 }
index cab3eaa2acd141d306eeb820e6278561555a33cd..cf681547f2e626b324fd5bcc9c33fbec8f449d81 100644 (file)
@@ -719,7 +719,8 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
        if (options & BITMAP_OPT_HASH_CACHE)
                write_hash_cache(f, index, index_nr);
 
-       finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+       finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+                         CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
 
        if (adjust_shared_perm(tmp_file.buf))
                die_errno("unable to make temporary bitmap file readable");
index a5846f3a346dd032c11cf047af8fc65d57c726e0..51812cb1299222cbcbc976ef2b8304f7e25bfbff 100644 (file)
@@ -159,9 +159,9 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
        }
 
        hashwrite(f, sha1, the_hash_algo->rawsz);
-       finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
-                                   ((opts->flags & WRITE_IDX_VERIFY)
-                                   ? 0 : CSUM_FSYNC));
+       finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+                         CSUM_HASH_IN_STREAM | CSUM_CLOSE |
+                         ((opts->flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
        return index_name;
 }
 
@@ -281,8 +281,9 @@ const char *write_rev_file_order(const char *rev_name,
        if (rev_name && adjust_shared_perm(rev_name) < 0)
                die(_("failed to make %s readable"), rev_name);
 
-       finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
-                                   ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
+       finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK_METADATA,
+                         CSUM_HASH_IN_STREAM | CSUM_CLOSE |
+                         ((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
 
        return rev_name;
 }
@@ -390,7 +391,7 @@ void fixup_pack_header_footer(int pack_fd,
                the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
        the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
        write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
-       fsync_or_die(pack_fd, pack_name);
+       fsync_component_or_die(FSYNC_COMPONENT_PACK, pack_fd, pack_name);
 }
 
 char *index_pack_lockfile(int ip_out, int *is_well_formed)
index 79b9b99ebf7d65b6663014deaadea12a039db29d..df869691fd4fd00514eef3650cf234d6b3d276ee 100644 (file)
@@ -3089,7 +3089,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
                        return -1;
        }
 
-       finalize_hashfile(f, istate->oid.hash, CSUM_HASH_IN_STREAM);
+       finalize_hashfile(f, istate->oid.hash, FSYNC_COMPONENT_NONE, CSUM_HASH_IN_STREAM);
        if (close_tempfile_gently(tempfile)) {
                error(_("could not close '%s'"), get_tempfile_path(tempfile));
                return -1;
index 9faa5f9f5635e10d2620a2f5f548962b254767f3..c4fd91b5b4308aa13cc8e2407df1e826470f54dd 100644 (file)
@@ -56,21 +56,39 @@ void fprintf_or_die(FILE *f, const char *fmt, ...)
        }
 }
 
-void fsync_or_die(int fd, const char *msg)
+static int maybe_fsync(int fd)
 {
        if (use_fsync < 0)
                use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
        if (!use_fsync)
-               return;
+               return 0;
 
        if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
            git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
-               return;
+               return 0;
+
+       return git_fsync(fd, FSYNC_HARDWARE_FLUSH);
+}
 
-       if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
+void fsync_or_die(int fd, const char *msg)
+{
+       if (maybe_fsync(fd) < 0)
                die_errno("fsync error on '%s'", msg);
 }
 
+int fsync_component(enum fsync_component component, int fd)
+{
+       if (fsync_components & component)
+               return maybe_fsync(fd);
+       return 0;
+}
+
+void fsync_component_or_die(enum fsync_component component, int fd, const char *msg)
+{
+       if (fsync_components & component)
+               fsync_or_die(fd, msg);
+}
+
 void write_or_die(int fd, const void *buf, size_t count)
 {
        if (write_in_full(fd, buf, count) < 0) {