]> git.ipfire.org Git - thirdparty/git.git/commitdiff
pack-objects: use finalize_object_file() to rename pack/idx/etc
authorTaylor Blau <me@ttaylorr.com>
Thu, 26 Sep 2024 15:22:41 +0000 (11:22 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 27 Sep 2024 18:27:47 +0000 (11:27 -0700)
In most places that write files to the object database (even packfiles
via index-pack or fast-import), we use finalize_object_file(). This
prefers link()/unlink() over rename(), because it means we will prefer
data that is already in the repository to data that we are newly
writing.

We should do the same thing in pack-objects. Even though we don't think
of it as accepting outside data (and thus not being susceptible to
collision attacks), in theory a determined attacker could present just
the right set of objects to cause an incremental repack to generate
a pack with their desired hash.

This has some test and real-world fallout, as seen in the adjustment to
t5303 below. That test script assumes that we can "fix" corruption by
repacking into a good state, including when the pack generated by that
repack operation collides with a (corrupted) pack with the same hash.
This violates our assumption from the previous adjustments to
finalize_object_file() that if we're moving a new file over an existing
one, that since their checksums match, so too must their contents.

This makes "fixing" corruption like this a more explicit operation,
since the test (and users, who may fix real-life corruption using a
similar technique) must first move the broken contents out of the way.

Note also that we now call adjust_shared_perm() twice. We already call
adjust_shared_perm() in stage_tmp_packfiles(), and now call it again in
finalize_object_file(). This is somewhat wasteful, but cleaning up the
existing calls to adjust_shared_perm() is tricky (because sometimes
we're writing to a tmpfile, and sometimes we're writing directly into
the final destination), so let's tolerate some minor waste until we can
more carefully clean up the now-redundant calls.

Co-authored-by: Jeff King <peff@peff.net>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
pack-write.c
t/t5303-pack-corruption-resilience.sh

index d07f03d0ab060ac3c698adf265c100864849bd76..e5beecd3a4f0a3a7b2eebf446e2527cfd93c7ce7 100644 (file)
@@ -8,6 +8,7 @@
 #include "csum-file.h"
 #include "remote.h"
 #include "chunk-format.h"
+#include "object-file.h"
 #include "pack-mtimes.h"
 #include "pack-objects.h"
 #include "pack-revindex.h"
@@ -527,9 +528,9 @@ static void rename_tmp_packfile(struct strbuf *name_prefix, const char *source,
        size_t name_prefix_len = name_prefix->len;
 
        strbuf_addstr(name_prefix, ext);
-       if (rename(source, name_prefix->buf))
-               die_errno("unable to rename temporary file to '%s'",
-                         name_prefix->buf);
+       if (finalize_object_file(source, name_prefix->buf))
+               die("unable to rename temporary file to '%s'",
+                   name_prefix->buf);
        strbuf_setlen(name_prefix, name_prefix_len);
 }
 
index 61469ef4a681200044f45f0a98f54f39dec2a4c2..e6a43ec9ae3c6df5d5f3537ed40e7b0185eee1dc 100755 (executable)
@@ -44,9 +44,14 @@ create_new_pack() {
 }
 
 do_repack() {
+    for f in $pack.*
+    do
+           mv $f "$(echo $f | sed -e 's/pack-/pack-corrupt-/')" || return 1
+    done &&
     pack=$(printf "$blob_1\n$blob_2\n$blob_3\n" |
           git pack-objects $@ .git/objects/pack/pack) &&
-    pack=".git/objects/pack/pack-${pack}"
+    pack=".git/objects/pack/pack-${pack}" &&
+    rm -f .git/objects/pack/pack-corrupt-*
 }
 
 do_corrupt_object() {