[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
[--cruft] [--cruft-expiration=<time>]
[--stdout [--filter=<filter-spec>] | <base-name>]
- [--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list>
+ [--shallow] [--keep-true-parents] [--[no-]sparse]
+ [--name-hash-version=<n>] < <object-list>
DESCRIPTION
Restrict delta matches based on "islands". See DELTA ISLANDS
below.
+--name-hash-version=<n>::
+ While performing delta compression, Git groups objects that may be
+ similar based on heuristics using the path to that object. While
+ grouping objects by an exact path match is good for paths with
+ many versions, there are benefits for finding delta pairs across
+ different full paths. Git collects objects by type and then by a
+ "name hash" of the path and then by size, hoping to group objects
+ that will compress well together.
++
+The default name hash version is `1`, which prioritizes hash locality by
+considering the final bytes of the path as providing the maximum magnitude
+to the hash function. This version excels at distinguishing short paths
+and finding renames across directories. However, the hash function depends
+primarily on the final 16 bytes of the path. If there are many paths in
+the repo that have the same final 16 bytes and differ only by parent
+directory, then this name-hash may lead to too many collisions and cause
+poor results. At the moment, this version is required when writing
+reachability bitmap files with `--write-bitmap-index`.
++
+The name hash version `2` has similar locality features as version `1`,
+except it considers each path component separately and overlays the hashes
+with a shift. This still prioritizes the final bytes of the path, but also
+"salts" the lower bits of the hash using the parent directory names. This
+method allows for some of the locality benefits of version `1` while
+breaking most of the collisions from a similarly-named file appearing in
+many different directories. At the moment, this version is not allowed
+when writing reachability bitmap files with `--write-bitmap-index` and it
+will be automatically changed to version `1`.
+
DELTA ISLANDS
-------------
static struct oidmap configured_exclusions;
static struct oidset excluded_by_config;
+static int name_hash_version = 1;
+
+/**
+ * Check whether the name_hash_version chosen by user input is appropriate,
+ * and also validate whether it is compatible with other features.
+ */
+static void validate_name_hash_version(void)
+{
+ if (name_hash_version < 1 || name_hash_version > 2)
+ die(_("invalid --name-hash-version option: %d"), name_hash_version);
+ if (write_bitmap_index && name_hash_version != 1) {
+ warning(_("currently, --write-bitmap-index requires --name-hash-version=1"));
+ name_hash_version = 1;
+ }
+}
+
+static inline uint32_t pack_name_hash_fn(const char *name)
+{
+ switch (name_hash_version) {
+ case 1:
+ return pack_name_hash(name);
+
+ case 2:
+ return pack_name_hash_v2((const unsigned char *)name);
+
+ default:
+ BUG("invalid name-hash version: %d", name_hash_version);
+ }
+}
/*
* stats
return 0;
}
- create_object_entry(oid, type, pack_name_hash(name),
+ create_object_entry(oid, type, pack_name_hash_fn(name),
exclude, name && no_try_delta(name),
found_pack, found_offset);
return 1;
{
struct pbase_tree *it;
size_t cmplen;
- unsigned hash = pack_name_hash(name);
+ unsigned hash = pack_name_hash_fn(name);
if (!num_preferred_base || check_pbase_path(hash))
return;
* here using a now in order to perhaps improve the delta selection
* process.
*/
- oe->hash = pack_name_hash(name);
+ oe->hash = pack_name_hash_fn(name);
oe->no_try_delta = name && no_try_delta(name);
stdin_packs_hints_nr++;
entry = packlist_find(&to_pack, oid);
if (entry) {
if (name) {
- entry->hash = pack_name_hash(name);
+ entry->hash = pack_name_hash_fn(name);
entry->no_try_delta = no_try_delta(name);
}
} else {
return;
}
- entry = create_object_entry(oid, type, pack_name_hash(name),
+ entry = create_object_entry(oid, type, pack_name_hash_fn(name),
0, name && no_try_delta(name),
pack, offset);
}
if (!(bitmap_git = prepare_bitmap_walk(revs, 0)))
return -1;
+ /*
+ * For now, force the name-hash version to be 1 since that
+ * is the version implied by the bitmap format. Later, the
+ * format can include this version explicitly in its format,
+ * allowing readers to know the version that was used during
+ * the bitmap write.
+ */
+ name_hash_version = 1;
+
if (pack_options_allow_reuse())
reuse_partial_packfile_from_bitmap(bitmap_git,
&reuse_packfiles,
OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
N_("protocol"),
N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+ OPT_INTEGER(0, "name-hash-version", &name_hash_version,
+ N_("use the specified name-hash function to group similar objects")),
OPT_END(),
};
if (pack_to_stdout || !rev_list_all)
write_bitmap_index = 0;
+ validate_name_hash_version();
+
if (use_delta_islands)
strvec_push(&rp, "--topo-order");
'
done
+test_expect_success 'valid and invalid --name-hash-versions' '
+ # Valid values are hard to verify other than "do not fail".
+ # Performance tests will be more valuable to validate these versions.
+ for value in 1 2
+ do
+ git pack-objects base --all --name-hash-version=$value || return 1
+ done &&
+
+ # Invalid values have clear post-conditions.
+ for value in -1 0 3
+ do
+ test_must_fail git pack-objects base --all --name-hash-version=$value 2>err &&
+ test_grep "invalid --name-hash-version option" err || return 1
+ done
+'
+
+# The following test is not necessarily a permanent choice, but since we do not
+# have a "name hash version" bit in the .bitmap file format, we cannot write the
+# hash values into the .bitmap file without risking breakage later.
+#
+# TODO: Make these compatible in the future and replace this test with the
+# expected behavior when both are specified.
+test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompatible' '
+ git pack-objects base --all --name-hash-version=2 --write-bitmap-index 2>err &&
+ test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err &&
+
+ # --stdout option silently removes --write-bitmap-index
+ git pack-objects --stdout --all --name-hash-version=2 --write-bitmap-index >out 2>err &&
+ ! test_grep "currently, --write-bitmap-index requires --name-hash-version=1" err
+'
+
test_done