From: Junio C Hamano Date: Wed, 23 May 2018 05:38:19 +0000 (+0900) Subject: Merge branch 'nd/pack-objects-pack-struct' X-Git-Tag: v2.18.0-rc0~49 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ad635e82d600e1b725a2e65d69114140db6bc876;p=thirdparty%2Fgit.git Merge branch 'nd/pack-objects-pack-struct' "git pack-objects" needs to allocate tons of "struct object_entry" while doing its work, and shrinking its size helps the performance quite a bit. * nd/pack-objects-pack-struct: ci: exercise the whole test suite with uncommon code in pack-objects pack-objects: reorder members to shrink struct object_entry pack-objects: shrink delta_size field in struct object_entry pack-objects: shrink size field in struct object_entry pack-objects: clarify the use of object_entry::size pack-objects: don't check size when the object is bad pack-objects: shrink z_delta_size field in struct object_entry pack-objects: refer to delta objects by index instead of pointer pack-objects: move in_pack out of struct object_entry pack-objects: move in_pack_pos out of struct object_entry pack-objects: use bitfield for object_entry::depth pack-objects: use bitfield for object_entry::dfs_state pack-objects: turn type and in_pack_type to bitfields pack-objects: a bit of document about struct object_entry read-cache.c: make $GIT_TEST_SPLIT_INDEX boolean --- ad635e82d600e1b725a2e65d69114140db6bc876 diff --cc builtin/pack-objects.c index 4ce6a93281,88d2bb8153..8552d7e42e --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@@ -30,8 -30,19 +30,20 @@@ #include "list.h" #include "packfile.h" #include "object-store.h" +#include "dir.h" + #define IN_PACK(obj) oe_in_pack(&to_pack, obj) + #define SIZE(obj) oe_size(&to_pack, obj) + #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size) + #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj) + #define DELTA(obj) oe_delta(&to_pack, obj) + #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj) + #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj) + #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val) + #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val) + #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val) + #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) + static const char *pack_usage[] = { N_("git pack-objects --stdout [...] [< | < ]"), N_("git pack-objects [...] [< | < ]"), @@@ -1521,14 -1544,17 +1549,18 @@@ static void check_object(struct object_ unuse_pack(&w_curs); } - entry->type = oid_object_info(the_repository, &entry->idx.oid, - &entry->size); - /* - * The error condition is checked in prepare_pack(). This is - * to permit a missing preferred base object to be ignored - * as a preferred base. Doing so can result in a larger - * pack file, but the transfer will still take place. - */ - oe_set_type(entry, oid_object_info(&entry->idx.oid, &canonical_size)); ++ oe_set_type(entry, ++ oid_object_info(the_repository, &entry->idx.oid, &canonical_size)); + if (entry->type_valid) { + SET_SIZE(entry, canonical_size); + } else { + /* + * Bad object type is checked in prepare_pack(). This is + * to permit a missing preferred base object to be ignored + * as a preferred base. Doing so can result in a larger + * pack file, but the transfer will still take place. + */ + } } static int pack_offset_sort(const void *_a, const void *_b) @@@ -1563,31 -1591,36 +1597,37 @@@ */ static void drop_reused_delta(struct object_entry *entry) { - struct object_entry **p = &entry->delta->delta_child; + unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx; struct object_info oi = OBJECT_INFO_INIT; + enum object_type type; + unsigned long size; + + while (*idx) { + struct object_entry *oe = &to_pack.objects[*idx - 1]; - while (*p) { - if (*p == entry) - *p = (*p)->delta_sibling; + if (oe == entry) + *idx = oe->delta_sibling_idx; else - p = &(*p)->delta_sibling; + idx = &oe->delta_sibling_idx; } - entry->delta = NULL; + SET_DELTA(entry, NULL); entry->depth = 0; - oi.sizep = &entry->size; - oi.typep = &entry->type; - if (packed_object_info(the_repository, entry->in_pack, - entry->in_pack_offset, &oi) < 0) { + oi.sizep = &size; + oi.typep = &type; - if (packed_object_info(IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { ++ if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { /* * We failed to get the info from this pack for some reason; * fall back to sha1_object_info, which may find another copy. - * And if that fails, the error will be recorded in entry->type + * And if that fails, the error will be recorded in oe_type(entry) * and dealt with in prepare_pack(). */ - entry->type = oid_object_info(the_repository, &entry->idx.oid, - &entry->size); - oe_set_type(entry, oid_object_info(&entry->idx.oid, &size)); ++ oe_set_type(entry, ++ oid_object_info(the_repository, &entry->idx.oid, &size)); + } else { + oe_set_type(entry, type); } + SET_SIZE(entry, size); } /* @@@ -1731,11 -1760,10 +1771,12 @@@ static void get_object_details(void for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = sorted_by_offset[i]; check_object(entry); - if (big_file_threshold < entry->size) + if (entry->type_valid && + oe_size_greater_than(&to_pack, entry, big_file_threshold)) entry->no_try_delta = 1; + display_progress(progress_state, i + 1); } + stop_progress(&progress_state); /* * This must happen in a second pass, since we rely on the delta @@@ -1828,6 -1860,46 +1873,46 @@@ static pthread_mutex_t progress_mutex #endif + /* + * Return the size of the object without doing any delta + * reconstruction (so non-deltas are true object sizes, but deltas + * return the size of the delta data). + */ + unsigned long oe_get_size_slow(struct packing_data *pack, + const struct object_entry *e) + { + struct packed_git *p; + struct pack_window *w_curs; + unsigned char *buf; + enum object_type type; + unsigned long used, avail, size; + + if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { + read_lock(); - if (oid_object_info(&e->idx.oid, &size) < 0) ++ if (oid_object_info(the_repository, &e->idx.oid, &size) < 0) + die(_("unable to get size of %s"), + oid_to_hex(&e->idx.oid)); + read_unlock(); + return size; + } + + p = oe_in_pack(pack, e); + if (!p) + BUG("when e->type is a delta, it must belong to a pack"); + + read_lock(); + w_curs = NULL; + buf = use_pack(p, &w_curs, e->in_pack_offset, &avail); + used = unpack_object_header_buffer(buf, avail, &type, &size); + if (used == 0) + die(_("unable to parse object header of %s"), + oid_to_hex(&e->idx.oid)); + + unuse_pack(&w_curs); + read_unlock(); + return size; + } + static int try_delta(struct unpacked *trg, struct unpacked *src, unsigned max_depth, unsigned long *mem_usage) { @@@ -3217,8 -3283,10 +3328,10 @@@ int cmd_pack_objects(int argc, const ch } } + prepare_packing_data(&to_pack); + if (progress) - progress_state = start_progress(_("Counting objects"), 0); + progress_state = start_progress(_("Enumerating objects"), 0); if (!use_internal_rev_list) read_object_list_from_stdin(); else { diff --cc cache.h index f436744363,543bb42ce8..6dedf3c4f9 --- a/cache.h +++ b/cache.h @@@ -373,11 -373,8 +373,13 @@@ extern void free_name_hash(struct index #define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz)) #endif + #define TYPE_BITS 3 + +/* + * Values in this enum (except those outside the 3 bit range) are part + * of pack file format. See Documentation/technical/pack-format.txt + * for more information. + */ enum object_type { OBJ_BAD = -1, OBJ_NONE = 0, diff --cc object-store.h index 485b819f00,71d4087867..d683112fd7 --- a/object-store.h +++ b/object-store.h @@@ -71,9 -69,9 +71,10 @@@ struct packed_git int index_version; time_t mtime; int pack_fd; + int index; /* for builtin/pack-objects.c */ unsigned pack_local:1, pack_keep:1, + pack_keep_in_core:1, freshened:1, do_not_close:1, pack_promisor:1; diff --cc pack-objects.h index af4f46c026,e5456c6c89..edf74dabdd --- a/pack-objects.h +++ b/pack-objects.h @@@ -1,26 -1,103 +1,105 @@@ #ifndef PACK_OBJECTS_H #define PACK_OBJECTS_H + #include "object-store.h" + +#define DEFAULT_DELTA_CACHE_SIZE (256 * 1024 * 1024) + + #define OE_DFS_STATE_BITS 2 + #define OE_DEPTH_BITS 12 + #define OE_IN_PACK_BITS 10 + #define OE_Z_DELTA_BITS 20 + /* + * Note that oe_set_size() becomes expensive when the given size is + * above this limit. Don't lower it too much. + */ + #define OE_SIZE_BITS 31 + #define OE_DELTA_SIZE_BITS 20 + + /* + * State flags for depth-first search used for analyzing delta cycles. + * + * The depth is measured in delta-links to the base (so if A is a delta + * against B, then A has a depth of 1, and B a depth of 0). + */ + enum dfs_state { + DFS_NONE = 0, + DFS_ACTIVE, + DFS_DONE, + DFS_NUM_STATES + }; + + /* + * The size of struct nearly determines pack-objects's memory + * consumption. This struct is packed tight for that reason. When you + * add or reorder something in this struct, think a bit about this. + * + * basic object info + * ----------------- + * idx.oid is filled up before delta searching starts. idx.crc32 is + * only valid after the object is written out and will be used for + * generating the index. idx.offset will be both gradually set and + * used in writing phase (base objects get offset first, then deltas + * refer to them) + * + * "size" is the uncompressed object size. Compressed size of the raw + * data for an object in a pack is not stored anywhere but is computed + * and made available when reverse .idx is made. Note that when a + * delta is reused, "size" is the uncompressed _delta_ size, not the + * canonical one after the delta has been applied. + * + * "hash" contains a path name hash which is used for sorting the + * delta list and also during delta searching. Once prepare_pack() + * returns it's no longer needed. + * + * source pack info + * ---------------- + * The (in_pack, in_pack_offset) tuple contains the location of the + * object in the source pack. in_pack_header_size allows quickly + * skipping the header and going straight to the zlib stream. + * + * "type" and "in_pack_type" both describe object type. in_pack_type + * may contain a delta type, while type is always the canonical type. + * + * deltas + * ------ + * Delta links (delta, delta_child and delta_sibling) are created to + * reflect that delta graph from the source pack then updated or added + * during delta searching phase when we find better deltas. + * + * delta_child and delta_sibling are last needed in + * compute_write_order(). "delta" and "delta_size" must remain valid + * at object writing phase in case the delta is not cached. + * + * If a delta is cached in memory and is compressed, delta_data points + * to the data and z_delta_size contains the compressed size. If it's + * uncompressed [1], z_delta_size must be zero. delta_size is always + * the uncompressed size and must be valid even if the delta is not + * cached. + * + * [1] during try_delta phase we don't bother with compressing because + * the delta could be quickly replaced with a better one. + */ struct object_entry { struct pack_idx_entry idx; - unsigned long size; /* uncompressed size */ - struct packed_git *in_pack; /* already in pack */ - off_t in_pack_offset; - struct object_entry *delta; /* delta base object */ - struct object_entry *delta_child; /* deltified objects who bases me */ - struct object_entry *delta_sibling; /* other deltified objects who - * uses the same base as me - */ void *delta_data; /* cached delta (uncompressed) */ - unsigned long delta_size; /* delta data size (uncompressed) */ - unsigned long z_delta_size; /* delta data size (compressed) */ - enum object_type type; - enum object_type in_pack_type; /* could be delta */ + off_t in_pack_offset; uint32_t hash; /* name hint hash */ - unsigned int in_pack_pos; - unsigned char in_pack_header_size; + unsigned size_:OE_SIZE_BITS; + unsigned size_valid:1; + uint32_t delta_idx; /* delta base object */ + uint32_t delta_child_idx; /* deltified objects who bases me */ + uint32_t delta_sibling_idx; /* other deltified objects who + * uses the same base as me + */ + unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */ + unsigned delta_size_valid:1; + unsigned in_pack_idx:OE_IN_PACK_BITS; /* already in pack */ + unsigned z_delta_size:OE_Z_DELTA_BITS; + unsigned type_valid:1; + unsigned type_:TYPE_BITS; + unsigned no_try_delta:1; + unsigned in_pack_type:TYPE_BITS; /* could be delta */ unsigned preferred_base:1; /* * we do not pack this, but is available * to be used as the base object to delta