struct object_entry
{
struct object_entry *next;
- enum object_type type;
unsigned long offset;
+ unsigned type : TYPE_BITS;
unsigned char sha1[20];
};
{
void *data;
unsigned long len;
+ unsigned long offset;
unsigned int depth;
- int no_free;
- unsigned char sha1[20];
+ unsigned no_free:1;
};
struct mem_pool
static unsigned long alloc_count;
static unsigned long branch_count;
static unsigned long branch_load_count;
-static unsigned long remap_count;
static unsigned long object_count;
-static unsigned long duplicate_count;
static unsigned long marks_set_count;
-static unsigned long object_count_by_type[9];
-static unsigned long duplicate_count_by_type[9];
-static unsigned long delta_count_by_type[9];
+static unsigned long object_count_by_type[1 << TYPE_BITS];
+static unsigned long duplicate_count_by_type[1 << TYPE_BITS];
+static unsigned long delta_count_by_type[1 << TYPE_BITS];
/* Memory pools */
static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
static struct atom_str **atom_table;
/* The .pack file being generated */
+static const char *base_name;
+static unsigned int pack_count;
+static char *pack_name;
+static char *idx_name;
+static struct packed_git *pack_data;
static int pack_fd;
static unsigned long pack_size;
static unsigned char pack_sha1[20];
-static unsigned char* pack_base;
-static unsigned long pack_moff;
-static unsigned long pack_mlen = 128*1024*1024;
-static unsigned long page_size;
/* Table of objects we've written. */
static unsigned int object_entry_alloc = 5000;
static FILE* branch_log;
-static void alloc_objects(int cnt)
+static void alloc_objects(unsigned int cnt)
{
struct object_entry_pool *b;
}
}
+static void start_packfile()
+{
+ struct pack_header hdr;
+
+ pack_count++;
+ pack_name = xmalloc(strlen(base_name) + 11);
+ idx_name = xmalloc(strlen(base_name) + 11);
+ sprintf(pack_name, "%s%5.5i.pack", base_name, pack_count);
+ sprintf(idx_name, "%s%5.5i.idx", base_name, pack_count);
+
+ pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
+ if (pack_fd < 0)
+ die("Can't create %s: %s", pack_name, strerror(errno));
+
+ pack_data = xcalloc(1, sizeof(*pack_data) + strlen(pack_name) + 2);
+ strcpy(pack_data->pack_name, pack_name);
+ pack_data->pack_fd = pack_fd;
+
+ hdr.hdr_signature = htonl(PACK_SIGNATURE);
+ hdr.hdr_version = htonl(2);
+ hdr.hdr_entries = 0;
+
+ write_or_die(pack_fd, &hdr, sizeof(hdr));
+ pack_size = sizeof(hdr);
+ object_count = 0;
+}
+
+static void fixup_header_footer()
+{
+ SHA_CTX c;
+ char hdr[8];
+ unsigned long cnt;
+ char *buf;
+
+ if (lseek(pack_fd, 0, SEEK_SET) != 0)
+ die("Failed seeking to start: %s", strerror(errno));
+
+ SHA1_Init(&c);
+ yread(pack_fd, hdr, 8);
+ SHA1_Update(&c, hdr, 8);
+
+ cnt = htonl(object_count);
+ SHA1_Update(&c, &cnt, 4);
+ write_or_die(pack_fd, &cnt, 4);
+
+ buf = xmalloc(128 * 1024);
+ for (;;) {
+ size_t n = xread(pack_fd, buf, 128 * 1024);
+ if (n <= 0)
+ break;
+ SHA1_Update(&c, buf, n);
+ }
+ free(buf);
+
+ SHA1_Final(pack_sha1, &c);
+ write_or_die(pack_fd, pack_sha1, sizeof(pack_sha1));
+}
+
+static int oecmp (const void *a_, const void *b_)
+{
+ struct object_entry *a = *((struct object_entry**)a_);
+ struct object_entry *b = *((struct object_entry**)b_);
+ return hashcmp(a->sha1, b->sha1);
+}
+
+static void write_index(const char *idx_name)
+{
+ struct sha1file *f;
+ struct object_entry **idx, **c, **last, *e;
+ struct object_entry_pool *o;
+ unsigned int array[256];
+ int i;
+
+ /* Build the sorted table of object IDs. */
+ idx = xmalloc(object_count * sizeof(struct object_entry*));
+ c = idx;
+ for (o = blocks; o; o = o->next_pool)
+ for (e = o->entries; e != o->next_free; e++)
+ *c++ = e;
+ last = idx + object_count;
+ qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
+
+ /* Generate the fan-out array. */
+ c = idx;
+ for (i = 0; i < 256; i++) {
+ struct object_entry **next = c;;
+ while (next < last) {
+ if ((*next)->sha1[0] != i)
+ break;
+ next++;
+ }
+ array[i] = htonl(next - idx);
+ c = next;
+ }
+
+ f = sha1create("%s", idx_name);
+ sha1write(f, array, 256 * sizeof(int));
+ for (c = idx; c != last; c++) {
+ unsigned int offset = htonl((*c)->offset);
+ sha1write(f, &offset, 4);
+ sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
+ }
+ sha1write(f, pack_sha1, sizeof(pack_sha1));
+ sha1close(f, NULL, 1);
+ free(idx);
+}
+
+static void end_packfile()
+{
+ fixup_header_footer();
+ close(pack_fd);
+ write_index(idx_name);
+
+ free(pack_name);
+ free(idx_name);
+ free(pack_data);
+}
+
static size_t encode_header(
enum object_type type,
size_t size,
if (mark)
insert_mark(mark, e);
if (e->offset) {
- duplicate_count++;
duplicate_count_by_type[type]++;
return 1;
}
deflateInit(&s, zlib_compression_level);
if (delta) {
+ unsigned long ofs = e->offset - last->offset;
+ unsigned pos = sizeof(hdr) - 1;
+
delta_count_by_type[type]++;
last->depth++;
s.next_in = delta;
s.avail_in = deltalen;
- hdrlen = encode_header(OBJ_REF_DELTA, deltalen, hdr);
+
+ hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
write_or_die(pack_fd, hdr, hdrlen);
- write_or_die(pack_fd, last->sha1, sizeof(sha1));
- pack_size += hdrlen + sizeof(sha1);
+ pack_size += hdrlen;
+
+ hdr[pos] = ofs & 127;
+ while (ofs >>= 7)
+ hdr[--pos] = 128 | (--ofs & 127);
+ write_or_die(pack_fd, hdr + pos, sizeof(hdr) - pos);
+ pack_size += sizeof(hdr) - pos;
} else {
if (last)
last->depth = 0;
if (last->data && !last->no_free)
free(last->data);
last->data = dat;
+ last->offset = e->offset;
last->len = datlen;
- hashcpy(last->sha1, sha1);
}
return 0;
}
-static unsigned char* map_pack(unsigned long offset, unsigned int *left)
-{
- if (offset >= pack_size)
- die("object offset outside of pack file");
- if (!pack_base
- || offset < pack_moff
- || (offset + 20) >= (pack_moff + pack_mlen)) {
- if (pack_base)
- munmap(pack_base, pack_mlen);
- pack_moff = (offset / page_size) * page_size;
- pack_base = mmap(NULL,pack_mlen,PROT_READ,MAP_SHARED,
- pack_fd,pack_moff);
- if (pack_base == MAP_FAILED)
- die("Failed to map generated pack: %s", strerror(errno));
- remap_count++;
- }
- offset -= pack_moff;
- if (left)
- *left = pack_mlen - offset;
- return pack_base + offset;
-}
-
-static unsigned long unpack_object_header(unsigned long offset,
- enum object_type *type,
- unsigned long *sizep)
-{
- unsigned shift;
- unsigned char c;
- unsigned long size;
-
- c = *map_pack(offset++, NULL);
- *type = (c >> 4) & 7;
- size = c & 15;
- shift = 4;
- while (c & 0x80) {
- c = *map_pack(offset++, NULL);
- size += (c & 0x7f) << shift;
- shift += 7;
- }
- *sizep = size;
- return offset;
-}
-
-static void *unpack_non_delta_entry(unsigned long o, unsigned long sz)
+static void *gfi_unpack_entry(unsigned long ofs, unsigned long *sizep)
{
- z_stream stream;
- unsigned char *result;
-
- result = xmalloc(sz + 1);
- result[sz] = 0;
-
- memset(&stream, 0, sizeof(stream));
- stream.next_in = map_pack(o, &stream.avail_in);
- stream.next_out = result;
- stream.avail_out = sz;
-
- inflateInit(&stream);
- for (;;) {
- int st = inflate(&stream, Z_FINISH);
- if (st == Z_STREAM_END)
- break;
- if (st == Z_OK || st == Z_BUF_ERROR) {
- o = stream.next_in - pack_base + pack_moff;
- stream.next_in = map_pack(o, &stream.avail_in);
- continue;
- }
- die("Error %i from zlib during inflate.", st);
- }
- inflateEnd(&stream);
- if (stream.total_out != sz)
- die("Error after inflate: sizes mismatch");
- return result;
-}
-
-static void *gfi_unpack_entry(unsigned long offset,
- unsigned long *sizep,
- unsigned int *delta_depth);
-
-static void *unpack_delta_entry(unsigned long offset,
- unsigned long delta_size,
- unsigned long *sizep,
- unsigned int *delta_depth)
-{
- struct object_entry *base_oe;
- unsigned char *base_sha1;
- void *delta_data, *base, *result;
- unsigned long base_size, result_size;
-
- base_sha1 = map_pack(offset, NULL);
- base_oe = find_object(base_sha1);
- if (!base_oe)
- die("I'm broken; I can't find a base I know must be here.");
- base = gfi_unpack_entry(base_oe->offset, &base_size, delta_depth);
- delta_data = unpack_non_delta_entry(offset + 20, delta_size);
- result = patch_delta(base, base_size,
- delta_data, delta_size,
- &result_size);
- if (!result)
- die("failed to apply delta");
- free(delta_data);
- free(base);
- *sizep = result_size;
- (*delta_depth)++;
- return result;
-}
-
-static void *gfi_unpack_entry(unsigned long offset,
- unsigned long *sizep,
- unsigned int *delta_depth)
-{
- unsigned long size;
- enum object_type kind;
-
- offset = unpack_object_header(offset, &kind, &size);
- switch (kind) {
- case OBJ_REF_DELTA:
- return unpack_delta_entry(offset, size, sizep, delta_depth);
- case OBJ_COMMIT:
- case OBJ_TREE:
- case OBJ_BLOB:
- case OBJ_TAG:
- *sizep = size;
- *delta_depth = 0;
- return unpack_non_delta_entry(offset, size);
- default:
- die("I created an object I can't read!");
- }
+ char type[20];
+ pack_data->pack_size = pack_size + 20;
+ return unpack_entry(pack_data, ofs, type, sizep);
}
static const char *get_mode(const char *str, unsigned int *modep)
if (myoe) {
if (myoe->type != OBJ_TREE)
die("Not a tree: %s", sha1_to_hex(sha1));
- buf = gfi_unpack_entry(myoe->offset, &size, &t->delta_depth);
+ t->delta_depth = 0;
+ buf = gfi_unpack_entry(myoe->offset, &size);
} else {
char type[20];
buf = read_sha1_file(sha1, type, &size);
unsigned int i, j, del;
unsigned long new_len;
struct last_object lo;
+ struct object_entry *le;
if (!is_null_sha1(root->versions[1].sha1))
return;
store_tree(t->entries[i]);
}
- if (!S_ISDIR(root->versions[0].mode)
- || is_null_sha1(root->versions[0].sha1)
- || !find_object(root->versions[0].sha1)) {
+ le = find_object(root->versions[0].sha1);
+ if (!S_ISDIR(root->versions[0].mode) || !le) {
lo.data = NULL;
lo.depth = 0;
} else {
mktree(t, 0, &lo.len, &old_tree);
lo.data = old_tree.buffer;
+ lo.offset = le->offset;
lo.depth = t->delta_depth;
lo.no_free = 1;
- hashcpy(lo.sha1, root->versions[0].sha1);
}
mktree(t, 1, &new_len, &new_tree);
return 1;
}
-static void init_pack_header()
-{
- struct pack_header hdr;
-
- hdr.hdr_signature = htonl(PACK_SIGNATURE);
- hdr.hdr_version = htonl(2);
- hdr.hdr_entries = 0;
-
- write_or_die(pack_fd, &hdr, sizeof(hdr));
- pack_size = sizeof(hdr);
-}
-
-static void fixup_header_footer()
-{
- SHA_CTX c;
- char hdr[8];
- unsigned long cnt;
- char *buf;
- size_t n;
-
- if (lseek(pack_fd, 0, SEEK_SET) != 0)
- die("Failed seeking to start: %s", strerror(errno));
-
- SHA1_Init(&c);
- yread(pack_fd, hdr, 8);
- SHA1_Update(&c, hdr, 8);
-
- cnt = htonl(object_count);
- SHA1_Update(&c, &cnt, 4);
- write_or_die(pack_fd, &cnt, 4);
-
- buf = xmalloc(128 * 1024);
- for (;;) {
- n = xread(pack_fd, buf, 128 * 1024);
- if (n <= 0)
- break;
- SHA1_Update(&c, buf, n);
- }
- free(buf);
-
- SHA1_Final(pack_sha1, &c);
- write_or_die(pack_fd, pack_sha1, sizeof(pack_sha1));
-}
-
-static int oecmp (const void *_a, const void *_b)
-{
- struct object_entry *a = *((struct object_entry**)_a);
- struct object_entry *b = *((struct object_entry**)_b);
- return hashcmp(a->sha1, b->sha1);
-}
-
-static void write_index(const char *idx_name)
-{
- struct sha1file *f;
- struct object_entry **idx, **c, **last;
- struct object_entry *e;
- struct object_entry_pool *o;
- unsigned int array[256];
- int i;
-
- /* Build the sorted table of object IDs. */
- idx = xmalloc(object_count * sizeof(struct object_entry*));
- c = idx;
- for (o = blocks; o; o = o->next_pool)
- for (e = o->entries; e != o->next_free; e++)
- *c++ = e;
- last = idx + object_count;
- qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
-
- /* Generate the fan-out array. */
- c = idx;
- for (i = 0; i < 256; i++) {
- struct object_entry **next = c;;
- while (next < last) {
- if ((*next)->sha1[0] != i)
- break;
- next++;
- }
- array[i] = htonl(next - idx);
- c = next;
- }
-
- f = sha1create("%s", idx_name);
- sha1write(f, array, 256 * sizeof(int));
- for (c = idx; c != last; c++) {
- unsigned int offset = htonl((*c)->offset);
- sha1write(f, &offset, 4);
- sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
- }
- sha1write(f, pack_sha1, sizeof(pack_sha1));
- sha1close(f, NULL, 1);
- free(idx);
-}
-
static void dump_branches()
{
static const char *msg = "fast-import";
unsigned long idnum = strtoul(from + 1, NULL, 10);
struct object_entry *oe = find_mark(idnum);
unsigned long size;
- unsigned int depth;
char *buf;
if (oe->type != OBJ_COMMIT)
die("Mark :%lu not a commit", idnum);
hashcpy(b->sha1, oe->sha1);
- buf = gfi_unpack_entry(oe->offset, &size, &depth);
+ buf = gfi_unpack_entry(oe->offset, &size);
if (!buf || size < 46)
die("Not a valid commit: %s", from);
if (memcmp("tree ", buf, 5)
int main(int argc, const char **argv)
{
- const char *base_name;
int i;
unsigned long est_obj_cnt = object_entry_alloc;
- char *pack_name;
- char *idx_name;
- struct stat sb;
+ unsigned long duplicate_count;
setup_ident();
git_config(git_default_config);
- page_size = getpagesize();
for (i = 1; i < argc; i++) {
const char *a = argv[i];
usage(fast_import_usage);
base_name = argv[i];
- pack_name = xmalloc(strlen(base_name) + 6);
- sprintf(pack_name, "%s.pack", base_name);
- idx_name = xmalloc(strlen(base_name) + 5);
- sprintf(idx_name, "%s.idx", base_name);
-
- pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
- if (pack_fd < 0)
- die("Can't create %s: %s", pack_name, strerror(errno));
-
- init_pack_header();
alloc_objects(est_obj_cnt);
strbuf_init(&command_buf);
avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
marks = pool_calloc(1, sizeof(struct mark_set));
+ start_packfile();
for (;;) {
read_next_command();
if (command_buf.eof)
else
die("Unsupported command: %s", command_buf.buf);
}
+ end_packfile();
- fixup_header_footer();
- close(pack_fd);
- write_index(idx_name);
dump_branches();
dump_tags();
dump_marks();
if (branch_log)
fclose(branch_log);
+ for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
+ duplicate_count += duplicate_count_by_type[i];
+
fprintf(stderr, "%s statistics:\n", argv[0]);
fprintf(stderr, "---------------------------------------------------------------------\n");
fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
fprintf(stderr, "Memory total: %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, " pools: %10lu KiB\n", total_allocd/1024);
fprintf(stderr, " objects: %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
- fprintf(stderr, "Pack remaps: %10lu\n", remap_count);
- stat(pack_name, &sb);
- fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
- stat(idx_name, &sb);
- fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
fprintf(stderr, "---------------------------------------------------------------------\n");
-
fprintf(stderr, "\n");
return 0;