unsigned char sha1[20];
};
-/* Stats and misc. counters */
+/* Configured limits on output */
static unsigned long max_depth = 10;
-static unsigned long alloc_count;
+static unsigned long max_packsize = -1;
+static uintmax_t max_objects = -1;
+
+/* Stats and misc. counters */
+static uintmax_t alloc_count;
+static uintmax_t object_count;
+static uintmax_t marks_set_count;
+static uintmax_t object_count_by_type[1 << TYPE_BITS];
+static uintmax_t duplicate_count_by_type[1 << TYPE_BITS];
+static uintmax_t delta_count_by_type[1 << TYPE_BITS];
static unsigned long branch_count;
static unsigned long branch_load_count;
-static unsigned long object_count;
-static unsigned long marks_set_count;
-static unsigned long object_count_by_type[1 << TYPE_BITS];
-static unsigned long duplicate_count_by_type[1 << TYPE_BITS];
-static unsigned long delta_count_by_type[1 << TYPE_BITS];
/* Memory pools */
static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
/* Input stream parsing */
static struct strbuf command_buf;
-static unsigned long next_mark;
+static uintmax_t next_mark;
static struct dbuf new_data;
static FILE* branch_log;
alloc_objects(object_entry_alloc);
e = blocks->next_free++;
- e->pack_id = pack_id;
hashcpy(e->sha1, sha1);
return e;
}
b->buffer = xmalloc(b->capacity);
}
-static void insert_mark(unsigned long idnum, struct object_entry *oe)
+static void insert_mark(uintmax_t idnum, struct object_entry *oe)
{
struct mark_set *s = marks;
while ((idnum >> s->shift) >= 1024) {
marks = s;
}
while (s->shift) {
- unsigned long i = idnum >> s->shift;
+ uintmax_t i = idnum >> s->shift;
idnum -= i << s->shift;
if (!s->data.sets[i]) {
s->data.sets[i] = pool_calloc(1, sizeof(struct mark_set));
s->data.marked[idnum] = oe;
}
-static struct object_entry* find_mark(unsigned long idnum)
+static struct object_entry* find_mark(uintmax_t idnum)
{
- unsigned long orig_idnum = idnum;
+ uintmax_t orig_idnum = idnum;
struct mark_set *s = marks;
struct object_entry *oe = NULL;
if ((idnum >> s->shift) < 1024) {
while (s && s->shift) {
- unsigned long i = idnum >> s->shift;
+ uintmax_t i = idnum >> s->shift;
idnum -= i << s->shift;
s = s->data.sets[i];
}
oe = s->data.marked[idnum];
}
if (!oe)
- die("mark :%lu not declared", orig_idnum);
+ die("mark :%ju not declared", orig_idnum);
return oe;
}
idx = xmalloc(object_count * sizeof(struct object_entry*));
c = idx;
for (o = blocks; o; o = o->next_pool)
- for (e = o->entries; e != o->next_free; e++)
+ for (e = o->next_free; e-- != o->entries;)
if (pack_id == e->pack_id)
*c++ = e;
last = idx + object_count;
+ if (c != last)
+ die("internal consistency error creating the index");
qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
/* Generate the fan-out array. */
if (object_count) {
fixup_header_footer();
write_index(idx_name);
+ fprintf(stdout, "%s\n", old_p->pack_name);
+ fflush(stdout);
/* Register the packfile with core git's machinary. */
new_p = add_packed_git(idx_name, strlen(idx_name), 1);
last_blob.depth = 0;
}
+static void checkpoint()
+{
+ end_packfile();
+ start_packfile();
+}
+
static size_t encode_header(
enum object_type type,
size_t size,
size_t datlen,
struct last_object *last,
unsigned char *sha1out,
- unsigned long mark)
+ uintmax_t mark)
{
void *out, *delta;
struct object_entry *e;
duplicate_count_by_type[type]++;
return 1;
}
- e->type = type;
- e->offset = pack_size;
- object_count++;
- object_count_by_type[type]++;
- if (last && last->data && last->depth < max_depth)
+ if (last && last->data && last->depth < max_depth) {
delta = diff_delta(last->data, last->len,
dat, datlen,
&deltalen, 0);
- else
- delta = 0;
+ if (delta && deltalen >= datlen) {
+ free(delta);
+ delta = NULL;
+ }
+ } else
+ delta = NULL;
memset(&s, 0, sizeof(s));
deflateInit(&s, zlib_compression_level);
+ if (delta) {
+ s.next_in = delta;
+ s.avail_in = deltalen;
+ } else {
+ s.next_in = dat;
+ s.avail_in = datlen;
+ }
+ s.avail_out = deflateBound(&s, s.avail_in);
+ s.next_out = out = xmalloc(s.avail_out);
+ while (deflate(&s, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&s);
+
+ /* Determine if we should auto-checkpoint. */
+ if ((object_count + 1) > max_objects
+ || (object_count + 1) < object_count
+ || (pack_size + 60 + s.total_out) > max_packsize
+ || (pack_size + 60 + s.total_out) < pack_size) {
+
+ /* This new object needs to *not* have the current pack_id. */
+ e->pack_id = pack_id + 1;
+ checkpoint();
+
+ /* We cannot carry a delta into the new pack. */
+ if (delta) {
+ free(delta);
+ delta = NULL;
+
+ memset(&s, 0, sizeof(s));
+ deflateInit(&s, zlib_compression_level);
+ s.next_in = dat;
+ s.avail_in = datlen;
+ s.avail_out = deflateBound(&s, s.avail_in);
+ s.next_out = out = xrealloc(out, s.avail_out);
+ while (deflate(&s, Z_FINISH) == Z_OK)
+ /* nothing */;
+ deflateEnd(&s);
+ }
+ }
+
+ e->type = type;
+ e->pack_id = pack_id;
+ e->offset = pack_size;
+ object_count++;
+ object_count_by_type[type]++;
if (delta) {
unsigned long ofs = e->offset - last->offset;
delta_count_by_type[type]++;
last->depth++;
- s.next_in = delta;
- s.avail_in = deltalen;
hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
write_or_die(pack_fd, hdr, hdrlen);
} else {
if (last)
last->depth = 0;
- s.next_in = dat;
- s.avail_in = datlen;
hdrlen = encode_header(type, datlen, hdr);
write_or_die(pack_fd, hdr, hdrlen);
pack_size += hdrlen;
}
- s.avail_out = deflateBound(&s, s.avail_in);
- s.next_out = out = xmalloc(s.avail_out);
- while (deflate(&s, Z_FINISH) == Z_OK)
- /* nothing */;
- deflateEnd(&s);
-
write_or_die(pack_fd, out, s.total_out);
pack_size += s.total_out;
}
static void dump_marks_helper(FILE *f,
- unsigned long base,
+ uintmax_t base,
struct mark_set *m)
{
- int k;
+ uintmax_t k;
if (m->shift) {
for (k = 0; k < 1024; k++) {
if (m->data.sets[k])
} else {
for (k = 0; k < 1024; k++) {
if (m->data.marked[k])
- fprintf(f, ":%lu %s\n", base + k,
+ fprintf(f, ":%ju %s\n", base + k,
sha1_to_hex(m->data.marked[k]->sha1));
}
}
static void cmd_mark()
{
if (!strncmp("mark :", command_buf.buf, 6)) {
- next_mark = strtoul(command_buf.buf + 6, NULL, 10);
+ next_mark = strtoumax(command_buf.buf + 6, NULL, 10);
read_next_command();
}
else
if (*p == ':') {
char *x;
- oe = find_mark(strtoul(p + 1, &x, 10));
+ oe = find_mark(strtoumax(p + 1, &x, 10));
hashcpy(sha1, oe->sha1);
p = x;
} else {
hashcpy(b->branch_tree.versions[0].sha1, t);
hashcpy(b->branch_tree.versions[1].sha1, t);
} else if (*from == ':') {
- unsigned long idnum = strtoul(from + 1, NULL, 10);
+ uintmax_t idnum = strtoumax(from + 1, NULL, 10);
struct object_entry *oe = find_mark(idnum);
unsigned long size;
char *buf;
if (oe->type != OBJ_COMMIT)
- die("Mark :%lu not a commit", idnum);
+ die("Mark :%ju not a commit", idnum);
hashcpy(b->sha1, oe->sha1);
buf = gfi_unpack_entry(oe, &size);
if (!buf || size < 46)
if (s)
hashcpy(n->sha1, s->sha1);
else if (*from == ':') {
- unsigned long idnum = strtoul(from + 1, NULL, 10);
+ uintmax_t idnum = strtoumax(from + 1, NULL, 10);
struct object_entry *oe = find_mark(idnum);
if (oe->type != OBJ_COMMIT)
- die("Mark :%lu not a commit", idnum);
+ die("Mark :%ju not a commit", idnum);
hashcpy(n->sha1, oe->sha1);
} else if (get_sha1(from, n->sha1))
die("Invalid ref name or SHA1 expression: %s", from);
fputc('"', branch_log);
} else
fprintf(branch_log, "%s", b->name);
- fprintf(branch_log," :%lu %s\n",next_mark,sha1_to_hex(b->sha1));
+ fprintf(branch_log," :%ju %s\n",next_mark,sha1_to_hex(b->sha1));
}
}
void *msg;
size_t msglen;
struct tag *t;
- unsigned long from_mark = 0;
+ uintmax_t from_mark = 0;
unsigned char sha1[20];
/* Obtain the new tag name from the rest of our command */
if (s) {
hashcpy(sha1, s->sha1);
} else if (*from == ':') {
- from_mark = strtoul(from + 1, NULL, 10);
+ from_mark = strtoumax(from + 1, NULL, 10);
struct object_entry *oe = find_mark(from_mark);
if (oe->type != OBJ_COMMIT)
- die("Mark :%lu not a commit", from_mark);
+ die("Mark :%ju not a commit", from_mark);
hashcpy(sha1, oe->sha1);
} else if (!get_sha1(from, sha1)) {
unsigned long size;
fputc('"', branch_log);
} else
fprintf(branch_log, "%s", t->name);
- fprintf(branch_log," :%lu %s\n",from_mark,sha1_to_hex(t->sha1));
+ fprintf(branch_log," :%ju %s\n",from_mark,sha1_to_hex(t->sha1));
}
}
static void cmd_checkpoint()
{
- if (object_count) {
- end_packfile();
- start_packfile();
- }
+ if (object_count)
+ checkpoint();
read_next_command();
}
int main(int argc, const char **argv)
{
int i;
- unsigned long est_obj_cnt = object_entry_alloc;
- unsigned long duplicate_count;
+ uintmax_t est_obj_cnt = object_entry_alloc;
+ uintmax_t duplicate_count;
setup_ident();
git_config(git_default_config);
if (*a != '-' || !strcmp(a, "--"))
break;
else if (!strncmp(a, "--objects=", 10))
- est_obj_cnt = strtoul(a + 10, NULL, 0);
+ est_obj_cnt = strtoumax(a + 10, NULL, 0);
+ else if (!strncmp(a, "--max-objects-per-pack=", 23))
+ max_objects = strtoumax(a + 23, NULL, 0);
+ else if (!strncmp(a, "--max-pack-size=", 16))
+ max_packsize = strtoumax(a + 16, NULL, 0) * 1024 * 1024;
else if (!strncmp(a, "--depth=", 8))
max_depth = strtoul(a + 8, NULL, 0);
else if (!strncmp(a, "--active-branches=", 18))
if (branch_log)
fclose(branch_log);
+ duplicate_count = 0;
for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
duplicate_count += duplicate_count_by_type[i];
fprintf(stderr, "%s statistics:\n", argv[0]);
fprintf(stderr, "---------------------------------------------------------------------\n");
- fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
- fprintf(stderr, "Total objects: %10lu (%10lu duplicates )\n", object_count, duplicate_count);
- fprintf(stderr, " blobs : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
- fprintf(stderr, " trees : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
- fprintf(stderr, " commits: %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
- fprintf(stderr, " tags : %10lu (%10lu duplicates %10lu deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
+ fprintf(stderr, "Alloc'd objects: %10ju (%10ju overflow )\n", alloc_count, alloc_count - est_obj_cnt);
+ fprintf(stderr, "Total objects: %10ju (%10ju duplicates )\n", object_count, duplicate_count);
+ fprintf(stderr, " blobs : %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB], delta_count_by_type[OBJ_BLOB]);
+ fprintf(stderr, " trees : %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE], delta_count_by_type[OBJ_TREE]);
+ fprintf(stderr, " commits: %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT], delta_count_by_type[OBJ_COMMIT]);
+ fprintf(stderr, " tags : %10ju (%10ju duplicates %10ju deltas)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG], delta_count_by_type[OBJ_TAG]);
fprintf(stderr, "Total branches: %10lu (%10lu loads )\n", branch_count, branch_load_count);
- fprintf(stderr, " marks: %10u (%10lu unique )\n", (1 << marks->shift) * 1024, marks_set_count);
+ fprintf(stderr, " marks: %10ju (%10ju unique )\n", (((uintmax_t)1) << marks->shift) * 1024, marks_set_count);
fprintf(stderr, " atoms: %10u\n", atom_cnt);
- fprintf(stderr, "Memory total: %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
+ fprintf(stderr, "Memory total: %10ju KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, " pools: %10lu KiB\n", total_allocd/1024);
- fprintf(stderr, " objects: %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
+ fprintf(stderr, " objects: %10ju KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
fprintf(stderr, "---------------------------------------------------------------------\n");
pack_report();
fprintf(stderr, "---------------------------------------------------------------------\n");