]> git.ipfire.org Git - thirdparty/git.git/blobdiff - fast-import.c
Remove unnecessary duplicate_count in fast-import.
[thirdparty/git.git] / fast-import.c
index 492a8594bf6509051b0c2ae418216ab1b2150b3e..12127168bd91355813c7c76ad9caee859f757be8 100644 (file)
@@ -110,8 +110,8 @@ Format of STDIN stream:
 struct object_entry
 {
        struct object_entry *next;
-       enum object_type type;
        unsigned long offset;
+       unsigned type : TYPE_BITS;
        unsigned char sha1[20];
 };
 
@@ -136,9 +136,9 @@ struct last_object
 {
        void *data;
        unsigned long len;
+       unsigned long offset;
        unsigned int depth;
-       int no_free;
-       unsigned char sha1[20];
+       unsigned no_free:1;
 };
 
 struct mem_pool
@@ -216,13 +216,11 @@ static unsigned long max_depth = 10;
 static unsigned long alloc_count;
 static unsigned long branch_count;
 static unsigned long branch_load_count;
-static unsigned long remap_count;
 static unsigned long object_count;
-static unsigned long duplicate_count;
 static unsigned long marks_set_count;
-static unsigned long object_count_by_type[9];
-static unsigned long duplicate_count_by_type[9];
-static unsigned long delta_count_by_type[9];
+static unsigned long object_count_by_type[1 << TYPE_BITS];
+static unsigned long duplicate_count_by_type[1 << TYPE_BITS];
+static unsigned long delta_count_by_type[1 << TYPE_BITS];
 
 /* Memory pools */
 static size_t mem_pool_alloc = 2*1024*1024 - sizeof(struct mem_pool);
@@ -235,13 +233,14 @@ static unsigned int atom_cnt;
 static struct atom_str **atom_table;
 
 /* The .pack file being generated */
+static const char *base_name;
+static unsigned int pack_count;
+static char *pack_name;
+static char *idx_name;
+static struct packed_git *pack_data;
 static int pack_fd;
 static unsigned long pack_size;
 static unsigned char pack_sha1[20];
-static unsigned char* pack_base;
-static unsigned long pack_moff;
-static unsigned long pack_mlen = 128*1024*1024;
-static unsigned long page_size;
 
 /* Table of objects we've written. */
 static unsigned int object_entry_alloc = 5000;
@@ -279,7 +278,7 @@ static struct dbuf new_data;
 static FILE* branch_log;
 
 
-static void alloc_objects(int cnt)
+static void alloc_objects(unsigned int cnt)
 {
        struct object_entry_pool *b;
 
@@ -596,6 +595,124 @@ static void yread(int fd, void *buffer, size_t length)
        }
 }
 
+static void start_packfile()
+{
+       struct pack_header hdr;
+
+       pack_count++;
+       pack_name = xmalloc(strlen(base_name) + 11);
+       idx_name = xmalloc(strlen(base_name) + 11);
+       sprintf(pack_name, "%s%5.5i.pack", base_name, pack_count);
+       sprintf(idx_name, "%s%5.5i.idx", base_name, pack_count);
+
+       pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
+       if (pack_fd < 0)
+               die("Can't create %s: %s", pack_name, strerror(errno));
+
+       pack_data = xcalloc(1, sizeof(*pack_data) + strlen(pack_name) + 2);
+       strcpy(pack_data->pack_name, pack_name);
+       pack_data->pack_fd = pack_fd;
+
+       hdr.hdr_signature = htonl(PACK_SIGNATURE);
+       hdr.hdr_version = htonl(2);
+       hdr.hdr_entries = 0;
+
+       write_or_die(pack_fd, &hdr, sizeof(hdr));
+       pack_size = sizeof(hdr);
+       object_count = 0;
+}
+
+static void fixup_header_footer()
+{
+       SHA_CTX c;
+       char hdr[8];
+       unsigned long cnt;
+       char *buf;
+
+       if (lseek(pack_fd, 0, SEEK_SET) != 0)
+               die("Failed seeking to start: %s", strerror(errno));
+
+       SHA1_Init(&c);
+       yread(pack_fd, hdr, 8);
+       SHA1_Update(&c, hdr, 8);
+
+       cnt = htonl(object_count);
+       SHA1_Update(&c, &cnt, 4);
+       write_or_die(pack_fd, &cnt, 4);
+
+       buf = xmalloc(128 * 1024);
+       for (;;) {
+               size_t n = xread(pack_fd, buf, 128 * 1024);
+               if (n <= 0)
+                       break;
+               SHA1_Update(&c, buf, n);
+       }
+       free(buf);
+
+       SHA1_Final(pack_sha1, &c);
+       write_or_die(pack_fd, pack_sha1, sizeof(pack_sha1));
+}
+
+static int oecmp (const void *a_, const void *b_)
+{
+       struct object_entry *a = *((struct object_entry**)a_);
+       struct object_entry *b = *((struct object_entry**)b_);
+       return hashcmp(a->sha1, b->sha1);
+}
+
+static void write_index(const char *idx_name)
+{
+       struct sha1file *f;
+       struct object_entry **idx, **c, **last, *e;
+       struct object_entry_pool *o;
+       unsigned int array[256];
+       int i;
+
+       /* Build the sorted table of object IDs. */
+       idx = xmalloc(object_count * sizeof(struct object_entry*));
+       c = idx;
+       for (o = blocks; o; o = o->next_pool)
+               for (e = o->entries; e != o->next_free; e++)
+                       *c++ = e;
+       last = idx + object_count;
+       qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
+
+       /* Generate the fan-out array. */
+       c = idx;
+       for (i = 0; i < 256; i++) {
+               struct object_entry **next = c;;
+               while (next < last) {
+                       if ((*next)->sha1[0] != i)
+                               break;
+                       next++;
+               }
+               array[i] = htonl(next - idx);
+               c = next;
+       }
+
+       f = sha1create("%s", idx_name);
+       sha1write(f, array, 256 * sizeof(int));
+       for (c = idx; c != last; c++) {
+               unsigned int offset = htonl((*c)->offset);
+               sha1write(f, &offset, 4);
+               sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
+       }
+       sha1write(f, pack_sha1, sizeof(pack_sha1));
+       sha1close(f, NULL, 1);
+       free(idx);
+}
+
+static void end_packfile()
+{
+       fixup_header_footer();
+       close(pack_fd);
+       write_index(idx_name);
+
+       free(pack_name);
+       free(idx_name);
+       free(pack_data);
+}
+
 static size_t encode_header(
        enum object_type type,
        size_t size,
@@ -647,7 +764,6 @@ static int store_object(
        if (mark)
                insert_mark(mark, e);
        if (e->offset) {
-               duplicate_count++;
                duplicate_count_by_type[type]++;
                return 1;
        }
@@ -667,14 +783,23 @@ static int store_object(
        deflateInit(&s, zlib_compression_level);
 
        if (delta) {
+               unsigned long ofs = e->offset - last->offset;
+               unsigned pos = sizeof(hdr) - 1;
+
                delta_count_by_type[type]++;
                last->depth++;
                s.next_in = delta;
                s.avail_in = deltalen;
-               hdrlen = encode_header(OBJ_REF_DELTA, deltalen, hdr);
+
+               hdrlen = encode_header(OBJ_OFS_DELTA, deltalen, hdr);
                write_or_die(pack_fd, hdr, hdrlen);
-               write_or_die(pack_fd, last->sha1, sizeof(sha1));
-               pack_size += hdrlen + sizeof(sha1);
+               pack_size += hdrlen;
+
+               hdr[pos] = ofs & 127;
+               while (ofs >>= 7)
+                       hdr[--pos] = 128 | (--ofs & 127);
+               write_or_die(pack_fd, hdr + pos, sizeof(hdr) - pos);
+               pack_size += sizeof(hdr) - pos;
        } else {
                if (last)
                        last->depth = 0;
@@ -701,139 +826,17 @@ static int store_object(
                if (last->data && !last->no_free)
                        free(last->data);
                last->data = dat;
+               last->offset = e->offset;
                last->len = datlen;
-               hashcpy(last->sha1, sha1);
        }
        return 0;
 }
 
-static unsigned char* map_pack(unsigned long offset, unsigned int *left)
-{
-       if (offset >= pack_size)
-               die("object offset outside of pack file");
-       if (!pack_base
-                       || offset < pack_moff
-                       || (offset + 20) >= (pack_moff + pack_mlen)) {
-               if (pack_base)
-                       munmap(pack_base, pack_mlen);
-               pack_moff = (offset / page_size) * page_size;
-               pack_base = mmap(NULL,pack_mlen,PROT_READ,MAP_SHARED,
-                       pack_fd,pack_moff);
-               if (pack_base == MAP_FAILED)
-                       die("Failed to map generated pack: %s", strerror(errno));
-               remap_count++;
-       }
-       offset -= pack_moff;
-       if (left)
-               *left = pack_mlen - offset;
-       return pack_base + offset;
-}
-
-static unsigned long unpack_object_header(unsigned long offset,
-       enum object_type *type,
-       unsigned long *sizep)
-{
-       unsigned shift;
-       unsigned char c;
-       unsigned long size;
-
-       c = *map_pack(offset++, NULL);
-       *type = (c >> 4) & 7;
-       size = c & 15;
-       shift = 4;
-       while (c & 0x80) {
-               c = *map_pack(offset++, NULL);
-               size += (c & 0x7f) << shift;
-               shift += 7;
-       }
-       *sizep = size;
-       return offset;
-}
-
-static void *unpack_non_delta_entry(unsigned long o, unsigned long sz)
+static void *gfi_unpack_entry(unsigned long ofs, unsigned long *sizep)
 {
-       z_stream stream;
-       unsigned char *result;
-
-       result = xmalloc(sz + 1);
-       result[sz] = 0;
-
-       memset(&stream, 0, sizeof(stream));
-       stream.next_in = map_pack(o, &stream.avail_in);
-       stream.next_out = result;
-       stream.avail_out = sz;
-
-       inflateInit(&stream);
-       for (;;) {
-               int st = inflate(&stream, Z_FINISH);
-               if (st == Z_STREAM_END)
-                       break;
-               if (st == Z_OK || st == Z_BUF_ERROR) {
-                       o = stream.next_in - pack_base + pack_moff;
-                       stream.next_in = map_pack(o, &stream.avail_in);
-                       continue;
-               }
-               die("Error %i from zlib during inflate.", st);
-       }
-       inflateEnd(&stream);
-       if (stream.total_out != sz)
-               die("Error after inflate: sizes mismatch");
-       return result;
-}
-
-static void *gfi_unpack_entry(unsigned long offset,
-       unsigned long *sizep,
-       unsigned int *delta_depth);
-
-static void *unpack_delta_entry(unsigned long offset,
-       unsigned long delta_size,
-       unsigned long *sizep,
-       unsigned int *delta_depth)
-{
-       struct object_entry *base_oe;
-       unsigned char *base_sha1;
-       void *delta_data, *base, *result;
-       unsigned long base_size, result_size;
-
-       base_sha1 = map_pack(offset, NULL);
-       base_oe = find_object(base_sha1);
-       if (!base_oe)
-               die("I'm broken; I can't find a base I know must be here.");
-       base = gfi_unpack_entry(base_oe->offset, &base_size, delta_depth);
-       delta_data = unpack_non_delta_entry(offset + 20, delta_size);
-       result = patch_delta(base, base_size,
-                            delta_data, delta_size,
-                            &result_size);
-       if (!result)
-               die("failed to apply delta");
-       free(delta_data);
-       free(base);
-       *sizep = result_size;
-       (*delta_depth)++;
-       return result;
-}
-
-static void *gfi_unpack_entry(unsigned long offset,
-       unsigned long *sizep,
-       unsigned int *delta_depth)
-{
-       unsigned long size;
-       enum object_type kind;
-
-       offset = unpack_object_header(offset, &kind, &size);
-       switch (kind) {
-       case OBJ_REF_DELTA:
-               return unpack_delta_entry(offset, size, sizep, delta_depth);
-       case OBJ_COMMIT:
-       case OBJ_TREE:
-       case OBJ_BLOB:
-       case OBJ_TAG:
-               *sizep = size;
-               *delta_depth = 0;
-               return unpack_non_delta_entry(offset, size);
-       default:
-               die("I created an object I can't read!");
-       }
+       char type[20];
+       pack_data->pack_size = pack_size + 20;
+       return unpack_entry(pack_data, ofs, type, sizep);
 }
 
 static const char *get_mode(const char *str, unsigned int *modep)
@@ -867,7 +870,8 @@ static void load_tree(struct tree_entry *root)
        if (myoe) {
                if (myoe->type != OBJ_TREE)
                        die("Not a tree: %s", sha1_to_hex(sha1));
-               buf = gfi_unpack_entry(myoe->offset, &size, &t->delta_depth);
+               t->delta_depth = 0;
+               buf = gfi_unpack_entry(myoe->offset, &size);
        } else {
                char type[20];
                buf = read_sha1_file(sha1, type, &size);
@@ -956,6 +960,7 @@ static void store_tree(struct tree_entry *root)
        unsigned int i, j, del;
        unsigned long new_len;
        struct last_object lo;
+       struct object_entry *le;
 
        if (!is_null_sha1(root->versions[1].sha1))
                return;
@@ -965,17 +970,16 @@ static void store_tree(struct tree_entry *root)
                        store_tree(t->entries[i]);
        }
 
-       if (!S_ISDIR(root->versions[0].mode)
-                       || is_null_sha1(root->versions[0].sha1)
-                       || !find_object(root->versions[0].sha1)) {
+       le = find_object(root->versions[0].sha1);
+       if (!S_ISDIR(root->versions[0].mode) || !le) {
                lo.data = NULL;
                lo.depth = 0;
        } else {
                mktree(t, 0, &lo.len, &old_tree);
                lo.data = old_tree.buffer;
+               lo.offset = le->offset;
                lo.depth = t->delta_depth;
                lo.no_free = 1;
-               hashcpy(lo.sha1, root->versions[0].sha1);
        }
 
        mktree(t, 1, &new_len, &new_tree);
@@ -1109,100 +1113,6 @@ del_entry:
        return 1;
 }
 
-static void init_pack_header()
-{
-       struct pack_header hdr;
-
-       hdr.hdr_signature = htonl(PACK_SIGNATURE);
-       hdr.hdr_version = htonl(2);
-       hdr.hdr_entries = 0;
-
-       write_or_die(pack_fd, &hdr, sizeof(hdr));
-       pack_size = sizeof(hdr);
-}
-
-static void fixup_header_footer()
-{
-       SHA_CTX c;
-       char hdr[8];
-       unsigned long cnt;
-       char *buf;
-       size_t n;
-
-       if (lseek(pack_fd, 0, SEEK_SET) != 0)
-               die("Failed seeking to start: %s", strerror(errno));
-
-       SHA1_Init(&c);
-       yread(pack_fd, hdr, 8);
-       SHA1_Update(&c, hdr, 8);
-
-       cnt = htonl(object_count);
-       SHA1_Update(&c, &cnt, 4);
-       write_or_die(pack_fd, &cnt, 4);
-
-       buf = xmalloc(128 * 1024);
-       for (;;) {
-               n = xread(pack_fd, buf, 128 * 1024);
-               if (n <= 0)
-                       break;
-               SHA1_Update(&c, buf, n);
-       }
-       free(buf);
-
-       SHA1_Final(pack_sha1, &c);
-       write_or_die(pack_fd, pack_sha1, sizeof(pack_sha1));
-}
-
-static int oecmp (const void *_a, const void *_b)
-{
-       struct object_entry *a = *((struct object_entry**)_a);
-       struct object_entry *b = *((struct object_entry**)_b);
-       return hashcmp(a->sha1, b->sha1);
-}
-
-static void write_index(const char *idx_name)
-{
-       struct sha1file *f;
-       struct object_entry **idx, **c, **last;
-       struct object_entry *e;
-       struct object_entry_pool *o;
-       unsigned int array[256];
-       int i;
-
-       /* Build the sorted table of object IDs. */
-       idx = xmalloc(object_count * sizeof(struct object_entry*));
-       c = idx;
-       for (o = blocks; o; o = o->next_pool)
-               for (e = o->entries; e != o->next_free; e++)
-                       *c++ = e;
-       last = idx + object_count;
-       qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
-
-       /* Generate the fan-out array. */
-       c = idx;
-       for (i = 0; i < 256; i++) {
-               struct object_entry **next = c;;
-               while (next < last) {
-                       if ((*next)->sha1[0] != i)
-                               break;
-                       next++;
-               }
-               array[i] = htonl(next - idx);
-               c = next;
-       }
-
-       f = sha1create("%s", idx_name);
-       sha1write(f, array, 256 * sizeof(int));
-       for (c = idx; c != last; c++) {
-               unsigned int offset = htonl((*c)->offset);
-               sha1write(f, &offset, 4);
-               sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
-       }
-       sha1write(f, pack_sha1, sizeof(pack_sha1));
-       sha1close(f, NULL, 1);
-       free(idx);
-}
-
 static void dump_branches()
 {
        static const char *msg = "fast-import";
@@ -1471,12 +1381,11 @@ static void cmd_from(struct branch *b)
                unsigned long idnum = strtoul(from + 1, NULL, 10);
                struct object_entry *oe = find_mark(idnum);
                unsigned long size;
-               unsigned int depth;
                char *buf;
                if (oe->type != OBJ_COMMIT)
                        die("Mark :%lu not a commit", idnum);
                hashcpy(b->sha1, oe->sha1);
-               buf = gfi_unpack_entry(oe->offset, &size, &depth);
+               buf = gfi_unpack_entry(oe->offset, &size);
                if (!buf || size < 46)
                        die("Not a valid commit: %s", from);
                if (memcmp("tree ", buf, 5)
@@ -1809,16 +1718,12 @@ static const char fast_import_usage[] =
 
 int main(int argc, const char **argv)
 {
-       const char *base_name;
        int i;
        unsigned long est_obj_cnt = object_entry_alloc;
-       char *pack_name;
-       char *idx_name;
-       struct stat sb;
+       unsigned long duplicate_count;
 
        setup_ident();
        git_config(git_default_config);
-       page_size = getpagesize();
 
        for (i = 1; i < argc; i++) {
                const char *a = argv[i];
@@ -1845,16 +1750,6 @@ int main(int argc, const char **argv)
                usage(fast_import_usage);
        base_name = argv[i];
 
-       pack_name = xmalloc(strlen(base_name) + 6);
-       sprintf(pack_name, "%s.pack", base_name);
-       idx_name = xmalloc(strlen(base_name) + 5);
-       sprintf(idx_name, "%s.idx", base_name);
-
-       pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
-       if (pack_fd < 0)
-               die("Can't create %s: %s", pack_name, strerror(errno));
-
-       init_pack_header();
        alloc_objects(est_obj_cnt);
        strbuf_init(&command_buf);
 
@@ -1863,6 +1758,7 @@ int main(int argc, const char **argv)
        avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*));
        marks = pool_calloc(1, sizeof(struct mark_set));
 
+       start_packfile();
        for (;;) {
                read_next_command();
                if (command_buf.eof)
@@ -1878,16 +1774,17 @@ int main(int argc, const char **argv)
                else
                        die("Unsupported command: %s", command_buf.buf);
        }
+       end_packfile();
 
-       fixup_header_footer();
-       close(pack_fd);
-       write_index(idx_name);
        dump_branches();
        dump_tags();
        dump_marks();
        if (branch_log)
                fclose(branch_log);
 
+       for (i = 0; i < ARRAY_SIZE(duplicate_count_by_type); i++)
+               duplicate_count += duplicate_count_by_type[i];
+
        fprintf(stderr, "%s statistics:\n", argv[0]);
        fprintf(stderr, "---------------------------------------------------------------------\n");
        fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow  )\n", alloc_count, alloc_count - est_obj_cnt);
@@ -1902,13 +1799,7 @@ int main(int argc, const char **argv)
        fprintf(stderr, "Memory total:    %10lu KiB\n", (total_allocd + alloc_count*sizeof(struct object_entry))/1024);
        fprintf(stderr, "       pools:    %10lu KiB\n", total_allocd/1024);
        fprintf(stderr, "     objects:    %10lu KiB\n", (alloc_count*sizeof(struct object_entry))/1024);
-       fprintf(stderr, "Pack remaps:     %10lu\n", remap_count);
-       stat(pack_name, &sb);
-       fprintf(stderr, "Pack size:       %10lu KiB\n", (unsigned long)(sb.st_size/1024));
-       stat(idx_name, &sb);
-       fprintf(stderr, "Index size:      %10lu KiB\n", (unsigned long)(sb.st_size/1024));
        fprintf(stderr, "---------------------------------------------------------------------\n");
-
        fprintf(stderr, "\n");
 
        return 0;