]> git.ipfire.org Git - thirdparty/git.git/blame - fast-import.c
Implemented branch handling and basic tree support in fast-import.
[thirdparty/git.git] / fast-import.c
CommitLineData
db5e523f
SP
1#include "builtin.h"
2#include "cache.h"
3#include "object.h"
4#include "blob.h"
5#include "delta.h"
6#include "pack.h"
7#include "csum-file.h"
8
27d6d290
SP
9struct object_entry
10{
11 struct object_entry *next;
12 unsigned long offset;
13 unsigned char sha1[20];
14};
15
16struct object_entry_block
17{
18 struct object_entry_block *next_block;
19 struct object_entry *next_free;
20 struct object_entry *end;
ac47a738 21 struct object_entry entries[FLEX_ARRAY]; /* more */
27d6d290
SP
22};
23
ac47a738
SP
24struct last_object
25{
26 void *data;
6bb5b329
SP
27 unsigned int len;
28 unsigned int depth;
ac47a738
SP
29 unsigned char sha1[20];
30};
31
6bb5b329
SP
32struct tree;
33struct tree_entry
34{
35 struct tree *tree;
36 mode_t mode;
37 unsigned char sha1[20];
38 char name[FLEX_ARRAY]; /* more */
39};
40
41struct tree
42{
43 struct last_object last_tree;
44 unsigned long entry_count;
45 struct tree_entry **entries;
46};
47
48struct branch
49{
50 struct branch *next_branch;
51 struct tree_entry tree;
52 unsigned char sha1[20];
53 const char *name;
54};
55
ac47a738 56/* Stats and misc. counters. */
db5e523f 57static int max_depth = 10;
27d6d290 58static unsigned long alloc_count;
6bb5b329 59static unsigned long branch_count;
db5e523f 60static unsigned long object_count;
8bcce301 61static unsigned long duplicate_count;
6143f064
SP
62static unsigned long object_count_by_type[9];
63static unsigned long duplicate_count_by_type[9];
ac47a738
SP
64
65/* The .pack file */
66static int pack_fd;
67static unsigned long pack_offset;
68static unsigned char pack_sha1[20];
69
70/* Table of objects we've written. */
27d6d290 71struct object_entry_block *blocks;
ac47a738
SP
72struct object_entry *object_table[1 << 16];
73
74/* Our last blob */
75struct last_object last_blob;
8bcce301 76
6bb5b329
SP
77/* Branch data */
78struct branch *branches;
79struct branch *current_branch;
80
27d6d290 81static void alloc_objects(int cnt)
8bcce301 82{
27d6d290
SP
83 struct object_entry_block *b;
84
85 b = xmalloc(sizeof(struct object_entry_block)
86 + cnt * sizeof(struct object_entry));
87 b->next_block = blocks;
88 b->next_free = b->entries;
89 b->end = b->entries + cnt;
90 blocks = b;
91 alloc_count += cnt;
92}
8bcce301 93
27d6d290 94static struct object_entry* new_object(unsigned char *sha1)
8bcce301 95{
27d6d290 96 struct object_entry *e;
8bcce301 97
27d6d290
SP
98 if (blocks->next_free == blocks->end)
99 alloc_objects(1000);
8bcce301 100
27d6d290
SP
101 e = blocks->next_free++;
102 memcpy(e->sha1, sha1, sizeof(e->sha1));
103 return e;
8bcce301
SP
104}
105
106static struct object_entry* insert_object(unsigned char *sha1)
107{
108 unsigned int h = sha1[0] << 8 | sha1[1];
27d6d290 109 struct object_entry *e = object_table[h];
8bcce301
SP
110 struct object_entry *p = 0;
111
112 while (e) {
113 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
114 return e;
115 p = e;
116 e = e->next;
117 }
118
119 e = new_object(sha1);
120 e->next = 0;
121 e->offset = 0;
122 if (p)
123 p->next = e;
124 else
27d6d290 125 object_table[h] = e;
8bcce301
SP
126 return e;
127}
db5e523f
SP
128
129static ssize_t yread(int fd, void *buffer, size_t length)
130{
131 ssize_t ret = 0;
132 while (ret < length) {
133 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
134 if (size < 0) {
135 return size;
136 }
137 if (size == 0) {
138 return ret;
139 }
140 ret += size;
141 }
142 return ret;
143}
144
145static ssize_t ywrite(int fd, void *buffer, size_t length)
146{
147 ssize_t ret = 0;
148 while (ret < length) {
149 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
150 if (size < 0) {
151 return size;
152 }
153 if (size == 0) {
154 return ret;
155 }
156 ret += size;
157 }
158 return ret;
159}
160
6bb5b329
SP
161static const char* read_string()
162{
163 static char sn[PATH_MAX];
164 unsigned long slen;
165
166 if (yread(0, &slen, 4) != 4)
167 die("Can't obtain string");
168 if (!slen)
169 return 0;
170 if (slen > (PATH_MAX - 1))
171 die("Can't handle excessive string length %lu", slen);
172
173 if (yread(0, sn, slen) != slen)
174 die("Can't obtain string of length %lu", slen);
175 sn[slen] = 0;
176 return sn;
177}
178
179static const char* read_required_string()
180{
181 const char *r = read_string();
182 if (!r)
183 die("Expected string command parameter, didn't find one");
184 return r;
185}
186
ac47a738
SP
187static unsigned long encode_header(
188 enum object_type type,
189 unsigned long size,
190 unsigned char *hdr)
db5e523f
SP
191{
192 int n = 1;
193 unsigned char c;
194
195 if (type < OBJ_COMMIT || type > OBJ_DELTA)
196 die("bad type %d", type);
197
198 c = (type << 4) | (size & 15);
199 size >>= 4;
200 while (size) {
201 *hdr++ = c | 0x80;
202 c = size & 0x7f;
203 size >>= 7;
204 n++;
205 }
206 *hdr = c;
207 return n;
208}
209
ac47a738
SP
210static int store_object(
211 enum object_type type,
212 void *dat,
213 unsigned long datlen,
6bb5b329
SP
214 struct last_object *last,
215 unsigned char *sha1out)
db5e523f 216{
db5e523f 217 void *out, *delta;
ac47a738
SP
218 struct object_entry *e;
219 unsigned char hdr[96];
220 unsigned char sha1[20];
db5e523f 221 unsigned long hdrlen, deltalen;
ac47a738
SP
222 SHA_CTX c;
223 z_stream s;
224
225 hdrlen = sprintf((char*)hdr,"%s %lu",type_names[type],datlen) + 1;
226 SHA1_Init(&c);
227 SHA1_Update(&c, hdr, hdrlen);
228 SHA1_Update(&c, dat, datlen);
229 SHA1_Final(sha1, &c);
6bb5b329
SP
230 if (sha1out)
231 memcpy(sha1out, sha1, sizeof(sha1));
ac47a738
SP
232
233 e = insert_object(sha1);
234 if (e->offset) {
235 duplicate_count++;
6143f064 236 duplicate_count_by_type[type]++;
ac47a738
SP
237 return 0;
238 }
239 e->offset = pack_offset;
240 object_count++;
6143f064 241 object_count_by_type[type]++;
db5e523f 242
ac47a738
SP
243 if (last->data && last->depth < max_depth)
244 delta = diff_delta(last->data, last->len,
db5e523f
SP
245 dat, datlen,
246 &deltalen, 0);
ac47a738 247 else
db5e523f
SP
248 delta = 0;
249
250 memset(&s, 0, sizeof(s));
251 deflateInit(&s, zlib_compression_level);
252
253 if (delta) {
ac47a738 254 last->depth++;
db5e523f
SP
255 s.next_in = delta;
256 s.avail_in = deltalen;
257 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
ac47a738 258 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 259 die("Can't write object header: %s", strerror(errno));
ac47a738 260 if (ywrite(pack_fd, last->sha1, sizeof(sha1)) != sizeof(sha1))
db5e523f 261 die("Can't write object base: %s", strerror(errno));
ac47a738 262 pack_offset += hdrlen + sizeof(sha1);
db5e523f 263 } else {
ac47a738 264 last->depth = 0;
db5e523f
SP
265 s.next_in = dat;
266 s.avail_in = datlen;
ac47a738
SP
267 hdrlen = encode_header(type, datlen, hdr);
268 if (ywrite(pack_fd, hdr, hdrlen) != hdrlen)
db5e523f 269 die("Can't write object header: %s", strerror(errno));
ac47a738 270 pack_offset += hdrlen;
db5e523f
SP
271 }
272
273 s.avail_out = deflateBound(&s, s.avail_in);
274 s.next_out = out = xmalloc(s.avail_out);
275 while (deflate(&s, Z_FINISH) == Z_OK)
276 /* nothing */;
277 deflateEnd(&s);
278
ac47a738 279 if (ywrite(pack_fd, out, s.total_out) != s.total_out)
db5e523f 280 die("Failed writing compressed data %s", strerror(errno));
ac47a738 281 pack_offset += s.total_out;
db5e523f
SP
282
283 free(out);
284 if (delta)
285 free(delta);
ac47a738
SP
286 if (last->data)
287 free(last->data);
288 last->data = dat;
289 last->len = datlen;
290 memcpy(last->sha1, sha1, sizeof(sha1));
291 return 1;
db5e523f
SP
292}
293
8bcce301 294static void init_pack_header()
db5e523f
SP
295{
296 const char* magic = "PACK";
6143f064 297 unsigned long version = 3;
db5e523f
SP
298 unsigned long zero = 0;
299
300 version = htonl(version);
301
ac47a738 302 if (ywrite(pack_fd, (char*)magic, 4) != 4)
db5e523f 303 die("Can't write pack magic: %s", strerror(errno));
ac47a738 304 if (ywrite(pack_fd, &version, 4) != 4)
db5e523f 305 die("Can't write pack version: %s", strerror(errno));
ac47a738 306 if (ywrite(pack_fd, &zero, 4) != 4)
db5e523f 307 die("Can't write 0 object count: %s", strerror(errno));
ac47a738 308 pack_offset = 4 * 3;
db5e523f
SP
309}
310
8bcce301 311static void fixup_header_footer()
db5e523f
SP
312{
313 SHA_CTX c;
314 char hdr[8];
db5e523f
SP
315 unsigned long cnt;
316 char *buf;
317 size_t n;
318
ac47a738 319 if (lseek(pack_fd, 0, SEEK_SET) != 0)
db5e523f
SP
320 die("Failed seeking to start: %s", strerror(errno));
321
322 SHA1_Init(&c);
ac47a738 323 if (yread(pack_fd, hdr, 8) != 8)
db5e523f
SP
324 die("Failed reading header: %s", strerror(errno));
325 SHA1_Update(&c, hdr, 8);
326
db5e523f
SP
327 cnt = htonl(object_count);
328 SHA1_Update(&c, &cnt, 4);
ac47a738 329 if (ywrite(pack_fd, &cnt, 4) != 4)
db5e523f
SP
330 die("Failed writing object count: %s", strerror(errno));
331
332 buf = xmalloc(128 * 1024);
333 for (;;) {
ac47a738 334 n = xread(pack_fd, buf, 128 * 1024);
db5e523f
SP
335 if (n <= 0)
336 break;
337 SHA1_Update(&c, buf, n);
338 }
339 free(buf);
340
ac47a738
SP
341 SHA1_Final(pack_sha1, &c);
342 if (ywrite(pack_fd, pack_sha1, sizeof(pack_sha1)) != sizeof(pack_sha1))
db5e523f
SP
343 die("Failed writing pack checksum: %s", strerror(errno));
344}
345
8bcce301 346static int oecmp (const void *_a, const void *_b)
db5e523f 347{
8bcce301
SP
348 struct object_entry *a = *((struct object_entry**)_a);
349 struct object_entry *b = *((struct object_entry**)_b);
350 return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
351}
352
353static void write_index(const char *idx_name)
354{
355 struct sha1file *f;
356 struct object_entry **idx, **c, **last;
357 struct object_entry *e;
27d6d290 358 struct object_entry_block *o;
8bcce301
SP
359 unsigned int array[256];
360 int i;
361
362 /* Build the sorted table of object IDs. */
363 idx = xmalloc(object_count * sizeof(struct object_entry*));
364 c = idx;
27d6d290
SP
365 for (o = blocks; o; o = o->next_block)
366 for (e = o->entries; e != o->next_free; e++)
367 *c++ = e;
8bcce301
SP
368 last = idx + object_count;
369 qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
370
371 /* Generate the fan-out array. */
372 c = idx;
373 for (i = 0; i < 256; i++) {
374 struct object_entry **next = c;;
375 while (next < last) {
376 if ((*next)->sha1[0] != i)
377 break;
378 next++;
379 }
380 array[i] = htonl(next - idx);
381 c = next;
382 }
383
384 f = sha1create("%s", idx_name);
385 sha1write(f, array, 256 * sizeof(int));
386 for (c = idx; c != last; c++) {
387 unsigned int offset = htonl((*c)->offset);
388 sha1write(f, &offset, 4);
389 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
390 }
ac47a738 391 sha1write(f, pack_sha1, sizeof(pack_sha1));
8bcce301
SP
392 sha1close(f, NULL, 1);
393 free(idx);
394}
395
6143f064
SP
396static void new_blob()
397{
398 unsigned long datlen;
399 void *dat;
400
401 if (yread(0, &datlen, 4) != 4)
402 die("Can't obtain blob length");
403
404 dat = xmalloc(datlen);
405 if (yread(0, dat, datlen) != datlen)
406 die("Con't obtain %lu bytes of blob data", datlen);
407
6bb5b329 408 if (!store_object(OBJ_BLOB, dat, datlen, &last_blob, 0))
6143f064
SP
409 free(dat);
410}
411
6bb5b329
SP
412static struct branch* lookup_branch(const char *name)
413{
414 struct branch *b;
415 for (b = branches; b; b = b->next_branch) {
416 if (!strcmp(name, b->name))
417 return b;
418 }
419 die("No branch named '%s' has been declared", name);
420}
421
422static struct tree* deep_copy_tree (struct tree *t)
423{
424 struct tree *r = xmalloc(sizeof(struct tree));
425 unsigned long i;
426
427 if (t->last_tree.data) {
428 r->last_tree.data = xmalloc(t->last_tree.len);
429 r->last_tree.len = t->last_tree.len;
430 r->last_tree.depth = t->last_tree.depth;
431 memcpy(r->last_tree.data, t->last_tree.data, t->last_tree.len);
432 memcpy(r->last_tree.sha1, t->last_tree.sha1, sizeof(t->last_tree.sha1));
433 }
434
435 r->entry_count = t->entry_count;
436 r->entries = xmalloc(t->entry_count * sizeof(struct tree_entry*));
437 for (i = 0; i < t->entry_count; i++) {
438 struct tree_entry *a = t->entries[i];
439 struct tree_entry *b;
440
441 b = xmalloc(sizeof(struct tree_entry) + strlen(a->name) + 1);
442 b->tree = a->tree ? deep_copy_tree(a->tree) : 0;
443 b->mode = a->mode;
444 memcpy(b->sha1, a->sha1, sizeof(a->sha1));
445 strcpy(b->name, a->name);
446 r->entries[i] = b;
447 }
448
449 return r;
450}
451
452static void store_tree (struct tree_entry *e)
453{
454 struct tree *t = e->tree;
455 unsigned long maxlen, i;
456 char *buf, *c;
457
458 if (memcmp(null_sha1, e->sha1, sizeof(e->sha1)))
459 return;
460
461 maxlen = t->entry_count * 32;
462 for (i = 0; i < t->entry_count; i++)
463 maxlen += strlen(t->entries[i]->name);
464
465 buf = c = xmalloc(maxlen);
466 for (i = 0; i < t->entry_count; i++) {
467 struct tree_entry *e = t->entries[i];
468 c += sprintf(c, "%o %s", e->mode, e->name) + 1;
469 if (e->tree)
470 store_tree(e);
471 memcpy(c, e->sha1, sizeof(e->sha1));
472 c += sizeof(e->sha1);
473 }
474
475 if (!store_object(OBJ_TREE, buf, c - buf, &t->last_tree, e->sha1))
476 free(buf);
477}
478
479static void new_branch()
480{
481 struct branch *nb = xcalloc(1, sizeof(struct branch));
482 const char *source_name;
483
484 nb->name = strdup(read_required_string());
485 source_name = read_string();
486 if (source_name) {
487 struct branch *sb = lookup_branch(source_name);
488 nb->tree.tree = deep_copy_tree(sb->tree.tree);
489 memcpy(nb->tree.sha1, sb->tree.sha1, sizeof(sb->tree.sha1));
490 memcpy(nb->sha1, sb->sha1, sizeof(sb->sha1));
491 } else {
492 nb->tree.tree = xcalloc(1, sizeof(struct tree));
493 nb->tree.tree->entries = xmalloc(8*sizeof(struct tree_entry*));
494 }
495 nb->next_branch = branches;
496 branches = nb;
497 branch_count++;
498}
499
500static void set_branch()
501{
502 current_branch = lookup_branch(read_required_string());
503}
504
505static void commit()
506{
507 store_tree(&current_branch->tree);
508}
509
8bcce301
SP
510int main(int argc, const char **argv)
511{
512 const char *base_name = argv[1];
513 int est_obj_cnt = atoi(argv[2]);
514 char *pack_name;
515 char *idx_name;
6143f064 516 struct stat sb;
8bcce301
SP
517
518 pack_name = xmalloc(strlen(base_name) + 6);
519 sprintf(pack_name, "%s.pack", base_name);
520 idx_name = xmalloc(strlen(base_name) + 5);
521 sprintf(idx_name, "%s.idx", base_name);
522
ac47a738
SP
523 pack_fd = open(pack_name, O_RDWR|O_CREAT|O_EXCL, 0666);
524 if (pack_fd < 0)
6143f064 525 die("Can't create %s: %s", pack_name, strerror(errno));
8bcce301 526
27d6d290 527 alloc_objects(est_obj_cnt);
db5e523f
SP
528 init_pack_header();
529 for (;;) {
6143f064
SP
530 unsigned long cmd;
531 if (yread(0, &cmd, 4) != 4)
db5e523f
SP
532 break;
533
6143f064 534 switch (cmd) {
6bb5b329
SP
535 case 'blob': new_blob(); break;
536 case 'newb': new_branch(); break;
537 case 'setb': set_branch(); break;
538 case 'comt': commit(); break;
6143f064
SP
539 default:
540 die("Invalid command %lu", cmd);
541 }
db5e523f
SP
542 }
543 fixup_header_footer();
ac47a738 544 close(pack_fd);
8bcce301
SP
545 write_index(idx_name);
546
6143f064
SP
547 fprintf(stderr, "%s statistics:\n", argv[0]);
548 fprintf(stderr, "---------------------------------------------------\n");
549 fprintf(stderr, "Alloc'd objects: %10lu (%10lu overflow )\n", alloc_count, alloc_count - est_obj_cnt);
550 fprintf(stderr, "Total objects: %10lu (%10lu duplicates)\n", object_count, duplicate_count);
551 fprintf(stderr, " blobs : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_BLOB], duplicate_count_by_type[OBJ_BLOB]);
552 fprintf(stderr, " trees : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TREE], duplicate_count_by_type[OBJ_TREE]);
553 fprintf(stderr, " commits: %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_COMMIT], duplicate_count_by_type[OBJ_COMMIT]);
554 fprintf(stderr, " tags : %10lu (%10lu duplicates)\n", object_count_by_type[OBJ_TAG], duplicate_count_by_type[OBJ_TAG]);
6bb5b329 555 fprintf(stderr, "Total branches: %10lu\n", branch_count);
6143f064
SP
556 fprintf(stderr, "---------------------------------------------------\n");
557
558 stat(pack_name, &sb);
559 fprintf(stderr, "Pack size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
560 stat(idx_name, &sb);
561 fprintf(stderr, "Index size: %10lu KiB\n", (unsigned long)(sb.st_size/1024));
562
563 fprintf(stderr, "\n");
db5e523f
SP
564
565 return 0;
566}