]> git.ipfire.org Git - thirdparty/git.git/blame - fast-import.c
Added automatic index generation to fast-import.
[thirdparty/git.git] / fast-import.c
CommitLineData
db5e523f
SP
1#include "builtin.h"
2#include "cache.h"
3#include "object.h"
4#include "blob.h"
5#include "delta.h"
6#include "pack.h"
7#include "csum-file.h"
8
9static int max_depth = 10;
10static unsigned long object_count;
8bcce301
SP
11static unsigned long duplicate_count;
12static unsigned long packoff;
13static unsigned long overflow_count;
db5e523f
SP
14static int packfd;
15static int current_depth;
16static void *lastdat;
17static unsigned long lastdatlen;
18static unsigned char lastsha1[20];
8bcce301
SP
19static unsigned char packsha1[20];
20
21struct object_entry
22{
23 struct object_entry *next;
24 unsigned long offset;
25 unsigned char sha1[20];
26};
27
28struct overflow_object_entry
29{
30 struct overflow_object_entry *next;
31 struct object_entry oe;
32};
33
34struct object_entry *pool_start;
35struct object_entry *pool_next;
36struct object_entry *pool_end;
37struct overflow_object_entry *overflow;
38struct object_entry *table[1 << 16];
39
40static struct object_entry* new_object(unsigned char *sha1)
41{
42 if (pool_next != pool_end) {
43 struct object_entry *e = pool_next++;
44 memcpy(e->sha1, sha1, sizeof(e->sha1));
45 return e;
46 } else {
47 struct overflow_object_entry *e;
48
49 e = xmalloc(sizeof(struct overflow_object_entry));
50 e->next = overflow;
51 memcpy(e->oe.sha1, sha1, sizeof(e->oe.sha1));
52 overflow = e;
53 overflow_count++;
54 return &e->oe;
55 }
56}
57
58static struct object_entry* insert_object(unsigned char *sha1)
59{
60 unsigned int h = sha1[0] << 8 | sha1[1];
61 struct object_entry *e = table[h];
62 struct object_entry *p = 0;
63
64 while (e) {
65 if (!memcmp(sha1, e->sha1, sizeof(e->sha1)))
66 return e;
67 p = e;
68 e = e->next;
69 }
70
71 e = new_object(sha1);
72 e->next = 0;
73 e->offset = 0;
74 if (p)
75 p->next = e;
76 else
77 table[h] = e;
78 return e;
79}
db5e523f
SP
80
81static ssize_t yread(int fd, void *buffer, size_t length)
82{
83 ssize_t ret = 0;
84 while (ret < length) {
85 ssize_t size = xread(fd, (char *) buffer + ret, length - ret);
86 if (size < 0) {
87 return size;
88 }
89 if (size == 0) {
90 return ret;
91 }
92 ret += size;
93 }
94 return ret;
95}
96
97static ssize_t ywrite(int fd, void *buffer, size_t length)
98{
99 ssize_t ret = 0;
100 while (ret < length) {
101 ssize_t size = xwrite(fd, (char *) buffer + ret, length - ret);
102 if (size < 0) {
103 return size;
104 }
105 if (size == 0) {
106 return ret;
107 }
108 ret += size;
109 }
110 return ret;
111}
112
113static unsigned long encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
114{
115 int n = 1;
116 unsigned char c;
117
118 if (type < OBJ_COMMIT || type > OBJ_DELTA)
119 die("bad type %d", type);
120
121 c = (type << 4) | (size & 15);
122 size >>= 4;
123 while (size) {
124 *hdr++ = c | 0x80;
125 c = size & 0x7f;
126 size >>= 7;
127 n++;
128 }
129 *hdr = c;
130 return n;
131}
132
8bcce301 133static void write_blob(void *dat, unsigned long datlen)
db5e523f
SP
134{
135 z_stream s;
136 void *out, *delta;
137 unsigned char hdr[64];
138 unsigned long hdrlen, deltalen;
139
140 if (lastdat && current_depth < max_depth) {
141 delta = diff_delta(lastdat, lastdatlen,
142 dat, datlen,
143 &deltalen, 0);
144 } else
145 delta = 0;
146
147 memset(&s, 0, sizeof(s));
148 deflateInit(&s, zlib_compression_level);
149
150 if (delta) {
151 current_depth++;
152 s.next_in = delta;
153 s.avail_in = deltalen;
154 hdrlen = encode_header(OBJ_DELTA, deltalen, hdr);
155 if (ywrite(packfd, hdr, hdrlen) != hdrlen)
156 die("Can't write object header: %s", strerror(errno));
157 if (ywrite(packfd, lastsha1, sizeof(lastsha1)) != sizeof(lastsha1))
158 die("Can't write object base: %s", strerror(errno));
8bcce301 159 packoff += hdrlen + sizeof(lastsha1);
db5e523f
SP
160 } else {
161 current_depth = 0;
162 s.next_in = dat;
163 s.avail_in = datlen;
164 hdrlen = encode_header(OBJ_BLOB, datlen, hdr);
165 if (ywrite(packfd, hdr, hdrlen) != hdrlen)
166 die("Can't write object header: %s", strerror(errno));
8bcce301 167 packoff += hdrlen;
db5e523f
SP
168 }
169
170 s.avail_out = deflateBound(&s, s.avail_in);
171 s.next_out = out = xmalloc(s.avail_out);
172 while (deflate(&s, Z_FINISH) == Z_OK)
173 /* nothing */;
174 deflateEnd(&s);
175
176 if (ywrite(packfd, out, s.total_out) != s.total_out)
177 die("Failed writing compressed data %s", strerror(errno));
8bcce301 178 packoff += s.total_out;
db5e523f
SP
179
180 free(out);
181 if (delta)
182 free(delta);
183}
184
8bcce301 185static void init_pack_header()
db5e523f
SP
186{
187 const char* magic = "PACK";
188 unsigned long version = 2;
189 unsigned long zero = 0;
190
191 version = htonl(version);
192
193 if (ywrite(packfd, (char*)magic, 4) != 4)
194 die("Can't write pack magic: %s", strerror(errno));
195 if (ywrite(packfd, &version, 4) != 4)
196 die("Can't write pack version: %s", strerror(errno));
197 if (ywrite(packfd, &zero, 4) != 4)
198 die("Can't write 0 object count: %s", strerror(errno));
8bcce301 199 packoff = 4 * 3;
db5e523f
SP
200}
201
8bcce301 202static void fixup_header_footer()
db5e523f
SP
203{
204 SHA_CTX c;
205 char hdr[8];
db5e523f
SP
206 unsigned long cnt;
207 char *buf;
208 size_t n;
209
210 if (lseek(packfd, 0, SEEK_SET) != 0)
211 die("Failed seeking to start: %s", strerror(errno));
212
213 SHA1_Init(&c);
214 if (yread(packfd, hdr, 8) != 8)
215 die("Failed reading header: %s", strerror(errno));
216 SHA1_Update(&c, hdr, 8);
217
db5e523f
SP
218 cnt = htonl(object_count);
219 SHA1_Update(&c, &cnt, 4);
220 if (ywrite(packfd, &cnt, 4) != 4)
221 die("Failed writing object count: %s", strerror(errno));
222
223 buf = xmalloc(128 * 1024);
224 for (;;) {
225 n = xread(packfd, buf, 128 * 1024);
226 if (n <= 0)
227 break;
228 SHA1_Update(&c, buf, n);
229 }
230 free(buf);
231
8bcce301
SP
232 SHA1_Final(packsha1, &c);
233 if (ywrite(packfd, packsha1, sizeof(packsha1)) != sizeof(packsha1))
db5e523f
SP
234 die("Failed writing pack checksum: %s", strerror(errno));
235}
236
8bcce301 237static int oecmp (const void *_a, const void *_b)
db5e523f 238{
8bcce301
SP
239 struct object_entry *a = *((struct object_entry**)_a);
240 struct object_entry *b = *((struct object_entry**)_b);
241 return memcmp(a->sha1, b->sha1, sizeof(a->sha1));
242}
243
244static void write_index(const char *idx_name)
245{
246 struct sha1file *f;
247 struct object_entry **idx, **c, **last;
248 struct object_entry *e;
249 struct overflow_object_entry *o;
250 unsigned int array[256];
251 int i;
252
253 /* Build the sorted table of object IDs. */
254 idx = xmalloc(object_count * sizeof(struct object_entry*));
255 c = idx;
256 for (e = pool_start; e != pool_next; e++)
257 *c++ = e;
258 for (o = overflow; o; o = o->next)
259 *c++ = &o->oe;
260 last = idx + object_count;
261 qsort(idx, object_count, sizeof(struct object_entry*), oecmp);
262
263 /* Generate the fan-out array. */
264 c = idx;
265 for (i = 0; i < 256; i++) {
266 struct object_entry **next = c;;
267 while (next < last) {
268 if ((*next)->sha1[0] != i)
269 break;
270 next++;
271 }
272 array[i] = htonl(next - idx);
273 c = next;
274 }
275
276 f = sha1create("%s", idx_name);
277 sha1write(f, array, 256 * sizeof(int));
278 for (c = idx; c != last; c++) {
279 unsigned int offset = htonl((*c)->offset);
280 sha1write(f, &offset, 4);
281 sha1write(f, (*c)->sha1, sizeof((*c)->sha1));
282 }
283 sha1write(f, packsha1, sizeof(packsha1));
284 sha1close(f, NULL, 1);
285 free(idx);
286}
287
288int main(int argc, const char **argv)
289{
290 const char *base_name = argv[1];
291 int est_obj_cnt = atoi(argv[2]);
292 char *pack_name;
293 char *idx_name;
294
295 pack_name = xmalloc(strlen(base_name) + 6);
296 sprintf(pack_name, "%s.pack", base_name);
297 idx_name = xmalloc(strlen(base_name) + 5);
298 sprintf(idx_name, "%s.idx", base_name);
299
300 packfd = open(pack_name, O_RDWR|O_CREAT|O_TRUNC, 0666);
db5e523f 301 if (packfd < 0)
8bcce301
SP
302 die("Can't create pack file %s: %s", pack_name, strerror(errno));
303
304 pool_start = xmalloc(est_obj_cnt * sizeof(struct object_entry));
305 pool_next = pool_start;
306 pool_end = pool_start + est_obj_cnt;
db5e523f
SP
307
308 init_pack_header();
309 for (;;) {
310 unsigned long datlen;
311 int hdrlen;
312 void *dat;
313 char hdr[128];
314 unsigned char sha1[20];
315 SHA_CTX c;
8bcce301 316 struct object_entry *e;
db5e523f
SP
317
318 if (yread(0, &datlen, 4) != 4)
8bcce301 319
db5e523f
SP
320 break;
321
322 dat = xmalloc(datlen);
323 if (yread(0, dat, datlen) != datlen)
324 break;
325
326 hdrlen = sprintf(hdr, "blob %lu", datlen) + 1;
327 SHA1_Init(&c);
328 SHA1_Update(&c, hdr, hdrlen);
329 SHA1_Update(&c, dat, datlen);
330 SHA1_Final(sha1, &c);
331
8bcce301
SP
332 e = insert_object(sha1);
333 if (!e->offset) {
334 e->offset = packoff;
335 write_blob(dat, datlen);
336 object_count++;
337 printf("%s\n", sha1_to_hex(sha1));
338 fflush(stdout);
db5e523f 339
8bcce301
SP
340 if (lastdat)
341 free(lastdat);
342 lastdat = dat;
343 lastdatlen = datlen;
344 memcpy(lastsha1, sha1, sizeof(sha1));
345 } else {
346 duplicate_count++;
347 free(dat);
348 }
db5e523f
SP
349 }
350 fixup_header_footer();
351 close(packfd);
8bcce301
SP
352 write_index(idx_name);
353
354 fprintf(stderr, "%lu objects, %lu duplicates, %lu pool overflow\n",
355 object_count, duplicate_count, overflow_count);
db5e523f
SP
356
357 return 0;
358}