]> git.ipfire.org Git - thirdparty/git.git/blob - sha1_file.c
Move offset_1st_component() to path.c
[thirdparty/git.git] / sha1_file.c
1 /*
2 * GIT - The information manager from hell
3 *
4 * Copyright (C) Linus Torvalds, 2005
5 *
6 * This handles basic git sha1 object files - packing, unpacking,
7 * creation etc.
8 */
9 #include "cache.h"
10 #include "delta.h"
11 #include "pack.h"
12 #include "blob.h"
13 #include "commit.h"
14 #include "tag.h"
15 #include "tree.h"
16 #include "refs.h"
17 #include "pack-revindex.h"
18 #include "sha1-lookup.h"
19
20 #ifndef O_NOATIME
21 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
22 #define O_NOATIME 01000000
23 #else
24 #define O_NOATIME 0
25 #endif
26 #endif
27
28 #ifdef NO_C99_FORMAT
29 #define SZ_FMT "lu"
30 static unsigned long sz_fmt(size_t s) { return (unsigned long)s; }
31 #else
32 #define SZ_FMT "zu"
33 static size_t sz_fmt(size_t s) { return s; }
34 #endif
35
36 const unsigned char null_sha1[20];
37
38 const signed char hexval_table[256] = {
39 -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
40 -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
41 -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
42 -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
43 -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
44 -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
45 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
46 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
47 -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
48 -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
49 -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
50 -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
51 -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
52 -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
53 -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
54 -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
55 -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
56 -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
57 -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
58 -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
59 -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
60 -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
61 -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
62 -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
63 -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
64 -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
65 -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
66 -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
67 -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
68 -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
69 -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
70 -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
71 };
72
73 int get_sha1_hex(const char *hex, unsigned char *sha1)
74 {
75 int i;
76 for (i = 0; i < 20; i++) {
77 unsigned int val = (hexval(hex[0]) << 4) | hexval(hex[1]);
78 if (val & ~0xff)
79 return -1;
80 *sha1++ = val;
81 hex += 2;
82 }
83 return 0;
84 }
85
86 int safe_create_leading_directories(char *path)
87 {
88 char *pos = path + offset_1st_component(path);
89 struct stat st;
90
91 while (pos) {
92 pos = strchr(pos, '/');
93 if (!pos)
94 break;
95 while (*++pos == '/')
96 ;
97 if (!*pos)
98 break;
99 *--pos = '\0';
100 if (!stat(path, &st)) {
101 /* path exists */
102 if (!S_ISDIR(st.st_mode)) {
103 *pos = '/';
104 return -3;
105 }
106 }
107 else if (mkdir(path, 0777)) {
108 *pos = '/';
109 return -1;
110 }
111 else if (adjust_shared_perm(path)) {
112 *pos = '/';
113 return -2;
114 }
115 *pos++ = '/';
116 }
117 return 0;
118 }
119
120 int safe_create_leading_directories_const(const char *path)
121 {
122 /* path points to cache entries, so xstrdup before messing with it */
123 char *buf = xstrdup(path);
124 int result = safe_create_leading_directories(buf);
125 free(buf);
126 return result;
127 }
128
129 char *sha1_to_hex(const unsigned char *sha1)
130 {
131 static int bufno;
132 static char hexbuffer[4][50];
133 static const char hex[] = "0123456789abcdef";
134 char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
135 int i;
136
137 for (i = 0; i < 20; i++) {
138 unsigned int val = *sha1++;
139 *buf++ = hex[val >> 4];
140 *buf++ = hex[val & 0xf];
141 }
142 *buf = '\0';
143
144 return buffer;
145 }
146
147 static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
148 {
149 int i;
150 for (i = 0; i < 20; i++) {
151 static char hex[] = "0123456789abcdef";
152 unsigned int val = sha1[i];
153 char *pos = pathbuf + i*2 + (i > 0);
154 *pos++ = hex[val >> 4];
155 *pos = hex[val & 0xf];
156 }
157 }
158
159 /*
160 * NOTE! This returns a statically allocated buffer, so you have to be
161 * careful about using it. Do an "xstrdup()" if you need to save the
162 * filename.
163 *
164 * Also note that this returns the location for creating. Reading
165 * SHA1 file can happen from any alternate directory listed in the
166 * DB_ENVIRONMENT environment variable if it is not found in
167 * the primary object database.
168 */
169 char *sha1_file_name(const unsigned char *sha1)
170 {
171 static char *name, *base;
172
173 if (!base) {
174 const char *sha1_file_directory = get_object_directory();
175 int len = strlen(sha1_file_directory);
176 base = xmalloc(len + 60);
177 memcpy(base, sha1_file_directory, len);
178 memset(base+len, 0, 60);
179 base[len] = '/';
180 base[len+3] = '/';
181 name = base + len + 1;
182 }
183 fill_sha1_path(name, sha1);
184 return base;
185 }
186
187 static char *sha1_get_pack_name(const unsigned char *sha1,
188 char **name, char **base, const char *which)
189 {
190 static const char hex[] = "0123456789abcdef";
191 char *buf;
192 int i;
193
194 if (!*base) {
195 const char *sha1_file_directory = get_object_directory();
196 int len = strlen(sha1_file_directory);
197 *base = xmalloc(len + 60);
198 sprintf(*base, "%s/pack/pack-1234567890123456789012345678901234567890.%s",
199 sha1_file_directory, which);
200 *name = *base + len + 11;
201 }
202
203 buf = *name;
204
205 for (i = 0; i < 20; i++) {
206 unsigned int val = *sha1++;
207 *buf++ = hex[val >> 4];
208 *buf++ = hex[val & 0xf];
209 }
210
211 return *base;
212 }
213
214 char *sha1_pack_name(const unsigned char *sha1)
215 {
216 static char *name, *base;
217
218 return sha1_get_pack_name(sha1, &name, &base, "pack");
219 }
220
221 char *sha1_pack_index_name(const unsigned char *sha1)
222 {
223 static char *name, *base;
224
225 return sha1_get_pack_name(sha1, &name, &base, "idx");
226 }
227
228 struct alternate_object_database *alt_odb_list;
229 static struct alternate_object_database **alt_odb_tail;
230
231 static void read_info_alternates(const char * alternates, int depth);
232
233 /*
234 * Prepare alternate object database registry.
235 *
236 * The variable alt_odb_list points at the list of struct
237 * alternate_object_database. The elements on this list come from
238 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
239 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
240 * whose contents is similar to that environment variable but can be
241 * LF separated. Its base points at a statically allocated buffer that
242 * contains "/the/directory/corresponding/to/.git/objects/...", while
243 * its name points just after the slash at the end of ".git/objects/"
244 * in the example above, and has enough space to hold 40-byte hex
245 * SHA1, an extra slash for the first level indirection, and the
246 * terminating NUL.
247 */
248 static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth)
249 {
250 const char *objdir = get_object_directory();
251 struct alternate_object_database *ent;
252 struct alternate_object_database *alt;
253 /* 43 = 40-byte + 2 '/' + terminating NUL */
254 int pfxlen = len;
255 int entlen = pfxlen + 43;
256 int base_len = -1;
257
258 if (!is_absolute_path(entry) && relative_base) {
259 /* Relative alt-odb */
260 if (base_len < 0)
261 base_len = strlen(relative_base) + 1;
262 entlen += base_len;
263 pfxlen += base_len;
264 }
265 ent = xmalloc(sizeof(*ent) + entlen);
266
267 if (!is_absolute_path(entry) && relative_base) {
268 memcpy(ent->base, relative_base, base_len - 1);
269 ent->base[base_len - 1] = '/';
270 memcpy(ent->base + base_len, entry, len);
271 }
272 else
273 memcpy(ent->base, entry, pfxlen);
274
275 ent->name = ent->base + pfxlen + 1;
276 ent->base[pfxlen + 3] = '/';
277 ent->base[pfxlen] = ent->base[entlen-1] = 0;
278
279 /* Detect cases where alternate disappeared */
280 if (!is_directory(ent->base)) {
281 error("object directory %s does not exist; "
282 "check .git/objects/info/alternates.",
283 ent->base);
284 free(ent);
285 return -1;
286 }
287
288 /* Prevent the common mistake of listing the same
289 * thing twice, or object directory itself.
290 */
291 for (alt = alt_odb_list; alt; alt = alt->next) {
292 if (!memcmp(ent->base, alt->base, pfxlen)) {
293 free(ent);
294 return -1;
295 }
296 }
297 if (!memcmp(ent->base, objdir, pfxlen)) {
298 free(ent);
299 return -1;
300 }
301
302 /* add the alternate entry */
303 *alt_odb_tail = ent;
304 alt_odb_tail = &(ent->next);
305 ent->next = NULL;
306
307 /* recursively add alternates */
308 read_info_alternates(ent->base, depth + 1);
309
310 ent->base[pfxlen] = '/';
311
312 return 0;
313 }
314
315 static void link_alt_odb_entries(const char *alt, const char *ep, int sep,
316 const char *relative_base, int depth)
317 {
318 const char *cp, *last;
319
320 if (depth > 5) {
321 error("%s: ignoring alternate object stores, nesting too deep.",
322 relative_base);
323 return;
324 }
325
326 last = alt;
327 while (last < ep) {
328 cp = last;
329 if (cp < ep && *cp == '#') {
330 while (cp < ep && *cp != sep)
331 cp++;
332 last = cp + 1;
333 continue;
334 }
335 while (cp < ep && *cp != sep)
336 cp++;
337 if (last != cp) {
338 if (!is_absolute_path(last) && depth) {
339 error("%s: ignoring relative alternate object store %s",
340 relative_base, last);
341 } else {
342 link_alt_odb_entry(last, cp - last,
343 relative_base, depth);
344 }
345 }
346 while (cp < ep && *cp == sep)
347 cp++;
348 last = cp;
349 }
350 }
351
352 static void read_info_alternates(const char * relative_base, int depth)
353 {
354 char *map;
355 size_t mapsz;
356 struct stat st;
357 const char alt_file_name[] = "info/alternates";
358 /* Given that relative_base is no longer than PATH_MAX,
359 ensure that "path" has enough space to append "/", the
360 file name, "info/alternates", and a trailing NUL. */
361 char path[PATH_MAX + 1 + sizeof alt_file_name];
362 int fd;
363
364 sprintf(path, "%s/%s", relative_base, alt_file_name);
365 fd = open(path, O_RDONLY);
366 if (fd < 0)
367 return;
368 if (fstat(fd, &st) || (st.st_size == 0)) {
369 close(fd);
370 return;
371 }
372 mapsz = xsize_t(st.st_size);
373 map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
374 close(fd);
375
376 link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth);
377
378 munmap(map, mapsz);
379 }
380
381 void add_to_alternates_file(const char *reference)
382 {
383 struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
384 int fd = hold_lock_file_for_append(lock, git_path("objects/info/alternates"), LOCK_DIE_ON_ERROR);
385 char *alt = mkpath("%s/objects\n", reference);
386 write_or_die(fd, alt, strlen(alt));
387 if (commit_lock_file(lock))
388 die("could not close alternates file");
389 if (alt_odb_tail)
390 link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0);
391 }
392
393 void foreach_alt_odb(alt_odb_fn fn, void *cb)
394 {
395 struct alternate_object_database *ent;
396
397 prepare_alt_odb();
398 for (ent = alt_odb_list; ent; ent = ent->next)
399 if (fn(ent, cb))
400 return;
401 }
402
403 void prepare_alt_odb(void)
404 {
405 const char *alt;
406
407 if (alt_odb_tail)
408 return;
409
410 alt = getenv(ALTERNATE_DB_ENVIRONMENT);
411 if (!alt) alt = "";
412
413 alt_odb_tail = &alt_odb_list;
414 link_alt_odb_entries(alt, alt + strlen(alt), PATH_SEP, NULL, 0);
415
416 read_info_alternates(get_object_directory(), 0);
417 }
418
419 static int has_loose_object_local(const unsigned char *sha1)
420 {
421 char *name = sha1_file_name(sha1);
422 return !access(name, F_OK);
423 }
424
425 int has_loose_object_nonlocal(const unsigned char *sha1)
426 {
427 struct alternate_object_database *alt;
428 prepare_alt_odb();
429 for (alt = alt_odb_list; alt; alt = alt->next) {
430 fill_sha1_path(alt->name, sha1);
431 if (!access(alt->base, F_OK))
432 return 1;
433 }
434 return 0;
435 }
436
437 static int has_loose_object(const unsigned char *sha1)
438 {
439 return has_loose_object_local(sha1) ||
440 has_loose_object_nonlocal(sha1);
441 }
442
443 static unsigned int pack_used_ctr;
444 static unsigned int pack_mmap_calls;
445 static unsigned int peak_pack_open_windows;
446 static unsigned int pack_open_windows;
447 static size_t peak_pack_mapped;
448 static size_t pack_mapped;
449 struct packed_git *packed_git;
450
451 void pack_report(void)
452 {
453 fprintf(stderr,
454 "pack_report: getpagesize() = %10" SZ_FMT "\n"
455 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
456 "pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
457 sz_fmt(getpagesize()),
458 sz_fmt(packed_git_window_size),
459 sz_fmt(packed_git_limit));
460 fprintf(stderr,
461 "pack_report: pack_used_ctr = %10u\n"
462 "pack_report: pack_mmap_calls = %10u\n"
463 "pack_report: pack_open_windows = %10u / %10u\n"
464 "pack_report: pack_mapped = "
465 "%10" SZ_FMT " / %10" SZ_FMT "\n",
466 pack_used_ctr,
467 pack_mmap_calls,
468 pack_open_windows, peak_pack_open_windows,
469 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
470 }
471
472 static int check_packed_git_idx(const char *path, struct packed_git *p)
473 {
474 void *idx_map;
475 struct pack_idx_header *hdr;
476 size_t idx_size;
477 uint32_t version, nr, i, *index;
478 int fd = open(path, O_RDONLY);
479 struct stat st;
480
481 if (fd < 0)
482 return -1;
483 if (fstat(fd, &st)) {
484 close(fd);
485 return -1;
486 }
487 idx_size = xsize_t(st.st_size);
488 if (idx_size < 4 * 256 + 20 + 20) {
489 close(fd);
490 return error("index file %s is too small", path);
491 }
492 idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
493 close(fd);
494
495 hdr = idx_map;
496 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
497 version = ntohl(hdr->idx_version);
498 if (version < 2 || version > 2) {
499 munmap(idx_map, idx_size);
500 return error("index file %s is version %"PRIu32
501 " and is not supported by this binary"
502 " (try upgrading GIT to a newer version)",
503 path, version);
504 }
505 } else
506 version = 1;
507
508 nr = 0;
509 index = idx_map;
510 if (version > 1)
511 index += 2; /* skip index header */
512 for (i = 0; i < 256; i++) {
513 uint32_t n = ntohl(index[i]);
514 if (n < nr) {
515 munmap(idx_map, idx_size);
516 return error("non-monotonic index %s", path);
517 }
518 nr = n;
519 }
520
521 if (version == 1) {
522 /*
523 * Total size:
524 * - 256 index entries 4 bytes each
525 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
526 * - 20-byte SHA1 of the packfile
527 * - 20-byte SHA1 file checksum
528 */
529 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
530 munmap(idx_map, idx_size);
531 return error("wrong index v1 file size in %s", path);
532 }
533 } else if (version == 2) {
534 /*
535 * Minimum size:
536 * - 8 bytes of header
537 * - 256 index entries 4 bytes each
538 * - 20-byte sha1 entry * nr
539 * - 4-byte crc entry * nr
540 * - 4-byte offset entry * nr
541 * - 20-byte SHA1 of the packfile
542 * - 20-byte SHA1 file checksum
543 * And after the 4-byte offset table might be a
544 * variable sized table containing 8-byte entries
545 * for offsets larger than 2^31.
546 */
547 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
548 unsigned long max_size = min_size;
549 if (nr)
550 max_size += (nr - 1)*8;
551 if (idx_size < min_size || idx_size > max_size) {
552 munmap(idx_map, idx_size);
553 return error("wrong index v2 file size in %s", path);
554 }
555 if (idx_size != min_size &&
556 /*
557 * make sure we can deal with large pack offsets.
558 * 31-bit signed offset won't be enough, neither
559 * 32-bit unsigned one will be.
560 */
561 (sizeof(off_t) <= 4)) {
562 munmap(idx_map, idx_size);
563 return error("pack too large for current definition of off_t in %s", path);
564 }
565 }
566
567 p->index_version = version;
568 p->index_data = idx_map;
569 p->index_size = idx_size;
570 p->num_objects = nr;
571 return 0;
572 }
573
574 int open_pack_index(struct packed_git *p)
575 {
576 char *idx_name;
577 int ret;
578
579 if (p->index_data)
580 return 0;
581
582 idx_name = xstrdup(p->pack_name);
583 strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx");
584 ret = check_packed_git_idx(idx_name, p);
585 free(idx_name);
586 return ret;
587 }
588
589 static void scan_windows(struct packed_git *p,
590 struct packed_git **lru_p,
591 struct pack_window **lru_w,
592 struct pack_window **lru_l)
593 {
594 struct pack_window *w, *w_l;
595
596 for (w_l = NULL, w = p->windows; w; w = w->next) {
597 if (!w->inuse_cnt) {
598 if (!*lru_w || w->last_used < (*lru_w)->last_used) {
599 *lru_p = p;
600 *lru_w = w;
601 *lru_l = w_l;
602 }
603 }
604 w_l = w;
605 }
606 }
607
608 static int unuse_one_window(struct packed_git *current, int keep_fd)
609 {
610 struct packed_git *p, *lru_p = NULL;
611 struct pack_window *lru_w = NULL, *lru_l = NULL;
612
613 if (current)
614 scan_windows(current, &lru_p, &lru_w, &lru_l);
615 for (p = packed_git; p; p = p->next)
616 scan_windows(p, &lru_p, &lru_w, &lru_l);
617 if (lru_p) {
618 munmap(lru_w->base, lru_w->len);
619 pack_mapped -= lru_w->len;
620 if (lru_l)
621 lru_l->next = lru_w->next;
622 else {
623 lru_p->windows = lru_w->next;
624 if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
625 close(lru_p->pack_fd);
626 lru_p->pack_fd = -1;
627 }
628 }
629 free(lru_w);
630 pack_open_windows--;
631 return 1;
632 }
633 return 0;
634 }
635
636 void release_pack_memory(size_t need, int fd)
637 {
638 size_t cur = pack_mapped;
639 while (need >= (cur - pack_mapped) && unuse_one_window(NULL, fd))
640 ; /* nothing */
641 }
642
643 void close_pack_windows(struct packed_git *p)
644 {
645 while (p->windows) {
646 struct pack_window *w = p->windows;
647
648 if (w->inuse_cnt)
649 die("pack '%s' still has open windows to it",
650 p->pack_name);
651 munmap(w->base, w->len);
652 pack_mapped -= w->len;
653 pack_open_windows--;
654 p->windows = w->next;
655 free(w);
656 }
657 }
658
659 void unuse_pack(struct pack_window **w_cursor)
660 {
661 struct pack_window *w = *w_cursor;
662 if (w) {
663 w->inuse_cnt--;
664 *w_cursor = NULL;
665 }
666 }
667
668 /*
669 * This is used by git-repack in case a newly created pack happens to
670 * contain the same set of objects as an existing one. In that case
671 * the resulting file might be different even if its name would be the
672 * same. It is best to close any reference to the old pack before it is
673 * replaced on disk. Of course no index pointers nor windows for given pack
674 * must subsist at this point. If ever objects from this pack are requested
675 * again, the new version of the pack will be reinitialized through
676 * reprepare_packed_git().
677 */
678 void free_pack_by_name(const char *pack_name)
679 {
680 struct packed_git *p, **pp = &packed_git;
681
682 while (*pp) {
683 p = *pp;
684 if (strcmp(pack_name, p->pack_name) == 0) {
685 clear_delta_base_cache();
686 close_pack_windows(p);
687 if (p->pack_fd != -1)
688 close(p->pack_fd);
689 if (p->index_data)
690 munmap((void *)p->index_data, p->index_size);
691 free(p->bad_object_sha1);
692 *pp = p->next;
693 free(p);
694 return;
695 }
696 pp = &p->next;
697 }
698 }
699
700 /*
701 * Do not call this directly as this leaks p->pack_fd on error return;
702 * call open_packed_git() instead.
703 */
704 static int open_packed_git_1(struct packed_git *p)
705 {
706 struct stat st;
707 struct pack_header hdr;
708 unsigned char sha1[20];
709 unsigned char *idx_sha1;
710 long fd_flag;
711
712 if (!p->index_data && open_pack_index(p))
713 return error("packfile %s index unavailable", p->pack_name);
714
715 p->pack_fd = open(p->pack_name, O_RDONLY);
716 while (p->pack_fd < 0 && errno == EMFILE && unuse_one_window(p, -1))
717 p->pack_fd = open(p->pack_name, O_RDONLY);
718 if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
719 return -1;
720
721 /* If we created the struct before we had the pack we lack size. */
722 if (!p->pack_size) {
723 if (!S_ISREG(st.st_mode))
724 return error("packfile %s not a regular file", p->pack_name);
725 p->pack_size = st.st_size;
726 } else if (p->pack_size != st.st_size)
727 return error("packfile %s size changed", p->pack_name);
728
729 /* We leave these file descriptors open with sliding mmap;
730 * there is no point keeping them open across exec(), though.
731 */
732 fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
733 if (fd_flag < 0)
734 return error("cannot determine file descriptor flags");
735 fd_flag |= FD_CLOEXEC;
736 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
737 return error("cannot set FD_CLOEXEC");
738
739 /* Verify we recognize this pack file format. */
740 if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
741 return error("file %s is far too short to be a packfile", p->pack_name);
742 if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
743 return error("file %s is not a GIT packfile", p->pack_name);
744 if (!pack_version_ok(hdr.hdr_version))
745 return error("packfile %s is version %"PRIu32" and not"
746 " supported (try upgrading GIT to a newer version)",
747 p->pack_name, ntohl(hdr.hdr_version));
748
749 /* Verify the pack matches its index. */
750 if (p->num_objects != ntohl(hdr.hdr_entries))
751 return error("packfile %s claims to have %"PRIu32" objects"
752 " while index indicates %"PRIu32" objects",
753 p->pack_name, ntohl(hdr.hdr_entries),
754 p->num_objects);
755 if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
756 return error("end of packfile %s is unavailable", p->pack_name);
757 if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
758 return error("packfile %s signature is unavailable", p->pack_name);
759 idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
760 if (hashcmp(sha1, idx_sha1))
761 return error("packfile %s does not match index", p->pack_name);
762 return 0;
763 }
764
765 static int open_packed_git(struct packed_git *p)
766 {
767 if (!open_packed_git_1(p))
768 return 0;
769 if (p->pack_fd != -1) {
770 close(p->pack_fd);
771 p->pack_fd = -1;
772 }
773 return -1;
774 }
775
776 static int in_window(struct pack_window *win, off_t offset)
777 {
778 /* We must promise at least 20 bytes (one hash) after the
779 * offset is available from this window, otherwise the offset
780 * is not actually in this window and a different window (which
781 * has that one hash excess) must be used. This is to support
782 * the object header and delta base parsing routines below.
783 */
784 off_t win_off = win->offset;
785 return win_off <= offset
786 && (offset + 20) <= (win_off + win->len);
787 }
788
789 unsigned char *use_pack(struct packed_git *p,
790 struct pack_window **w_cursor,
791 off_t offset,
792 unsigned int *left)
793 {
794 struct pack_window *win = *w_cursor;
795
796 if (p->pack_fd == -1 && open_packed_git(p))
797 die("packfile %s cannot be accessed", p->pack_name);
798
799 /* Since packfiles end in a hash of their content and it's
800 * pointless to ask for an offset into the middle of that
801 * hash, and the in_window function above wouldn't match
802 * don't allow an offset too close to the end of the file.
803 */
804 if (offset > (p->pack_size - 20))
805 die("offset beyond end of packfile (truncated pack?)");
806
807 if (!win || !in_window(win, offset)) {
808 if (win)
809 win->inuse_cnt--;
810 for (win = p->windows; win; win = win->next) {
811 if (in_window(win, offset))
812 break;
813 }
814 if (!win) {
815 size_t window_align = packed_git_window_size / 2;
816 off_t len;
817 win = xcalloc(1, sizeof(*win));
818 win->offset = (offset / window_align) * window_align;
819 len = p->pack_size - win->offset;
820 if (len > packed_git_window_size)
821 len = packed_git_window_size;
822 win->len = (size_t)len;
823 pack_mapped += win->len;
824 while (packed_git_limit < pack_mapped
825 && unuse_one_window(p, p->pack_fd))
826 ; /* nothing */
827 win->base = xmmap(NULL, win->len,
828 PROT_READ, MAP_PRIVATE,
829 p->pack_fd, win->offset);
830 if (win->base == MAP_FAILED)
831 die("packfile %s cannot be mapped: %s",
832 p->pack_name,
833 strerror(errno));
834 pack_mmap_calls++;
835 pack_open_windows++;
836 if (pack_mapped > peak_pack_mapped)
837 peak_pack_mapped = pack_mapped;
838 if (pack_open_windows > peak_pack_open_windows)
839 peak_pack_open_windows = pack_open_windows;
840 win->next = p->windows;
841 p->windows = win;
842 }
843 }
844 if (win != *w_cursor) {
845 win->last_used = pack_used_ctr++;
846 win->inuse_cnt++;
847 *w_cursor = win;
848 }
849 offset -= win->offset;
850 if (left)
851 *left = win->len - xsize_t(offset);
852 return win->base + offset;
853 }
854
855 static struct packed_git *alloc_packed_git(int extra)
856 {
857 struct packed_git *p = xmalloc(sizeof(*p) + extra);
858 memset(p, 0, sizeof(*p));
859 p->pack_fd = -1;
860 return p;
861 }
862
863 struct packed_git *add_packed_git(const char *path, int path_len, int local)
864 {
865 struct stat st;
866 struct packed_git *p = alloc_packed_git(path_len + 2);
867
868 /*
869 * Make sure a corresponding .pack file exists and that
870 * the index looks sane.
871 */
872 path_len -= strlen(".idx");
873 if (path_len < 1) {
874 free(p);
875 return NULL;
876 }
877 memcpy(p->pack_name, path, path_len);
878
879 strcpy(p->pack_name + path_len, ".keep");
880 if (!access(p->pack_name, F_OK))
881 p->pack_keep = 1;
882
883 strcpy(p->pack_name + path_len, ".pack");
884 if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
885 free(p);
886 return NULL;
887 }
888
889 /* ok, it looks sane as far as we can check without
890 * actually mapping the pack file.
891 */
892 p->pack_size = st.st_size;
893 p->pack_local = local;
894 p->mtime = st.st_mtime;
895 if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
896 hashclr(p->sha1);
897 return p;
898 }
899
900 struct packed_git *parse_pack_index(unsigned char *sha1)
901 {
902 const char *idx_path = sha1_pack_index_name(sha1);
903 const char *path = sha1_pack_name(sha1);
904 struct packed_git *p = alloc_packed_git(strlen(path) + 1);
905
906 strcpy(p->pack_name, path);
907 hashcpy(p->sha1, sha1);
908 if (check_packed_git_idx(idx_path, p)) {
909 free(p);
910 return NULL;
911 }
912
913 return p;
914 }
915
916 void install_packed_git(struct packed_git *pack)
917 {
918 pack->next = packed_git;
919 packed_git = pack;
920 }
921
922 static void prepare_packed_git_one(char *objdir, int local)
923 {
924 /* Ensure that this buffer is large enough so that we can
925 append "/pack/" without clobbering the stack even if
926 strlen(objdir) were PATH_MAX. */
927 char path[PATH_MAX + 1 + 4 + 1 + 1];
928 int len;
929 DIR *dir;
930 struct dirent *de;
931
932 sprintf(path, "%s/pack", objdir);
933 len = strlen(path);
934 dir = opendir(path);
935 while (!dir && errno == EMFILE && unuse_one_window(packed_git, -1))
936 dir = opendir(path);
937 if (!dir) {
938 if (errno != ENOENT)
939 error("unable to open object pack directory: %s: %s",
940 path, strerror(errno));
941 return;
942 }
943 path[len++] = '/';
944 while ((de = readdir(dir)) != NULL) {
945 int namelen = strlen(de->d_name);
946 struct packed_git *p;
947
948 if (!has_extension(de->d_name, ".idx"))
949 continue;
950
951 if (len + namelen + 1 > sizeof(path))
952 continue;
953
954 /* Don't reopen a pack we already have. */
955 strcpy(path + len, de->d_name);
956 for (p = packed_git; p; p = p->next) {
957 if (!memcmp(path, p->pack_name, len + namelen - 4))
958 break;
959 }
960 if (p)
961 continue;
962 /* See if it really is a valid .idx file with corresponding
963 * .pack file that we can map.
964 */
965 p = add_packed_git(path, len + namelen, local);
966 if (!p)
967 continue;
968 install_packed_git(p);
969 }
970 closedir(dir);
971 }
972
973 static int sort_pack(const void *a_, const void *b_)
974 {
975 struct packed_git *a = *((struct packed_git **)a_);
976 struct packed_git *b = *((struct packed_git **)b_);
977 int st;
978
979 /*
980 * Local packs tend to contain objects specific to our
981 * variant of the project than remote ones. In addition,
982 * remote ones could be on a network mounted filesystem.
983 * Favor local ones for these reasons.
984 */
985 st = a->pack_local - b->pack_local;
986 if (st)
987 return -st;
988
989 /*
990 * Younger packs tend to contain more recent objects,
991 * and more recent objects tend to get accessed more
992 * often.
993 */
994 if (a->mtime < b->mtime)
995 return 1;
996 else if (a->mtime == b->mtime)
997 return 0;
998 return -1;
999 }
1000
1001 static void rearrange_packed_git(void)
1002 {
1003 struct packed_git **ary, *p;
1004 int i, n;
1005
1006 for (n = 0, p = packed_git; p; p = p->next)
1007 n++;
1008 if (n < 2)
1009 return;
1010
1011 /* prepare an array of packed_git for easier sorting */
1012 ary = xcalloc(n, sizeof(struct packed_git *));
1013 for (n = 0, p = packed_git; p; p = p->next)
1014 ary[n++] = p;
1015
1016 qsort(ary, n, sizeof(struct packed_git *), sort_pack);
1017
1018 /* link them back again */
1019 for (i = 0; i < n - 1; i++)
1020 ary[i]->next = ary[i + 1];
1021 ary[n - 1]->next = NULL;
1022 packed_git = ary[0];
1023
1024 free(ary);
1025 }
1026
1027 static int prepare_packed_git_run_once = 0;
1028 void prepare_packed_git(void)
1029 {
1030 struct alternate_object_database *alt;
1031
1032 if (prepare_packed_git_run_once)
1033 return;
1034 prepare_packed_git_one(get_object_directory(), 1);
1035 prepare_alt_odb();
1036 for (alt = alt_odb_list; alt; alt = alt->next) {
1037 alt->name[-1] = 0;
1038 prepare_packed_git_one(alt->base, 0);
1039 alt->name[-1] = '/';
1040 }
1041 rearrange_packed_git();
1042 prepare_packed_git_run_once = 1;
1043 }
1044
1045 void reprepare_packed_git(void)
1046 {
1047 discard_revindex();
1048 prepare_packed_git_run_once = 0;
1049 prepare_packed_git();
1050 }
1051
1052 static void mark_bad_packed_object(struct packed_git *p,
1053 const unsigned char *sha1)
1054 {
1055 unsigned i;
1056 for (i = 0; i < p->num_bad_objects; i++)
1057 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1058 return;
1059 p->bad_object_sha1 = xrealloc(p->bad_object_sha1, 20 * (p->num_bad_objects + 1));
1060 hashcpy(p->bad_object_sha1 + 20 * p->num_bad_objects, sha1);
1061 p->num_bad_objects++;
1062 }
1063
1064 static int has_packed_and_bad(const unsigned char *sha1)
1065 {
1066 struct packed_git *p;
1067 unsigned i;
1068
1069 for (p = packed_git; p; p = p->next)
1070 for (i = 0; i < p->num_bad_objects; i++)
1071 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1072 return 1;
1073 return 0;
1074 }
1075
1076 int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
1077 {
1078 unsigned char real_sha1[20];
1079 hash_sha1_file(map, size, type, real_sha1);
1080 return hashcmp(sha1, real_sha1) ? -1 : 0;
1081 }
1082
1083 static int git_open_noatime(const char *name)
1084 {
1085 static int sha1_file_open_flag = O_NOATIME;
1086 int fd = open(name, O_RDONLY | sha1_file_open_flag);
1087
1088 /* Might the failure be due to O_NOATIME? */
1089 if (fd < 0 && errno != ENOENT && sha1_file_open_flag) {
1090 fd = open(name, O_RDONLY);
1091 if (fd >= 0)
1092 sha1_file_open_flag = 0;
1093 }
1094 return fd;
1095 }
1096
1097 static int open_sha1_file(const unsigned char *sha1)
1098 {
1099 int fd;
1100 char *name = sha1_file_name(sha1);
1101 struct alternate_object_database *alt;
1102
1103 fd = git_open_noatime(name);
1104 if (fd >= 0)
1105 return fd;
1106
1107 prepare_alt_odb();
1108 errno = ENOENT;
1109 for (alt = alt_odb_list; alt; alt = alt->next) {
1110 name = alt->name;
1111 fill_sha1_path(name, sha1);
1112 fd = git_open_noatime(alt->base);
1113 if (fd >= 0)
1114 return fd;
1115 }
1116 return -1;
1117 }
1118
1119 static void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1120 {
1121 void *map;
1122 int fd;
1123
1124 fd = open_sha1_file(sha1);
1125 map = NULL;
1126 if (fd >= 0) {
1127 struct stat st;
1128
1129 if (!fstat(fd, &st)) {
1130 *size = xsize_t(st.st_size);
1131 map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1132 }
1133 close(fd);
1134 }
1135 return map;
1136 }
1137
1138 static int legacy_loose_object(unsigned char *map)
1139 {
1140 unsigned int word;
1141
1142 /*
1143 * Is it a zlib-compressed buffer? If so, the first byte
1144 * must be 0x78 (15-bit window size, deflated), and the
1145 * first 16-bit word is evenly divisible by 31
1146 */
1147 word = (map[0] << 8) + map[1];
1148 if (map[0] == 0x78 && !(word % 31))
1149 return 1;
1150 else
1151 return 0;
1152 }
1153
1154 unsigned long unpack_object_header_buffer(const unsigned char *buf,
1155 unsigned long len, enum object_type *type, unsigned long *sizep)
1156 {
1157 unsigned shift;
1158 unsigned long size, c;
1159 unsigned long used = 0;
1160
1161 c = buf[used++];
1162 *type = (c >> 4) & 7;
1163 size = c & 15;
1164 shift = 4;
1165 while (c & 0x80) {
1166 if (len <= used || bitsizeof(long) <= shift) {
1167 error("bad object header");
1168 return 0;
1169 }
1170 c = buf[used++];
1171 size += (c & 0x7f) << shift;
1172 shift += 7;
1173 }
1174 *sizep = size;
1175 return used;
1176 }
1177
1178 static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
1179 {
1180 unsigned long size, used;
1181 static const char valid_loose_object_type[8] = {
1182 0, /* OBJ_EXT */
1183 1, 1, 1, 1, /* "commit", "tree", "blob", "tag" */
1184 0, /* "delta" and others are invalid in a loose object */
1185 };
1186 enum object_type type;
1187
1188 /* Get the data stream */
1189 memset(stream, 0, sizeof(*stream));
1190 stream->next_in = map;
1191 stream->avail_in = mapsize;
1192 stream->next_out = buffer;
1193 stream->avail_out = bufsiz;
1194
1195 if (legacy_loose_object(map)) {
1196 git_inflate_init(stream);
1197 return git_inflate(stream, 0);
1198 }
1199
1200
1201 /*
1202 * There used to be a second loose object header format which
1203 * was meant to mimic the in-pack format, allowing for direct
1204 * copy of the object data. This format turned up not to be
1205 * really worth it and we don't write it any longer. But we
1206 * can still read it.
1207 */
1208 used = unpack_object_header_buffer(map, mapsize, &type, &size);
1209 if (!used || !valid_loose_object_type[type])
1210 return -1;
1211 map += used;
1212 mapsize -= used;
1213
1214 /* Set up the stream for the rest.. */
1215 stream->next_in = map;
1216 stream->avail_in = mapsize;
1217 git_inflate_init(stream);
1218
1219 /* And generate the fake traditional header */
1220 stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu",
1221 typename(type), size);
1222 return 0;
1223 }
1224
1225 static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1226 {
1227 int bytes = strlen(buffer) + 1;
1228 unsigned char *buf = xmallocz(size);
1229 unsigned long n;
1230 int status = Z_OK;
1231
1232 n = stream->total_out - bytes;
1233 if (n > size)
1234 n = size;
1235 memcpy(buf, (char *) buffer + bytes, n);
1236 bytes = n;
1237 if (bytes <= size) {
1238 /*
1239 * The above condition must be (bytes <= size), not
1240 * (bytes < size). In other words, even though we
1241 * expect no more output and set avail_out to zer0,
1242 * the input zlib stream may have bytes that express
1243 * "this concludes the stream", and we *do* want to
1244 * eat that input.
1245 *
1246 * Otherwise we would not be able to test that we
1247 * consumed all the input to reach the expected size;
1248 * we also want to check that zlib tells us that all
1249 * went well with status == Z_STREAM_END at the end.
1250 */
1251 stream->next_out = buf + bytes;
1252 stream->avail_out = size - bytes;
1253 while (status == Z_OK)
1254 status = git_inflate(stream, Z_FINISH);
1255 }
1256 if (status == Z_STREAM_END && !stream->avail_in) {
1257 git_inflate_end(stream);
1258 return buf;
1259 }
1260
1261 if (status < 0)
1262 error("corrupt loose object '%s'", sha1_to_hex(sha1));
1263 else if (stream->avail_in)
1264 error("garbage at end of loose object '%s'",
1265 sha1_to_hex(sha1));
1266 free(buf);
1267 return NULL;
1268 }
1269
1270 /*
1271 * We used to just use "sscanf()", but that's actually way
1272 * too permissive for what we want to check. So do an anal
1273 * object header parse by hand.
1274 */
1275 static int parse_sha1_header(const char *hdr, unsigned long *sizep)
1276 {
1277 char type[10];
1278 int i;
1279 unsigned long size;
1280
1281 /*
1282 * The type can be at most ten bytes (including the
1283 * terminating '\0' that we add), and is followed by
1284 * a space.
1285 */
1286 i = 0;
1287 for (;;) {
1288 char c = *hdr++;
1289 if (c == ' ')
1290 break;
1291 type[i++] = c;
1292 if (i >= sizeof(type))
1293 return -1;
1294 }
1295 type[i] = 0;
1296
1297 /*
1298 * The length must follow immediately, and be in canonical
1299 * decimal format (ie "010" is not valid).
1300 */
1301 size = *hdr++ - '0';
1302 if (size > 9)
1303 return -1;
1304 if (size) {
1305 for (;;) {
1306 unsigned long c = *hdr - '0';
1307 if (c > 9)
1308 break;
1309 hdr++;
1310 size = size * 10 + c;
1311 }
1312 }
1313 *sizep = size;
1314
1315 /*
1316 * The length must be followed by a zero byte
1317 */
1318 return *hdr ? -1 : type_from_string(type);
1319 }
1320
1321 static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
1322 {
1323 int ret;
1324 z_stream stream;
1325 char hdr[8192];
1326
1327 ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
1328 if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0)
1329 return NULL;
1330
1331 return unpack_sha1_rest(&stream, hdr, *size, sha1);
1332 }
1333
1334 unsigned long get_size_from_delta(struct packed_git *p,
1335 struct pack_window **w_curs,
1336 off_t curpos)
1337 {
1338 const unsigned char *data;
1339 unsigned char delta_head[20], *in;
1340 z_stream stream;
1341 int st;
1342
1343 memset(&stream, 0, sizeof(stream));
1344 stream.next_out = delta_head;
1345 stream.avail_out = sizeof(delta_head);
1346
1347 git_inflate_init(&stream);
1348 do {
1349 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1350 stream.next_in = in;
1351 st = git_inflate(&stream, Z_FINISH);
1352 curpos += stream.next_in - in;
1353 } while ((st == Z_OK || st == Z_BUF_ERROR) &&
1354 stream.total_out < sizeof(delta_head));
1355 git_inflate_end(&stream);
1356 if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
1357 error("delta data unpack-initial failed");
1358 return 0;
1359 }
1360
1361 /* Examine the initial part of the delta to figure out
1362 * the result size.
1363 */
1364 data = delta_head;
1365
1366 /* ignore base size */
1367 get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1368
1369 /* Read the result size */
1370 return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
1371 }
1372
1373 static off_t get_delta_base(struct packed_git *p,
1374 struct pack_window **w_curs,
1375 off_t *curpos,
1376 enum object_type type,
1377 off_t delta_obj_offset)
1378 {
1379 unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1380 off_t base_offset;
1381
1382 /* use_pack() assured us we have [base_info, base_info + 20)
1383 * as a range that we can look at without walking off the
1384 * end of the mapped window. Its actually the hash size
1385 * that is assured. An OFS_DELTA longer than the hash size
1386 * is stupid, as then a REF_DELTA would be smaller to store.
1387 */
1388 if (type == OBJ_OFS_DELTA) {
1389 unsigned used = 0;
1390 unsigned char c = base_info[used++];
1391 base_offset = c & 127;
1392 while (c & 128) {
1393 base_offset += 1;
1394 if (!base_offset || MSB(base_offset, 7))
1395 return 0; /* overflow */
1396 c = base_info[used++];
1397 base_offset = (base_offset << 7) + (c & 127);
1398 }
1399 base_offset = delta_obj_offset - base_offset;
1400 if (base_offset <= 0 || base_offset >= delta_obj_offset)
1401 return 0; /* out of bound */
1402 *curpos += used;
1403 } else if (type == OBJ_REF_DELTA) {
1404 /* The base entry _must_ be in the same pack */
1405 base_offset = find_pack_entry_one(base_info, p);
1406 *curpos += 20;
1407 } else
1408 die("I am totally screwed");
1409 return base_offset;
1410 }
1411
1412 /* forward declaration for a mutually recursive function */
1413 static int packed_object_info(struct packed_git *p, off_t offset,
1414 unsigned long *sizep);
1415
1416 static int packed_delta_info(struct packed_git *p,
1417 struct pack_window **w_curs,
1418 off_t curpos,
1419 enum object_type type,
1420 off_t obj_offset,
1421 unsigned long *sizep)
1422 {
1423 off_t base_offset;
1424
1425 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1426 if (!base_offset)
1427 return OBJ_BAD;
1428 type = packed_object_info(p, base_offset, NULL);
1429 if (type <= OBJ_NONE) {
1430 struct revindex_entry *revidx;
1431 const unsigned char *base_sha1;
1432 revidx = find_pack_revindex(p, base_offset);
1433 if (!revidx)
1434 return OBJ_BAD;
1435 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1436 mark_bad_packed_object(p, base_sha1);
1437 type = sha1_object_info(base_sha1, NULL);
1438 if (type <= OBJ_NONE)
1439 return OBJ_BAD;
1440 }
1441
1442 /* We choose to only get the type of the base object and
1443 * ignore potentially corrupt pack file that expects the delta
1444 * based on a base with a wrong size. This saves tons of
1445 * inflate() calls.
1446 */
1447 if (sizep) {
1448 *sizep = get_size_from_delta(p, w_curs, curpos);
1449 if (*sizep == 0)
1450 type = OBJ_BAD;
1451 }
1452
1453 return type;
1454 }
1455
1456 static int unpack_object_header(struct packed_git *p,
1457 struct pack_window **w_curs,
1458 off_t *curpos,
1459 unsigned long *sizep)
1460 {
1461 unsigned char *base;
1462 unsigned int left;
1463 unsigned long used;
1464 enum object_type type;
1465
1466 /* use_pack() assures us we have [base, base + 20) available
1467 * as a range that we can look at at. (Its actually the hash
1468 * size that is assured.) With our object header encoding
1469 * the maximum deflated object size is 2^137, which is just
1470 * insane, so we know won't exceed what we have been given.
1471 */
1472 base = use_pack(p, w_curs, *curpos, &left);
1473 used = unpack_object_header_buffer(base, left, &type, sizep);
1474 if (!used) {
1475 type = OBJ_BAD;
1476 } else
1477 *curpos += used;
1478
1479 return type;
1480 }
1481
1482 const char *packed_object_info_detail(struct packed_git *p,
1483 off_t obj_offset,
1484 unsigned long *size,
1485 unsigned long *store_size,
1486 unsigned int *delta_chain_length,
1487 unsigned char *base_sha1)
1488 {
1489 struct pack_window *w_curs = NULL;
1490 off_t curpos;
1491 unsigned long dummy;
1492 unsigned char *next_sha1;
1493 enum object_type type;
1494 struct revindex_entry *revidx;
1495
1496 *delta_chain_length = 0;
1497 curpos = obj_offset;
1498 type = unpack_object_header(p, &w_curs, &curpos, size);
1499
1500 revidx = find_pack_revindex(p, obj_offset);
1501 *store_size = revidx[1].offset - obj_offset;
1502
1503 for (;;) {
1504 switch (type) {
1505 default:
1506 die("pack %s contains unknown object type %d",
1507 p->pack_name, type);
1508 case OBJ_COMMIT:
1509 case OBJ_TREE:
1510 case OBJ_BLOB:
1511 case OBJ_TAG:
1512 unuse_pack(&w_curs);
1513 return typename(type);
1514 case OBJ_OFS_DELTA:
1515 obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1516 if (!obj_offset)
1517 die("pack %s contains bad delta base reference of type %s",
1518 p->pack_name, typename(type));
1519 if (*delta_chain_length == 0) {
1520 revidx = find_pack_revindex(p, obj_offset);
1521 hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
1522 }
1523 break;
1524 case OBJ_REF_DELTA:
1525 next_sha1 = use_pack(p, &w_curs, curpos, NULL);
1526 if (*delta_chain_length == 0)
1527 hashcpy(base_sha1, next_sha1);
1528 obj_offset = find_pack_entry_one(next_sha1, p);
1529 break;
1530 }
1531 (*delta_chain_length)++;
1532 curpos = obj_offset;
1533 type = unpack_object_header(p, &w_curs, &curpos, &dummy);
1534 }
1535 }
1536
1537 static int packed_object_info(struct packed_git *p, off_t obj_offset,
1538 unsigned long *sizep)
1539 {
1540 struct pack_window *w_curs = NULL;
1541 unsigned long size;
1542 off_t curpos = obj_offset;
1543 enum object_type type;
1544
1545 type = unpack_object_header(p, &w_curs, &curpos, &size);
1546
1547 switch (type) {
1548 case OBJ_OFS_DELTA:
1549 case OBJ_REF_DELTA:
1550 type = packed_delta_info(p, &w_curs, curpos,
1551 type, obj_offset, sizep);
1552 break;
1553 case OBJ_COMMIT:
1554 case OBJ_TREE:
1555 case OBJ_BLOB:
1556 case OBJ_TAG:
1557 if (sizep)
1558 *sizep = size;
1559 break;
1560 default:
1561 error("unknown object type %i at offset %"PRIuMAX" in %s",
1562 type, (uintmax_t)obj_offset, p->pack_name);
1563 type = OBJ_BAD;
1564 }
1565 unuse_pack(&w_curs);
1566 return type;
1567 }
1568
1569 static void *unpack_compressed_entry(struct packed_git *p,
1570 struct pack_window **w_curs,
1571 off_t curpos,
1572 unsigned long size)
1573 {
1574 int st;
1575 z_stream stream;
1576 unsigned char *buffer, *in;
1577
1578 buffer = xmallocz(size);
1579 memset(&stream, 0, sizeof(stream));
1580 stream.next_out = buffer;
1581 stream.avail_out = size + 1;
1582
1583 git_inflate_init(&stream);
1584 do {
1585 in = use_pack(p, w_curs, curpos, &stream.avail_in);
1586 stream.next_in = in;
1587 st = git_inflate(&stream, Z_FINISH);
1588 if (!stream.avail_out)
1589 break; /* the payload is larger than it should be */
1590 curpos += stream.next_in - in;
1591 } while (st == Z_OK || st == Z_BUF_ERROR);
1592 git_inflate_end(&stream);
1593 if ((st != Z_STREAM_END) || stream.total_out != size) {
1594 free(buffer);
1595 return NULL;
1596 }
1597
1598 return buffer;
1599 }
1600
1601 #define MAX_DELTA_CACHE (256)
1602
1603 static size_t delta_base_cached;
1604
1605 static struct delta_base_cache_lru_list {
1606 struct delta_base_cache_lru_list *prev;
1607 struct delta_base_cache_lru_list *next;
1608 } delta_base_cache_lru = { &delta_base_cache_lru, &delta_base_cache_lru };
1609
1610 static struct delta_base_cache_entry {
1611 struct delta_base_cache_lru_list lru;
1612 void *data;
1613 struct packed_git *p;
1614 off_t base_offset;
1615 unsigned long size;
1616 enum object_type type;
1617 } delta_base_cache[MAX_DELTA_CACHE];
1618
1619 static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset)
1620 {
1621 unsigned long hash;
1622
1623 hash = (unsigned long)p + (unsigned long)base_offset;
1624 hash += (hash >> 8) + (hash >> 16);
1625 return hash % MAX_DELTA_CACHE;
1626 }
1627
1628 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1629 unsigned long *base_size, enum object_type *type, int keep_cache)
1630 {
1631 void *ret;
1632 unsigned long hash = pack_entry_hash(p, base_offset);
1633 struct delta_base_cache_entry *ent = delta_base_cache + hash;
1634
1635 ret = ent->data;
1636 if (!ret || ent->p != p || ent->base_offset != base_offset)
1637 return unpack_entry(p, base_offset, type, base_size);
1638
1639 if (!keep_cache) {
1640 ent->data = NULL;
1641 ent->lru.next->prev = ent->lru.prev;
1642 ent->lru.prev->next = ent->lru.next;
1643 delta_base_cached -= ent->size;
1644 } else {
1645 ret = xmemdupz(ent->data, ent->size);
1646 }
1647 *type = ent->type;
1648 *base_size = ent->size;
1649 return ret;
1650 }
1651
1652 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
1653 {
1654 if (ent->data) {
1655 free(ent->data);
1656 ent->data = NULL;
1657 ent->lru.next->prev = ent->lru.prev;
1658 ent->lru.prev->next = ent->lru.next;
1659 delta_base_cached -= ent->size;
1660 }
1661 }
1662
1663 void clear_delta_base_cache(void)
1664 {
1665 unsigned long p;
1666 for (p = 0; p < MAX_DELTA_CACHE; p++)
1667 release_delta_base_cache(&delta_base_cache[p]);
1668 }
1669
1670 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
1671 void *base, unsigned long base_size, enum object_type type)
1672 {
1673 unsigned long hash = pack_entry_hash(p, base_offset);
1674 struct delta_base_cache_entry *ent = delta_base_cache + hash;
1675 struct delta_base_cache_lru_list *lru;
1676
1677 release_delta_base_cache(ent);
1678 delta_base_cached += base_size;
1679
1680 for (lru = delta_base_cache_lru.next;
1681 delta_base_cached > delta_base_cache_limit
1682 && lru != &delta_base_cache_lru;
1683 lru = lru->next) {
1684 struct delta_base_cache_entry *f = (void *)lru;
1685 if (f->type == OBJ_BLOB)
1686 release_delta_base_cache(f);
1687 }
1688 for (lru = delta_base_cache_lru.next;
1689 delta_base_cached > delta_base_cache_limit
1690 && lru != &delta_base_cache_lru;
1691 lru = lru->next) {
1692 struct delta_base_cache_entry *f = (void *)lru;
1693 release_delta_base_cache(f);
1694 }
1695
1696 ent->p = p;
1697 ent->base_offset = base_offset;
1698 ent->type = type;
1699 ent->data = base;
1700 ent->size = base_size;
1701 ent->lru.next = &delta_base_cache_lru;
1702 ent->lru.prev = delta_base_cache_lru.prev;
1703 delta_base_cache_lru.prev->next = &ent->lru;
1704 delta_base_cache_lru.prev = &ent->lru;
1705 }
1706
1707 static void *read_object(const unsigned char *sha1, enum object_type *type,
1708 unsigned long *size);
1709
1710 static void *unpack_delta_entry(struct packed_git *p,
1711 struct pack_window **w_curs,
1712 off_t curpos,
1713 unsigned long delta_size,
1714 off_t obj_offset,
1715 enum object_type *type,
1716 unsigned long *sizep)
1717 {
1718 void *delta_data, *result, *base;
1719 unsigned long base_size;
1720 off_t base_offset;
1721
1722 base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
1723 if (!base_offset) {
1724 error("failed to validate delta base reference "
1725 "at offset %"PRIuMAX" from %s",
1726 (uintmax_t)curpos, p->pack_name);
1727 return NULL;
1728 }
1729 unuse_pack(w_curs);
1730 base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
1731 if (!base) {
1732 /*
1733 * We're probably in deep shit, but let's try to fetch
1734 * the required base anyway from another pack or loose.
1735 * This is costly but should happen only in the presence
1736 * of a corrupted pack, and is better than failing outright.
1737 */
1738 struct revindex_entry *revidx;
1739 const unsigned char *base_sha1;
1740 revidx = find_pack_revindex(p, base_offset);
1741 if (!revidx)
1742 return NULL;
1743 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1744 error("failed to read delta base object %s"
1745 " at offset %"PRIuMAX" from %s",
1746 sha1_to_hex(base_sha1), (uintmax_t)base_offset,
1747 p->pack_name);
1748 mark_bad_packed_object(p, base_sha1);
1749 base = read_object(base_sha1, type, &base_size);
1750 if (!base)
1751 return NULL;
1752 }
1753
1754 delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
1755 if (!delta_data) {
1756 error("failed to unpack compressed delta "
1757 "at offset %"PRIuMAX" from %s",
1758 (uintmax_t)curpos, p->pack_name);
1759 free(base);
1760 return NULL;
1761 }
1762 result = patch_delta(base, base_size,
1763 delta_data, delta_size,
1764 sizep);
1765 if (!result)
1766 die("failed to apply delta");
1767 free(delta_data);
1768 add_delta_base_cache(p, base_offset, base, base_size, *type);
1769 return result;
1770 }
1771
1772 int do_check_packed_object_crc;
1773
1774 void *unpack_entry(struct packed_git *p, off_t obj_offset,
1775 enum object_type *type, unsigned long *sizep)
1776 {
1777 struct pack_window *w_curs = NULL;
1778 off_t curpos = obj_offset;
1779 void *data;
1780
1781 if (do_check_packed_object_crc && p->index_version > 1) {
1782 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1783 unsigned long len = revidx[1].offset - obj_offset;
1784 if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
1785 const unsigned char *sha1 =
1786 nth_packed_object_sha1(p, revidx->nr);
1787 error("bad packed object CRC for %s",
1788 sha1_to_hex(sha1));
1789 mark_bad_packed_object(p, sha1);
1790 unuse_pack(&w_curs);
1791 return NULL;
1792 }
1793 }
1794
1795 *type = unpack_object_header(p, &w_curs, &curpos, sizep);
1796 switch (*type) {
1797 case OBJ_OFS_DELTA:
1798 case OBJ_REF_DELTA:
1799 data = unpack_delta_entry(p, &w_curs, curpos, *sizep,
1800 obj_offset, type, sizep);
1801 break;
1802 case OBJ_COMMIT:
1803 case OBJ_TREE:
1804 case OBJ_BLOB:
1805 case OBJ_TAG:
1806 data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
1807 break;
1808 default:
1809 data = NULL;
1810 error("unknown object type %i at offset %"PRIuMAX" in %s",
1811 *type, (uintmax_t)obj_offset, p->pack_name);
1812 }
1813 unuse_pack(&w_curs);
1814 return data;
1815 }
1816
1817 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1818 uint32_t n)
1819 {
1820 const unsigned char *index = p->index_data;
1821 if (!index) {
1822 if (open_pack_index(p))
1823 return NULL;
1824 index = p->index_data;
1825 }
1826 if (n >= p->num_objects)
1827 return NULL;
1828 index += 4 * 256;
1829 if (p->index_version == 1) {
1830 return index + 24 * n + 4;
1831 } else {
1832 index += 8;
1833 return index + 20 * n;
1834 }
1835 }
1836
1837 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1838 {
1839 const unsigned char *index = p->index_data;
1840 index += 4 * 256;
1841 if (p->index_version == 1) {
1842 return ntohl(*((uint32_t *)(index + 24 * n)));
1843 } else {
1844 uint32_t off;
1845 index += 8 + p->num_objects * (20 + 4);
1846 off = ntohl(*((uint32_t *)(index + 4 * n)));
1847 if (!(off & 0x80000000))
1848 return off;
1849 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
1850 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1851 ntohl(*((uint32_t *)(index + 4)));
1852 }
1853 }
1854
1855 off_t find_pack_entry_one(const unsigned char *sha1,
1856 struct packed_git *p)
1857 {
1858 const uint32_t *level1_ofs = p->index_data;
1859 const unsigned char *index = p->index_data;
1860 unsigned hi, lo, stride;
1861 static int use_lookup = -1;
1862 static int debug_lookup = -1;
1863
1864 if (debug_lookup < 0)
1865 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1866
1867 if (!index) {
1868 if (open_pack_index(p))
1869 return 0;
1870 level1_ofs = p->index_data;
1871 index = p->index_data;
1872 }
1873 if (p->index_version > 1) {
1874 level1_ofs += 2;
1875 index += 8;
1876 }
1877 index += 4 * 256;
1878 hi = ntohl(level1_ofs[*sha1]);
1879 lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1880 if (p->index_version > 1) {
1881 stride = 20;
1882 } else {
1883 stride = 24;
1884 index += 4;
1885 }
1886
1887 if (debug_lookup)
1888 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1889 sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
1890
1891 if (use_lookup < 0)
1892 use_lookup = !!getenv("GIT_USE_LOOKUP");
1893 if (use_lookup) {
1894 int pos = sha1_entry_pos(index, stride, 0,
1895 lo, hi, p->num_objects, sha1);
1896 if (pos < 0)
1897 return 0;
1898 return nth_packed_object_offset(p, pos);
1899 }
1900
1901 do {
1902 unsigned mi = (lo + hi) / 2;
1903 int cmp = hashcmp(index + mi * stride, sha1);
1904
1905 if (debug_lookup)
1906 printf("lo %u hi %u rg %u mi %u\n",
1907 lo, hi, hi - lo, mi);
1908 if (!cmp)
1909 return nth_packed_object_offset(p, mi);
1910 if (cmp > 0)
1911 hi = mi;
1912 else
1913 lo = mi+1;
1914 } while (lo < hi);
1915 return 0;
1916 }
1917
1918 static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1919 {
1920 static struct packed_git *last_found = (void *)1;
1921 struct packed_git *p;
1922 off_t offset;
1923
1924 prepare_packed_git();
1925 if (!packed_git)
1926 return 0;
1927 p = (last_found == (void *)1) ? packed_git : last_found;
1928
1929 do {
1930 if (p->num_bad_objects) {
1931 unsigned i;
1932 for (i = 0; i < p->num_bad_objects; i++)
1933 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1934 goto next;
1935 }
1936
1937 offset = find_pack_entry_one(sha1, p);
1938 if (offset) {
1939 /*
1940 * We are about to tell the caller where they can
1941 * locate the requested object. We better make
1942 * sure the packfile is still here and can be
1943 * accessed before supplying that answer, as
1944 * it may have been deleted since the index
1945 * was loaded!
1946 */
1947 if (p->pack_fd == -1 && open_packed_git(p)) {
1948 error("packfile %s cannot be accessed", p->pack_name);
1949 goto next;
1950 }
1951 e->offset = offset;
1952 e->p = p;
1953 hashcpy(e->sha1, sha1);
1954 last_found = p;
1955 return 1;
1956 }
1957
1958 next:
1959 if (p == last_found)
1960 p = packed_git;
1961 else
1962 p = p->next;
1963 if (p == last_found)
1964 p = p->next;
1965 } while (p);
1966 return 0;
1967 }
1968
1969 struct packed_git *find_sha1_pack(const unsigned char *sha1,
1970 struct packed_git *packs)
1971 {
1972 struct packed_git *p;
1973
1974 for (p = packs; p; p = p->next) {
1975 if (find_pack_entry_one(sha1, p))
1976 return p;
1977 }
1978 return NULL;
1979
1980 }
1981
1982 static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
1983 {
1984 int status;
1985 unsigned long mapsize, size;
1986 void *map;
1987 z_stream stream;
1988 char hdr[32];
1989
1990 map = map_sha1_file(sha1, &mapsize);
1991 if (!map)
1992 return error("unable to find %s", sha1_to_hex(sha1));
1993 if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
1994 status = error("unable to unpack %s header",
1995 sha1_to_hex(sha1));
1996 else if ((status = parse_sha1_header(hdr, &size)) < 0)
1997 status = error("unable to parse %s header", sha1_to_hex(sha1));
1998 else if (sizep)
1999 *sizep = size;
2000 git_inflate_end(&stream);
2001 munmap(map, mapsize);
2002 return status;
2003 }
2004
2005 int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
2006 {
2007 struct pack_entry e;
2008 int status;
2009
2010 if (!find_pack_entry(sha1, &e)) {
2011 /* Most likely it's a loose object. */
2012 status = sha1_loose_object_info(sha1, sizep);
2013 if (status >= 0)
2014 return status;
2015
2016 /* Not a loose object; someone else may have just packed it. */
2017 reprepare_packed_git();
2018 if (!find_pack_entry(sha1, &e))
2019 return status;
2020 }
2021
2022 status = packed_object_info(e.p, e.offset, sizep);
2023 if (status < 0) {
2024 mark_bad_packed_object(e.p, sha1);
2025 status = sha1_object_info(sha1, sizep);
2026 }
2027
2028 return status;
2029 }
2030
2031 static void *read_packed_sha1(const unsigned char *sha1,
2032 enum object_type *type, unsigned long *size)
2033 {
2034 struct pack_entry e;
2035 void *data;
2036
2037 if (!find_pack_entry(sha1, &e))
2038 return NULL;
2039 data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
2040 if (!data) {
2041 /*
2042 * We're probably in deep shit, but let's try to fetch
2043 * the required object anyway from another pack or loose.
2044 * This should happen only in the presence of a corrupted
2045 * pack, and is better than failing outright.
2046 */
2047 error("failed to read object %s at offset %"PRIuMAX" from %s",
2048 sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
2049 mark_bad_packed_object(e.p, sha1);
2050 data = read_object(sha1, type, size);
2051 }
2052 return data;
2053 }
2054
2055 /*
2056 * This is meant to hold a *small* number of objects that you would
2057 * want read_sha1_file() to be able to return, but yet you do not want
2058 * to write them into the object store (e.g. a browse-only
2059 * application).
2060 */
2061 static struct cached_object {
2062 unsigned char sha1[20];
2063 enum object_type type;
2064 void *buf;
2065 unsigned long size;
2066 } *cached_objects;
2067 static int cached_object_nr, cached_object_alloc;
2068
2069 static struct cached_object empty_tree = {
2070 EMPTY_TREE_SHA1_BIN,
2071 OBJ_TREE,
2072 "",
2073 0
2074 };
2075
2076 static struct cached_object *find_cached_object(const unsigned char *sha1)
2077 {
2078 int i;
2079 struct cached_object *co = cached_objects;
2080
2081 for (i = 0; i < cached_object_nr; i++, co++) {
2082 if (!hashcmp(co->sha1, sha1))
2083 return co;
2084 }
2085 if (!hashcmp(sha1, empty_tree.sha1))
2086 return &empty_tree;
2087 return NULL;
2088 }
2089
2090 int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
2091 unsigned char *sha1)
2092 {
2093 struct cached_object *co;
2094
2095 hash_sha1_file(buf, len, typename(type), sha1);
2096 if (has_sha1_file(sha1) || find_cached_object(sha1))
2097 return 0;
2098 if (cached_object_alloc <= cached_object_nr) {
2099 cached_object_alloc = alloc_nr(cached_object_alloc);
2100 cached_objects = xrealloc(cached_objects,
2101 sizeof(*cached_objects) *
2102 cached_object_alloc);
2103 }
2104 co = &cached_objects[cached_object_nr++];
2105 co->size = len;
2106 co->type = type;
2107 co->buf = xmalloc(len);
2108 memcpy(co->buf, buf, len);
2109 hashcpy(co->sha1, sha1);
2110 return 0;
2111 }
2112
2113 static void *read_object(const unsigned char *sha1, enum object_type *type,
2114 unsigned long *size)
2115 {
2116 unsigned long mapsize;
2117 void *map, *buf;
2118 struct cached_object *co;
2119
2120 co = find_cached_object(sha1);
2121 if (co) {
2122 *type = co->type;
2123 *size = co->size;
2124 return xmemdupz(co->buf, co->size);
2125 }
2126
2127 buf = read_packed_sha1(sha1, type, size);
2128 if (buf)
2129 return buf;
2130 map = map_sha1_file(sha1, &mapsize);
2131 if (map) {
2132 buf = unpack_sha1_file(map, mapsize, type, size, sha1);
2133 munmap(map, mapsize);
2134 return buf;
2135 }
2136 reprepare_packed_git();
2137 return read_packed_sha1(sha1, type, size);
2138 }
2139
2140 void *read_sha1_file_repl(const unsigned char *sha1,
2141 enum object_type *type,
2142 unsigned long *size,
2143 const unsigned char **replacement)
2144 {
2145 const unsigned char *repl = lookup_replace_object(sha1);
2146 void *data = read_object(repl, type, size);
2147
2148 /* die if we replaced an object with one that does not exist */
2149 if (!data && repl != sha1)
2150 die("replacement %s not found for %s",
2151 sha1_to_hex(repl), sha1_to_hex(sha1));
2152
2153 /* legacy behavior is to die on corrupted objects */
2154 if (!data && (has_loose_object(repl) || has_packed_and_bad(repl)))
2155 die("object %s is corrupted", sha1_to_hex(repl));
2156
2157 if (replacement)
2158 *replacement = repl;
2159
2160 return data;
2161 }
2162
2163 void *read_object_with_reference(const unsigned char *sha1,
2164 const char *required_type_name,
2165 unsigned long *size,
2166 unsigned char *actual_sha1_return)
2167 {
2168 enum object_type type, required_type;
2169 void *buffer;
2170 unsigned long isize;
2171 unsigned char actual_sha1[20];
2172
2173 required_type = type_from_string(required_type_name);
2174 hashcpy(actual_sha1, sha1);
2175 while (1) {
2176 int ref_length = -1;
2177 const char *ref_type = NULL;
2178
2179 buffer = read_sha1_file(actual_sha1, &type, &isize);
2180 if (!buffer)
2181 return NULL;
2182 if (type == required_type) {
2183 *size = isize;
2184 if (actual_sha1_return)
2185 hashcpy(actual_sha1_return, actual_sha1);
2186 return buffer;
2187 }
2188 /* Handle references */
2189 else if (type == OBJ_COMMIT)
2190 ref_type = "tree ";
2191 else if (type == OBJ_TAG)
2192 ref_type = "object ";
2193 else {
2194 free(buffer);
2195 return NULL;
2196 }
2197 ref_length = strlen(ref_type);
2198
2199 if (ref_length + 40 > isize ||
2200 memcmp(buffer, ref_type, ref_length) ||
2201 get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
2202 free(buffer);
2203 return NULL;
2204 }
2205 free(buffer);
2206 /* Now we have the ID of the referred-to object in
2207 * actual_sha1. Check again. */
2208 }
2209 }
2210
2211 static void write_sha1_file_prepare(const void *buf, unsigned long len,
2212 const char *type, unsigned char *sha1,
2213 char *hdr, int *hdrlen)
2214 {
2215 git_SHA_CTX c;
2216
2217 /* Generate the header */
2218 *hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
2219
2220 /* Sha1.. */
2221 git_SHA1_Init(&c);
2222 git_SHA1_Update(&c, hdr, *hdrlen);
2223 git_SHA1_Update(&c, buf, len);
2224 git_SHA1_Final(sha1, &c);
2225 }
2226
2227 /*
2228 * Move the just written object into its final resting place.
2229 * NEEDSWORK: this should be renamed to finalize_temp_file() as
2230 * "moving" is only a part of what it does, when no patch between
2231 * master to pu changes the call sites of this function.
2232 */
2233 int move_temp_to_file(const char *tmpfile, const char *filename)
2234 {
2235 int ret = 0;
2236
2237 if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
2238 goto try_rename;
2239 else if (link(tmpfile, filename))
2240 ret = errno;
2241
2242 /*
2243 * Coda hack - coda doesn't like cross-directory links,
2244 * so we fall back to a rename, which will mean that it
2245 * won't be able to check collisions, but that's not a
2246 * big deal.
2247 *
2248 * The same holds for FAT formatted media.
2249 *
2250 * When this succeeds, we just return. We have nothing
2251 * left to unlink.
2252 */
2253 if (ret && ret != EEXIST) {
2254 try_rename:
2255 if (!rename(tmpfile, filename))
2256 goto out;
2257 ret = errno;
2258 }
2259 unlink_or_warn(tmpfile);
2260 if (ret) {
2261 if (ret != EEXIST) {
2262 return error("unable to write sha1 filename %s: %s\n", filename, strerror(ret));
2263 }
2264 /* FIXME!!! Collision check here ? */
2265 }
2266
2267 out:
2268 if (set_shared_perm(filename, (S_IFREG|0444)))
2269 return error("unable to set permission to '%s'", filename);
2270 return 0;
2271 }
2272
2273 static int write_buffer(int fd, const void *buf, size_t len)
2274 {
2275 if (write_in_full(fd, buf, len) < 0)
2276 return error("file write error (%s)", strerror(errno));
2277 return 0;
2278 }
2279
2280 int hash_sha1_file(const void *buf, unsigned long len, const char *type,
2281 unsigned char *sha1)
2282 {
2283 char hdr[32];
2284 int hdrlen;
2285 write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
2286 return 0;
2287 }
2288
2289 /* Finalize a file on disk, and close it. */
2290 static void close_sha1_file(int fd)
2291 {
2292 if (fsync_object_files)
2293 fsync_or_die(fd, "sha1 file");
2294 if (close(fd) != 0)
2295 die_errno("error when closing sha1 file");
2296 }
2297
2298 /* Size of directory component, including the ending '/' */
2299 static inline int directory_size(const char *filename)
2300 {
2301 const char *s = strrchr(filename, '/');
2302 if (!s)
2303 return 0;
2304 return s - filename + 1;
2305 }
2306
2307 /*
2308 * This creates a temporary file in the same directory as the final
2309 * 'filename'
2310 *
2311 * We want to avoid cross-directory filename renames, because those
2312 * can have problems on various filesystems (FAT, NFS, Coda).
2313 */
2314 static int create_tmpfile(char *buffer, size_t bufsiz, const char *filename)
2315 {
2316 int fd, dirlen = directory_size(filename);
2317
2318 if (dirlen + 20 > bufsiz) {
2319 errno = ENAMETOOLONG;
2320 return -1;
2321 }
2322 memcpy(buffer, filename, dirlen);
2323 strcpy(buffer + dirlen, "tmp_obj_XXXXXX");
2324 fd = mkstemp(buffer);
2325 if (fd < 0 && dirlen && errno == ENOENT) {
2326 /* Make sure the directory exists */
2327 memcpy(buffer, filename, dirlen);
2328 buffer[dirlen-1] = 0;
2329 if (mkdir(buffer, 0777) || adjust_shared_perm(buffer))
2330 return -1;
2331
2332 /* Try again */
2333 strcpy(buffer + dirlen - 1, "/tmp_obj_XXXXXX");
2334 fd = mkstemp(buffer);
2335 }
2336 return fd;
2337 }
2338
2339 static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
2340 void *buf, unsigned long len, time_t mtime)
2341 {
2342 int fd, ret;
2343 size_t size;
2344 unsigned char *compressed;
2345 z_stream stream;
2346 char *filename;
2347 static char tmpfile[PATH_MAX];
2348
2349 filename = sha1_file_name(sha1);
2350 fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
2351 while (fd < 0 && errno == EMFILE && unuse_one_window(packed_git, -1))
2352 fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
2353 if (fd < 0) {
2354 if (errno == EACCES)
2355 return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
2356 else
2357 return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
2358 }
2359
2360 /* Set it up */
2361 memset(&stream, 0, sizeof(stream));
2362 deflateInit(&stream, zlib_compression_level);
2363 size = 8 + deflateBound(&stream, len+hdrlen);
2364 compressed = xmalloc(size);
2365
2366 /* Compress it */
2367 stream.next_out = compressed;
2368 stream.avail_out = size;
2369
2370 /* First header.. */
2371 stream.next_in = (unsigned char *)hdr;
2372 stream.avail_in = hdrlen;
2373 while (deflate(&stream, 0) == Z_OK)
2374 /* nothing */;
2375
2376 /* Then the data itself.. */
2377 stream.next_in = buf;
2378 stream.avail_in = len;
2379 ret = deflate(&stream, Z_FINISH);
2380 if (ret != Z_STREAM_END)
2381 die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
2382
2383 ret = deflateEnd(&stream);
2384 if (ret != Z_OK)
2385 die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
2386
2387 size = stream.total_out;
2388
2389 if (write_buffer(fd, compressed, size) < 0)
2390 die("unable to write sha1 file");
2391 close_sha1_file(fd);
2392 free(compressed);
2393
2394 if (mtime) {
2395 struct utimbuf utb;
2396 utb.actime = mtime;
2397 utb.modtime = mtime;
2398 if (utime(tmpfile, &utb) < 0)
2399 warning("failed utime() on %s: %s",
2400 tmpfile, strerror(errno));
2401 }
2402
2403 return move_temp_to_file(tmpfile, filename);
2404 }
2405
2406 int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
2407 {
2408 unsigned char sha1[20];
2409 char hdr[32];
2410 int hdrlen;
2411
2412 /* Normally if we have it in the pack then we do not bother writing
2413 * it out into .git/objects/??/?{38} file.
2414 */
2415 write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
2416 if (returnsha1)
2417 hashcpy(returnsha1, sha1);
2418 if (has_sha1_file(sha1))
2419 return 0;
2420 return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
2421 }
2422
2423 int force_object_loose(const unsigned char *sha1, time_t mtime)
2424 {
2425 void *buf;
2426 unsigned long len;
2427 enum object_type type;
2428 char hdr[32];
2429 int hdrlen;
2430 int ret;
2431
2432 if (has_loose_object(sha1))
2433 return 0;
2434 buf = read_packed_sha1(sha1, &type, &len);
2435 if (!buf)
2436 return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
2437 hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1;
2438 ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
2439 free(buf);
2440
2441 return ret;
2442 }
2443
2444 int has_pack_index(const unsigned char *sha1)
2445 {
2446 struct stat st;
2447 if (stat(sha1_pack_index_name(sha1), &st))
2448 return 0;
2449 return 1;
2450 }
2451
2452 int has_pack_file(const unsigned char *sha1)
2453 {
2454 struct stat st;
2455 if (stat(sha1_pack_name(sha1), &st))
2456 return 0;
2457 return 1;
2458 }
2459
2460 int has_sha1_pack(const unsigned char *sha1)
2461 {
2462 struct pack_entry e;
2463 return find_pack_entry(sha1, &e);
2464 }
2465
2466 int has_sha1_file(const unsigned char *sha1)
2467 {
2468 struct pack_entry e;
2469
2470 if (find_pack_entry(sha1, &e))
2471 return 1;
2472 return has_loose_object(sha1);
2473 }
2474
2475 static int index_mem(unsigned char *sha1, void *buf, size_t size,
2476 int write_object, enum object_type type, const char *path)
2477 {
2478 int ret, re_allocated = 0;
2479
2480 if (!type)
2481 type = OBJ_BLOB;
2482
2483 /*
2484 * Convert blobs to git internal format
2485 */
2486 if ((type == OBJ_BLOB) && path) {
2487 struct strbuf nbuf = STRBUF_INIT;
2488 if (convert_to_git(path, buf, size, &nbuf,
2489 write_object ? safe_crlf : 0)) {
2490 buf = strbuf_detach(&nbuf, &size);
2491 re_allocated = 1;
2492 }
2493 }
2494
2495 if (write_object)
2496 ret = write_sha1_file(buf, size, typename(type), sha1);
2497 else
2498 ret = hash_sha1_file(buf, size, typename(type), sha1);
2499 if (re_allocated)
2500 free(buf);
2501 return ret;
2502 }
2503
2504 int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
2505 enum object_type type, const char *path)
2506 {
2507 int ret;
2508 size_t size = xsize_t(st->st_size);
2509
2510 if (!S_ISREG(st->st_mode)) {
2511 struct strbuf sbuf = STRBUF_INIT;
2512 if (strbuf_read(&sbuf, fd, 4096) >= 0)
2513 ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
2514 type, path);
2515 else
2516 ret = -1;
2517 strbuf_release(&sbuf);
2518 } else if (size) {
2519 void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
2520 ret = index_mem(sha1, buf, size, write_object, type, path);
2521 munmap(buf, size);
2522 } else
2523 ret = index_mem(sha1, NULL, size, write_object, type, path);
2524 close(fd);
2525 return ret;
2526 }
2527
2528 int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object)
2529 {
2530 int fd;
2531 struct strbuf sb = STRBUF_INIT;
2532
2533 switch (st->st_mode & S_IFMT) {
2534 case S_IFREG:
2535 fd = open(path, O_RDONLY);
2536 if (fd < 0)
2537 return error("open(\"%s\"): %s", path,
2538 strerror(errno));
2539 if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path) < 0)
2540 return error("%s: failed to insert into database",
2541 path);
2542 break;
2543 case S_IFLNK:
2544 if (strbuf_readlink(&sb, path, st->st_size)) {
2545 char *errstr = strerror(errno);
2546 return error("readlink(\"%s\"): %s", path,
2547 errstr);
2548 }
2549 if (!write_object)
2550 hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
2551 else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
2552 return error("%s: failed to insert into database",
2553 path);
2554 strbuf_release(&sb);
2555 break;
2556 case S_IFDIR:
2557 return resolve_gitlink_ref(path, "HEAD", sha1);
2558 default:
2559 return error("%s: unsupported file type", path);
2560 }
2561 return 0;
2562 }
2563
2564 int read_pack_header(int fd, struct pack_header *header)
2565 {
2566 if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
2567 /* "eof before pack header was fully read" */
2568 return PH_ERROR_EOF;
2569
2570 if (header->hdr_signature != htonl(PACK_SIGNATURE))
2571 /* "protocol error (pack signature mismatch detected)" */
2572 return PH_ERROR_PACK_SIGNATURE;
2573 if (!pack_version_ok(header->hdr_version))
2574 /* "protocol error (pack version unsupported)" */
2575 return PH_ERROR_PROTOCOL;
2576 return 0;
2577 }