]> git.ipfire.org Git - thirdparty/git.git/blob - sha1_file.c
pack: move pack name-related functions
[thirdparty/git.git] / sha1_file.c
1 /*
2 * GIT - The information manager from hell
3 *
4 * Copyright (C) Linus Torvalds, 2005
5 *
6 * This handles basic git sha1 object files - packing, unpacking,
7 * creation etc.
8 */
9 #include "cache.h"
10 #include "config.h"
11 #include "string-list.h"
12 #include "lockfile.h"
13 #include "delta.h"
14 #include "pack.h"
15 #include "blob.h"
16 #include "commit.h"
17 #include "run-command.h"
18 #include "tag.h"
19 #include "tree.h"
20 #include "tree-walk.h"
21 #include "refs.h"
22 #include "pack-revindex.h"
23 #include "sha1-lookup.h"
24 #include "bulk-checkin.h"
25 #include "streaming.h"
26 #include "dir.h"
27 #include "mru.h"
28 #include "list.h"
29 #include "mergesort.h"
30 #include "quote.h"
31 #include "packfile.h"
32
33 #define SZ_FMT PRIuMAX
34 static inline uintmax_t sz_fmt(size_t s) { return s; }
35
36 const unsigned char null_sha1[20];
37 const struct object_id null_oid;
38 const struct object_id empty_tree_oid = {
39 EMPTY_TREE_SHA1_BIN_LITERAL
40 };
41 const struct object_id empty_blob_oid = {
42 EMPTY_BLOB_SHA1_BIN_LITERAL
43 };
44
45 /*
46 * This is meant to hold a *small* number of objects that you would
47 * want read_sha1_file() to be able to return, but yet you do not want
48 * to write them into the object store (e.g. a browse-only
49 * application).
50 */
51 static struct cached_object {
52 unsigned char sha1[20];
53 enum object_type type;
54 void *buf;
55 unsigned long size;
56 } *cached_objects;
57 static int cached_object_nr, cached_object_alloc;
58
59 static struct cached_object empty_tree = {
60 EMPTY_TREE_SHA1_BIN_LITERAL,
61 OBJ_TREE,
62 "",
63 0
64 };
65
66 static struct cached_object *find_cached_object(const unsigned char *sha1)
67 {
68 int i;
69 struct cached_object *co = cached_objects;
70
71 for (i = 0; i < cached_object_nr; i++, co++) {
72 if (!hashcmp(co->sha1, sha1))
73 return co;
74 }
75 if (!hashcmp(sha1, empty_tree.sha1))
76 return &empty_tree;
77 return NULL;
78 }
79
80 int mkdir_in_gitdir(const char *path)
81 {
82 if (mkdir(path, 0777)) {
83 int saved_errno = errno;
84 struct stat st;
85 struct strbuf sb = STRBUF_INIT;
86
87 if (errno != EEXIST)
88 return -1;
89 /*
90 * Are we looking at a path in a symlinked worktree
91 * whose original repository does not yet have it?
92 * e.g. .git/rr-cache pointing at its original
93 * repository in which the user hasn't performed any
94 * conflict resolution yet?
95 */
96 if (lstat(path, &st) || !S_ISLNK(st.st_mode) ||
97 strbuf_readlink(&sb, path, st.st_size) ||
98 !is_absolute_path(sb.buf) ||
99 mkdir(sb.buf, 0777)) {
100 strbuf_release(&sb);
101 errno = saved_errno;
102 return -1;
103 }
104 strbuf_release(&sb);
105 }
106 return adjust_shared_perm(path);
107 }
108
109 enum scld_error safe_create_leading_directories(char *path)
110 {
111 char *next_component = path + offset_1st_component(path);
112 enum scld_error ret = SCLD_OK;
113
114 while (ret == SCLD_OK && next_component) {
115 struct stat st;
116 char *slash = next_component, slash_character;
117
118 while (*slash && !is_dir_sep(*slash))
119 slash++;
120
121 if (!*slash)
122 break;
123
124 next_component = slash + 1;
125 while (is_dir_sep(*next_component))
126 next_component++;
127 if (!*next_component)
128 break;
129
130 slash_character = *slash;
131 *slash = '\0';
132 if (!stat(path, &st)) {
133 /* path exists */
134 if (!S_ISDIR(st.st_mode)) {
135 errno = ENOTDIR;
136 ret = SCLD_EXISTS;
137 }
138 } else if (mkdir(path, 0777)) {
139 if (errno == EEXIST &&
140 !stat(path, &st) && S_ISDIR(st.st_mode))
141 ; /* somebody created it since we checked */
142 else if (errno == ENOENT)
143 /*
144 * Either mkdir() failed because
145 * somebody just pruned the containing
146 * directory, or stat() failed because
147 * the file that was in our way was
148 * just removed. Either way, inform
149 * the caller that it might be worth
150 * trying again:
151 */
152 ret = SCLD_VANISHED;
153 else
154 ret = SCLD_FAILED;
155 } else if (adjust_shared_perm(path)) {
156 ret = SCLD_PERMS;
157 }
158 *slash = slash_character;
159 }
160 return ret;
161 }
162
163 enum scld_error safe_create_leading_directories_const(const char *path)
164 {
165 int save_errno;
166 /* path points to cache entries, so xstrdup before messing with it */
167 char *buf = xstrdup(path);
168 enum scld_error result = safe_create_leading_directories(buf);
169
170 save_errno = errno;
171 free(buf);
172 errno = save_errno;
173 return result;
174 }
175
176 int raceproof_create_file(const char *path, create_file_fn fn, void *cb)
177 {
178 /*
179 * The number of times we will try to remove empty directories
180 * in the way of path. This is only 1 because if another
181 * process is racily creating directories that conflict with
182 * us, we don't want to fight against them.
183 */
184 int remove_directories_remaining = 1;
185
186 /*
187 * The number of times that we will try to create the
188 * directories containing path. We are willing to attempt this
189 * more than once, because another process could be trying to
190 * clean up empty directories at the same time as we are
191 * trying to create them.
192 */
193 int create_directories_remaining = 3;
194
195 /* A scratch copy of path, filled lazily if we need it: */
196 struct strbuf path_copy = STRBUF_INIT;
197
198 int ret, save_errno;
199
200 /* Sanity check: */
201 assert(*path);
202
203 retry_fn:
204 ret = fn(path, cb);
205 save_errno = errno;
206 if (!ret)
207 goto out;
208
209 if (errno == EISDIR && remove_directories_remaining-- > 0) {
210 /*
211 * A directory is in the way. Maybe it is empty; try
212 * to remove it:
213 */
214 if (!path_copy.len)
215 strbuf_addstr(&path_copy, path);
216
217 if (!remove_dir_recursively(&path_copy, REMOVE_DIR_EMPTY_ONLY))
218 goto retry_fn;
219 } else if (errno == ENOENT && create_directories_remaining-- > 0) {
220 /*
221 * Maybe the containing directory didn't exist, or
222 * maybe it was just deleted by a process that is
223 * racing with us to clean up empty directories. Try
224 * to create it:
225 */
226 enum scld_error scld_result;
227
228 if (!path_copy.len)
229 strbuf_addstr(&path_copy, path);
230
231 do {
232 scld_result = safe_create_leading_directories(path_copy.buf);
233 if (scld_result == SCLD_OK)
234 goto retry_fn;
235 } while (scld_result == SCLD_VANISHED && create_directories_remaining-- > 0);
236 }
237
238 out:
239 strbuf_release(&path_copy);
240 errno = save_errno;
241 return ret;
242 }
243
244 static void fill_sha1_path(struct strbuf *buf, const unsigned char *sha1)
245 {
246 int i;
247 for (i = 0; i < 20; i++) {
248 static char hex[] = "0123456789abcdef";
249 unsigned int val = sha1[i];
250 strbuf_addch(buf, hex[val >> 4]);
251 strbuf_addch(buf, hex[val & 0xf]);
252 if (!i)
253 strbuf_addch(buf, '/');
254 }
255 }
256
257 const char *sha1_file_name(const unsigned char *sha1)
258 {
259 static struct strbuf buf = STRBUF_INIT;
260
261 strbuf_reset(&buf);
262 strbuf_addf(&buf, "%s/", get_object_directory());
263
264 fill_sha1_path(&buf, sha1);
265 return buf.buf;
266 }
267
268 struct strbuf *alt_scratch_buf(struct alternate_object_database *alt)
269 {
270 strbuf_setlen(&alt->scratch, alt->base_len);
271 return &alt->scratch;
272 }
273
274 static const char *alt_sha1_path(struct alternate_object_database *alt,
275 const unsigned char *sha1)
276 {
277 struct strbuf *buf = alt_scratch_buf(alt);
278 fill_sha1_path(buf, sha1);
279 return buf->buf;
280 }
281
282 struct alternate_object_database *alt_odb_list;
283 static struct alternate_object_database **alt_odb_tail;
284
285 /*
286 * Return non-zero iff the path is usable as an alternate object database.
287 */
288 static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
289 {
290 struct alternate_object_database *alt;
291
292 /* Detect cases where alternate disappeared */
293 if (!is_directory(path->buf)) {
294 error("object directory %s does not exist; "
295 "check .git/objects/info/alternates.",
296 path->buf);
297 return 0;
298 }
299
300 /*
301 * Prevent the common mistake of listing the same
302 * thing twice, or object directory itself.
303 */
304 for (alt = alt_odb_list; alt; alt = alt->next) {
305 if (!fspathcmp(path->buf, alt->path))
306 return 0;
307 }
308 if (!fspathcmp(path->buf, normalized_objdir))
309 return 0;
310
311 return 1;
312 }
313
314 /*
315 * Prepare alternate object database registry.
316 *
317 * The variable alt_odb_list points at the list of struct
318 * alternate_object_database. The elements on this list come from
319 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
320 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
321 * whose contents is similar to that environment variable but can be
322 * LF separated. Its base points at a statically allocated buffer that
323 * contains "/the/directory/corresponding/to/.git/objects/...", while
324 * its name points just after the slash at the end of ".git/objects/"
325 * in the example above, and has enough space to hold 40-byte hex
326 * SHA1, an extra slash for the first level indirection, and the
327 * terminating NUL.
328 */
329 static void read_info_alternates(const char * relative_base, int depth);
330 static int link_alt_odb_entry(const char *entry, const char *relative_base,
331 int depth, const char *normalized_objdir)
332 {
333 struct alternate_object_database *ent;
334 struct strbuf pathbuf = STRBUF_INIT;
335
336 if (!is_absolute_path(entry) && relative_base) {
337 strbuf_realpath(&pathbuf, relative_base, 1);
338 strbuf_addch(&pathbuf, '/');
339 }
340 strbuf_addstr(&pathbuf, entry);
341
342 if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) {
343 error("unable to normalize alternate object path: %s",
344 pathbuf.buf);
345 strbuf_release(&pathbuf);
346 return -1;
347 }
348
349 /*
350 * The trailing slash after the directory name is given by
351 * this function at the end. Remove duplicates.
352 */
353 while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
354 strbuf_setlen(&pathbuf, pathbuf.len - 1);
355
356 if (!alt_odb_usable(&pathbuf, normalized_objdir)) {
357 strbuf_release(&pathbuf);
358 return -1;
359 }
360
361 ent = alloc_alt_odb(pathbuf.buf);
362
363 /* add the alternate entry */
364 *alt_odb_tail = ent;
365 alt_odb_tail = &(ent->next);
366 ent->next = NULL;
367
368 /* recursively add alternates */
369 read_info_alternates(pathbuf.buf, depth + 1);
370
371 strbuf_release(&pathbuf);
372 return 0;
373 }
374
375 static const char *parse_alt_odb_entry(const char *string,
376 int sep,
377 struct strbuf *out)
378 {
379 const char *end;
380
381 strbuf_reset(out);
382
383 if (*string == '#') {
384 /* comment; consume up to next separator */
385 end = strchrnul(string, sep);
386 } else if (*string == '"' && !unquote_c_style(out, string, &end)) {
387 /*
388 * quoted path; unquote_c_style has copied the
389 * data for us and set "end". Broken quoting (e.g.,
390 * an entry that doesn't end with a quote) falls
391 * back to the unquoted case below.
392 */
393 } else {
394 /* normal, unquoted path */
395 end = strchrnul(string, sep);
396 strbuf_add(out, string, end - string);
397 }
398
399 if (*end)
400 end++;
401 return end;
402 }
403
404 static void link_alt_odb_entries(const char *alt, int len, int sep,
405 const char *relative_base, int depth)
406 {
407 struct strbuf objdirbuf = STRBUF_INIT;
408 struct strbuf entry = STRBUF_INIT;
409
410 if (depth > 5) {
411 error("%s: ignoring alternate object stores, nesting too deep.",
412 relative_base);
413 return;
414 }
415
416 strbuf_add_absolute_path(&objdirbuf, get_object_directory());
417 if (strbuf_normalize_path(&objdirbuf) < 0)
418 die("unable to normalize object directory: %s",
419 objdirbuf.buf);
420
421 while (*alt) {
422 alt = parse_alt_odb_entry(alt, sep, &entry);
423 if (!entry.len)
424 continue;
425 link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf);
426 }
427 strbuf_release(&entry);
428 strbuf_release(&objdirbuf);
429 }
430
431 static void read_info_alternates(const char * relative_base, int depth)
432 {
433 char *map;
434 size_t mapsz;
435 struct stat st;
436 char *path;
437 int fd;
438
439 path = xstrfmt("%s/info/alternates", relative_base);
440 fd = git_open(path);
441 free(path);
442 if (fd < 0)
443 return;
444 if (fstat(fd, &st) || (st.st_size == 0)) {
445 close(fd);
446 return;
447 }
448 mapsz = xsize_t(st.st_size);
449 map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
450 close(fd);
451
452 link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
453
454 munmap(map, mapsz);
455 }
456
457 struct alternate_object_database *alloc_alt_odb(const char *dir)
458 {
459 struct alternate_object_database *ent;
460
461 FLEX_ALLOC_STR(ent, path, dir);
462 strbuf_init(&ent->scratch, 0);
463 strbuf_addf(&ent->scratch, "%s/", dir);
464 ent->base_len = ent->scratch.len;
465
466 return ent;
467 }
468
469 void add_to_alternates_file(const char *reference)
470 {
471 struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
472 char *alts = git_pathdup("objects/info/alternates");
473 FILE *in, *out;
474
475 hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
476 out = fdopen_lock_file(lock, "w");
477 if (!out)
478 die_errno("unable to fdopen alternates lockfile");
479
480 in = fopen(alts, "r");
481 if (in) {
482 struct strbuf line = STRBUF_INIT;
483 int found = 0;
484
485 while (strbuf_getline(&line, in) != EOF) {
486 if (!strcmp(reference, line.buf)) {
487 found = 1;
488 break;
489 }
490 fprintf_or_die(out, "%s\n", line.buf);
491 }
492
493 strbuf_release(&line);
494 fclose(in);
495
496 if (found) {
497 rollback_lock_file(lock);
498 lock = NULL;
499 }
500 }
501 else if (errno != ENOENT)
502 die_errno("unable to read alternates file");
503
504 if (lock) {
505 fprintf_or_die(out, "%s\n", reference);
506 if (commit_lock_file(lock))
507 die_errno("unable to move new alternates file into place");
508 if (alt_odb_tail)
509 link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
510 }
511 free(alts);
512 }
513
514 void add_to_alternates_memory(const char *reference)
515 {
516 /*
517 * Make sure alternates are initialized, or else our entry may be
518 * overwritten when they are.
519 */
520 prepare_alt_odb();
521
522 link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
523 }
524
525 /*
526 * Compute the exact path an alternate is at and returns it. In case of
527 * error NULL is returned and the human readable error is added to `err`
528 * `path` may be relative and should point to $GITDIR.
529 * `err` must not be null.
530 */
531 char *compute_alternate_path(const char *path, struct strbuf *err)
532 {
533 char *ref_git = NULL;
534 const char *repo, *ref_git_s;
535 int seen_error = 0;
536
537 ref_git_s = real_path_if_valid(path);
538 if (!ref_git_s) {
539 seen_error = 1;
540 strbuf_addf(err, _("path '%s' does not exist"), path);
541 goto out;
542 } else
543 /*
544 * Beware: read_gitfile(), real_path() and mkpath()
545 * return static buffer
546 */
547 ref_git = xstrdup(ref_git_s);
548
549 repo = read_gitfile(ref_git);
550 if (!repo)
551 repo = read_gitfile(mkpath("%s/.git", ref_git));
552 if (repo) {
553 free(ref_git);
554 ref_git = xstrdup(repo);
555 }
556
557 if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
558 char *ref_git_git = mkpathdup("%s/.git", ref_git);
559 free(ref_git);
560 ref_git = ref_git_git;
561 } else if (!is_directory(mkpath("%s/objects", ref_git))) {
562 struct strbuf sb = STRBUF_INIT;
563 seen_error = 1;
564 if (get_common_dir(&sb, ref_git)) {
565 strbuf_addf(err,
566 _("reference repository '%s' as a linked "
567 "checkout is not supported yet."),
568 path);
569 goto out;
570 }
571
572 strbuf_addf(err, _("reference repository '%s' is not a "
573 "local repository."), path);
574 goto out;
575 }
576
577 if (!access(mkpath("%s/shallow", ref_git), F_OK)) {
578 strbuf_addf(err, _("reference repository '%s' is shallow"),
579 path);
580 seen_error = 1;
581 goto out;
582 }
583
584 if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) {
585 strbuf_addf(err,
586 _("reference repository '%s' is grafted"),
587 path);
588 seen_error = 1;
589 goto out;
590 }
591
592 out:
593 if (seen_error) {
594 FREE_AND_NULL(ref_git);
595 }
596
597 return ref_git;
598 }
599
600 int foreach_alt_odb(alt_odb_fn fn, void *cb)
601 {
602 struct alternate_object_database *ent;
603 int r = 0;
604
605 prepare_alt_odb();
606 for (ent = alt_odb_list; ent; ent = ent->next) {
607 r = fn(ent, cb);
608 if (r)
609 break;
610 }
611 return r;
612 }
613
614 void prepare_alt_odb(void)
615 {
616 const char *alt;
617
618 if (alt_odb_tail)
619 return;
620
621 alt = getenv(ALTERNATE_DB_ENVIRONMENT);
622 if (!alt) alt = "";
623
624 alt_odb_tail = &alt_odb_list;
625 link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
626
627 read_info_alternates(get_object_directory(), 0);
628 }
629
630 /* Returns 1 if we have successfully freshened the file, 0 otherwise. */
631 static int freshen_file(const char *fn)
632 {
633 struct utimbuf t;
634 t.actime = t.modtime = time(NULL);
635 return !utime(fn, &t);
636 }
637
638 /*
639 * All of the check_and_freshen functions return 1 if the file exists and was
640 * freshened (if freshening was requested), 0 otherwise. If they return
641 * 0, you should not assume that it is safe to skip a write of the object (it
642 * either does not exist on disk, or has a stale mtime and may be subject to
643 * pruning).
644 */
645 int check_and_freshen_file(const char *fn, int freshen)
646 {
647 if (access(fn, F_OK))
648 return 0;
649 if (freshen && !freshen_file(fn))
650 return 0;
651 return 1;
652 }
653
654 static int check_and_freshen_local(const unsigned char *sha1, int freshen)
655 {
656 return check_and_freshen_file(sha1_file_name(sha1), freshen);
657 }
658
659 static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
660 {
661 struct alternate_object_database *alt;
662 prepare_alt_odb();
663 for (alt = alt_odb_list; alt; alt = alt->next) {
664 const char *path = alt_sha1_path(alt, sha1);
665 if (check_and_freshen_file(path, freshen))
666 return 1;
667 }
668 return 0;
669 }
670
671 static int check_and_freshen(const unsigned char *sha1, int freshen)
672 {
673 return check_and_freshen_local(sha1, freshen) ||
674 check_and_freshen_nonlocal(sha1, freshen);
675 }
676
677 int has_loose_object_nonlocal(const unsigned char *sha1)
678 {
679 return check_and_freshen_nonlocal(sha1, 0);
680 }
681
682 static int has_loose_object(const unsigned char *sha1)
683 {
684 return check_and_freshen(sha1, 0);
685 }
686
687 static unsigned int pack_used_ctr;
688 static unsigned int pack_mmap_calls;
689 static unsigned int peak_pack_open_windows;
690 static unsigned int pack_open_windows;
691 static unsigned int pack_open_fds;
692 static unsigned int pack_max_fds;
693 static size_t peak_pack_mapped;
694 static size_t pack_mapped;
695 struct packed_git *packed_git;
696
697 static struct mru packed_git_mru_storage;
698 struct mru *packed_git_mru = &packed_git_mru_storage;
699
700 void pack_report(void)
701 {
702 fprintf(stderr,
703 "pack_report: getpagesize() = %10" SZ_FMT "\n"
704 "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
705 "pack_report: core.packedGitLimit = %10" SZ_FMT "\n",
706 sz_fmt(getpagesize()),
707 sz_fmt(packed_git_window_size),
708 sz_fmt(packed_git_limit));
709 fprintf(stderr,
710 "pack_report: pack_used_ctr = %10u\n"
711 "pack_report: pack_mmap_calls = %10u\n"
712 "pack_report: pack_open_windows = %10u / %10u\n"
713 "pack_report: pack_mapped = "
714 "%10" SZ_FMT " / %10" SZ_FMT "\n",
715 pack_used_ctr,
716 pack_mmap_calls,
717 pack_open_windows, peak_pack_open_windows,
718 sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
719 }
720
721 /*
722 * Open and mmap the index file at path, perform a couple of
723 * consistency checks, then record its information to p. Return 0 on
724 * success.
725 */
726 static int check_packed_git_idx(const char *path, struct packed_git *p)
727 {
728 void *idx_map;
729 struct pack_idx_header *hdr;
730 size_t idx_size;
731 uint32_t version, nr, i, *index;
732 int fd = git_open(path);
733 struct stat st;
734
735 if (fd < 0)
736 return -1;
737 if (fstat(fd, &st)) {
738 close(fd);
739 return -1;
740 }
741 idx_size = xsize_t(st.st_size);
742 if (idx_size < 4 * 256 + 20 + 20) {
743 close(fd);
744 return error("index file %s is too small", path);
745 }
746 idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
747 close(fd);
748
749 hdr = idx_map;
750 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
751 version = ntohl(hdr->idx_version);
752 if (version < 2 || version > 2) {
753 munmap(idx_map, idx_size);
754 return error("index file %s is version %"PRIu32
755 " and is not supported by this binary"
756 " (try upgrading GIT to a newer version)",
757 path, version);
758 }
759 } else
760 version = 1;
761
762 nr = 0;
763 index = idx_map;
764 if (version > 1)
765 index += 2; /* skip index header */
766 for (i = 0; i < 256; i++) {
767 uint32_t n = ntohl(index[i]);
768 if (n < nr) {
769 munmap(idx_map, idx_size);
770 return error("non-monotonic index %s", path);
771 }
772 nr = n;
773 }
774
775 if (version == 1) {
776 /*
777 * Total size:
778 * - 256 index entries 4 bytes each
779 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
780 * - 20-byte SHA1 of the packfile
781 * - 20-byte SHA1 file checksum
782 */
783 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
784 munmap(idx_map, idx_size);
785 return error("wrong index v1 file size in %s", path);
786 }
787 } else if (version == 2) {
788 /*
789 * Minimum size:
790 * - 8 bytes of header
791 * - 256 index entries 4 bytes each
792 * - 20-byte sha1 entry * nr
793 * - 4-byte crc entry * nr
794 * - 4-byte offset entry * nr
795 * - 20-byte SHA1 of the packfile
796 * - 20-byte SHA1 file checksum
797 * And after the 4-byte offset table might be a
798 * variable sized table containing 8-byte entries
799 * for offsets larger than 2^31.
800 */
801 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
802 unsigned long max_size = min_size;
803 if (nr)
804 max_size += (nr - 1)*8;
805 if (idx_size < min_size || idx_size > max_size) {
806 munmap(idx_map, idx_size);
807 return error("wrong index v2 file size in %s", path);
808 }
809 if (idx_size != min_size &&
810 /*
811 * make sure we can deal with large pack offsets.
812 * 31-bit signed offset won't be enough, neither
813 * 32-bit unsigned one will be.
814 */
815 (sizeof(off_t) <= 4)) {
816 munmap(idx_map, idx_size);
817 return error("pack too large for current definition of off_t in %s", path);
818 }
819 }
820
821 p->index_version = version;
822 p->index_data = idx_map;
823 p->index_size = idx_size;
824 p->num_objects = nr;
825 return 0;
826 }
827
828 int open_pack_index(struct packed_git *p)
829 {
830 char *idx_name;
831 size_t len;
832 int ret;
833
834 if (p->index_data)
835 return 0;
836
837 if (!strip_suffix(p->pack_name, ".pack", &len))
838 die("BUG: pack_name does not end in .pack");
839 idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
840 ret = check_packed_git_idx(idx_name, p);
841 free(idx_name);
842 return ret;
843 }
844
845 static void scan_windows(struct packed_git *p,
846 struct packed_git **lru_p,
847 struct pack_window **lru_w,
848 struct pack_window **lru_l)
849 {
850 struct pack_window *w, *w_l;
851
852 for (w_l = NULL, w = p->windows; w; w = w->next) {
853 if (!w->inuse_cnt) {
854 if (!*lru_w || w->last_used < (*lru_w)->last_used) {
855 *lru_p = p;
856 *lru_w = w;
857 *lru_l = w_l;
858 }
859 }
860 w_l = w;
861 }
862 }
863
864 static int unuse_one_window(struct packed_git *current)
865 {
866 struct packed_git *p, *lru_p = NULL;
867 struct pack_window *lru_w = NULL, *lru_l = NULL;
868
869 if (current)
870 scan_windows(current, &lru_p, &lru_w, &lru_l);
871 for (p = packed_git; p; p = p->next)
872 scan_windows(p, &lru_p, &lru_w, &lru_l);
873 if (lru_p) {
874 munmap(lru_w->base, lru_w->len);
875 pack_mapped -= lru_w->len;
876 if (lru_l)
877 lru_l->next = lru_w->next;
878 else
879 lru_p->windows = lru_w->next;
880 free(lru_w);
881 pack_open_windows--;
882 return 1;
883 }
884 return 0;
885 }
886
887 void release_pack_memory(size_t need)
888 {
889 size_t cur = pack_mapped;
890 while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
891 ; /* nothing */
892 }
893
894 static void mmap_limit_check(size_t length)
895 {
896 static size_t limit = 0;
897 if (!limit) {
898 limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
899 if (!limit)
900 limit = SIZE_MAX;
901 }
902 if (length > limit)
903 die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
904 (uintmax_t)length, (uintmax_t)limit);
905 }
906
907 void *xmmap_gently(void *start, size_t length,
908 int prot, int flags, int fd, off_t offset)
909 {
910 void *ret;
911
912 mmap_limit_check(length);
913 ret = mmap(start, length, prot, flags, fd, offset);
914 if (ret == MAP_FAILED) {
915 if (!length)
916 return NULL;
917 release_pack_memory(length);
918 ret = mmap(start, length, prot, flags, fd, offset);
919 }
920 return ret;
921 }
922
923 void *xmmap(void *start, size_t length,
924 int prot, int flags, int fd, off_t offset)
925 {
926 void *ret = xmmap_gently(start, length, prot, flags, fd, offset);
927 if (ret == MAP_FAILED)
928 die_errno("mmap failed");
929 return ret;
930 }
931
932 void close_pack_windows(struct packed_git *p)
933 {
934 while (p->windows) {
935 struct pack_window *w = p->windows;
936
937 if (w->inuse_cnt)
938 die("pack '%s' still has open windows to it",
939 p->pack_name);
940 munmap(w->base, w->len);
941 pack_mapped -= w->len;
942 pack_open_windows--;
943 p->windows = w->next;
944 free(w);
945 }
946 }
947
948 static int close_pack_fd(struct packed_git *p)
949 {
950 if (p->pack_fd < 0)
951 return 0;
952
953 close(p->pack_fd);
954 pack_open_fds--;
955 p->pack_fd = -1;
956
957 return 1;
958 }
959
960 static void close_pack(struct packed_git *p)
961 {
962 close_pack_windows(p);
963 close_pack_fd(p);
964 close_pack_index(p);
965 }
966
967 void close_all_packs(void)
968 {
969 struct packed_git *p;
970
971 for (p = packed_git; p; p = p->next)
972 if (p->do_not_close)
973 die("BUG: want to close pack marked 'do-not-close'");
974 else
975 close_pack(p);
976 }
977
978
979 /*
980 * The LRU pack is the one with the oldest MRU window, preferring packs
981 * with no used windows, or the oldest mtime if it has no windows allocated.
982 */
983 static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
984 {
985 struct pack_window *w, *this_mru_w;
986 int has_windows_inuse = 0;
987
988 /*
989 * Reject this pack if it has windows and the previously selected
990 * one does not. If this pack does not have windows, reject
991 * it if the pack file is newer than the previously selected one.
992 */
993 if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
994 return;
995
996 for (w = this_mru_w = p->windows; w; w = w->next) {
997 /*
998 * Reject this pack if any of its windows are in use,
999 * but the previously selected pack did not have any
1000 * inuse windows. Otherwise, record that this pack
1001 * has windows in use.
1002 */
1003 if (w->inuse_cnt) {
1004 if (*accept_windows_inuse)
1005 has_windows_inuse = 1;
1006 else
1007 return;
1008 }
1009
1010 if (w->last_used > this_mru_w->last_used)
1011 this_mru_w = w;
1012
1013 /*
1014 * Reject this pack if it has windows that have been
1015 * used more recently than the previously selected pack.
1016 * If the previously selected pack had windows inuse and
1017 * we have not encountered a window in this pack that is
1018 * inuse, skip this check since we prefer a pack with no
1019 * inuse windows to one that has inuse windows.
1020 */
1021 if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
1022 this_mru_w->last_used > (*mru_w)->last_used)
1023 return;
1024 }
1025
1026 /*
1027 * Select this pack.
1028 */
1029 *mru_w = this_mru_w;
1030 *lru_p = p;
1031 *accept_windows_inuse = has_windows_inuse;
1032 }
1033
1034 static int close_one_pack(void)
1035 {
1036 struct packed_git *p, *lru_p = NULL;
1037 struct pack_window *mru_w = NULL;
1038 int accept_windows_inuse = 1;
1039
1040 for (p = packed_git; p; p = p->next) {
1041 if (p->pack_fd == -1)
1042 continue;
1043 find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
1044 }
1045
1046 if (lru_p)
1047 return close_pack_fd(lru_p);
1048
1049 return 0;
1050 }
1051
1052 void unuse_pack(struct pack_window **w_cursor)
1053 {
1054 struct pack_window *w = *w_cursor;
1055 if (w) {
1056 w->inuse_cnt--;
1057 *w_cursor = NULL;
1058 }
1059 }
1060
1061 void close_pack_index(struct packed_git *p)
1062 {
1063 if (p->index_data) {
1064 munmap((void *)p->index_data, p->index_size);
1065 p->index_data = NULL;
1066 }
1067 }
1068
1069 static unsigned int get_max_fd_limit(void)
1070 {
1071 #ifdef RLIMIT_NOFILE
1072 {
1073 struct rlimit lim;
1074
1075 if (!getrlimit(RLIMIT_NOFILE, &lim))
1076 return lim.rlim_cur;
1077 }
1078 #endif
1079
1080 #ifdef _SC_OPEN_MAX
1081 {
1082 long open_max = sysconf(_SC_OPEN_MAX);
1083 if (0 < open_max)
1084 return open_max;
1085 /*
1086 * Otherwise, we got -1 for one of the two
1087 * reasons:
1088 *
1089 * (1) sysconf() did not understand _SC_OPEN_MAX
1090 * and signaled an error with -1; or
1091 * (2) sysconf() said there is no limit.
1092 *
1093 * We _could_ clear errno before calling sysconf() to
1094 * tell these two cases apart and return a huge number
1095 * in the latter case to let the caller cap it to a
1096 * value that is not so selfish, but letting the
1097 * fallback OPEN_MAX codepath take care of these cases
1098 * is a lot simpler.
1099 */
1100 }
1101 #endif
1102
1103 #ifdef OPEN_MAX
1104 return OPEN_MAX;
1105 #else
1106 return 1; /* see the caller ;-) */
1107 #endif
1108 }
1109
1110 /*
1111 * Do not call this directly as this leaks p->pack_fd on error return;
1112 * call open_packed_git() instead.
1113 */
1114 static int open_packed_git_1(struct packed_git *p)
1115 {
1116 struct stat st;
1117 struct pack_header hdr;
1118 unsigned char sha1[20];
1119 unsigned char *idx_sha1;
1120 long fd_flag;
1121
1122 if (!p->index_data && open_pack_index(p))
1123 return error("packfile %s index unavailable", p->pack_name);
1124
1125 if (!pack_max_fds) {
1126 unsigned int max_fds = get_max_fd_limit();
1127
1128 /* Save 3 for stdin/stdout/stderr, 22 for work */
1129 if (25 < max_fds)
1130 pack_max_fds = max_fds - 25;
1131 else
1132 pack_max_fds = 1;
1133 }
1134
1135 while (pack_max_fds <= pack_open_fds && close_one_pack())
1136 ; /* nothing */
1137
1138 p->pack_fd = git_open(p->pack_name);
1139 if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
1140 return -1;
1141 pack_open_fds++;
1142
1143 /* If we created the struct before we had the pack we lack size. */
1144 if (!p->pack_size) {
1145 if (!S_ISREG(st.st_mode))
1146 return error("packfile %s not a regular file", p->pack_name);
1147 p->pack_size = st.st_size;
1148 } else if (p->pack_size != st.st_size)
1149 return error("packfile %s size changed", p->pack_name);
1150
1151 /* We leave these file descriptors open with sliding mmap;
1152 * there is no point keeping them open across exec(), though.
1153 */
1154 fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
1155 if (fd_flag < 0)
1156 return error("cannot determine file descriptor flags");
1157 fd_flag |= FD_CLOEXEC;
1158 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1159 return error("cannot set FD_CLOEXEC");
1160
1161 /* Verify we recognize this pack file format. */
1162 if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
1163 return error("file %s is far too short to be a packfile", p->pack_name);
1164 if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
1165 return error("file %s is not a GIT packfile", p->pack_name);
1166 if (!pack_version_ok(hdr.hdr_version))
1167 return error("packfile %s is version %"PRIu32" and not"
1168 " supported (try upgrading GIT to a newer version)",
1169 p->pack_name, ntohl(hdr.hdr_version));
1170
1171 /* Verify the pack matches its index. */
1172 if (p->num_objects != ntohl(hdr.hdr_entries))
1173 return error("packfile %s claims to have %"PRIu32" objects"
1174 " while index indicates %"PRIu32" objects",
1175 p->pack_name, ntohl(hdr.hdr_entries),
1176 p->num_objects);
1177 if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
1178 return error("end of packfile %s is unavailable", p->pack_name);
1179 if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
1180 return error("packfile %s signature is unavailable", p->pack_name);
1181 idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
1182 if (hashcmp(sha1, idx_sha1))
1183 return error("packfile %s does not match index", p->pack_name);
1184 return 0;
1185 }
1186
1187 static int open_packed_git(struct packed_git *p)
1188 {
1189 if (!open_packed_git_1(p))
1190 return 0;
1191 close_pack_fd(p);
1192 return -1;
1193 }
1194
1195 static int in_window(struct pack_window *win, off_t offset)
1196 {
1197 /* We must promise at least 20 bytes (one hash) after the
1198 * offset is available from this window, otherwise the offset
1199 * is not actually in this window and a different window (which
1200 * has that one hash excess) must be used. This is to support
1201 * the object header and delta base parsing routines below.
1202 */
1203 off_t win_off = win->offset;
1204 return win_off <= offset
1205 && (offset + 20) <= (win_off + win->len);
1206 }
1207
1208 unsigned char *use_pack(struct packed_git *p,
1209 struct pack_window **w_cursor,
1210 off_t offset,
1211 unsigned long *left)
1212 {
1213 struct pack_window *win = *w_cursor;
1214
1215 /* Since packfiles end in a hash of their content and it's
1216 * pointless to ask for an offset into the middle of that
1217 * hash, and the in_window function above wouldn't match
1218 * don't allow an offset too close to the end of the file.
1219 */
1220 if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
1221 die("packfile %s cannot be accessed", p->pack_name);
1222 if (offset > (p->pack_size - 20))
1223 die("offset beyond end of packfile (truncated pack?)");
1224 if (offset < 0)
1225 die(_("offset before end of packfile (broken .idx?)"));
1226
1227 if (!win || !in_window(win, offset)) {
1228 if (win)
1229 win->inuse_cnt--;
1230 for (win = p->windows; win; win = win->next) {
1231 if (in_window(win, offset))
1232 break;
1233 }
1234 if (!win) {
1235 size_t window_align = packed_git_window_size / 2;
1236 off_t len;
1237
1238 if (p->pack_fd == -1 && open_packed_git(p))
1239 die("packfile %s cannot be accessed", p->pack_name);
1240
1241 win = xcalloc(1, sizeof(*win));
1242 win->offset = (offset / window_align) * window_align;
1243 len = p->pack_size - win->offset;
1244 if (len > packed_git_window_size)
1245 len = packed_git_window_size;
1246 win->len = (size_t)len;
1247 pack_mapped += win->len;
1248 while (packed_git_limit < pack_mapped
1249 && unuse_one_window(p))
1250 ; /* nothing */
1251 win->base = xmmap(NULL, win->len,
1252 PROT_READ, MAP_PRIVATE,
1253 p->pack_fd, win->offset);
1254 if (win->base == MAP_FAILED)
1255 die_errno("packfile %s cannot be mapped",
1256 p->pack_name);
1257 if (!win->offset && win->len == p->pack_size
1258 && !p->do_not_close)
1259 close_pack_fd(p);
1260 pack_mmap_calls++;
1261 pack_open_windows++;
1262 if (pack_mapped > peak_pack_mapped)
1263 peak_pack_mapped = pack_mapped;
1264 if (pack_open_windows > peak_pack_open_windows)
1265 peak_pack_open_windows = pack_open_windows;
1266 win->next = p->windows;
1267 p->windows = win;
1268 }
1269 }
1270 if (win != *w_cursor) {
1271 win->last_used = pack_used_ctr++;
1272 win->inuse_cnt++;
1273 *w_cursor = win;
1274 }
1275 offset -= win->offset;
1276 if (left)
1277 *left = win->len - xsize_t(offset);
1278 return win->base + offset;
1279 }
1280
1281 static struct packed_git *alloc_packed_git(int extra)
1282 {
1283 struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
1284 memset(p, 0, sizeof(*p));
1285 p->pack_fd = -1;
1286 return p;
1287 }
1288
1289 static void try_to_free_pack_memory(size_t size)
1290 {
1291 release_pack_memory(size);
1292 }
1293
1294 struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
1295 {
1296 static int have_set_try_to_free_routine;
1297 struct stat st;
1298 size_t alloc;
1299 struct packed_git *p;
1300
1301 if (!have_set_try_to_free_routine) {
1302 have_set_try_to_free_routine = 1;
1303 set_try_to_free_routine(try_to_free_pack_memory);
1304 }
1305
1306 /*
1307 * Make sure a corresponding .pack file exists and that
1308 * the index looks sane.
1309 */
1310 if (!strip_suffix_mem(path, &path_len, ".idx"))
1311 return NULL;
1312
1313 /*
1314 * ".pack" is long enough to hold any suffix we're adding (and
1315 * the use xsnprintf double-checks that)
1316 */
1317 alloc = st_add3(path_len, strlen(".pack"), 1);
1318 p = alloc_packed_git(alloc);
1319 memcpy(p->pack_name, path, path_len);
1320
1321 xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
1322 if (!access(p->pack_name, F_OK))
1323 p->pack_keep = 1;
1324
1325 xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
1326 if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
1327 free(p);
1328 return NULL;
1329 }
1330
1331 /* ok, it looks sane as far as we can check without
1332 * actually mapping the pack file.
1333 */
1334 p->pack_size = st.st_size;
1335 p->pack_local = local;
1336 p->mtime = st.st_mtime;
1337 if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
1338 hashclr(p->sha1);
1339 return p;
1340 }
1341
1342 struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
1343 {
1344 const char *path = sha1_pack_name(sha1);
1345 size_t alloc = st_add(strlen(path), 1);
1346 struct packed_git *p = alloc_packed_git(alloc);
1347
1348 memcpy(p->pack_name, path, alloc); /* includes NUL */
1349 hashcpy(p->sha1, sha1);
1350 if (check_packed_git_idx(idx_path, p)) {
1351 free(p);
1352 return NULL;
1353 }
1354
1355 return p;
1356 }
1357
1358 void install_packed_git(struct packed_git *pack)
1359 {
1360 if (pack->pack_fd != -1)
1361 pack_open_fds++;
1362
1363 pack->next = packed_git;
1364 packed_git = pack;
1365 }
1366
1367 void (*report_garbage)(unsigned seen_bits, const char *path);
1368
1369 static void report_helper(const struct string_list *list,
1370 int seen_bits, int first, int last)
1371 {
1372 if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
1373 return;
1374
1375 for (; first < last; first++)
1376 report_garbage(seen_bits, list->items[first].string);
1377 }
1378
1379 static void report_pack_garbage(struct string_list *list)
1380 {
1381 int i, baselen = -1, first = 0, seen_bits = 0;
1382
1383 if (!report_garbage)
1384 return;
1385
1386 string_list_sort(list);
1387
1388 for (i = 0; i < list->nr; i++) {
1389 const char *path = list->items[i].string;
1390 if (baselen != -1 &&
1391 strncmp(path, list->items[first].string, baselen)) {
1392 report_helper(list, seen_bits, first, i);
1393 baselen = -1;
1394 seen_bits = 0;
1395 }
1396 if (baselen == -1) {
1397 const char *dot = strrchr(path, '.');
1398 if (!dot) {
1399 report_garbage(PACKDIR_FILE_GARBAGE, path);
1400 continue;
1401 }
1402 baselen = dot - path + 1;
1403 first = i;
1404 }
1405 if (!strcmp(path + baselen, "pack"))
1406 seen_bits |= 1;
1407 else if (!strcmp(path + baselen, "idx"))
1408 seen_bits |= 2;
1409 }
1410 report_helper(list, seen_bits, first, list->nr);
1411 }
1412
1413 static void prepare_packed_git_one(char *objdir, int local)
1414 {
1415 struct strbuf path = STRBUF_INIT;
1416 size_t dirnamelen;
1417 DIR *dir;
1418 struct dirent *de;
1419 struct string_list garbage = STRING_LIST_INIT_DUP;
1420
1421 strbuf_addstr(&path, objdir);
1422 strbuf_addstr(&path, "/pack");
1423 dir = opendir(path.buf);
1424 if (!dir) {
1425 if (errno != ENOENT)
1426 error_errno("unable to open object pack directory: %s",
1427 path.buf);
1428 strbuf_release(&path);
1429 return;
1430 }
1431 strbuf_addch(&path, '/');
1432 dirnamelen = path.len;
1433 while ((de = readdir(dir)) != NULL) {
1434 struct packed_git *p;
1435 size_t base_len;
1436
1437 if (is_dot_or_dotdot(de->d_name))
1438 continue;
1439
1440 strbuf_setlen(&path, dirnamelen);
1441 strbuf_addstr(&path, de->d_name);
1442
1443 base_len = path.len;
1444 if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
1445 /* Don't reopen a pack we already have. */
1446 for (p = packed_git; p; p = p->next) {
1447 size_t len;
1448 if (strip_suffix(p->pack_name, ".pack", &len) &&
1449 len == base_len &&
1450 !memcmp(p->pack_name, path.buf, len))
1451 break;
1452 }
1453 if (p == NULL &&
1454 /*
1455 * See if it really is a valid .idx file with
1456 * corresponding .pack file that we can map.
1457 */
1458 (p = add_packed_git(path.buf, path.len, local)) != NULL)
1459 install_packed_git(p);
1460 }
1461
1462 if (!report_garbage)
1463 continue;
1464
1465 if (ends_with(de->d_name, ".idx") ||
1466 ends_with(de->d_name, ".pack") ||
1467 ends_with(de->d_name, ".bitmap") ||
1468 ends_with(de->d_name, ".keep"))
1469 string_list_append(&garbage, path.buf);
1470 else
1471 report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
1472 }
1473 closedir(dir);
1474 report_pack_garbage(&garbage);
1475 string_list_clear(&garbage, 0);
1476 strbuf_release(&path);
1477 }
1478
1479 static int approximate_object_count_valid;
1480
1481 /*
1482 * Give a fast, rough count of the number of objects in the repository. This
1483 * ignores loose objects completely. If you have a lot of them, then either
1484 * you should repack because your performance will be awful, or they are
1485 * all unreachable objects about to be pruned, in which case they're not really
1486 * interesting as a measure of repo size in the first place.
1487 */
1488 unsigned long approximate_object_count(void)
1489 {
1490 static unsigned long count;
1491 if (!approximate_object_count_valid) {
1492 struct packed_git *p;
1493
1494 prepare_packed_git();
1495 count = 0;
1496 for (p = packed_git; p; p = p->next) {
1497 if (open_pack_index(p))
1498 continue;
1499 count += p->num_objects;
1500 }
1501 }
1502 return count;
1503 }
1504
1505 static void *get_next_packed_git(const void *p)
1506 {
1507 return ((const struct packed_git *)p)->next;
1508 }
1509
1510 static void set_next_packed_git(void *p, void *next)
1511 {
1512 ((struct packed_git *)p)->next = next;
1513 }
1514
1515 static int sort_pack(const void *a_, const void *b_)
1516 {
1517 const struct packed_git *a = a_;
1518 const struct packed_git *b = b_;
1519 int st;
1520
1521 /*
1522 * Local packs tend to contain objects specific to our
1523 * variant of the project than remote ones. In addition,
1524 * remote ones could be on a network mounted filesystem.
1525 * Favor local ones for these reasons.
1526 */
1527 st = a->pack_local - b->pack_local;
1528 if (st)
1529 return -st;
1530
1531 /*
1532 * Younger packs tend to contain more recent objects,
1533 * and more recent objects tend to get accessed more
1534 * often.
1535 */
1536 if (a->mtime < b->mtime)
1537 return 1;
1538 else if (a->mtime == b->mtime)
1539 return 0;
1540 return -1;
1541 }
1542
1543 static void rearrange_packed_git(void)
1544 {
1545 packed_git = llist_mergesort(packed_git, get_next_packed_git,
1546 set_next_packed_git, sort_pack);
1547 }
1548
1549 static void prepare_packed_git_mru(void)
1550 {
1551 struct packed_git *p;
1552
1553 mru_clear(packed_git_mru);
1554 for (p = packed_git; p; p = p->next)
1555 mru_append(packed_git_mru, p);
1556 }
1557
1558 static int prepare_packed_git_run_once = 0;
1559 void prepare_packed_git(void)
1560 {
1561 struct alternate_object_database *alt;
1562
1563 if (prepare_packed_git_run_once)
1564 return;
1565 prepare_packed_git_one(get_object_directory(), 1);
1566 prepare_alt_odb();
1567 for (alt = alt_odb_list; alt; alt = alt->next)
1568 prepare_packed_git_one(alt->path, 0);
1569 rearrange_packed_git();
1570 prepare_packed_git_mru();
1571 prepare_packed_git_run_once = 1;
1572 }
1573
1574 void reprepare_packed_git(void)
1575 {
1576 approximate_object_count_valid = 0;
1577 prepare_packed_git_run_once = 0;
1578 prepare_packed_git();
1579 }
1580
1581 static void mark_bad_packed_object(struct packed_git *p,
1582 const unsigned char *sha1)
1583 {
1584 unsigned i;
1585 for (i = 0; i < p->num_bad_objects; i++)
1586 if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1587 return;
1588 p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
1589 st_mult(GIT_MAX_RAWSZ,
1590 st_add(p->num_bad_objects, 1)));
1591 hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1592 p->num_bad_objects++;
1593 }
1594
1595 static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1596 {
1597 struct packed_git *p;
1598 unsigned i;
1599
1600 for (p = packed_git; p; p = p->next)
1601 for (i = 0; i < p->num_bad_objects; i++)
1602 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1603 return p;
1604 return NULL;
1605 }
1606
1607 /*
1608 * With an in-core object data in "map", rehash it to make sure the
1609 * object name actually matches "sha1" to detect object corruption.
1610 * With "map" == NULL, try reading the object named with "sha1" using
1611 * the streaming interface and rehash it to do the same.
1612 */
1613 int check_sha1_signature(const unsigned char *sha1, void *map,
1614 unsigned long size, const char *type)
1615 {
1616 unsigned char real_sha1[20];
1617 enum object_type obj_type;
1618 struct git_istream *st;
1619 git_SHA_CTX c;
1620 char hdr[32];
1621 int hdrlen;
1622
1623 if (map) {
1624 hash_sha1_file(map, size, type, real_sha1);
1625 return hashcmp(sha1, real_sha1) ? -1 : 0;
1626 }
1627
1628 st = open_istream(sha1, &obj_type, &size, NULL);
1629 if (!st)
1630 return -1;
1631
1632 /* Generate the header */
1633 hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1;
1634
1635 /* Sha1.. */
1636 git_SHA1_Init(&c);
1637 git_SHA1_Update(&c, hdr, hdrlen);
1638 for (;;) {
1639 char buf[1024 * 16];
1640 ssize_t readlen = read_istream(st, buf, sizeof(buf));
1641
1642 if (readlen < 0) {
1643 close_istream(st);
1644 return -1;
1645 }
1646 if (!readlen)
1647 break;
1648 git_SHA1_Update(&c, buf, readlen);
1649 }
1650 git_SHA1_Final(real_sha1, &c);
1651 close_istream(st);
1652 return hashcmp(sha1, real_sha1) ? -1 : 0;
1653 }
1654
1655 int git_open_cloexec(const char *name, int flags)
1656 {
1657 int fd;
1658 static int o_cloexec = O_CLOEXEC;
1659
1660 fd = open(name, flags | o_cloexec);
1661 if ((o_cloexec & O_CLOEXEC) && fd < 0 && errno == EINVAL) {
1662 /* Try again w/o O_CLOEXEC: the kernel might not support it */
1663 o_cloexec &= ~O_CLOEXEC;
1664 fd = open(name, flags | o_cloexec);
1665 }
1666
1667 #if defined(F_GETFD) && defined(F_SETFD) && defined(FD_CLOEXEC)
1668 {
1669 static int fd_cloexec = FD_CLOEXEC;
1670
1671 if (!o_cloexec && 0 <= fd && fd_cloexec) {
1672 /* Opened w/o O_CLOEXEC? try with fcntl(2) to add it */
1673 int flags = fcntl(fd, F_GETFD);
1674 if (fcntl(fd, F_SETFD, flags | fd_cloexec))
1675 fd_cloexec = 0;
1676 }
1677 }
1678 #endif
1679 return fd;
1680 }
1681
1682 /*
1683 * Find "sha1" as a loose object in the local repository or in an alternate.
1684 * Returns 0 on success, negative on failure.
1685 *
1686 * The "path" out-parameter will give the path of the object we found (if any).
1687 * Note that it may point to static storage and is only valid until another
1688 * call to sha1_file_name(), etc.
1689 */
1690 static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
1691 const char **path)
1692 {
1693 struct alternate_object_database *alt;
1694
1695 *path = sha1_file_name(sha1);
1696 if (!lstat(*path, st))
1697 return 0;
1698
1699 prepare_alt_odb();
1700 errno = ENOENT;
1701 for (alt = alt_odb_list; alt; alt = alt->next) {
1702 *path = alt_sha1_path(alt, sha1);
1703 if (!lstat(*path, st))
1704 return 0;
1705 }
1706
1707 return -1;
1708 }
1709
1710 /*
1711 * Like stat_sha1_file(), but actually open the object and return the
1712 * descriptor. See the caveats on the "path" parameter above.
1713 */
1714 static int open_sha1_file(const unsigned char *sha1, const char **path)
1715 {
1716 int fd;
1717 struct alternate_object_database *alt;
1718 int most_interesting_errno;
1719
1720 *path = sha1_file_name(sha1);
1721 fd = git_open(*path);
1722 if (fd >= 0)
1723 return fd;
1724 most_interesting_errno = errno;
1725
1726 prepare_alt_odb();
1727 for (alt = alt_odb_list; alt; alt = alt->next) {
1728 *path = alt_sha1_path(alt, sha1);
1729 fd = git_open(*path);
1730 if (fd >= 0)
1731 return fd;
1732 if (most_interesting_errno == ENOENT)
1733 most_interesting_errno = errno;
1734 }
1735 errno = most_interesting_errno;
1736 return -1;
1737 }
1738
1739 /*
1740 * Map the loose object at "path" if it is not NULL, or the path found by
1741 * searching for a loose object named "sha1".
1742 */
1743 static void *map_sha1_file_1(const char *path,
1744 const unsigned char *sha1,
1745 unsigned long *size)
1746 {
1747 void *map;
1748 int fd;
1749
1750 if (path)
1751 fd = git_open(path);
1752 else
1753 fd = open_sha1_file(sha1, &path);
1754 map = NULL;
1755 if (fd >= 0) {
1756 struct stat st;
1757
1758 if (!fstat(fd, &st)) {
1759 *size = xsize_t(st.st_size);
1760 if (!*size) {
1761 /* mmap() is forbidden on empty files */
1762 error("object file %s is empty", path);
1763 return NULL;
1764 }
1765 map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1766 }
1767 close(fd);
1768 }
1769 return map;
1770 }
1771
1772 void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1773 {
1774 return map_sha1_file_1(NULL, sha1, size);
1775 }
1776
1777 unsigned long unpack_object_header_buffer(const unsigned char *buf,
1778 unsigned long len, enum object_type *type, unsigned long *sizep)
1779 {
1780 unsigned shift;
1781 unsigned long size, c;
1782 unsigned long used = 0;
1783
1784 c = buf[used++];
1785 *type = (c >> 4) & 7;
1786 size = c & 15;
1787 shift = 4;
1788 while (c & 0x80) {
1789 if (len <= used || bitsizeof(long) <= shift) {
1790 error("bad object header");
1791 size = used = 0;
1792 break;
1793 }
1794 c = buf[used++];
1795 size += (c & 0x7f) << shift;
1796 shift += 7;
1797 }
1798 *sizep = size;
1799 return used;
1800 }
1801
1802 static int unpack_sha1_short_header(git_zstream *stream,
1803 unsigned char *map, unsigned long mapsize,
1804 void *buffer, unsigned long bufsiz)
1805 {
1806 /* Get the data stream */
1807 memset(stream, 0, sizeof(*stream));
1808 stream->next_in = map;
1809 stream->avail_in = mapsize;
1810 stream->next_out = buffer;
1811 stream->avail_out = bufsiz;
1812
1813 git_inflate_init(stream);
1814 return git_inflate(stream, 0);
1815 }
1816
1817 int unpack_sha1_header(git_zstream *stream,
1818 unsigned char *map, unsigned long mapsize,
1819 void *buffer, unsigned long bufsiz)
1820 {
1821 int status = unpack_sha1_short_header(stream, map, mapsize,
1822 buffer, bufsiz);
1823
1824 if (status < Z_OK)
1825 return status;
1826
1827 /* Make sure we have the terminating NUL */
1828 if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1829 return -1;
1830 return 0;
1831 }
1832
1833 static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
1834 unsigned long mapsize, void *buffer,
1835 unsigned long bufsiz, struct strbuf *header)
1836 {
1837 int status;
1838
1839 status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);
1840 if (status < Z_OK)
1841 return -1;
1842
1843 /*
1844 * Check if entire header is unpacked in the first iteration.
1845 */
1846 if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1847 return 0;
1848
1849 /*
1850 * buffer[0..bufsiz] was not large enough. Copy the partial
1851 * result out to header, and then append the result of further
1852 * reading the stream.
1853 */
1854 strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1855 stream->next_out = buffer;
1856 stream->avail_out = bufsiz;
1857
1858 do {
1859 status = git_inflate(stream, 0);
1860 strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
1861 if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
1862 return 0;
1863 stream->next_out = buffer;
1864 stream->avail_out = bufsiz;
1865 } while (status != Z_STREAM_END);
1866 return -1;
1867 }
1868
1869 static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1870 {
1871 int bytes = strlen(buffer) + 1;
1872 unsigned char *buf = xmallocz(size);
1873 unsigned long n;
1874 int status = Z_OK;
1875
1876 n = stream->total_out - bytes;
1877 if (n > size)
1878 n = size;
1879 memcpy(buf, (char *) buffer + bytes, n);
1880 bytes = n;
1881 if (bytes <= size) {
1882 /*
1883 * The above condition must be (bytes <= size), not
1884 * (bytes < size). In other words, even though we
1885 * expect no more output and set avail_out to zero,
1886 * the input zlib stream may have bytes that express
1887 * "this concludes the stream", and we *do* want to
1888 * eat that input.
1889 *
1890 * Otherwise we would not be able to test that we
1891 * consumed all the input to reach the expected size;
1892 * we also want to check that zlib tells us that all
1893 * went well with status == Z_STREAM_END at the end.
1894 */
1895 stream->next_out = buf + bytes;
1896 stream->avail_out = size - bytes;
1897 while (status == Z_OK)
1898 status = git_inflate(stream, Z_FINISH);
1899 }
1900 if (status == Z_STREAM_END && !stream->avail_in) {
1901 git_inflate_end(stream);
1902 return buf;
1903 }
1904
1905 if (status < 0)
1906 error("corrupt loose object '%s'", sha1_to_hex(sha1));
1907 else if (stream->avail_in)
1908 error("garbage at end of loose object '%s'",
1909 sha1_to_hex(sha1));
1910 free(buf);
1911 return NULL;
1912 }
1913
1914 /*
1915 * We used to just use "sscanf()", but that's actually way
1916 * too permissive for what we want to check. So do an anal
1917 * object header parse by hand.
1918 */
1919 static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
1920 unsigned int flags)
1921 {
1922 const char *type_buf = hdr;
1923 unsigned long size;
1924 int type, type_len = 0;
1925
1926 /*
1927 * The type can be of any size but is followed by
1928 * a space.
1929 */
1930 for (;;) {
1931 char c = *hdr++;
1932 if (!c)
1933 return -1;
1934 if (c == ' ')
1935 break;
1936 type_len++;
1937 }
1938
1939 type = type_from_string_gently(type_buf, type_len, 1);
1940 if (oi->typename)
1941 strbuf_add(oi->typename, type_buf, type_len);
1942 /*
1943 * Set type to 0 if its an unknown object and
1944 * we're obtaining the type using '--allow-unknown-type'
1945 * option.
1946 */
1947 if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE) && (type < 0))
1948 type = 0;
1949 else if (type < 0)
1950 die("invalid object type");
1951 if (oi->typep)
1952 *oi->typep = type;
1953
1954 /*
1955 * The length must follow immediately, and be in canonical
1956 * decimal format (ie "010" is not valid).
1957 */
1958 size = *hdr++ - '0';
1959 if (size > 9)
1960 return -1;
1961 if (size) {
1962 for (;;) {
1963 unsigned long c = *hdr - '0';
1964 if (c > 9)
1965 break;
1966 hdr++;
1967 size = size * 10 + c;
1968 }
1969 }
1970
1971 if (oi->sizep)
1972 *oi->sizep = size;
1973
1974 /*
1975 * The length must be followed by a zero byte
1976 */
1977 return *hdr ? -1 : type;
1978 }
1979
1980 int parse_sha1_header(const char *hdr, unsigned long *sizep)
1981 {
1982 struct object_info oi = OBJECT_INFO_INIT;
1983
1984 oi.sizep = sizep;
1985 return parse_sha1_header_extended(hdr, &oi, 0);
1986 }
1987
1988 unsigned long get_size_from_delta(struct packed_git *p,
1989 struct pack_window **w_curs,
1990 off_t curpos)
1991 {
1992 const unsigned char *data;
1993 unsigned char delta_head[20], *in;
1994 git_zstream stream;
1995 int st;
1996
1997 memset(&stream, 0, sizeof(stream));
1998 stream.next_out = delta_head;
1999 stream.avail_out = sizeof(delta_head);
2000
2001 git_inflate_init(&stream);
2002 do {
2003 in = use_pack(p, w_curs, curpos, &stream.avail_in);
2004 stream.next_in = in;
2005 st = git_inflate(&stream, Z_FINISH);
2006 curpos += stream.next_in - in;
2007 } while ((st == Z_OK || st == Z_BUF_ERROR) &&
2008 stream.total_out < sizeof(delta_head));
2009 git_inflate_end(&stream);
2010 if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
2011 error("delta data unpack-initial failed");
2012 return 0;
2013 }
2014
2015 /* Examine the initial part of the delta to figure out
2016 * the result size.
2017 */
2018 data = delta_head;
2019
2020 /* ignore base size */
2021 get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
2022
2023 /* Read the result size */
2024 return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
2025 }
2026
2027 static off_t get_delta_base(struct packed_git *p,
2028 struct pack_window **w_curs,
2029 off_t *curpos,
2030 enum object_type type,
2031 off_t delta_obj_offset)
2032 {
2033 unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
2034 off_t base_offset;
2035
2036 /* use_pack() assured us we have [base_info, base_info + 20)
2037 * as a range that we can look at without walking off the
2038 * end of the mapped window. Its actually the hash size
2039 * that is assured. An OFS_DELTA longer than the hash size
2040 * is stupid, as then a REF_DELTA would be smaller to store.
2041 */
2042 if (type == OBJ_OFS_DELTA) {
2043 unsigned used = 0;
2044 unsigned char c = base_info[used++];
2045 base_offset = c & 127;
2046 while (c & 128) {
2047 base_offset += 1;
2048 if (!base_offset || MSB(base_offset, 7))
2049 return 0; /* overflow */
2050 c = base_info[used++];
2051 base_offset = (base_offset << 7) + (c & 127);
2052 }
2053 base_offset = delta_obj_offset - base_offset;
2054 if (base_offset <= 0 || base_offset >= delta_obj_offset)
2055 return 0; /* out of bound */
2056 *curpos += used;
2057 } else if (type == OBJ_REF_DELTA) {
2058 /* The base entry _must_ be in the same pack */
2059 base_offset = find_pack_entry_one(base_info, p);
2060 *curpos += 20;
2061 } else
2062 die("I am totally screwed");
2063 return base_offset;
2064 }
2065
2066 /*
2067 * Like get_delta_base above, but we return the sha1 instead of the pack
2068 * offset. This means it is cheaper for REF deltas (we do not have to do
2069 * the final object lookup), but more expensive for OFS deltas (we
2070 * have to load the revidx to convert the offset back into a sha1).
2071 */
2072 static const unsigned char *get_delta_base_sha1(struct packed_git *p,
2073 struct pack_window **w_curs,
2074 off_t curpos,
2075 enum object_type type,
2076 off_t delta_obj_offset)
2077 {
2078 if (type == OBJ_REF_DELTA) {
2079 unsigned char *base = use_pack(p, w_curs, curpos, NULL);
2080 return base;
2081 } else if (type == OBJ_OFS_DELTA) {
2082 struct revindex_entry *revidx;
2083 off_t base_offset = get_delta_base(p, w_curs, &curpos,
2084 type, delta_obj_offset);
2085
2086 if (!base_offset)
2087 return NULL;
2088
2089 revidx = find_pack_revindex(p, base_offset);
2090 if (!revidx)
2091 return NULL;
2092
2093 return nth_packed_object_sha1(p, revidx->nr);
2094 } else
2095 return NULL;
2096 }
2097
2098 int unpack_object_header(struct packed_git *p,
2099 struct pack_window **w_curs,
2100 off_t *curpos,
2101 unsigned long *sizep)
2102 {
2103 unsigned char *base;
2104 unsigned long left;
2105 unsigned long used;
2106 enum object_type type;
2107
2108 /* use_pack() assures us we have [base, base + 20) available
2109 * as a range that we can look at. (Its actually the hash
2110 * size that is assured.) With our object header encoding
2111 * the maximum deflated object size is 2^137, which is just
2112 * insane, so we know won't exceed what we have been given.
2113 */
2114 base = use_pack(p, w_curs, *curpos, &left);
2115 used = unpack_object_header_buffer(base, left, &type, sizep);
2116 if (!used) {
2117 type = OBJ_BAD;
2118 } else
2119 *curpos += used;
2120
2121 return type;
2122 }
2123
2124 static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
2125 {
2126 int type;
2127 struct revindex_entry *revidx;
2128 const unsigned char *sha1;
2129 revidx = find_pack_revindex(p, obj_offset);
2130 if (!revidx)
2131 return OBJ_BAD;
2132 sha1 = nth_packed_object_sha1(p, revidx->nr);
2133 mark_bad_packed_object(p, sha1);
2134 type = sha1_object_info(sha1, NULL);
2135 if (type <= OBJ_NONE)
2136 return OBJ_BAD;
2137 return type;
2138 }
2139
2140 #define POI_STACK_PREALLOC 64
2141
2142 static enum object_type packed_to_object_type(struct packed_git *p,
2143 off_t obj_offset,
2144 enum object_type type,
2145 struct pack_window **w_curs,
2146 off_t curpos)
2147 {
2148 off_t small_poi_stack[POI_STACK_PREALLOC];
2149 off_t *poi_stack = small_poi_stack;
2150 int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
2151
2152 while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2153 off_t base_offset;
2154 unsigned long size;
2155 /* Push the object we're going to leave behind */
2156 if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
2157 poi_stack_alloc = alloc_nr(poi_stack_nr);
2158 ALLOC_ARRAY(poi_stack, poi_stack_alloc);
2159 memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
2160 } else {
2161 ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
2162 }
2163 poi_stack[poi_stack_nr++] = obj_offset;
2164 /* If parsing the base offset fails, just unwind */
2165 base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
2166 if (!base_offset)
2167 goto unwind;
2168 curpos = obj_offset = base_offset;
2169 type = unpack_object_header(p, w_curs, &curpos, &size);
2170 if (type <= OBJ_NONE) {
2171 /* If getting the base itself fails, we first
2172 * retry the base, otherwise unwind */
2173 type = retry_bad_packed_offset(p, base_offset);
2174 if (type > OBJ_NONE)
2175 goto out;
2176 goto unwind;
2177 }
2178 }
2179
2180 switch (type) {
2181 case OBJ_BAD:
2182 case OBJ_COMMIT:
2183 case OBJ_TREE:
2184 case OBJ_BLOB:
2185 case OBJ_TAG:
2186 break;
2187 default:
2188 error("unknown object type %i at offset %"PRIuMAX" in %s",
2189 type, (uintmax_t)obj_offset, p->pack_name);
2190 type = OBJ_BAD;
2191 }
2192
2193 out:
2194 if (poi_stack != small_poi_stack)
2195 free(poi_stack);
2196 return type;
2197
2198 unwind:
2199 while (poi_stack_nr) {
2200 obj_offset = poi_stack[--poi_stack_nr];
2201 type = retry_bad_packed_offset(p, obj_offset);
2202 if (type > OBJ_NONE)
2203 goto out;
2204 }
2205 type = OBJ_BAD;
2206 goto out;
2207 }
2208
2209 static struct hashmap delta_base_cache;
2210 static size_t delta_base_cached;
2211
2212 static LIST_HEAD(delta_base_cache_lru);
2213
2214 struct delta_base_cache_key {
2215 struct packed_git *p;
2216 off_t base_offset;
2217 };
2218
2219 struct delta_base_cache_entry {
2220 struct hashmap hash;
2221 struct delta_base_cache_key key;
2222 struct list_head lru;
2223 void *data;
2224 unsigned long size;
2225 enum object_type type;
2226 };
2227
2228 static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
2229 {
2230 unsigned int hash;
2231
2232 hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
2233 hash += (hash >> 8) + (hash >> 16);
2234 return hash;
2235 }
2236
2237 static struct delta_base_cache_entry *
2238 get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
2239 {
2240 struct hashmap_entry entry;
2241 struct delta_base_cache_key key;
2242
2243 if (!delta_base_cache.cmpfn)
2244 return NULL;
2245
2246 hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
2247 key.p = p;
2248 key.base_offset = base_offset;
2249 return hashmap_get(&delta_base_cache, &entry, &key);
2250 }
2251
2252 static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
2253 const struct delta_base_cache_key *b)
2254 {
2255 return a->p == b->p && a->base_offset == b->base_offset;
2256 }
2257
2258 static int delta_base_cache_hash_cmp(const void *unused_cmp_data,
2259 const void *va, const void *vb,
2260 const void *vkey)
2261 {
2262 const struct delta_base_cache_entry *a = va, *b = vb;
2263 const struct delta_base_cache_key *key = vkey;
2264 if (key)
2265 return !delta_base_cache_key_eq(&a->key, key);
2266 else
2267 return !delta_base_cache_key_eq(&a->key, &b->key);
2268 }
2269
2270 static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
2271 {
2272 return !!get_delta_base_cache_entry(p, base_offset);
2273 }
2274
2275 /*
2276 * Remove the entry from the cache, but do _not_ free the associated
2277 * entry data. The caller takes ownership of the "data" buffer, and
2278 * should copy out any fields it wants before detaching.
2279 */
2280 static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
2281 {
2282 hashmap_remove(&delta_base_cache, ent, &ent->key);
2283 list_del(&ent->lru);
2284 delta_base_cached -= ent->size;
2285 free(ent);
2286 }
2287
2288 static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
2289 unsigned long *base_size, enum object_type *type)
2290 {
2291 struct delta_base_cache_entry *ent;
2292
2293 ent = get_delta_base_cache_entry(p, base_offset);
2294 if (!ent)
2295 return unpack_entry(p, base_offset, type, base_size);
2296
2297 if (type)
2298 *type = ent->type;
2299 if (base_size)
2300 *base_size = ent->size;
2301 return xmemdupz(ent->data, ent->size);
2302 }
2303
2304 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
2305 {
2306 free(ent->data);
2307 detach_delta_base_cache_entry(ent);
2308 }
2309
2310 void clear_delta_base_cache(void)
2311 {
2312 struct list_head *lru, *tmp;
2313 list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
2314 struct delta_base_cache_entry *entry =
2315 list_entry(lru, struct delta_base_cache_entry, lru);
2316 release_delta_base_cache(entry);
2317 }
2318 }
2319
2320 static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
2321 void *base, unsigned long base_size, enum object_type type)
2322 {
2323 struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
2324 struct list_head *lru, *tmp;
2325
2326 delta_base_cached += base_size;
2327
2328 list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
2329 struct delta_base_cache_entry *f =
2330 list_entry(lru, struct delta_base_cache_entry, lru);
2331 if (delta_base_cached <= delta_base_cache_limit)
2332 break;
2333 release_delta_base_cache(f);
2334 }
2335
2336 ent->key.p = p;
2337 ent->key.base_offset = base_offset;
2338 ent->type = type;
2339 ent->data = base;
2340 ent->size = base_size;
2341 list_add_tail(&ent->lru, &delta_base_cache_lru);
2342
2343 if (!delta_base_cache.cmpfn)
2344 hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);
2345 hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
2346 hashmap_add(&delta_base_cache, ent);
2347 }
2348
2349 int packed_object_info(struct packed_git *p, off_t obj_offset,
2350 struct object_info *oi)
2351 {
2352 struct pack_window *w_curs = NULL;
2353 unsigned long size;
2354 off_t curpos = obj_offset;
2355 enum object_type type;
2356
2357 /*
2358 * We always get the representation type, but only convert it to
2359 * a "real" type later if the caller is interested.
2360 */
2361 if (oi->contentp) {
2362 *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep,
2363 &type);
2364 if (!*oi->contentp)
2365 type = OBJ_BAD;
2366 } else {
2367 type = unpack_object_header(p, &w_curs, &curpos, &size);
2368 }
2369
2370 if (!oi->contentp && oi->sizep) {
2371 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2372 off_t tmp_pos = curpos;
2373 off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
2374 type, obj_offset);
2375 if (!base_offset) {
2376 type = OBJ_BAD;
2377 goto out;
2378 }
2379 *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
2380 if (*oi->sizep == 0) {
2381 type = OBJ_BAD;
2382 goto out;
2383 }
2384 } else {
2385 *oi->sizep = size;
2386 }
2387 }
2388
2389 if (oi->disk_sizep) {
2390 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2391 *oi->disk_sizep = revidx[1].offset - obj_offset;
2392 }
2393
2394 if (oi->typep || oi->typename) {
2395 enum object_type ptot;
2396 ptot = packed_to_object_type(p, obj_offset, type, &w_curs,
2397 curpos);
2398 if (oi->typep)
2399 *oi->typep = ptot;
2400 if (oi->typename) {
2401 const char *tn = typename(ptot);
2402 if (tn)
2403 strbuf_addstr(oi->typename, tn);
2404 }
2405 if (ptot < 0) {
2406 type = OBJ_BAD;
2407 goto out;
2408 }
2409 }
2410
2411 if (oi->delta_base_sha1) {
2412 if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
2413 const unsigned char *base;
2414
2415 base = get_delta_base_sha1(p, &w_curs, curpos,
2416 type, obj_offset);
2417 if (!base) {
2418 type = OBJ_BAD;
2419 goto out;
2420 }
2421
2422 hashcpy(oi->delta_base_sha1, base);
2423 } else
2424 hashclr(oi->delta_base_sha1);
2425 }
2426
2427 oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
2428 OI_PACKED;
2429
2430 out:
2431 unuse_pack(&w_curs);
2432 return type;
2433 }
2434
2435 static void *unpack_compressed_entry(struct packed_git *p,
2436 struct pack_window **w_curs,
2437 off_t curpos,
2438 unsigned long size)
2439 {
2440 int st;
2441 git_zstream stream;
2442 unsigned char *buffer, *in;
2443
2444 buffer = xmallocz_gently(size);
2445 if (!buffer)
2446 return NULL;
2447 memset(&stream, 0, sizeof(stream));
2448 stream.next_out = buffer;
2449 stream.avail_out = size + 1;
2450
2451 git_inflate_init(&stream);
2452 do {
2453 in = use_pack(p, w_curs, curpos, &stream.avail_in);
2454 stream.next_in = in;
2455 st = git_inflate(&stream, Z_FINISH);
2456 if (!stream.avail_out)
2457 break; /* the payload is larger than it should be */
2458 curpos += stream.next_in - in;
2459 } while (st == Z_OK || st == Z_BUF_ERROR);
2460 git_inflate_end(&stream);
2461 if ((st != Z_STREAM_END) || stream.total_out != size) {
2462 free(buffer);
2463 return NULL;
2464 }
2465
2466 return buffer;
2467 }
2468
2469 static void *read_object(const unsigned char *sha1, enum object_type *type,
2470 unsigned long *size);
2471
2472 static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
2473 {
2474 static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
2475 trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
2476 p->pack_name, (uintmax_t)obj_offset);
2477 }
2478
2479 int do_check_packed_object_crc;
2480
2481 #define UNPACK_ENTRY_STACK_PREALLOC 64
2482 struct unpack_entry_stack_ent {
2483 off_t obj_offset;
2484 off_t curpos;
2485 unsigned long size;
2486 };
2487
2488 void *unpack_entry(struct packed_git *p, off_t obj_offset,
2489 enum object_type *final_type, unsigned long *final_size)
2490 {
2491 struct pack_window *w_curs = NULL;
2492 off_t curpos = obj_offset;
2493 void *data = NULL;
2494 unsigned long size;
2495 enum object_type type;
2496 struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
2497 struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
2498 int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
2499 int base_from_cache = 0;
2500
2501 write_pack_access_log(p, obj_offset);
2502
2503 /* PHASE 1: drill down to the innermost base object */
2504 for (;;) {
2505 off_t base_offset;
2506 int i;
2507 struct delta_base_cache_entry *ent;
2508
2509 ent = get_delta_base_cache_entry(p, curpos);
2510 if (ent) {
2511 type = ent->type;
2512 data = ent->data;
2513 size = ent->size;
2514 detach_delta_base_cache_entry(ent);
2515 base_from_cache = 1;
2516 break;
2517 }
2518
2519 if (do_check_packed_object_crc && p->index_version > 1) {
2520 struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2521 off_t len = revidx[1].offset - obj_offset;
2522 if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
2523 const unsigned char *sha1 =
2524 nth_packed_object_sha1(p, revidx->nr);
2525 error("bad packed object CRC for %s",
2526 sha1_to_hex(sha1));
2527 mark_bad_packed_object(p, sha1);
2528 data = NULL;
2529 goto out;
2530 }
2531 }
2532
2533 type = unpack_object_header(p, &w_curs, &curpos, &size);
2534 if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
2535 break;
2536
2537 base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
2538 if (!base_offset) {
2539 error("failed to validate delta base reference "
2540 "at offset %"PRIuMAX" from %s",
2541 (uintmax_t)curpos, p->pack_name);
2542 /* bail to phase 2, in hopes of recovery */
2543 data = NULL;
2544 break;
2545 }
2546
2547 /* push object, proceed to base */
2548 if (delta_stack_nr >= delta_stack_alloc
2549 && delta_stack == small_delta_stack) {
2550 delta_stack_alloc = alloc_nr(delta_stack_nr);
2551 ALLOC_ARRAY(delta_stack, delta_stack_alloc);
2552 memcpy(delta_stack, small_delta_stack,
2553 sizeof(*delta_stack)*delta_stack_nr);
2554 } else {
2555 ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
2556 }
2557 i = delta_stack_nr++;
2558 delta_stack[i].obj_offset = obj_offset;
2559 delta_stack[i].curpos = curpos;
2560 delta_stack[i].size = size;
2561
2562 curpos = obj_offset = base_offset;
2563 }
2564
2565 /* PHASE 2: handle the base */
2566 switch (type) {
2567 case OBJ_OFS_DELTA:
2568 case OBJ_REF_DELTA:
2569 if (data)
2570 die("BUG: unpack_entry: left loop at a valid delta");
2571 break;
2572 case OBJ_COMMIT:
2573 case OBJ_TREE:
2574 case OBJ_BLOB:
2575 case OBJ_TAG:
2576 if (!base_from_cache)
2577 data = unpack_compressed_entry(p, &w_curs, curpos, size);
2578 break;
2579 default:
2580 data = NULL;
2581 error("unknown object type %i at offset %"PRIuMAX" in %s",
2582 type, (uintmax_t)obj_offset, p->pack_name);
2583 }
2584
2585 /* PHASE 3: apply deltas in order */
2586
2587 /* invariants:
2588 * 'data' holds the base data, or NULL if there was corruption
2589 */
2590 while (delta_stack_nr) {
2591 void *delta_data;
2592 void *base = data;
2593 void *external_base = NULL;
2594 unsigned long delta_size, base_size = size;
2595 int i;
2596
2597 data = NULL;
2598
2599 if (base)
2600 add_delta_base_cache(p, obj_offset, base, base_size, type);
2601
2602 if (!base) {
2603 /*
2604 * We're probably in deep shit, but let's try to fetch
2605 * the required base anyway from another pack or loose.
2606 * This is costly but should happen only in the presence
2607 * of a corrupted pack, and is better than failing outright.
2608 */
2609 struct revindex_entry *revidx;
2610 const unsigned char *base_sha1;
2611 revidx = find_pack_revindex(p, obj_offset);
2612 if (revidx) {
2613 base_sha1 = nth_packed_object_sha1(p, revidx->nr);
2614 error("failed to read delta base object %s"
2615 " at offset %"PRIuMAX" from %s",
2616 sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
2617 p->pack_name);
2618 mark_bad_packed_object(p, base_sha1);
2619 base = read_object(base_sha1, &type, &base_size);
2620 external_base = base;
2621 }
2622 }
2623
2624 i = --delta_stack_nr;
2625 obj_offset = delta_stack[i].obj_offset;
2626 curpos = delta_stack[i].curpos;
2627 delta_size = delta_stack[i].size;
2628
2629 if (!base)
2630 continue;
2631
2632 delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);
2633
2634 if (!delta_data) {
2635 error("failed to unpack compressed delta "
2636 "at offset %"PRIuMAX" from %s",
2637 (uintmax_t)curpos, p->pack_name);
2638 data = NULL;
2639 free(external_base);
2640 continue;
2641 }
2642
2643 data = patch_delta(base, base_size,
2644 delta_data, delta_size,
2645 &size);
2646
2647 /*
2648 * We could not apply the delta; warn the user, but keep going.
2649 * Our failure will be noticed either in the next iteration of
2650 * the loop, or if this is the final delta, in the caller when
2651 * we return NULL. Those code paths will take care of making
2652 * a more explicit warning and retrying with another copy of
2653 * the object.
2654 */
2655 if (!data)
2656 error("failed to apply delta");
2657
2658 free(delta_data);
2659 free(external_base);
2660 }
2661
2662 if (final_type)
2663 *final_type = type;
2664 if (final_size)
2665 *final_size = size;
2666
2667 out:
2668 unuse_pack(&w_curs);
2669
2670 if (delta_stack != small_delta_stack)
2671 free(delta_stack);
2672
2673 return data;
2674 }
2675
2676 const unsigned char *nth_packed_object_sha1(struct packed_git *p,
2677 uint32_t n)
2678 {
2679 const unsigned char *index = p->index_data;
2680 if (!index) {
2681 if (open_pack_index(p))
2682 return NULL;
2683 index = p->index_data;
2684 }
2685 if (n >= p->num_objects)
2686 return NULL;
2687 index += 4 * 256;
2688 if (p->index_version == 1) {
2689 return index + 24 * n + 4;
2690 } else {
2691 index += 8;
2692 return index + 20 * n;
2693 }
2694 }
2695
2696 const struct object_id *nth_packed_object_oid(struct object_id *oid,
2697 struct packed_git *p,
2698 uint32_t n)
2699 {
2700 const unsigned char *hash = nth_packed_object_sha1(p, n);
2701 if (!hash)
2702 return NULL;
2703 hashcpy(oid->hash, hash);
2704 return oid;
2705 }
2706
2707 void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
2708 {
2709 const unsigned char *ptr = vptr;
2710 const unsigned char *start = p->index_data;
2711 const unsigned char *end = start + p->index_size;
2712 if (ptr < start)
2713 die(_("offset before start of pack index for %s (corrupt index?)"),
2714 p->pack_name);
2715 /* No need to check for underflow; .idx files must be at least 8 bytes */
2716 if (ptr >= end - 8)
2717 die(_("offset beyond end of pack index for %s (truncated index?)"),
2718 p->pack_name);
2719 }
2720
2721 off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
2722 {
2723 const unsigned char *index = p->index_data;
2724 index += 4 * 256;
2725 if (p->index_version == 1) {
2726 return ntohl(*((uint32_t *)(index + 24 * n)));
2727 } else {
2728 uint32_t off;
2729 index += 8 + p->num_objects * (20 + 4);
2730 off = ntohl(*((uint32_t *)(index + 4 * n)));
2731 if (!(off & 0x80000000))
2732 return off;
2733 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
2734 check_pack_index_ptr(p, index);
2735 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
2736 ntohl(*((uint32_t *)(index + 4)));
2737 }
2738 }
2739
2740 off_t find_pack_entry_one(const unsigned char *sha1,
2741 struct packed_git *p)
2742 {
2743 const uint32_t *level1_ofs = p->index_data;
2744 const unsigned char *index = p->index_data;
2745 unsigned hi, lo, stride;
2746 static int debug_lookup = -1;
2747
2748 if (debug_lookup < 0)
2749 debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
2750
2751 if (!index) {
2752 if (open_pack_index(p))
2753 return 0;
2754 level1_ofs = p->index_data;
2755 index = p->index_data;
2756 }
2757 if (p->index_version > 1) {
2758 level1_ofs += 2;
2759 index += 8;
2760 }
2761 index += 4 * 256;
2762 hi = ntohl(level1_ofs[*sha1]);
2763 lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
2764 if (p->index_version > 1) {
2765 stride = 20;
2766 } else {
2767 stride = 24;
2768 index += 4;
2769 }
2770
2771 if (debug_lookup)
2772 printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
2773 sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);
2774
2775 while (lo < hi) {
2776 unsigned mi = (lo + hi) / 2;
2777 int cmp = hashcmp(index + mi * stride, sha1);
2778
2779 if (debug_lookup)
2780 printf("lo %u hi %u rg %u mi %u\n",
2781 lo, hi, hi - lo, mi);
2782 if (!cmp)
2783 return nth_packed_object_offset(p, mi);
2784 if (cmp > 0)
2785 hi = mi;
2786 else
2787 lo = mi+1;
2788 }
2789 return 0;
2790 }
2791
2792 int is_pack_valid(struct packed_git *p)
2793 {
2794 /* An already open pack is known to be valid. */
2795 if (p->pack_fd != -1)
2796 return 1;
2797
2798 /* If the pack has one window completely covering the
2799 * file size, the pack is known to be valid even if
2800 * the descriptor is not currently open.
2801 */
2802 if (p->windows) {
2803 struct pack_window *w = p->windows;
2804
2805 if (!w->offset && w->len == p->pack_size)
2806 return 1;
2807 }
2808
2809 /* Force the pack to open to prove its valid. */
2810 return !open_packed_git(p);
2811 }
2812
2813 static int fill_pack_entry(const unsigned char *sha1,
2814 struct pack_entry *e,
2815 struct packed_git *p)
2816 {
2817 off_t offset;
2818
2819 if (p->num_bad_objects) {
2820 unsigned i;
2821 for (i = 0; i < p->num_bad_objects; i++)
2822 if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
2823 return 0;
2824 }
2825
2826 offset = find_pack_entry_one(sha1, p);
2827 if (!offset)
2828 return 0;
2829
2830 /*
2831 * We are about to tell the caller where they can locate the
2832 * requested object. We better make sure the packfile is
2833 * still here and can be accessed before supplying that
2834 * answer, as it may have been deleted since the index was
2835 * loaded!
2836 */
2837 if (!is_pack_valid(p))
2838 return 0;
2839 e->offset = offset;
2840 e->p = p;
2841 hashcpy(e->sha1, sha1);
2842 return 1;
2843 }
2844
2845 /*
2846 * Iff a pack file contains the object named by sha1, return true and
2847 * store its location to e.
2848 */
2849 static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
2850 {
2851 struct mru_entry *p;
2852
2853 prepare_packed_git();
2854 if (!packed_git)
2855 return 0;
2856
2857 for (p = packed_git_mru->head; p; p = p->next) {
2858 if (fill_pack_entry(sha1, e, p->item)) {
2859 mru_mark(packed_git_mru, p);
2860 return 1;
2861 }
2862 }
2863 return 0;
2864 }
2865
2866 struct packed_git *find_sha1_pack(const unsigned char *sha1,
2867 struct packed_git *packs)
2868 {
2869 struct packed_git *p;
2870
2871 for (p = packs; p; p = p->next) {
2872 if (find_pack_entry_one(sha1, p))
2873 return p;
2874 }
2875 return NULL;
2876
2877 }
2878
2879 static int sha1_loose_object_info(const unsigned char *sha1,
2880 struct object_info *oi,
2881 int flags)
2882 {
2883 int status = 0;
2884 unsigned long mapsize;
2885 void *map;
2886 git_zstream stream;
2887 char hdr[32];
2888 struct strbuf hdrbuf = STRBUF_INIT;
2889 unsigned long size_scratch;
2890
2891 if (oi->delta_base_sha1)
2892 hashclr(oi->delta_base_sha1);
2893
2894 /*
2895 * If we don't care about type or size, then we don't
2896 * need to look inside the object at all. Note that we
2897 * do not optimize out the stat call, even if the
2898 * caller doesn't care about the disk-size, since our
2899 * return value implicitly indicates whether the
2900 * object even exists.
2901 */
2902 if (!oi->typep && !oi->typename && !oi->sizep && !oi->contentp) {
2903 const char *path;
2904 struct stat st;
2905 if (stat_sha1_file(sha1, &st, &path) < 0)
2906 return -1;
2907 if (oi->disk_sizep)
2908 *oi->disk_sizep = st.st_size;
2909 return 0;
2910 }
2911
2912 map = map_sha1_file(sha1, &mapsize);
2913 if (!map)
2914 return -1;
2915
2916 if (!oi->sizep)
2917 oi->sizep = &size_scratch;
2918
2919 if (oi->disk_sizep)
2920 *oi->disk_sizep = mapsize;
2921 if ((flags & OBJECT_INFO_ALLOW_UNKNOWN_TYPE)) {
2922 if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
2923 status = error("unable to unpack %s header with --allow-unknown-type",
2924 sha1_to_hex(sha1));
2925 } else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2926 status = error("unable to unpack %s header",
2927 sha1_to_hex(sha1));
2928 if (status < 0)
2929 ; /* Do nothing */
2930 else if (hdrbuf.len) {
2931 if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
2932 status = error("unable to parse %s header with --allow-unknown-type",
2933 sha1_to_hex(sha1));
2934 } else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
2935 status = error("unable to parse %s header", sha1_to_hex(sha1));
2936
2937 if (status >= 0 && oi->contentp)
2938 *oi->contentp = unpack_sha1_rest(&stream, hdr,
2939 *oi->sizep, sha1);
2940 else
2941 git_inflate_end(&stream);
2942
2943 munmap(map, mapsize);
2944 if (status && oi->typep)
2945 *oi->typep = status;
2946 if (oi->sizep == &size_scratch)
2947 oi->sizep = NULL;
2948 strbuf_release(&hdrbuf);
2949 oi->whence = OI_LOOSE;
2950 return (status < 0) ? status : 0;
2951 }
2952
2953 int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
2954 {
2955 static struct object_info blank_oi = OBJECT_INFO_INIT;
2956 struct pack_entry e;
2957 int rtype;
2958 const unsigned char *real = (flags & OBJECT_INFO_LOOKUP_REPLACE) ?
2959 lookup_replace_object(sha1) :
2960 sha1;
2961
2962 if (!oi)
2963 oi = &blank_oi;
2964
2965 if (!(flags & OBJECT_INFO_SKIP_CACHED)) {
2966 struct cached_object *co = find_cached_object(real);
2967 if (co) {
2968 if (oi->typep)
2969 *(oi->typep) = co->type;
2970 if (oi->sizep)
2971 *(oi->sizep) = co->size;
2972 if (oi->disk_sizep)
2973 *(oi->disk_sizep) = 0;
2974 if (oi->delta_base_sha1)
2975 hashclr(oi->delta_base_sha1);
2976 if (oi->typename)
2977 strbuf_addstr(oi->typename, typename(co->type));
2978 if (oi->contentp)
2979 *oi->contentp = xmemdupz(co->buf, co->size);
2980 oi->whence = OI_CACHED;
2981 return 0;
2982 }
2983 }
2984
2985 if (!find_pack_entry(real, &e)) {
2986 /* Most likely it's a loose object. */
2987 if (!sha1_loose_object_info(real, oi, flags))
2988 return 0;
2989
2990 /* Not a loose object; someone else may have just packed it. */
2991 if (flags & OBJECT_INFO_QUICK) {
2992 return -1;
2993 } else {
2994 reprepare_packed_git();
2995 if (!find_pack_entry(real, &e))
2996 return -1;
2997 }
2998 }
2999
3000 if (oi == &blank_oi)
3001 /*
3002 * We know that the caller doesn't actually need the
3003 * information below, so return early.
3004 */
3005 return 0;
3006
3007 rtype = packed_object_info(e.p, e.offset, oi);
3008 if (rtype < 0) {
3009 mark_bad_packed_object(e.p, real);
3010 return sha1_object_info_extended(real, oi, 0);
3011 } else if (oi->whence == OI_PACKED) {
3012 oi->u.packed.offset = e.offset;
3013 oi->u.packed.pack = e.p;
3014 oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
3015 rtype == OBJ_OFS_DELTA);
3016 }
3017
3018 return 0;
3019 }
3020
3021 /* returns enum object_type or negative */
3022 int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
3023 {
3024 enum object_type type;
3025 struct object_info oi = OBJECT_INFO_INIT;
3026
3027 oi.typep = &type;
3028 oi.sizep = sizep;
3029 if (sha1_object_info_extended(sha1, &oi,
3030 OBJECT_INFO_LOOKUP_REPLACE) < 0)
3031 return -1;
3032 return type;
3033 }
3034
3035 int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
3036 unsigned char *sha1)
3037 {
3038 struct cached_object *co;
3039
3040 hash_sha1_file(buf, len, typename(type), sha1);
3041 if (has_sha1_file(sha1) || find_cached_object(sha1))
3042 return 0;
3043 ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc);
3044 co = &cached_objects[cached_object_nr++];
3045 co->size = len;
3046 co->type = type;
3047 co->buf = xmalloc(len);
3048 memcpy(co->buf, buf, len);
3049 hashcpy(co->sha1, sha1);
3050 return 0;
3051 }
3052
3053 static void *read_object(const unsigned char *sha1, enum object_type *type,
3054 unsigned long *size)
3055 {
3056 struct object_info oi = OBJECT_INFO_INIT;
3057 void *content;
3058 oi.typep = type;
3059 oi.sizep = size;
3060 oi.contentp = &content;
3061
3062 if (sha1_object_info_extended(sha1, &oi, 0) < 0)
3063 return NULL;
3064 return content;
3065 }
3066
3067 /*
3068 * This function dies on corrupt objects; the callers who want to
3069 * deal with them should arrange to call read_object() and give error
3070 * messages themselves.
3071 */
3072 void *read_sha1_file_extended(const unsigned char *sha1,
3073 enum object_type *type,
3074 unsigned long *size,
3075 int lookup_replace)
3076 {
3077 void *data;
3078 const struct packed_git *p;
3079 const char *path;
3080 struct stat st;
3081 const unsigned char *repl = lookup_replace ? lookup_replace_object(sha1)
3082 : sha1;
3083
3084 errno = 0;
3085 data = read_object(repl, type, size);
3086 if (data)
3087 return data;
3088
3089 if (errno && errno != ENOENT)
3090 die_errno("failed to read object %s", sha1_to_hex(sha1));
3091
3092 /* die if we replaced an object with one that does not exist */
3093 if (repl != sha1)
3094 die("replacement %s not found for %s",
3095 sha1_to_hex(repl), sha1_to_hex(sha1));
3096
3097 if (!stat_sha1_file(repl, &st, &path))
3098 die("loose object %s (stored in %s) is corrupt",
3099 sha1_to_hex(repl), path);
3100
3101 if ((p = has_packed_and_bad(repl)) != NULL)
3102 die("packed object %s (stored in %s) is corrupt",
3103 sha1_to_hex(repl), p->pack_name);
3104
3105 return NULL;
3106 }
3107
3108 void *read_object_with_reference(const unsigned char *sha1,
3109 const char *required_type_name,
3110 unsigned long *size,
3111 unsigned char *actual_sha1_return)
3112 {
3113 enum object_type type, required_type;
3114 void *buffer;
3115 unsigned long isize;
3116 unsigned char actual_sha1[20];
3117
3118 required_type = type_from_string(required_type_name);
3119 hashcpy(actual_sha1, sha1);
3120 while (1) {
3121 int ref_length = -1;
3122 const char *ref_type = NULL;
3123
3124 buffer = read_sha1_file(actual_sha1, &type, &isize);
3125 if (!buffer)
3126 return NULL;
3127 if (type == required_type) {
3128 *size = isize;
3129 if (actual_sha1_return)
3130 hashcpy(actual_sha1_return, actual_sha1);
3131 return buffer;
3132 }
3133 /* Handle references */
3134 else if (type == OBJ_COMMIT)
3135 ref_type = "tree ";
3136 else if (type == OBJ_TAG)
3137 ref_type = "object ";
3138 else {
3139 free(buffer);
3140 return NULL;
3141 }
3142 ref_length = strlen(ref_type);
3143
3144 if (ref_length + 40 > isize ||
3145 memcmp(buffer, ref_type, ref_length) ||
3146 get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
3147 free(buffer);
3148 return NULL;
3149 }
3150 free(buffer);
3151 /* Now we have the ID of the referred-to object in
3152 * actual_sha1. Check again. */
3153 }
3154 }
3155
3156 static void write_sha1_file_prepare(const void *buf, unsigned long len,
3157 const char *type, unsigned char *sha1,
3158 char *hdr, int *hdrlen)
3159 {
3160 git_SHA_CTX c;
3161
3162 /* Generate the header */
3163 *hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1;
3164
3165 /* Sha1.. */
3166 git_SHA1_Init(&c);
3167 git_SHA1_Update(&c, hdr, *hdrlen);
3168 git_SHA1_Update(&c, buf, len);
3169 git_SHA1_Final(sha1, &c);
3170 }
3171
3172 /*
3173 * Move the just written object into its final resting place.
3174 */
3175 int finalize_object_file(const char *tmpfile, const char *filename)
3176 {
3177 int ret = 0;
3178
3179 if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
3180 goto try_rename;
3181 else if (link(tmpfile, filename))
3182 ret = errno;
3183
3184 /*
3185 * Coda hack - coda doesn't like cross-directory links,
3186 * so we fall back to a rename, which will mean that it
3187 * won't be able to check collisions, but that's not a
3188 * big deal.
3189 *
3190 * The same holds for FAT formatted media.
3191 *
3192 * When this succeeds, we just return. We have nothing
3193 * left to unlink.
3194 */
3195 if (ret && ret != EEXIST) {
3196 try_rename:
3197 if (!rename(tmpfile, filename))
3198 goto out;
3199 ret = errno;
3200 }
3201 unlink_or_warn(tmpfile);
3202 if (ret) {
3203 if (ret != EEXIST) {
3204 return error_errno("unable to write sha1 filename %s", filename);
3205 }
3206 /* FIXME!!! Collision check here ? */
3207 }
3208
3209 out:
3210 if (adjust_shared_perm(filename))
3211 return error("unable to set permission to '%s'", filename);
3212 return 0;
3213 }
3214
3215 static int write_buffer(int fd, const void *buf, size_t len)
3216 {
3217 if (write_in_full(fd, buf, len) < 0)
3218 return error_errno("file write error");
3219 return 0;
3220 }
3221
3222 int hash_sha1_file(const void *buf, unsigned long len, const char *type,
3223 unsigned char *sha1)
3224 {
3225 char hdr[32];
3226 int hdrlen = sizeof(hdr);
3227 write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
3228 return 0;
3229 }
3230
3231 /* Finalize a file on disk, and close it. */
3232 static void close_sha1_file(int fd)
3233 {
3234 if (fsync_object_files)
3235 fsync_or_die(fd, "sha1 file");
3236 if (close(fd) != 0)
3237 die_errno("error when closing sha1 file");
3238 }
3239
3240 /* Size of directory component, including the ending '/' */
3241 static inline int directory_size(const char *filename)
3242 {
3243 const char *s = strrchr(filename, '/');
3244 if (!s)
3245 return 0;
3246 return s - filename + 1;
3247 }
3248
3249 /*
3250 * This creates a temporary file in the same directory as the final
3251 * 'filename'
3252 *
3253 * We want to avoid cross-directory filename renames, because those
3254 * can have problems on various filesystems (FAT, NFS, Coda).
3255 */
3256 static int create_tmpfile(struct strbuf *tmp, const char *filename)
3257 {
3258 int fd, dirlen = directory_size(filename);
3259
3260 strbuf_reset(tmp);
3261 strbuf_add(tmp, filename, dirlen);
3262 strbuf_addstr(tmp, "tmp_obj_XXXXXX");
3263 fd = git_mkstemp_mode(tmp->buf, 0444);
3264 if (fd < 0 && dirlen && errno == ENOENT) {
3265 /*
3266 * Make sure the directory exists; note that the contents
3267 * of the buffer are undefined after mkstemp returns an
3268 * error, so we have to rewrite the whole buffer from
3269 * scratch.
3270 */
3271 strbuf_reset(tmp);
3272 strbuf_add(tmp, filename, dirlen - 1);
3273 if (mkdir(tmp->buf, 0777) && errno != EEXIST)
3274 return -1;
3275 if (adjust_shared_perm(tmp->buf))
3276 return -1;
3277
3278 /* Try again */
3279 strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
3280 fd = git_mkstemp_mode(tmp->buf, 0444);
3281 }
3282 return fd;
3283 }
3284
3285 static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
3286 const void *buf, unsigned long len, time_t mtime)
3287 {
3288 int fd, ret;
3289 unsigned char compressed[4096];
3290 git_zstream stream;
3291 git_SHA_CTX c;
3292 unsigned char parano_sha1[20];
3293 static struct strbuf tmp_file = STRBUF_INIT;
3294 const char *filename = sha1_file_name(sha1);
3295
3296 fd = create_tmpfile(&tmp_file, filename);
3297 if (fd < 0) {
3298 if (errno == EACCES)
3299 return error("insufficient permission for adding an object to repository database %s", get_object_directory());
3300 else
3301 return error_errno("unable to create temporary file");
3302 }
3303
3304 /* Set it up */
3305 git_deflate_init(&stream, zlib_compression_level);
3306 stream.next_out = compressed;
3307 stream.avail_out = sizeof(compressed);
3308 git_SHA1_Init(&c);
3309
3310 /* First header.. */
3311 stream.next_in = (unsigned char *)hdr;
3312 stream.avail_in = hdrlen;
3313 while (git_deflate(&stream, 0) == Z_OK)
3314 ; /* nothing */
3315 git_SHA1_Update(&c, hdr, hdrlen);
3316
3317 /* Then the data itself.. */
3318 stream.next_in = (void *)buf;
3319 stream.avail_in = len;
3320 do {
3321 unsigned char *in0 = stream.next_in;
3322 ret = git_deflate(&stream, Z_FINISH);
3323 git_SHA1_Update(&c, in0, stream.next_in - in0);
3324 if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
3325 die("unable to write sha1 file");
3326 stream.next_out = compressed;
3327 stream.avail_out = sizeof(compressed);
3328 } while (ret == Z_OK);
3329
3330 if (ret != Z_STREAM_END)
3331 die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
3332 ret = git_deflate_end_gently(&stream);
3333 if (ret != Z_OK)
3334 die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
3335 git_SHA1_Final(parano_sha1, &c);
3336 if (hashcmp(sha1, parano_sha1) != 0)
3337 die("confused by unstable object source data for %s", sha1_to_hex(sha1));
3338
3339 close_sha1_file(fd);
3340
3341 if (mtime) {
3342 struct utimbuf utb;
3343 utb.actime = mtime;
3344 utb.modtime = mtime;
3345 if (utime(tmp_file.buf, &utb) < 0)
3346 warning_errno("failed utime() on %s", tmp_file.buf);
3347 }
3348
3349 return finalize_object_file(tmp_file.buf, filename);
3350 }
3351
3352 static int freshen_loose_object(const unsigned char *sha1)
3353 {
3354 return check_and_freshen(sha1, 1);
3355 }
3356
3357 static int freshen_packed_object(const unsigned char *sha1)
3358 {
3359 struct pack_entry e;
3360 if (!find_pack_entry(sha1, &e))
3361 return 0;
3362 if (e.p->freshened)
3363 return 1;
3364 if (!freshen_file(e.p->pack_name))
3365 return 0;
3366 e.p->freshened = 1;
3367 return 1;
3368 }
3369
3370 int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
3371 {
3372 char hdr[32];
3373 int hdrlen = sizeof(hdr);
3374
3375 /* Normally if we have it in the pack then we do not bother writing
3376 * it out into .git/objects/??/?{38} file.
3377 */
3378 write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
3379 if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3380 return 0;
3381 return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
3382 }
3383
3384 int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
3385 unsigned char *sha1, unsigned flags)
3386 {
3387 char *header;
3388 int hdrlen, status = 0;
3389
3390 /* type string, SP, %lu of the length plus NUL must fit this */
3391 hdrlen = strlen(type) + 32;
3392 header = xmalloc(hdrlen);
3393 write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);
3394
3395 if (!(flags & HASH_WRITE_OBJECT))
3396 goto cleanup;
3397 if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3398 goto cleanup;
3399 status = write_loose_object(sha1, header, hdrlen, buf, len, 0);
3400
3401 cleanup:
3402 free(header);
3403 return status;
3404 }
3405
3406 int force_object_loose(const unsigned char *sha1, time_t mtime)
3407 {
3408 void *buf;
3409 unsigned long len;
3410 enum object_type type;
3411 char hdr[32];
3412 int hdrlen;
3413 int ret;
3414
3415 if (has_loose_object(sha1))
3416 return 0;
3417 buf = read_object(sha1, &type, &len);
3418 if (!buf)
3419 return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
3420 hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1;
3421 ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
3422 free(buf);
3423
3424 return ret;
3425 }
3426
3427 int has_pack_index(const unsigned char *sha1)
3428 {
3429 struct stat st;
3430 if (stat(sha1_pack_index_name(sha1), &st))
3431 return 0;
3432 return 1;
3433 }
3434
3435 int has_sha1_pack(const unsigned char *sha1)
3436 {
3437 struct pack_entry e;
3438 return find_pack_entry(sha1, &e);
3439 }
3440
3441 int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
3442 {
3443 if (!startup_info->have_repository)
3444 return 0;
3445 return sha1_object_info_extended(sha1, NULL,
3446 flags | OBJECT_INFO_SKIP_CACHED) >= 0;
3447 }
3448
3449 int has_object_file(const struct object_id *oid)
3450 {
3451 return has_sha1_file(oid->hash);
3452 }
3453
3454 int has_object_file_with_flags(const struct object_id *oid, int flags)
3455 {
3456 return has_sha1_file_with_flags(oid->hash, flags);
3457 }
3458
3459 static void check_tree(const void *buf, size_t size)
3460 {
3461 struct tree_desc desc;
3462 struct name_entry entry;
3463
3464 init_tree_desc(&desc, buf, size);
3465 while (tree_entry(&desc, &entry))
3466 /* do nothing
3467 * tree_entry() will die() on malformed entries */
3468 ;
3469 }
3470
3471 static void check_commit(const void *buf, size_t size)
3472 {
3473 struct commit c;
3474 memset(&c, 0, sizeof(c));
3475 if (parse_commit_buffer(&c, buf, size))
3476 die("corrupt commit");
3477 }
3478
3479 static void check_tag(const void *buf, size_t size)
3480 {
3481 struct tag t;
3482 memset(&t, 0, sizeof(t));
3483 if (parse_tag_buffer(&t, buf, size))
3484 die("corrupt tag");
3485 }
3486
3487 static int index_mem(unsigned char *sha1, void *buf, size_t size,
3488 enum object_type type,
3489 const char *path, unsigned flags)
3490 {
3491 int ret, re_allocated = 0;
3492 int write_object = flags & HASH_WRITE_OBJECT;
3493
3494 if (!type)
3495 type = OBJ_BLOB;
3496
3497 /*
3498 * Convert blobs to git internal format
3499 */
3500 if ((type == OBJ_BLOB) && path) {
3501 struct strbuf nbuf = STRBUF_INIT;
3502 if (convert_to_git(&the_index, path, buf, size, &nbuf,
3503 write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
3504 buf = strbuf_detach(&nbuf, &size);
3505 re_allocated = 1;
3506 }
3507 }
3508 if (flags & HASH_FORMAT_CHECK) {
3509 if (type == OBJ_TREE)
3510 check_tree(buf, size);
3511 if (type == OBJ_COMMIT)
3512 check_commit(buf, size);
3513 if (type == OBJ_TAG)
3514 check_tag(buf, size);
3515 }
3516
3517 if (write_object)
3518 ret = write_sha1_file(buf, size, typename(type), sha1);
3519 else
3520 ret = hash_sha1_file(buf, size, typename(type), sha1);
3521 if (re_allocated)
3522 free(buf);
3523 return ret;
3524 }
3525
3526 static int index_stream_convert_blob(unsigned char *sha1, int fd,
3527 const char *path, unsigned flags)
3528 {
3529 int ret;
3530 const int write_object = flags & HASH_WRITE_OBJECT;
3531 struct strbuf sbuf = STRBUF_INIT;
3532
3533 assert(path);
3534 assert(would_convert_to_git_filter_fd(path));
3535
3536 convert_to_git_filter_fd(&the_index, path, fd, &sbuf,
3537 write_object ? safe_crlf : SAFE_CRLF_FALSE);
3538
3539 if (write_object)
3540 ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
3541 sha1);
3542 else
3543 ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
3544 sha1);
3545 strbuf_release(&sbuf);
3546 return ret;
3547 }
3548
3549 static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
3550 const char *path, unsigned flags)
3551 {
3552 struct strbuf sbuf = STRBUF_INIT;
3553 int ret;
3554
3555 if (strbuf_read(&sbuf, fd, 4096) >= 0)
3556 ret = index_mem(sha1, sbuf.buf, sbuf.len, type, path, flags);
3557 else
3558 ret = -1;
3559 strbuf_release(&sbuf);
3560 return ret;
3561 }
3562
3563 #define SMALL_FILE_SIZE (32*1024)
3564
3565 static int index_core(unsigned char *sha1, int fd, size_t size,
3566 enum object_type type, const char *path,
3567 unsigned flags)
3568 {
3569 int ret;
3570
3571 if (!size) {
3572 ret = index_mem(sha1, "", size, type, path, flags);
3573 } else if (size <= SMALL_FILE_SIZE) {
3574 char *buf = xmalloc(size);
3575 if (size == read_in_full(fd, buf, size))
3576 ret = index_mem(sha1, buf, size, type, path, flags);
3577 else
3578 ret = error_errno("short read");
3579 free(buf);
3580 } else {
3581 void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
3582 ret = index_mem(sha1, buf, size, type, path, flags);
3583 munmap(buf, size);
3584 }
3585 return ret;
3586 }
3587
3588 /*
3589 * This creates one packfile per large blob unless bulk-checkin
3590 * machinery is "plugged".
3591 *
3592 * This also bypasses the usual "convert-to-git" dance, and that is on
3593 * purpose. We could write a streaming version of the converting
3594 * functions and insert that before feeding the data to fast-import
3595 * (or equivalent in-core API described above). However, that is
3596 * somewhat complicated, as we do not know the size of the filter
3597 * result, which we need to know beforehand when writing a git object.
3598 * Since the primary motivation for trying to stream from the working
3599 * tree file and to avoid mmaping it in core is to deal with large
3600 * binary blobs, they generally do not want to get any conversion, and
3601 * callers should avoid this code path when filters are requested.
3602 */
3603 static int index_stream(unsigned char *sha1, int fd, size_t size,
3604 enum object_type type, const char *path,
3605 unsigned flags)
3606 {
3607 return index_bulk_checkin(sha1, fd, size, type, path, flags);
3608 }
3609
3610 int index_fd(unsigned char *sha1, int fd, struct stat *st,
3611 enum object_type type, const char *path, unsigned flags)
3612 {
3613 int ret;
3614
3615 /*
3616 * Call xsize_t() only when needed to avoid potentially unnecessary
3617 * die() for large files.
3618 */
3619 if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
3620 ret = index_stream_convert_blob(sha1, fd, path, flags);
3621 else if (!S_ISREG(st->st_mode))
3622 ret = index_pipe(sha1, fd, type, path, flags);
3623 else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
3624 (path && would_convert_to_git(&the_index, path)))
3625 ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
3626 flags);
3627 else
3628 ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
3629 flags);
3630 close(fd);
3631 return ret;
3632 }
3633
3634 int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
3635 {
3636 int fd;
3637 struct strbuf sb = STRBUF_INIT;
3638
3639 switch (st->st_mode & S_IFMT) {
3640 case S_IFREG:
3641 fd = open(path, O_RDONLY);
3642 if (fd < 0)
3643 return error_errno("open(\"%s\")", path);
3644 if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
3645 return error("%s: failed to insert into database",
3646 path);
3647 break;
3648 case S_IFLNK:
3649 if (strbuf_readlink(&sb, path, st->st_size))
3650 return error_errno("readlink(\"%s\")", path);
3651 if (!(flags & HASH_WRITE_OBJECT))
3652 hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
3653 else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
3654 return error("%s: failed to insert into database",
3655 path);
3656 strbuf_release(&sb);
3657 break;
3658 case S_IFDIR:
3659 return resolve_gitlink_ref(path, "HEAD", sha1);
3660 default:
3661 return error("%s: unsupported file type", path);
3662 }
3663 return 0;
3664 }
3665
3666 int read_pack_header(int fd, struct pack_header *header)
3667 {
3668 if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
3669 /* "eof before pack header was fully read" */
3670 return PH_ERROR_EOF;
3671
3672 if (header->hdr_signature != htonl(PACK_SIGNATURE))
3673 /* "protocol error (pack signature mismatch detected)" */
3674 return PH_ERROR_PACK_SIGNATURE;
3675 if (!pack_version_ok(header->hdr_version))
3676 /* "protocol error (pack version unsupported)" */
3677 return PH_ERROR_PROTOCOL;
3678 return 0;
3679 }
3680
3681 void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
3682 {
3683 enum object_type type = sha1_object_info(sha1, NULL);
3684 if (type < 0)
3685 die("%s is not a valid object", sha1_to_hex(sha1));
3686 if (type != expect)
3687 die("%s is not a valid '%s' object", sha1_to_hex(sha1),
3688 typename(expect));
3689 }
3690
3691 int for_each_file_in_obj_subdir(unsigned int subdir_nr,
3692 struct strbuf *path,
3693 each_loose_object_fn obj_cb,
3694 each_loose_cruft_fn cruft_cb,
3695 each_loose_subdir_fn subdir_cb,
3696 void *data)
3697 {
3698 size_t origlen, baselen;
3699 DIR *dir;
3700 struct dirent *de;
3701 int r = 0;
3702
3703 if (subdir_nr > 0xff)
3704 BUG("invalid loose object subdirectory: %x", subdir_nr);
3705
3706 origlen = path->len;
3707 strbuf_complete(path, '/');
3708 strbuf_addf(path, "%02x", subdir_nr);
3709 baselen = path->len;
3710
3711 dir = opendir(path->buf);
3712 if (!dir) {
3713 if (errno != ENOENT)
3714 r = error_errno("unable to open %s", path->buf);
3715 strbuf_setlen(path, origlen);
3716 return r;
3717 }
3718
3719 while ((de = readdir(dir))) {
3720 if (is_dot_or_dotdot(de->d_name))
3721 continue;
3722
3723 strbuf_setlen(path, baselen);
3724 strbuf_addf(path, "/%s", de->d_name);
3725
3726 if (strlen(de->d_name) == GIT_SHA1_HEXSZ - 2) {
3727 char hex[GIT_MAX_HEXSZ+1];
3728 struct object_id oid;
3729
3730 xsnprintf(hex, sizeof(hex), "%02x%s",
3731 subdir_nr, de->d_name);
3732 if (!get_oid_hex(hex, &oid)) {
3733 if (obj_cb) {
3734 r = obj_cb(&oid, path->buf, data);
3735 if (r)
3736 break;
3737 }
3738 continue;
3739 }
3740 }
3741
3742 if (cruft_cb) {
3743 r = cruft_cb(de->d_name, path->buf, data);
3744 if (r)
3745 break;
3746 }
3747 }
3748 closedir(dir);
3749
3750 strbuf_setlen(path, baselen);
3751 if (!r && subdir_cb)
3752 r = subdir_cb(subdir_nr, path->buf, data);
3753
3754 strbuf_setlen(path, origlen);
3755
3756 return r;
3757 }
3758
3759 int for_each_loose_file_in_objdir_buf(struct strbuf *path,
3760 each_loose_object_fn obj_cb,
3761 each_loose_cruft_fn cruft_cb,
3762 each_loose_subdir_fn subdir_cb,
3763 void *data)
3764 {
3765 int r = 0;
3766 int i;
3767
3768 for (i = 0; i < 256; i++) {
3769 r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
3770 subdir_cb, data);
3771 if (r)
3772 break;
3773 }
3774
3775 return r;
3776 }
3777
3778 int for_each_loose_file_in_objdir(const char *path,
3779 each_loose_object_fn obj_cb,
3780 each_loose_cruft_fn cruft_cb,
3781 each_loose_subdir_fn subdir_cb,
3782 void *data)
3783 {
3784 struct strbuf buf = STRBUF_INIT;
3785 int r;
3786
3787 strbuf_addstr(&buf, path);
3788 r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
3789 subdir_cb, data);
3790 strbuf_release(&buf);
3791
3792 return r;
3793 }
3794
3795 struct loose_alt_odb_data {
3796 each_loose_object_fn *cb;
3797 void *data;
3798 };
3799
3800 static int loose_from_alt_odb(struct alternate_object_database *alt,
3801 void *vdata)
3802 {
3803 struct loose_alt_odb_data *data = vdata;
3804 struct strbuf buf = STRBUF_INIT;
3805 int r;
3806
3807 strbuf_addstr(&buf, alt->path);
3808 r = for_each_loose_file_in_objdir_buf(&buf,
3809 data->cb, NULL, NULL,
3810 data->data);
3811 strbuf_release(&buf);
3812 return r;
3813 }
3814
3815 int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
3816 {
3817 struct loose_alt_odb_data alt;
3818 int r;
3819
3820 r = for_each_loose_file_in_objdir(get_object_directory(),
3821 cb, NULL, NULL, data);
3822 if (r)
3823 return r;
3824
3825 if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
3826 return 0;
3827
3828 alt.cb = cb;
3829 alt.data = data;
3830 return foreach_alt_odb(loose_from_alt_odb, &alt);
3831 }
3832
3833 static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
3834 {
3835 uint32_t i;
3836 int r = 0;
3837
3838 for (i = 0; i < p->num_objects; i++) {
3839 struct object_id oid;
3840
3841 if (!nth_packed_object_oid(&oid, p, i))
3842 return error("unable to get sha1 of object %u in %s",
3843 i, p->pack_name);
3844
3845 r = cb(&oid, p, i, data);
3846 if (r)
3847 break;
3848 }
3849 return r;
3850 }
3851
3852 int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
3853 {
3854 struct packed_git *p;
3855 int r = 0;
3856 int pack_errors = 0;
3857
3858 prepare_packed_git();
3859 for (p = packed_git; p; p = p->next) {
3860 if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
3861 continue;
3862 if (open_pack_index(p)) {
3863 pack_errors = 1;
3864 continue;
3865 }
3866 r = for_each_object_in_pack(p, cb, data);
3867 if (r)
3868 break;
3869 }
3870 return r ? r : pack_errors;
3871 }
3872
3873 static int check_stream_sha1(git_zstream *stream,
3874 const char *hdr,
3875 unsigned long size,
3876 const char *path,
3877 const unsigned char *expected_sha1)
3878 {
3879 git_SHA_CTX c;
3880 unsigned char real_sha1[GIT_MAX_RAWSZ];
3881 unsigned char buf[4096];
3882 unsigned long total_read;
3883 int status = Z_OK;
3884
3885 git_SHA1_Init(&c);
3886 git_SHA1_Update(&c, hdr, stream->total_out);
3887
3888 /*
3889 * We already read some bytes into hdr, but the ones up to the NUL
3890 * do not count against the object's content size.
3891 */
3892 total_read = stream->total_out - strlen(hdr) - 1;
3893
3894 /*
3895 * This size comparison must be "<=" to read the final zlib packets;
3896 * see the comment in unpack_sha1_rest for details.
3897 */
3898 while (total_read <= size &&
3899 (status == Z_OK || status == Z_BUF_ERROR)) {
3900 stream->next_out = buf;
3901 stream->avail_out = sizeof(buf);
3902 if (size - total_read < stream->avail_out)
3903 stream->avail_out = size - total_read;
3904 status = git_inflate(stream, Z_FINISH);
3905 git_SHA1_Update(&c, buf, stream->next_out - buf);
3906 total_read += stream->next_out - buf;
3907 }
3908 git_inflate_end(stream);
3909
3910 if (status != Z_STREAM_END) {
3911 error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
3912 return -1;
3913 }
3914 if (stream->avail_in) {
3915 error("garbage at end of loose object '%s'",
3916 sha1_to_hex(expected_sha1));
3917 return -1;
3918 }
3919
3920 git_SHA1_Final(real_sha1, &c);
3921 if (hashcmp(expected_sha1, real_sha1)) {
3922 error("sha1 mismatch for %s (expected %s)", path,
3923 sha1_to_hex(expected_sha1));
3924 return -1;
3925 }
3926
3927 return 0;
3928 }
3929
3930 int read_loose_object(const char *path,
3931 const unsigned char *expected_sha1,
3932 enum object_type *type,
3933 unsigned long *size,
3934 void **contents)
3935 {
3936 int ret = -1;
3937 void *map = NULL;
3938 unsigned long mapsize;
3939 git_zstream stream;
3940 char hdr[32];
3941
3942 *contents = NULL;
3943
3944 map = map_sha1_file_1(path, NULL, &mapsize);
3945 if (!map) {
3946 error_errno("unable to mmap %s", path);
3947 goto out;
3948 }
3949
3950 if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
3951 error("unable to unpack header of %s", path);
3952 goto out;
3953 }
3954
3955 *type = parse_sha1_header(hdr, size);
3956 if (*type < 0) {
3957 error("unable to parse header of %s", path);
3958 git_inflate_end(&stream);
3959 goto out;
3960 }
3961
3962 if (*type == OBJ_BLOB) {
3963 if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
3964 goto out;
3965 } else {
3966 *contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
3967 if (!*contents) {
3968 error("unable to unpack contents of %s", path);
3969 git_inflate_end(&stream);
3970 goto out;
3971 }
3972 if (check_sha1_signature(expected_sha1, *contents,
3973 *size, typename(*type))) {
3974 error("sha1 mismatch for %s (expected %s)", path,
3975 sha1_to_hex(expected_sha1));
3976 free(*contents);
3977 goto out;
3978 }
3979 }
3980
3981 ret = 0; /* everything checks out */
3982
3983 out:
3984 if (map)
3985 munmap(map, mapsize);
3986 return ret;
3987 }