]> git.ipfire.org Git - thirdparty/git.git/blame - fsck.c
hex.h: move some hex-related declarations from cache.h
[thirdparty/git.git] / fsck.c
CommitLineData
36bf1958
EN
1#include "git-compat-util.h"
2#include "alloc.h"
cbd53a21 3#include "object-store.h"
109cd76d 4#include "repository.h"
355885d5 5#include "object.h"
27ab4784 6#include "attr.h"
355885d5
MK
7#include "blob.h"
8#include "tree.h"
9#include "tree-walk.h"
10#include "commit.h"
11#include "tag.h"
12#include "fsck.h"
cec097be 13#include "refs.h"
a2b26ffb 14#include "url.h"
a18fcc9f 15#include "utf8.h"
7b35efd7 16#include "decorate.h"
159e7b08 17#include "oidset.h"
27387444 18#include "packfile.h"
ed8b10f6
JK
19#include "submodule-config.h"
20#include "config.h"
07259e74 21#include "credential.h"
3ac68a93 22#include "help.h"
159e7b08 23
f417eed8 24#define STR(x) #x
a4a9cc19 25#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
c99ba492 26static struct {
f417eed8
JS
27 const char *id_string;
28 const char *downcased;
a4a9cc19 29 const char *camelcased;
1b32b59f 30 enum fsck_msg_type msg_type;
c99ba492 31} msg_id_info[FSCK_MSG_MAX + 1] = {
901f2f67 32 FOREACH_FSCK_MSG_ID(MSG_ID)
a4a9cc19 33 { NULL, NULL, NULL, -1 }
c99ba492
JS
34};
35#undef MSG_ID
b5495024 36#undef STR
c99ba492 37
a46baac6 38static void prepare_msg_ids(void)
f417eed8
JS
39{
40 int i;
41
a46baac6
NTND
42 if (msg_id_info[0].downcased)
43 return;
44
45 /* convert id_string to lower case, without underscores. */
46 for (i = 0; i < FSCK_MSG_MAX; i++) {
47 const char *p = msg_id_info[i].id_string;
48 int len = strlen(p);
49 char *q = xmalloc(len);
50
51 msg_id_info[i].downcased = q;
52 while (*p)
53 if (*p == '_')
54 p++;
55 else
56 *(q)++ = tolower(*(p)++);
57 *q = '\0';
a4a9cc19
NTND
58
59 p = msg_id_info[i].id_string;
60 q = xmalloc(len);
61 msg_id_info[i].camelcased = q;
62 while (*p) {
63 if (*p == '_') {
64 p++;
65 if (*p)
66 *q++ = *p++;
67 } else {
68 *q++ = tolower(*p++);
69 }
f417eed8 70 }
a4a9cc19 71 *q = '\0';
f417eed8 72 }
a46baac6
NTND
73}
74
75static int parse_msg_id(const char *text)
76{
77 int i;
78
79 prepare_msg_ids();
f417eed8
JS
80
81 for (i = 0; i < FSCK_MSG_MAX; i++)
82 if (!strcmp(text, msg_id_info[i].downcased))
83 return i;
84
85 return -1;
86}
87
3ac68a93
NTND
88void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
89{
90 int i;
91
92 prepare_msg_ids();
93
3ac68a93 94 for (i = 0; i < FSCK_MSG_MAX; i++)
a4a9cc19 95 list_config_item(list, prefix, msg_id_info[i].camelcased);
3ac68a93
NTND
96}
97
1b32b59f 98static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
c99ba492
JS
99 struct fsck_options *options)
100{
0282f4dc
JS
101 assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
102
e35d65a7 103 if (!options->msg_type) {
1b32b59f 104 enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
e35d65a7 105
0282f4dc
JS
106 if (options->strict && msg_type == FSCK_WARN)
107 msg_type = FSCK_ERROR;
e35d65a7 108 return msg_type;
0282f4dc 109 }
c99ba492 110
e35d65a7 111 return options->msg_type[msg_id];
c99ba492
JS
112}
113
1b32b59f 114static enum fsck_msg_type parse_msg_type(const char *str)
0282f4dc
JS
115{
116 if (!strcmp(str, "error"))
117 return FSCK_ERROR;
118 else if (!strcmp(str, "warn"))
119 return FSCK_WARN;
efaba7cc
JS
120 else if (!strcmp(str, "ignore"))
121 return FSCK_IGNORE;
0282f4dc
JS
122 else
123 die("Unknown fsck message type: '%s'", str);
124}
125
5d477a33
JS
126int is_valid_msg_type(const char *msg_id, const char *msg_type)
127{
128 if (parse_msg_id(msg_id) < 0)
129 return 0;
130 parse_msg_type(msg_type);
131 return 1;
132}
133
53692df2
ÆAB
134void fsck_set_msg_type_from_ids(struct fsck_options *options,
135 enum fsck_msg_id msg_id,
136 enum fsck_msg_type msg_type)
137{
138 if (!options->msg_type) {
139 int i;
140 enum fsck_msg_type *severity;
141 ALLOC_ARRAY(severity, FSCK_MSG_MAX);
142 for (i = 0; i < FSCK_MSG_MAX; i++)
143 severity[i] = fsck_msg_type(i, options);
144 options->msg_type = severity;
145 }
146
147 options->msg_type[msg_id] = msg_type;
148}
149
0282f4dc 150void fsck_set_msg_type(struct fsck_options *options,
f1abc2d0 151 const char *msg_id_str, const char *msg_type_str)
0282f4dc 152{
1b32b59f 153 int msg_id = parse_msg_id(msg_id_str);
c72da1a2 154 enum fsck_msg_type msg_type = parse_msg_type(msg_type_str);
0282f4dc 155
f1abc2d0
ÆAB
156 if (msg_id < 0)
157 die("Unhandled message id: %s", msg_id_str);
0282f4dc 158
f1abc2d0
ÆAB
159 if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)
160 die("Cannot demote %s to %s", msg_id_str, msg_type_str);
f50c4407 161
53692df2 162 fsck_set_msg_type_from_ids(options, msg_id, msg_type);
0282f4dc
JS
163}
164
165void fsck_set_msg_types(struct fsck_options *options, const char *values)
166{
167 char *buf = xstrdup(values), *to_free = buf;
168 int done = 0;
169
170 while (!done) {
171 int len = strcspn(buf, " ,|"), equal;
172
173 done = !buf[len];
174 if (!len) {
175 buf++;
176 continue;
177 }
178 buf[len] = '\0';
179
180 for (equal = 0;
181 equal < len && buf[equal] != '=' && buf[equal] != ':';
182 equal++)
183 buf[equal] = tolower(buf[equal]);
184 buf[equal] = '\0';
185
cd94c6f9
JS
186 if (!strcmp(buf, "skiplist")) {
187 if (equal == len)
188 die("skiplist requires a path");
24eb33eb 189 oidset_parse_file(&options->skiplist, buf + equal + 1);
cd94c6f9
JS
190 buf += len + 1;
191 continue;
192 }
193
0282f4dc
JS
194 if (equal == len)
195 die("Missing '=': '%s'", buf);
355885d5 196
0282f4dc
JS
197 fsck_set_msg_type(options, buf, buf + equal + 1);
198 buf += len + 1;
199 }
200 free(to_free);
201}
202
f5979376
JK
203static int object_on_skiplist(struct fsck_options *opts,
204 const struct object_id *oid)
fb162877 205{
f5979376 206 return opts && oid && oidset_contains(&opts->skiplist, oid);
fb162877
RJ
207}
208
38370253
JK
209__attribute__((format (printf, 5, 6)))
210static int report(struct fsck_options *options,
211 const struct object_id *oid, enum object_type object_type,
35af754b 212 enum fsck_msg_id msg_id, const char *fmt, ...)
c99ba492
JS
213{
214 va_list ap;
215 struct strbuf sb = STRBUF_INIT;
1b32b59f
ÆAB
216 enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
217 int result;
c99ba492 218
efaba7cc
JS
219 if (msg_type == FSCK_IGNORE)
220 return 0;
221
38370253 222 if (object_on_skiplist(options, oid))
cd94c6f9
JS
223 return 0;
224
f50c4407
JS
225 if (msg_type == FSCK_FATAL)
226 msg_type = FSCK_ERROR;
f27d05b1
JS
227 else if (msg_type == FSCK_INFO)
228 msg_type = FSCK_WARN;
f50c4407 229
034a7b7b 230 prepare_msg_ids();
35af754b 231 strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
71ab8fa8 232
c99ba492
JS
233 va_start(ap, fmt);
234 strbuf_vaddf(&sb, fmt, ap);
38370253 235 result = options->error_func(options, oid, object_type,
394d5d31 236 msg_type, msg_id, sb.buf);
c99ba492
JS
237 strbuf_release(&sb);
238 va_end(ap);
239
240 return result;
241}
242
a59cfb32
JK
243void fsck_enable_object_names(struct fsck_options *options)
244{
245 if (!options->object_names)
73390290 246 options->object_names = kh_init_oid_map();
a59cfb32
JK
247}
248
73390290
JK
249const char *fsck_get_object_name(struct fsck_options *options,
250 const struct object_id *oid)
7b35efd7 251{
73390290 252 khiter_t pos;
7b35efd7
JS
253 if (!options->object_names)
254 return NULL;
73390290
JK
255 pos = kh_get_oid_map(options->object_names, *oid);
256 if (pos >= kh_end(options->object_names))
257 return NULL;
258 return kh_value(options->object_names, pos);
7b35efd7
JS
259}
260
73390290
JK
261void fsck_put_object_name(struct fsck_options *options,
262 const struct object_id *oid,
a59cfb32 263 const char *fmt, ...)
7b35efd7
JS
264{
265 va_list ap;
266 struct strbuf buf = STRBUF_INIT;
73390290
JK
267 khiter_t pos;
268 int hashret;
7b35efd7
JS
269
270 if (!options->object_names)
271 return;
73390290
JK
272
273 pos = kh_put_oid_map(options->object_names, *oid, &hashret);
274 if (!hashret)
7b35efd7
JS
275 return;
276 va_start(ap, fmt);
277 strbuf_vaddf(&buf, fmt, ap);
73390290 278 kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);
7b35efd7
JS
279 va_end(ap);
280}
281
a59cfb32 282const char *fsck_describe_object(struct fsck_options *options,
73390290 283 const struct object_id *oid)
90cf590f 284{
a59cfb32
JK
285 static struct strbuf bufs[] = {
286 STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
287 };
288 static int b = 0;
289 struct strbuf *buf;
73390290 290 const char *name = fsck_get_object_name(options, oid);
a59cfb32
JK
291
292 buf = bufs + b;
293 b = (b + 1) % ARRAY_SIZE(bufs);
294 strbuf_reset(buf);
73390290 295 strbuf_addstr(buf, oid_to_hex(oid));
a59cfb32
JK
296 if (name)
297 strbuf_addf(buf, " (%s)", name);
90cf590f 298
a59cfb32 299 return buf->buf;
90cf590f
JS
300}
301
22410549 302static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
355885d5
MK
303{
304 struct tree_desc desc;
305 struct name_entry entry;
306 int res = 0;
7b35efd7 307 const char *name;
355885d5
MK
308
309 if (parse_tree(tree))
310 return -1;
311
73390290 312 name = fsck_get_object_name(options, &tree->object.oid);
ec18b10b 313 if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0))
8354fa3d
DT
314 return -1;
315 while (tree_entry_gently(&desc, &entry)) {
7b35efd7 316 struct object *obj;
355885d5
MK
317 int result;
318
319 if (S_ISGITLINK(entry.mode))
320 continue;
7b35efd7
JS
321
322 if (S_ISDIR(entry.mode)) {
ea82b2a0 323 obj = (struct object *)lookup_tree(the_repository, &entry.oid);
2720f6db 324 if (name && obj)
73390290 325 fsck_put_object_name(options, &entry.oid, "%s%s/",
a59cfb32 326 name, entry.path);
7b35efd7
JS
327 result = options->walk(obj, OBJ_TREE, data, options);
328 }
329 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
ea82b2a0 330 obj = (struct object *)lookup_blob(the_repository, &entry.oid);
2720f6db 331 if (name && obj)
73390290 332 fsck_put_object_name(options, &entry.oid, "%s%s",
a59cfb32 333 name, entry.path);
7b35efd7
JS
334 result = options->walk(obj, OBJ_BLOB, data, options);
335 }
355885d5 336 else {
82247e9b 337 result = error("in tree %s: entry %s has bad mode %.6o",
73390290 338 fsck_describe_object(options, &tree->object.oid),
a59cfb32 339 entry.path, entry.mode);
355885d5
MK
340 }
341 if (result < 0)
342 return result;
343 if (!res)
344 res = result;
345 }
346 return res;
347}
348
22410549 349static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
355885d5 350{
7b35efd7 351 int counter = 0, generation = 0, name_prefix_len = 0;
355885d5
MK
352 struct commit_list *parents;
353 int res;
354 int result;
7b35efd7 355 const char *name;
355885d5
MK
356
357 if (parse_commit(commit))
358 return -1;
359
73390290 360 name = fsck_get_object_name(options, &commit->object.oid);
7b35efd7 361 if (name)
73390290 362 fsck_put_object_name(options, get_commit_tree_oid(commit),
a59cfb32 363 "%s:", name);
7b35efd7 364
2e27bd77
DS
365 result = options->walk((struct object *)get_commit_tree(commit),
366 OBJ_TREE, data, options);
355885d5
MK
367 if (result < 0)
368 return result;
369 res = result;
370
371 parents = commit->parents;
7b35efd7
JS
372 if (name && parents) {
373 int len = strlen(name), power;
374
375 if (len && name[len - 1] == '^') {
376 generation = 1;
377 name_prefix_len = len - 1;
378 }
379 else { /* parse ~<generation> suffix */
380 for (generation = 0, power = 1;
381 len && isdigit(name[len - 1]);
382 power *= 10)
383 generation += power * (name[--len] - '0');
384 if (power > 1 && len && name[len - 1] == '~')
385 name_prefix_len = len - 1;
e89f8936
JS
386 else {
387 /* Maybe a non-first parent, e.g. HEAD^2 */
388 generation = 0;
389 name_prefix_len = len;
390 }
7b35efd7
JS
391 }
392 }
393
355885d5 394 while (parents) {
7b35efd7 395 if (name) {
73390290 396 struct object_id *oid = &parents->item->object.oid;
7b35efd7 397
b84c7838 398 if (counter++)
73390290 399 fsck_put_object_name(options, oid, "%s^%d",
a59cfb32 400 name, counter);
7b35efd7 401 else if (generation > 0)
73390290 402 fsck_put_object_name(options, oid, "%.*s~%d",
a59cfb32
JK
403 name_prefix_len, name,
404 generation + 1);
7b35efd7 405 else
73390290 406 fsck_put_object_name(options, oid, "%s^", name);
7b35efd7 407 }
22410549 408 result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
355885d5
MK
409 if (result < 0)
410 return result;
411 if (!res)
412 res = result;
413 parents = parents->next;
414 }
415 return res;
416}
417
22410549 418static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
355885d5 419{
73390290 420 const char *name = fsck_get_object_name(options, &tag->object.oid);
7b35efd7 421
355885d5
MK
422 if (parse_tag(tag))
423 return -1;
7b35efd7 424 if (name)
73390290 425 fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
22410549 426 return options->walk(tag->tagged, OBJ_ANY, data, options);
355885d5
MK
427}
428
22410549 429int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
355885d5
MK
430{
431 if (!obj)
432 return -1;
a2b22854
JK
433
434 if (obj->type == OBJ_NONE)
109cd76d 435 parse_object(the_repository, &obj->oid);
a2b22854 436
355885d5
MK
437 switch (obj->type) {
438 case OBJ_BLOB:
439 return 0;
440 case OBJ_TREE:
22410549 441 return fsck_walk_tree((struct tree *)obj, data, options);
355885d5 442 case OBJ_COMMIT:
22410549 443 return fsck_walk_commit((struct commit *)obj, data, options);
355885d5 444 case OBJ_TAG:
22410549 445 return fsck_walk_tag((struct tag *)obj, data, options);
355885d5 446 default:
a59cfb32 447 error("Unknown object type for %s",
73390290 448 fsck_describe_object(options, &obj->oid));
355885d5
MK
449 return -1;
450 }
451}
ba002f3b 452
9068cfb2
RS
453struct name_stack {
454 const char **names;
455 size_t nr, alloc;
456};
457
458static void name_stack_push(struct name_stack *stack, const char *name)
459{
460 ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);
461 stack->names[stack->nr++] = name;
462}
463
464static const char *name_stack_pop(struct name_stack *stack)
465{
466 return stack->nr ? stack->names[--stack->nr] : NULL;
467}
468
469static void name_stack_clear(struct name_stack *stack)
470{
471 FREE_AND_NULL(stack->names);
472 stack->nr = stack->alloc = 0;
473}
474
ba002f3b
MK
475/*
476 * The entries in a tree are ordered in the _path_ order,
477 * which means that a directory entry is ordered by adding
478 * a slash to the end of it.
479 *
480 * So a directory called "a" is ordered _after_ a file
481 * called "a.c", because "a/" sorts after "a.c".
482 */
483#define TREE_UNORDERED (-1)
484#define TREE_HAS_DUPS (-2)
485
9068cfb2
RS
486static int is_less_than_slash(unsigned char c)
487{
488 return '\0' < c && c < '/';
489}
490
491static int verify_ordered(unsigned mode1, const char *name1,
492 unsigned mode2, const char *name2,
493 struct name_stack *candidates)
ba002f3b
MK
494{
495 int len1 = strlen(name1);
496 int len2 = strlen(name2);
497 int len = len1 < len2 ? len1 : len2;
498 unsigned char c1, c2;
499 int cmp;
500
501 cmp = memcmp(name1, name2, len);
502 if (cmp < 0)
503 return 0;
504 if (cmp > 0)
505 return TREE_UNORDERED;
506
507 /*
508 * Ok, the first <len> characters are the same.
509 * Now we need to order the next one, but turn
510 * a '\0' into a '/' for a directory entry.
511 */
512 c1 = name1[len];
513 c2 = name2[len];
514 if (!c1 && !c2)
515 /*
516 * git-write-tree used to write out a nonsense tree that has
517 * entries with the same name, one blob and one tree. Make
518 * sure we do not have duplicate entries.
519 */
520 return TREE_HAS_DUPS;
521 if (!c1 && S_ISDIR(mode1))
522 c1 = '/';
523 if (!c2 && S_ISDIR(mode2))
524 c2 = '/';
9068cfb2
RS
525
526 /*
527 * There can be non-consecutive duplicates due to the implicitly
86715592 528 * added slash, e.g.:
9068cfb2
RS
529 *
530 * foo
531 * foo.bar
532 * foo.bar.baz
533 * foo.bar/
534 * foo/
535 *
536 * Record non-directory candidates (like "foo" and "foo.bar" in
537 * the example) on a stack and check directory candidates (like
538 * foo/" and "foo.bar/") against that stack.
539 */
540 if (!c1 && is_less_than_slash(c2)) {
541 name_stack_push(candidates, name1);
542 } else if (c2 == '/' && is_less_than_slash(c1)) {
543 for (;;) {
544 const char *p;
545 const char *f_name = name_stack_pop(candidates);
546
547 if (!f_name)
548 break;
549 if (!skip_prefix(name2, f_name, &p))
fe747043 550 continue;
9068cfb2
RS
551 if (!*p)
552 return TREE_HAS_DUPS;
553 if (is_less_than_slash(*p)) {
554 name_stack_push(candidates, f_name);
555 break;
556 }
557 }
558 }
559
ba002f3b
MK
560 return c1 < c2 ? 0 : TREE_UNORDERED;
561}
562
9e1947cb 563static int fsck_tree(const struct object_id *tree_oid,
23a173a7
JK
564 const char *buffer, unsigned long size,
565 struct fsck_options *options)
ba002f3b 566{
8354fa3d 567 int retval = 0;
c479d14a 568 int has_null_sha1 = 0;
ba002f3b
MK
569 int has_full_path = 0;
570 int has_empty_name = 0;
5d34a435
JK
571 int has_dot = 0;
572 int has_dotdot = 0;
5c17f512 573 int has_dotgit = 0;
ba002f3b
MK
574 int has_zero_pad = 0;
575 int has_bad_modes = 0;
576 int has_dup_entries = 0;
577 int not_properly_sorted = 0;
578 struct tree_desc desc;
579 unsigned o_mode;
580 const char *o_name;
9068cfb2 581 struct name_stack df_dup_candidates = { NULL };
ba002f3b 582
53602a93 583 if (init_tree_desc_gently(&desc, buffer, size, TREE_DESC_RAW_MODES)) {
0282f679
JK
584 retval += report(options, tree_oid, OBJ_TREE,
585 FSCK_MSG_BAD_TREE,
586 "cannot be parsed as a tree");
8354fa3d
DT
587 return retval;
588 }
ba002f3b
MK
589
590 o_mode = 0;
591 o_name = NULL;
ba002f3b
MK
592
593 while (desc.size) {
5ec1e728 594 unsigned short mode;
288a74bc 595 const char *name, *backslash;
9e1947cb 596 const struct object_id *entry_oid;
ba002f3b 597
9e1947cb 598 entry_oid = tree_entry_extract(&desc, &name, &mode);
ba002f3b 599
9e1947cb 600 has_null_sha1 |= is_null_oid(entry_oid);
effd12ec
HS
601 has_full_path |= !!strchr(name, '/');
602 has_empty_name |= !*name;
603 has_dot |= !strcmp(name, ".");
604 has_dotdot |= !strcmp(name, "..");
ed9c3220 605 has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
ba002f3b 606 has_zero_pad |= *(char *)desc.buffer == '0';
159e7b08 607
b7b1fca1
JK
608 if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
609 if (!S_ISLNK(mode))
9e1947cb
JK
610 oidset_insert(&options->gitmodules_found,
611 entry_oid);
b7b1fca1 612 else
38370253 613 retval += report(options,
9e1947cb 614 tree_oid, OBJ_TREE,
b7b1fca1
JK
615 FSCK_MSG_GITMODULES_SYMLINK,
616 ".gitmodules is a symbolic link");
617 }
159e7b08 618
f8587c31 619 if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
27ab4784
PS
620 if (!S_ISLNK(mode))
621 oidset_insert(&options->gitattributes_found,
622 entry_oid);
623 else
f8587c31
PS
624 retval += report(options, tree_oid, OBJ_TREE,
625 FSCK_MSG_GITATTRIBUTES_SYMLINK,
626 ".gitattributes is a symlink");
627 }
628
bb6832d5
JK
629 if (S_ISLNK(mode)) {
630 if (is_hfs_dotgitignore(name) ||
631 is_ntfs_dotgitignore(name))
632 retval += report(options, tree_oid, OBJ_TREE,
633 FSCK_MSG_GITIGNORE_SYMLINK,
634 ".gitignore is a symlink");
bb6832d5
JK
635 if (is_hfs_dotmailmap(name) ||
636 is_ntfs_dotmailmap(name))
637 retval += report(options, tree_oid, OBJ_TREE,
638 FSCK_MSG_MAILMAP_SYMLINK,
639 ".mailmap is a symlink");
640 }
641
288a74bc
JS
642 if ((backslash = strchr(name, '\\'))) {
643 while (backslash) {
644 backslash++;
645 has_dotgit |= is_ntfs_dotgit(backslash);
bdfef049
JS
646 if (is_ntfs_dotgitmodules(backslash)) {
647 if (!S_ISLNK(mode))
9e1947cb
JK
648 oidset_insert(&options->gitmodules_found,
649 entry_oid);
bdfef049 650 else
9e1947cb 651 retval += report(options, tree_oid, OBJ_TREE,
bdfef049
JS
652 FSCK_MSG_GITMODULES_SYMLINK,
653 ".gitmodules is a symbolic link");
654 }
288a74bc
JS
655 backslash = strchr(backslash, '\\');
656 }
657 }
658
8354fa3d 659 if (update_tree_entry_gently(&desc)) {
0282f679
JK
660 retval += report(options, tree_oid, OBJ_TREE,
661 FSCK_MSG_BAD_TREE,
662 "cannot be parsed as a tree");
8354fa3d
DT
663 break;
664 }
ba002f3b
MK
665
666 switch (mode) {
667 /*
668 * Standard modes..
669 */
670 case S_IFREG | 0755:
671 case S_IFREG | 0644:
672 case S_IFLNK:
673 case S_IFDIR:
674 case S_IFGITLINK:
675 break;
676 /*
677 * This is nonstandard, but we had a few of these
678 * early on when we honored the full set of mode
679 * bits..
680 */
681 case S_IFREG | 0664:
22410549 682 if (!options->strict)
ba002f3b 683 break;
1cf01a34 684 /* fallthrough */
ba002f3b
MK
685 default:
686 has_bad_modes = 1;
687 }
688
689 if (o_name) {
9068cfb2
RS
690 switch (verify_ordered(o_mode, o_name, mode, name,
691 &df_dup_candidates)) {
ba002f3b
MK
692 case TREE_UNORDERED:
693 not_properly_sorted = 1;
694 break;
695 case TREE_HAS_DUPS:
696 has_dup_entries = 1;
697 break;
698 default:
699 break;
700 }
701 }
702
703 o_mode = mode;
704 o_name = name;
ba002f3b
MK
705 }
706
9068cfb2
RS
707 name_stack_clear(&df_dup_candidates);
708
c479d14a 709 if (has_null_sha1)
0282f679
JK
710 retval += report(options, tree_oid, OBJ_TREE,
711 FSCK_MSG_NULL_SHA1,
712 "contains entries pointing to null sha1");
ba002f3b 713 if (has_full_path)
0282f679
JK
714 retval += report(options, tree_oid, OBJ_TREE,
715 FSCK_MSG_FULL_PATHNAME,
716 "contains full pathnames");
ba002f3b 717 if (has_empty_name)
0282f679
JK
718 retval += report(options, tree_oid, OBJ_TREE,
719 FSCK_MSG_EMPTY_NAME,
720 "contains empty pathname");
5d34a435 721 if (has_dot)
0282f679
JK
722 retval += report(options, tree_oid, OBJ_TREE,
723 FSCK_MSG_HAS_DOT,
724 "contains '.'");
5d34a435 725 if (has_dotdot)
0282f679
JK
726 retval += report(options, tree_oid, OBJ_TREE,
727 FSCK_MSG_HAS_DOTDOT,
728 "contains '..'");
5c17f512 729 if (has_dotgit)
0282f679
JK
730 retval += report(options, tree_oid, OBJ_TREE,
731 FSCK_MSG_HAS_DOTGIT,
732 "contains '.git'");
ba002f3b 733 if (has_zero_pad)
0282f679
JK
734 retval += report(options, tree_oid, OBJ_TREE,
735 FSCK_MSG_ZERO_PADDED_FILEMODE,
736 "contains zero-padded file modes");
ba002f3b 737 if (has_bad_modes)
0282f679
JK
738 retval += report(options, tree_oid, OBJ_TREE,
739 FSCK_MSG_BAD_FILEMODE,
740 "contains bad file modes");
ba002f3b 741 if (has_dup_entries)
0282f679
JK
742 retval += report(options, tree_oid, OBJ_TREE,
743 FSCK_MSG_DUPLICATE_ENTRIES,
744 "contains duplicate file entries");
ba002f3b 745 if (not_properly_sorted)
0282f679
JK
746 retval += report(options, tree_oid, OBJ_TREE,
747 FSCK_MSG_TREE_NOT_SORTED,
748 "not properly sorted");
ba002f3b
MK
749 return retval;
750}
751
8e430903
JK
752/*
753 * Confirm that the headers of a commit or tag object end in a reasonable way,
754 * either with the usual "\n\n" separator, or at least with a trailing newline
755 * on the final header line.
756 *
757 * This property is important for the memory safety of our callers. It allows
758 * them to scan the buffer linewise without constantly checking the remaining
759 * size as long as:
760 *
761 * - they check that there are bytes left in the buffer at the start of any
762 * line (i.e., that the last newline they saw was not the final one we
763 * found here)
764 *
765 * - any intra-line scanning they do will stop at a newline, which will worst
766 * case hit the newline we found here as the end-of-header. This makes it
767 * OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
768 */
84d18c0b 769static int verify_headers(const void *data, unsigned long size,
cc579000
JK
770 const struct object_id *oid, enum object_type type,
771 struct fsck_options *options)
4d0d8975
JS
772{
773 const char *buffer = (const char *)data;
774 unsigned long i;
775
776 for (i = 0; i < size; i++) {
777 switch (buffer[i]) {
778 case '\0':
cc579000 779 return report(options, oid, type,
c99ba492
JS
780 FSCK_MSG_NUL_IN_HEADER,
781 "unterminated header: NUL at offset %ld", i);
4d0d8975
JS
782 case '\n':
783 if (i + 1 < size && buffer[i + 1] == '\n')
784 return 0;
785 }
786 }
787
84d18c0b
JH
788 /*
789 * We did not find double-LF that separates the header
790 * and the body. Not having a body is not a crime but
791 * we do want to see the terminating LF for the last header
792 * line.
793 */
794 if (size && buffer[size - 1] == '\n')
795 return 0;
796
cc579000 797 return report(options, oid, type,
c99ba492 798 FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
4d0d8975
JS
799}
800
78543993
JK
801static int fsck_ident(const char **ident,
802 const struct object_id *oid, enum object_type type,
803 struct fsck_options *options)
daae1922 804{
e6826e33 805 const char *p = *ident;
d4b8de04
JK
806 char *end;
807
e6826e33
JS
808 *ident = strchrnul(*ident, '\n');
809 if (**ident == '\n')
810 (*ident)++;
811
812 if (*p == '<')
78543993 813 return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
e6826e33
JS
814 p += strcspn(p, "<>\n");
815 if (*p == '>')
78543993 816 return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
e6826e33 817 if (*p != '<')
78543993 818 return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
e6826e33 819 if (p[-1] != ' ')
78543993 820 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
e6826e33
JS
821 p++;
822 p += strcspn(p, "<>\n");
823 if (*p != '>')
78543993 824 return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
e6826e33
JS
825 p++;
826 if (*p != ' ')
78543993 827 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
e6826e33 828 p++;
8e430903
JK
829 /*
830 * Our timestamp parser is based on the C strto*() functions, which
831 * will happily eat whitespace, including the newline that is supposed
832 * to prevent us walking past the end of the buffer. So do our own
833 * scan, skipping linear whitespace but not newlines, and then
834 * confirming we found a digit. We _could_ be even more strict here,
835 * as we really expect only a single space, but since we have
836 * traditionally allowed extra whitespace, we'll continue to do so.
837 */
838 while (*p == ' ' || *p == '\t')
839 p++;
840 if (!isdigit(*p))
841 return report(options, oid, type, FSCK_MSG_BAD_DATE,
842 "invalid author/committer line - bad date");
e6826e33 843 if (*p == '0' && p[1] != ' ')
78543993 844 return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
1aeb7e75 845 if (date_overflows(parse_timestamp(p, &end, 10)))
78543993 846 return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
e6826e33 847 if ((end == p || *end != ' '))
78543993 848 return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
e6826e33
JS
849 p = end + 1;
850 if ((*p != '+' && *p != '-') ||
851 !isdigit(p[1]) ||
852 !isdigit(p[2]) ||
853 !isdigit(p[3]) ||
854 !isdigit(p[4]) ||
855 (p[5] != '\n'))
78543993 856 return report(options, oid, type, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
e6826e33 857 p += 6;
daae1922
JN
858 return 0;
859}
860
c5b4269b
JK
861static int fsck_commit(const struct object_id *oid,
862 const char *buffer, unsigned long size,
863 struct fsck_options *options)
ba002f3b 864{
f648ee70 865 struct object_id tree_oid, parent_oid;
ec652315 866 unsigned author_count;
daae1922 867 int err;
6d2d780f 868 const char *buffer_begin = buffer;
8e430903 869 const char *buffer_end = buffer + size;
c54f5ca9 870 const char *p;
ba002f3b 871
8e430903
JK
872 /*
873 * We _must_ stop parsing immediately if this reports failure, as the
874 * memory safety of the rest of the function depends on it. See the
875 * comment above the definition of verify_headers() for more details.
876 */
c5b4269b 877 if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
4d0d8975
JS
878 return -1;
879
8e430903 880 if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
c5b4269b 881 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
c54f5ca9 882 if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
c5b4269b 883 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
b3584761
JS
884 if (err)
885 return err;
886 }
c54f5ca9 887 buffer = p + 1;
8e430903 888 while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
f648ee70 889 if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
c5b4269b 890 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
b3584761
JS
891 if (err)
892 return err;
893 }
c54f5ca9 894 buffer = p + 1;
ba002f3b 895 }
c9ad147f 896 author_count = 0;
8e430903 897 while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
c9ad147f 898 author_count++;
c5b4269b 899 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
c9ad147f
JS
900 if (err)
901 return err;
ba002f3b 902 }
c9ad147f 903 if (author_count < 1)
c5b4269b 904 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
c9ad147f 905 else if (author_count > 1)
c5b4269b 906 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
daae1922
JN
907 if (err)
908 return err;
8e430903 909 if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
c5b4269b
JK
910 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
911 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
daae1922
JN
912 if (err)
913 return err;
6d2d780f 914 if (memchr(buffer_begin, '\0', size)) {
c5b4269b 915 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
6d2d780f
JH
916 "NUL byte in the commit object body");
917 if (err)
918 return err;
919 }
ba002f3b
MK
920 return 0;
921}
922
103fb6d4 923static int fsck_tag(const struct object_id *oid, const char *buffer,
2175a0c6 924 unsigned long size, struct fsck_options *options)
cec097be 925{
f648ee70 926 struct object_id tagged_oid;
acf9de4c
ÆAB
927 int tagged_type;
928 return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,
929 &tagged_type);
930}
931
932int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
933 unsigned long size, struct fsck_options *options,
934 struct object_id *tagged_oid,
935 int *tagged_type)
936{
cec097be 937 int ret = 0;
23a173a7 938 char *eol;
cec097be 939 struct strbuf sb = STRBUF_INIT;
8e430903 940 const char *buffer_end = buffer + size;
c54f5ca9 941 const char *p;
cec097be 942
8e430903
JK
943 /*
944 * We _must_ stop parsing immediately if this reports failure, as the
945 * memory safety of the rest of the function depends on it. See the
946 * comment above the definition of verify_headers() for more details.
947 */
103fb6d4 948 ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
8a272f29 949 if (ret)
cec097be
JS
950 goto done;
951
8e430903 952 if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
103fb6d4 953 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
cec097be
JS
954 goto done;
955 }
acf9de4c 956 if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {
103fb6d4 957 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
7d7d5b05
JS
958 if (ret)
959 goto done;
cec097be 960 }
c54f5ca9 961 buffer = p + 1;
cec097be 962
8e430903 963 if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
103fb6d4 964 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
cec097be
JS
965 goto done;
966 }
8e430903 967 eol = memchr(buffer, '\n', buffer_end - buffer);
cec097be 968 if (!eol) {
103fb6d4 969 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
cec097be
JS
970 goto done;
971 }
acf9de4c
ÆAB
972 *tagged_type = type_from_string_gently(buffer, eol - buffer, 1);
973 if (*tagged_type < 0)
103fb6d4 974 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
cec097be
JS
975 if (ret)
976 goto done;
977 buffer = eol + 1;
978
8e430903 979 if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
103fb6d4 980 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
cec097be
JS
981 goto done;
982 }
8e430903 983 eol = memchr(buffer, '\n', buffer_end - buffer);
cec097be 984 if (!eol) {
103fb6d4 985 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
cec097be
JS
986 goto done;
987 }
988 strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
f27d05b1 989 if (check_refname_format(sb.buf, 0)) {
103fb6d4 990 ret = report(options, oid, OBJ_TAG,
38370253
JK
991 FSCK_MSG_BAD_TAG_NAME,
992 "invalid 'tag' name: %.*s",
993 (int)(eol - buffer), buffer);
f27d05b1
JS
994 if (ret)
995 goto done;
996 }
cec097be
JS
997 buffer = eol + 1;
998
8e430903 999 if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
cec097be 1000 /* early tags do not contain 'tagger' lines; warn only */
103fb6d4 1001 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
f27d05b1
JS
1002 if (ret)
1003 goto done;
1004 }
cec097be 1005 else
103fb6d4 1006 ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
cec097be 1007
8e430903 1008 if (buffer < buffer_end && !starts_with(buffer, "\n")) {
acf9de4c
ÆAB
1009 /*
1010 * The verify_headers() check will allow
1011 * e.g. "[...]tagger <tagger>\nsome
1012 * garbage\n\nmessage" to pass, thinking "some
1013 * garbage" could be a custom header. E.g. "mktag"
1014 * doesn't want any unknown headers.
1015 */
1016 ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");
1017 if (ret)
1018 goto done;
1019 }
1020
cec097be
JS
1021done:
1022 strbuf_release(&sb);
cec097be
JS
1023 return ret;
1024}
1025
9fd512c8 1026static int starts_with_dot_slash(const char *const path)
a2b26ffb 1027{
9fd512c8
ÆAB
1028 return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
1029 PATH_MATCH_XPLATFORM);
a2b26ffb
JN
1030}
1031
9fd512c8 1032static int starts_with_dot_dot_slash(const char *const path)
a2b26ffb 1033{
9fd512c8
ÆAB
1034 return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
1035 PATH_MATCH_XPLATFORM);
a2b26ffb
JN
1036}
1037
1038static int submodule_url_is_relative(const char *url)
1039{
1040 return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
1041}
1042
c44088ec
JN
1043/*
1044 * Count directory components that a relative submodule URL should chop
1045 * from the remote_url it is to be resolved against.
1046 *
1047 * In other words, this counts "../" components at the start of a
1048 * submodule URL.
1049 *
1050 * Returns the number of directory components to chop and writes a
1051 * pointer to the next character of url after all leading "./" and
1052 * "../" components to out.
1053 */
1054static int count_leading_dotdots(const char *url, const char **out)
1055{
1056 int result = 0;
1057 while (1) {
1058 if (starts_with_dot_dot_slash(url)) {
1059 result++;
1060 url += strlen("../");
1061 continue;
1062 }
1063 if (starts_with_dot_slash(url)) {
1064 url += strlen("./");
1065 continue;
1066 }
1067 *out = url;
1068 return result;
1069 }
1070}
a2b26ffb
JN
1071/*
1072 * Check whether a transport is implemented by git-remote-curl.
1073 *
1074 * If it is, returns 1 and writes the URL that would be passed to
1075 * git-remote-curl to the "out" parameter.
1076 *
1077 * Otherwise, returns 0 and leaves "out" untouched.
1078 *
1079 * Examples:
1080 * http::https://example.com/repo.git -> 1, https://example.com/repo.git
1081 * https://example.com/repo.git -> 1, https://example.com/repo.git
1082 * git://example.com/repo.git -> 0
1083 *
1084 * This is for use in checking for previously exploitable bugs that
1085 * required a submodule URL to be passed to git-remote-curl.
1086 */
1087static int url_to_curl_url(const char *url, const char **out)
1088{
1089 /*
1090 * We don't need to check for case-aliases, "http.exe", and so
1091 * on because in the default configuration, is_transport_allowed
1092 * prevents URLs with those schemes from being cloned
1093 * automatically.
1094 */
1095 if (skip_prefix(url, "http::", out) ||
1096 skip_prefix(url, "https::", out) ||
1097 skip_prefix(url, "ftp::", out) ||
1098 skip_prefix(url, "ftps::", out))
1099 return 1;
1100 if (starts_with(url, "http://") ||
1101 starts_with(url, "https://") ||
1102 starts_with(url, "ftp://") ||
1103 starts_with(url, "ftps://")) {
1104 *out = url;
1105 return 1;
1106 }
1107 return 0;
1108}
1109
07259e74
JK
1110static int check_submodule_url(const char *url)
1111{
a2b26ffb 1112 const char *curl_url;
07259e74
JK
1113
1114 if (looks_like_command_line_option(url))
1115 return -1;
1116
6aed5673 1117 if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
c44088ec
JN
1118 char *decoded;
1119 const char *next;
1120 int has_nl;
1121
a2b26ffb
JN
1122 /*
1123 * This could be appended to an http URL and url-decoded;
1124 * check for malicious characters.
1125 */
c44088ec
JN
1126 decoded = url_decode(url);
1127 has_nl = !!strchr(decoded, '\n');
1128
a2b26ffb
JN
1129 free(decoded);
1130 if (has_nl)
1131 return -1;
c44088ec
JN
1132
1133 /*
1134 * URLs which escape their root via "../" can overwrite
1135 * the host field and previous components, resolving to
1a3609e4
JN
1136 * URLs like https::example.com/submodule.git and
1137 * https:///example.com/submodule.git that were
c44088ec
JN
1138 * susceptible to CVE-2020-11008.
1139 */
1140 if (count_leading_dotdots(url, &next) > 0 &&
1a3609e4 1141 (*next == ':' || *next == '/'))
c44088ec 1142 return -1;
a2b26ffb
JN
1143 }
1144
1145 else if (url_to_curl_url(url, &curl_url)) {
1146 struct credential c = CREDENTIAL_INIT;
1a3609e4
JN
1147 int ret = 0;
1148 if (credential_from_url_gently(&c, curl_url, 1) ||
1149 !*c.host)
1150 ret = -1;
a2b26ffb
JN
1151 credential_clear(&c);
1152 return ret;
1153 }
1154
1155 return 0;
07259e74
JK
1156}
1157
ed8b10f6 1158struct fsck_gitmodules_data {
6da40b22 1159 const struct object_id *oid;
ed8b10f6
JK
1160 struct fsck_options *options;
1161 int ret;
1162};
1163
1164static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
1165{
1166 struct fsck_gitmodules_data *data = vdata;
1167 const char *subsection, *key;
f5914f4b 1168 size_t subsection_len;
ed8b10f6
JK
1169 char *name;
1170
1171 if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
1172 !subsection)
1173 return 0;
1174
1175 name = xmemdupz(subsection, subsection_len);
1176 if (check_submodule_name(name) < 0)
38370253 1177 data->ret |= report(data->options,
6da40b22 1178 data->oid, OBJ_BLOB,
ed8b10f6
JK
1179 FSCK_MSG_GITMODULES_NAME,
1180 "disallowed submodule name: %s",
1181 name);
a124133e 1182 if (!strcmp(key, "url") && value &&
07259e74 1183 check_submodule_url(value) < 0)
38370253 1184 data->ret |= report(data->options,
6da40b22 1185 data->oid, OBJ_BLOB,
a124133e
JK
1186 FSCK_MSG_GITMODULES_URL,
1187 "disallowed submodule url: %s",
1188 value);
1a7fd1fb
JK
1189 if (!strcmp(key, "path") && value &&
1190 looks_like_command_line_option(value))
38370253 1191 data->ret |= report(data->options,
6da40b22 1192 data->oid, OBJ_BLOB,
1a7fd1fb
JK
1193 FSCK_MSG_GITMODULES_PATH,
1194 "disallowed submodule path: %s",
1195 value);
bb92255e
JN
1196 if (!strcmp(key, "update") && value &&
1197 parse_submodule_update_type(value) == SM_UPDATE_COMMAND)
7034cd09 1198 data->ret |= report(data->options, data->oid, OBJ_BLOB,
bb92255e
JN
1199 FSCK_MSG_GITMODULES_UPDATE,
1200 "disallowed submodule update setting: %s",
1201 value);
ed8b10f6
JK
1202 free(name);
1203
1204 return 0;
1205}
1206
6da40b22 1207static int fsck_blob(const struct object_id *oid, const char *buf,
7ac4f3a0
JK
1208 unsigned long size, struct fsck_options *options)
1209{
bb3a9265 1210 int ret = 0;
ed8b10f6 1211
6da40b22 1212 if (object_on_skiplist(options, oid))
fb162877
RJ
1213 return 0;
1214
bb3a9265
PS
1215 if (oidset_contains(&options->gitmodules_found, oid)) {
1216 struct config_options config_opts = { 0 };
1217 struct fsck_gitmodules_data data;
1218
1219 oidset_insert(&options->gitmodules_done, oid);
1220
1221 if (!buf) {
1222 /*
1223 * A missing buffer here is a sign that the caller found the
1224 * blob too gigantic to load into memory. Let's just consider
1225 * that an error.
1226 */
1227 return report(options, oid, OBJ_BLOB,
1228 FSCK_MSG_GITMODULES_LARGE,
1229 ".gitmodules too large to parse");
1230 }
1231
1232 data.oid = oid;
1233 data.options = options;
1234 data.ret = 0;
1235 config_opts.error_action = CONFIG_ERROR_SILENT;
1236 if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1237 ".gitmodules", buf, size, &data, &config_opts))
1238 data.ret |= report(options, oid, OBJ_BLOB,
1239 FSCK_MSG_GITMODULES_PARSE,
1240 "could not parse gitmodules blob");
1241 ret |= data.ret;
ed8b10f6
JK
1242 }
1243
27ab4784
PS
1244 if (oidset_contains(&options->gitattributes_found, oid)) {
1245 const char *ptr;
1246
1247 oidset_insert(&options->gitattributes_done, oid);
1248
1249 if (!buf || size > ATTR_MAX_FILE_SIZE) {
1250 /*
1251 * A missing buffer here is a sign that the caller found the
1252 * blob too gigantic to load into memory. Let's just consider
1253 * that an error.
1254 */
1255 return report(options, oid, OBJ_BLOB,
1256 FSCK_MSG_GITATTRIBUTES_LARGE,
1257 ".gitattributes too large to parse");
1258 }
1259
1260 for (ptr = buf; *ptr; ) {
1261 const char *eol = strchrnul(ptr, '\n');
1262 if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {
1263 ret |= report(options, oid, OBJ_BLOB,
1264 FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,
1265 ".gitattributes has too long lines to parse");
1266 break;
1267 }
1268
1269 ptr = *eol ? eol + 1 : eol;
1270 }
1271 }
1272
bb3a9265 1273 return ret;
7ac4f3a0
JK
1274}
1275
90a398bb 1276int fsck_object(struct object *obj, void *data, unsigned long size,
22410549 1277 struct fsck_options *options)
ba002f3b
MK
1278{
1279 if (!obj)
38370253 1280 return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
ba002f3b 1281
35ff327e
JK
1282 return fsck_buffer(&obj->oid, obj->type, data, size, options);
1283}
1284
1285int fsck_buffer(const struct object_id *oid, enum object_type type,
1286 void *data, unsigned long size,
1287 struct fsck_options *options)
1288{
1289 if (type == OBJ_BLOB)
1290 return fsck_blob(oid, data, size, options);
1291 if (type == OBJ_TREE)
1292 return fsck_tree(oid, data, size, options);
1293 if (type == OBJ_COMMIT)
1294 return fsck_commit(oid, data, size, options);
1295 if (type == OBJ_TAG)
1296 return fsck_tag(oid, data, size, options);
1297
1298 return report(options, oid, type,
38370253
JK
1299 FSCK_MSG_UNKNOWN_TYPE,
1300 "unknown type '%d' (internal fsck error)",
35ff327e 1301 type);
ba002f3b 1302}
d6ffc8d7 1303
1cd772cc 1304int fsck_error_function(struct fsck_options *o,
5afc4b1d
JK
1305 const struct object_id *oid,
1306 enum object_type object_type,
394d5d31
ÆAB
1307 enum fsck_msg_type msg_type,
1308 enum fsck_msg_id msg_id,
1309 const char *message)
d6ffc8d7 1310{
0282f4dc 1311 if (msg_type == FSCK_WARN) {
5afc4b1d 1312 warning("object %s: %s", fsck_describe_object(o, oid), message);
0282f4dc
JS
1313 return 0;
1314 }
5afc4b1d 1315 error("object %s: %s", fsck_describe_object(o, oid), message);
d6ffc8d7
MK
1316 return 1;
1317}
159e7b08 1318
a59a8c68
PS
1319static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
1320 enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
1321 struct fsck_options *options, const char *blob_type)
159e7b08
JK
1322{
1323 int ret = 0;
1324 struct oidset_iter iter;
1325 const struct object_id *oid;
1326
a59a8c68 1327 oidset_iter_init(blobs_found, &iter);
159e7b08 1328 while ((oid = oidset_iter_next(&iter))) {
159e7b08
JK
1329 enum object_type type;
1330 unsigned long size;
1331 char *buf;
1332
a59a8c68 1333 if (oidset_contains(blobs_done, oid))
159e7b08
JK
1334 continue;
1335
7913f53b 1336 buf = read_object_file(oid, &type, &size);
159e7b08 1337 if (!buf) {
b8b00f16 1338 if (is_promisor_object(oid))
27387444 1339 continue;
38370253 1340 ret |= report(options,
a59a8c68
PS
1341 oid, OBJ_BLOB, msg_missing,
1342 "unable to read %s blob", blob_type);
159e7b08
JK
1343 continue;
1344 }
1345
1346 if (type == OBJ_BLOB)
b8b00f16 1347 ret |= fsck_blob(oid, buf, size, options);
159e7b08 1348 else
a59a8c68
PS
1349 ret |= report(options, oid, type, msg_type,
1350 "non-blob found at %s", blob_type);
159e7b08
JK
1351 free(buf);
1352 }
1353
a59a8c68
PS
1354 oidset_clear(blobs_found);
1355 oidset_clear(blobs_done);
1356
1357 return ret;
1358}
1359
1360int fsck_finish(struct fsck_options *options)
1361{
1362 int ret = 0;
1363
1364 ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
1365 FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
1366 options, ".gitmodules");
27ab4784
PS
1367 ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
1368 FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
1369 options, ".gitattributes");
159e7b08 1370
159e7b08
JK
1371 return ret;
1372}
1f3299fd 1373
fb79f5bf 1374int git_fsck_config(const char *var, const char *value, void *cb)
1f3299fd 1375{
fb79f5bf 1376 struct fsck_options *options = cb;
1f3299fd
ÆAB
1377 if (strcmp(var, "fsck.skiplist") == 0) {
1378 const char *path;
1379 struct strbuf sb = STRBUF_INIT;
1380
1381 if (git_config_pathname(&path, var, value))
1382 return 1;
1383 strbuf_addf(&sb, "skiplist=%s", path);
1384 free((char *)path);
1385 fsck_set_msg_types(options, sb.buf);
1386 strbuf_release(&sb);
1387 return 0;
1388 }
1389
1390 if (skip_prefix(var, "fsck.", &var)) {
1391 fsck_set_msg_type(options, var, value);
1392 return 0;
1393 }
1394
1395 return git_default_config(var, value, cb);
1396}
3745e269
ÆAB
1397
1398/*
1399 * Custom error callbacks that are used in more than one place.
1400 */
1401
1402int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
1403 const struct object_id *oid,
1404 enum object_type object_type,
1405 enum fsck_msg_type msg_type,
1406 enum fsck_msg_id msg_id,
1407 const char *message)
1408{
1409 if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
1410 puts(oid_to_hex(oid));
1411 return 0;
1412 }
1413 return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
1414}