]> git.ipfire.org Git - thirdparty/git.git/blame - fsck.c
log-tree: replace include of revision.h with simple forward declaration
[thirdparty/git.git] / fsck.c
CommitLineData
d812c3b6 1#include "git-compat-util.h"
36bf1958 2#include "alloc.h"
d4a4f929 3#include "date.h"
41771fa4 4#include "hex.h"
cbd53a21 5#include "object-store.h"
109cd76d 6#include "repository.h"
355885d5 7#include "object.h"
27ab4784 8#include "attr.h"
355885d5
MK
9#include "blob.h"
10#include "tree.h"
11#include "tree-walk.h"
12#include "commit.h"
13#include "tag.h"
14#include "fsck.h"
cec097be 15#include "refs.h"
a2b26ffb 16#include "url.h"
a18fcc9f 17#include "utf8.h"
7b35efd7 18#include "decorate.h"
159e7b08 19#include "oidset.h"
27387444 20#include "packfile.h"
ed8b10f6
JK
21#include "submodule-config.h"
22#include "config.h"
07259e74 23#include "credential.h"
3ac68a93 24#include "help.h"
159e7b08 25
f417eed8 26#define STR(x) #x
a4a9cc19 27#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
c99ba492 28static struct {
f417eed8
JS
29 const char *id_string;
30 const char *downcased;
a4a9cc19 31 const char *camelcased;
1b32b59f 32 enum fsck_msg_type msg_type;
c99ba492 33} msg_id_info[FSCK_MSG_MAX + 1] = {
901f2f67 34 FOREACH_FSCK_MSG_ID(MSG_ID)
a4a9cc19 35 { NULL, NULL, NULL, -1 }
c99ba492
JS
36};
37#undef MSG_ID
b5495024 38#undef STR
c99ba492 39
a46baac6 40static void prepare_msg_ids(void)
f417eed8
JS
41{
42 int i;
43
a46baac6
NTND
44 if (msg_id_info[0].downcased)
45 return;
46
47 /* convert id_string to lower case, without underscores. */
48 for (i = 0; i < FSCK_MSG_MAX; i++) {
49 const char *p = msg_id_info[i].id_string;
50 int len = strlen(p);
51 char *q = xmalloc(len);
52
53 msg_id_info[i].downcased = q;
54 while (*p)
55 if (*p == '_')
56 p++;
57 else
58 *(q)++ = tolower(*(p)++);
59 *q = '\0';
a4a9cc19
NTND
60
61 p = msg_id_info[i].id_string;
62 q = xmalloc(len);
63 msg_id_info[i].camelcased = q;
64 while (*p) {
65 if (*p == '_') {
66 p++;
67 if (*p)
68 *q++ = *p++;
69 } else {
70 *q++ = tolower(*p++);
71 }
f417eed8 72 }
a4a9cc19 73 *q = '\0';
f417eed8 74 }
a46baac6
NTND
75}
76
77static int parse_msg_id(const char *text)
78{
79 int i;
80
81 prepare_msg_ids();
f417eed8
JS
82
83 for (i = 0; i < FSCK_MSG_MAX; i++)
84 if (!strcmp(text, msg_id_info[i].downcased))
85 return i;
86
87 return -1;
88}
89
3ac68a93
NTND
90void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
91{
92 int i;
93
94 prepare_msg_ids();
95
3ac68a93 96 for (i = 0; i < FSCK_MSG_MAX; i++)
a4a9cc19 97 list_config_item(list, prefix, msg_id_info[i].camelcased);
3ac68a93
NTND
98}
99
1b32b59f 100static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
c99ba492
JS
101 struct fsck_options *options)
102{
0282f4dc
JS
103 assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
104
e35d65a7 105 if (!options->msg_type) {
1b32b59f 106 enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
e35d65a7 107
0282f4dc
JS
108 if (options->strict && msg_type == FSCK_WARN)
109 msg_type = FSCK_ERROR;
e35d65a7 110 return msg_type;
0282f4dc 111 }
c99ba492 112
e35d65a7 113 return options->msg_type[msg_id];
c99ba492
JS
114}
115
1b32b59f 116static enum fsck_msg_type parse_msg_type(const char *str)
0282f4dc
JS
117{
118 if (!strcmp(str, "error"))
119 return FSCK_ERROR;
120 else if (!strcmp(str, "warn"))
121 return FSCK_WARN;
efaba7cc
JS
122 else if (!strcmp(str, "ignore"))
123 return FSCK_IGNORE;
0282f4dc
JS
124 else
125 die("Unknown fsck message type: '%s'", str);
126}
127
5d477a33
JS
128int is_valid_msg_type(const char *msg_id, const char *msg_type)
129{
130 if (parse_msg_id(msg_id) < 0)
131 return 0;
132 parse_msg_type(msg_type);
133 return 1;
134}
135
53692df2
ÆAB
136void fsck_set_msg_type_from_ids(struct fsck_options *options,
137 enum fsck_msg_id msg_id,
138 enum fsck_msg_type msg_type)
139{
140 if (!options->msg_type) {
141 int i;
142 enum fsck_msg_type *severity;
143 ALLOC_ARRAY(severity, FSCK_MSG_MAX);
144 for (i = 0; i < FSCK_MSG_MAX; i++)
145 severity[i] = fsck_msg_type(i, options);
146 options->msg_type = severity;
147 }
148
149 options->msg_type[msg_id] = msg_type;
150}
151
0282f4dc 152void fsck_set_msg_type(struct fsck_options *options,
f1abc2d0 153 const char *msg_id_str, const char *msg_type_str)
0282f4dc 154{
1b32b59f 155 int msg_id = parse_msg_id(msg_id_str);
c72da1a2 156 enum fsck_msg_type msg_type = parse_msg_type(msg_type_str);
0282f4dc 157
f1abc2d0
ÆAB
158 if (msg_id < 0)
159 die("Unhandled message id: %s", msg_id_str);
0282f4dc 160
f1abc2d0
ÆAB
161 if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)
162 die("Cannot demote %s to %s", msg_id_str, msg_type_str);
f50c4407 163
53692df2 164 fsck_set_msg_type_from_ids(options, msg_id, msg_type);
0282f4dc
JS
165}
166
167void fsck_set_msg_types(struct fsck_options *options, const char *values)
168{
169 char *buf = xstrdup(values), *to_free = buf;
170 int done = 0;
171
172 while (!done) {
173 int len = strcspn(buf, " ,|"), equal;
174
175 done = !buf[len];
176 if (!len) {
177 buf++;
178 continue;
179 }
180 buf[len] = '\0';
181
182 for (equal = 0;
183 equal < len && buf[equal] != '=' && buf[equal] != ':';
184 equal++)
185 buf[equal] = tolower(buf[equal]);
186 buf[equal] = '\0';
187
cd94c6f9
JS
188 if (!strcmp(buf, "skiplist")) {
189 if (equal == len)
190 die("skiplist requires a path");
24eb33eb 191 oidset_parse_file(&options->skiplist, buf + equal + 1);
cd94c6f9
JS
192 buf += len + 1;
193 continue;
194 }
195
0282f4dc
JS
196 if (equal == len)
197 die("Missing '=': '%s'", buf);
355885d5 198
0282f4dc
JS
199 fsck_set_msg_type(options, buf, buf + equal + 1);
200 buf += len + 1;
201 }
202 free(to_free);
203}
204
f5979376
JK
205static int object_on_skiplist(struct fsck_options *opts,
206 const struct object_id *oid)
fb162877 207{
f5979376 208 return opts && oid && oidset_contains(&opts->skiplist, oid);
fb162877
RJ
209}
210
38370253
JK
211__attribute__((format (printf, 5, 6)))
212static int report(struct fsck_options *options,
213 const struct object_id *oid, enum object_type object_type,
35af754b 214 enum fsck_msg_id msg_id, const char *fmt, ...)
c99ba492
JS
215{
216 va_list ap;
217 struct strbuf sb = STRBUF_INIT;
1b32b59f
ÆAB
218 enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
219 int result;
c99ba492 220
efaba7cc
JS
221 if (msg_type == FSCK_IGNORE)
222 return 0;
223
38370253 224 if (object_on_skiplist(options, oid))
cd94c6f9
JS
225 return 0;
226
f50c4407
JS
227 if (msg_type == FSCK_FATAL)
228 msg_type = FSCK_ERROR;
f27d05b1
JS
229 else if (msg_type == FSCK_INFO)
230 msg_type = FSCK_WARN;
f50c4407 231
034a7b7b 232 prepare_msg_ids();
35af754b 233 strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
71ab8fa8 234
c99ba492
JS
235 va_start(ap, fmt);
236 strbuf_vaddf(&sb, fmt, ap);
38370253 237 result = options->error_func(options, oid, object_type,
394d5d31 238 msg_type, msg_id, sb.buf);
c99ba492
JS
239 strbuf_release(&sb);
240 va_end(ap);
241
242 return result;
243}
244
a59cfb32
JK
245void fsck_enable_object_names(struct fsck_options *options)
246{
247 if (!options->object_names)
73390290 248 options->object_names = kh_init_oid_map();
a59cfb32
JK
249}
250
73390290
JK
251const char *fsck_get_object_name(struct fsck_options *options,
252 const struct object_id *oid)
7b35efd7 253{
73390290 254 khiter_t pos;
7b35efd7
JS
255 if (!options->object_names)
256 return NULL;
73390290
JK
257 pos = kh_get_oid_map(options->object_names, *oid);
258 if (pos >= kh_end(options->object_names))
259 return NULL;
260 return kh_value(options->object_names, pos);
7b35efd7
JS
261}
262
73390290
JK
263void fsck_put_object_name(struct fsck_options *options,
264 const struct object_id *oid,
a59cfb32 265 const char *fmt, ...)
7b35efd7
JS
266{
267 va_list ap;
268 struct strbuf buf = STRBUF_INIT;
73390290
JK
269 khiter_t pos;
270 int hashret;
7b35efd7
JS
271
272 if (!options->object_names)
273 return;
73390290
JK
274
275 pos = kh_put_oid_map(options->object_names, *oid, &hashret);
276 if (!hashret)
7b35efd7
JS
277 return;
278 va_start(ap, fmt);
279 strbuf_vaddf(&buf, fmt, ap);
73390290 280 kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);
7b35efd7
JS
281 va_end(ap);
282}
283
a59cfb32 284const char *fsck_describe_object(struct fsck_options *options,
73390290 285 const struct object_id *oid)
90cf590f 286{
a59cfb32
JK
287 static struct strbuf bufs[] = {
288 STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT
289 };
290 static int b = 0;
291 struct strbuf *buf;
73390290 292 const char *name = fsck_get_object_name(options, oid);
a59cfb32
JK
293
294 buf = bufs + b;
295 b = (b + 1) % ARRAY_SIZE(bufs);
296 strbuf_reset(buf);
73390290 297 strbuf_addstr(buf, oid_to_hex(oid));
a59cfb32
JK
298 if (name)
299 strbuf_addf(buf, " (%s)", name);
90cf590f 300
a59cfb32 301 return buf->buf;
90cf590f
JS
302}
303
22410549 304static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)
355885d5
MK
305{
306 struct tree_desc desc;
307 struct name_entry entry;
308 int res = 0;
7b35efd7 309 const char *name;
355885d5
MK
310
311 if (parse_tree(tree))
312 return -1;
313
73390290 314 name = fsck_get_object_name(options, &tree->object.oid);
ec18b10b 315 if (init_tree_desc_gently(&desc, tree->buffer, tree->size, 0))
8354fa3d
DT
316 return -1;
317 while (tree_entry_gently(&desc, &entry)) {
7b35efd7 318 struct object *obj;
355885d5
MK
319 int result;
320
321 if (S_ISGITLINK(entry.mode))
322 continue;
7b35efd7
JS
323
324 if (S_ISDIR(entry.mode)) {
ea82b2a0 325 obj = (struct object *)lookup_tree(the_repository, &entry.oid);
2720f6db 326 if (name && obj)
73390290 327 fsck_put_object_name(options, &entry.oid, "%s%s/",
a59cfb32 328 name, entry.path);
7b35efd7
JS
329 result = options->walk(obj, OBJ_TREE, data, options);
330 }
331 else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {
ea82b2a0 332 obj = (struct object *)lookup_blob(the_repository, &entry.oid);
2720f6db 333 if (name && obj)
73390290 334 fsck_put_object_name(options, &entry.oid, "%s%s",
a59cfb32 335 name, entry.path);
7b35efd7
JS
336 result = options->walk(obj, OBJ_BLOB, data, options);
337 }
355885d5 338 else {
82247e9b 339 result = error("in tree %s: entry %s has bad mode %.6o",
73390290 340 fsck_describe_object(options, &tree->object.oid),
a59cfb32 341 entry.path, entry.mode);
355885d5
MK
342 }
343 if (result < 0)
344 return result;
345 if (!res)
346 res = result;
347 }
348 return res;
349}
350
22410549 351static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)
355885d5 352{
7b35efd7 353 int counter = 0, generation = 0, name_prefix_len = 0;
355885d5
MK
354 struct commit_list *parents;
355 int res;
356 int result;
7b35efd7 357 const char *name;
355885d5 358
ecb5091f 359 if (repo_parse_commit(the_repository, commit))
355885d5
MK
360 return -1;
361
73390290 362 name = fsck_get_object_name(options, &commit->object.oid);
7b35efd7 363 if (name)
73390290 364 fsck_put_object_name(options, get_commit_tree_oid(commit),
a59cfb32 365 "%s:", name);
7b35efd7 366
ecb5091f 367 result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit),
2e27bd77 368 OBJ_TREE, data, options);
355885d5
MK
369 if (result < 0)
370 return result;
371 res = result;
372
373 parents = commit->parents;
7b35efd7
JS
374 if (name && parents) {
375 int len = strlen(name), power;
376
377 if (len && name[len - 1] == '^') {
378 generation = 1;
379 name_prefix_len = len - 1;
380 }
381 else { /* parse ~<generation> suffix */
382 for (generation = 0, power = 1;
383 len && isdigit(name[len - 1]);
384 power *= 10)
385 generation += power * (name[--len] - '0');
386 if (power > 1 && len && name[len - 1] == '~')
387 name_prefix_len = len - 1;
e89f8936
JS
388 else {
389 /* Maybe a non-first parent, e.g. HEAD^2 */
390 generation = 0;
391 name_prefix_len = len;
392 }
7b35efd7
JS
393 }
394 }
395
355885d5 396 while (parents) {
7b35efd7 397 if (name) {
73390290 398 struct object_id *oid = &parents->item->object.oid;
7b35efd7 399
b84c7838 400 if (counter++)
73390290 401 fsck_put_object_name(options, oid, "%s^%d",
a59cfb32 402 name, counter);
7b35efd7 403 else if (generation > 0)
73390290 404 fsck_put_object_name(options, oid, "%.*s~%d",
a59cfb32
JK
405 name_prefix_len, name,
406 generation + 1);
7b35efd7 407 else
73390290 408 fsck_put_object_name(options, oid, "%s^", name);
7b35efd7 409 }
22410549 410 result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);
355885d5
MK
411 if (result < 0)
412 return result;
413 if (!res)
414 res = result;
415 parents = parents->next;
416 }
417 return res;
418}
419
22410549 420static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)
355885d5 421{
73390290 422 const char *name = fsck_get_object_name(options, &tag->object.oid);
7b35efd7 423
355885d5
MK
424 if (parse_tag(tag))
425 return -1;
7b35efd7 426 if (name)
73390290 427 fsck_put_object_name(options, &tag->tagged->oid, "%s", name);
22410549 428 return options->walk(tag->tagged, OBJ_ANY, data, options);
355885d5
MK
429}
430
22410549 431int fsck_walk(struct object *obj, void *data, struct fsck_options *options)
355885d5
MK
432{
433 if (!obj)
434 return -1;
a2b22854
JK
435
436 if (obj->type == OBJ_NONE)
109cd76d 437 parse_object(the_repository, &obj->oid);
a2b22854 438
355885d5
MK
439 switch (obj->type) {
440 case OBJ_BLOB:
441 return 0;
442 case OBJ_TREE:
22410549 443 return fsck_walk_tree((struct tree *)obj, data, options);
355885d5 444 case OBJ_COMMIT:
22410549 445 return fsck_walk_commit((struct commit *)obj, data, options);
355885d5 446 case OBJ_TAG:
22410549 447 return fsck_walk_tag((struct tag *)obj, data, options);
355885d5 448 default:
a59cfb32 449 error("Unknown object type for %s",
73390290 450 fsck_describe_object(options, &obj->oid));
355885d5
MK
451 return -1;
452 }
453}
ba002f3b 454
9068cfb2
RS
455struct name_stack {
456 const char **names;
457 size_t nr, alloc;
458};
459
460static void name_stack_push(struct name_stack *stack, const char *name)
461{
462 ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);
463 stack->names[stack->nr++] = name;
464}
465
466static const char *name_stack_pop(struct name_stack *stack)
467{
468 return stack->nr ? stack->names[--stack->nr] : NULL;
469}
470
471static void name_stack_clear(struct name_stack *stack)
472{
473 FREE_AND_NULL(stack->names);
474 stack->nr = stack->alloc = 0;
475}
476
ba002f3b
MK
477/*
478 * The entries in a tree are ordered in the _path_ order,
479 * which means that a directory entry is ordered by adding
480 * a slash to the end of it.
481 *
482 * So a directory called "a" is ordered _after_ a file
483 * called "a.c", because "a/" sorts after "a.c".
484 */
485#define TREE_UNORDERED (-1)
486#define TREE_HAS_DUPS (-2)
487
9068cfb2
RS
488static int is_less_than_slash(unsigned char c)
489{
490 return '\0' < c && c < '/';
491}
492
493static int verify_ordered(unsigned mode1, const char *name1,
494 unsigned mode2, const char *name2,
495 struct name_stack *candidates)
ba002f3b
MK
496{
497 int len1 = strlen(name1);
498 int len2 = strlen(name2);
499 int len = len1 < len2 ? len1 : len2;
500 unsigned char c1, c2;
501 int cmp;
502
503 cmp = memcmp(name1, name2, len);
504 if (cmp < 0)
505 return 0;
506 if (cmp > 0)
507 return TREE_UNORDERED;
508
509 /*
510 * Ok, the first <len> characters are the same.
511 * Now we need to order the next one, but turn
512 * a '\0' into a '/' for a directory entry.
513 */
514 c1 = name1[len];
515 c2 = name2[len];
516 if (!c1 && !c2)
517 /*
518 * git-write-tree used to write out a nonsense tree that has
519 * entries with the same name, one blob and one tree. Make
520 * sure we do not have duplicate entries.
521 */
522 return TREE_HAS_DUPS;
523 if (!c1 && S_ISDIR(mode1))
524 c1 = '/';
525 if (!c2 && S_ISDIR(mode2))
526 c2 = '/';
9068cfb2
RS
527
528 /*
529 * There can be non-consecutive duplicates due to the implicitly
86715592 530 * added slash, e.g.:
9068cfb2
RS
531 *
532 * foo
533 * foo.bar
534 * foo.bar.baz
535 * foo.bar/
536 * foo/
537 *
538 * Record non-directory candidates (like "foo" and "foo.bar" in
539 * the example) on a stack and check directory candidates (like
540 * foo/" and "foo.bar/") against that stack.
541 */
542 if (!c1 && is_less_than_slash(c2)) {
543 name_stack_push(candidates, name1);
544 } else if (c2 == '/' && is_less_than_slash(c1)) {
545 for (;;) {
546 const char *p;
547 const char *f_name = name_stack_pop(candidates);
548
549 if (!f_name)
550 break;
551 if (!skip_prefix(name2, f_name, &p))
fe747043 552 continue;
9068cfb2
RS
553 if (!*p)
554 return TREE_HAS_DUPS;
555 if (is_less_than_slash(*p)) {
556 name_stack_push(candidates, f_name);
557 break;
558 }
559 }
560 }
561
ba002f3b
MK
562 return c1 < c2 ? 0 : TREE_UNORDERED;
563}
564
9e1947cb 565static int fsck_tree(const struct object_id *tree_oid,
23a173a7
JK
566 const char *buffer, unsigned long size,
567 struct fsck_options *options)
ba002f3b 568{
8354fa3d 569 int retval = 0;
c479d14a 570 int has_null_sha1 = 0;
ba002f3b
MK
571 int has_full_path = 0;
572 int has_empty_name = 0;
5d34a435
JK
573 int has_dot = 0;
574 int has_dotdot = 0;
5c17f512 575 int has_dotgit = 0;
ba002f3b
MK
576 int has_zero_pad = 0;
577 int has_bad_modes = 0;
578 int has_dup_entries = 0;
579 int not_properly_sorted = 0;
580 struct tree_desc desc;
581 unsigned o_mode;
582 const char *o_name;
9068cfb2 583 struct name_stack df_dup_candidates = { NULL };
ba002f3b 584
53602a93 585 if (init_tree_desc_gently(&desc, buffer, size, TREE_DESC_RAW_MODES)) {
0282f679
JK
586 retval += report(options, tree_oid, OBJ_TREE,
587 FSCK_MSG_BAD_TREE,
588 "cannot be parsed as a tree");
8354fa3d
DT
589 return retval;
590 }
ba002f3b
MK
591
592 o_mode = 0;
593 o_name = NULL;
ba002f3b
MK
594
595 while (desc.size) {
5ec1e728 596 unsigned short mode;
288a74bc 597 const char *name, *backslash;
9e1947cb 598 const struct object_id *entry_oid;
ba002f3b 599
9e1947cb 600 entry_oid = tree_entry_extract(&desc, &name, &mode);
ba002f3b 601
9e1947cb 602 has_null_sha1 |= is_null_oid(entry_oid);
effd12ec
HS
603 has_full_path |= !!strchr(name, '/');
604 has_empty_name |= !*name;
605 has_dot |= !strcmp(name, ".");
606 has_dotdot |= !strcmp(name, "..");
ed9c3220 607 has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
ba002f3b 608 has_zero_pad |= *(char *)desc.buffer == '0';
159e7b08 609
b7b1fca1
JK
610 if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
611 if (!S_ISLNK(mode))
9e1947cb
JK
612 oidset_insert(&options->gitmodules_found,
613 entry_oid);
b7b1fca1 614 else
38370253 615 retval += report(options,
9e1947cb 616 tree_oid, OBJ_TREE,
b7b1fca1
JK
617 FSCK_MSG_GITMODULES_SYMLINK,
618 ".gitmodules is a symbolic link");
619 }
159e7b08 620
f8587c31 621 if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
27ab4784
PS
622 if (!S_ISLNK(mode))
623 oidset_insert(&options->gitattributes_found,
624 entry_oid);
625 else
f8587c31
PS
626 retval += report(options, tree_oid, OBJ_TREE,
627 FSCK_MSG_GITATTRIBUTES_SYMLINK,
628 ".gitattributes is a symlink");
629 }
630
bb6832d5
JK
631 if (S_ISLNK(mode)) {
632 if (is_hfs_dotgitignore(name) ||
633 is_ntfs_dotgitignore(name))
634 retval += report(options, tree_oid, OBJ_TREE,
635 FSCK_MSG_GITIGNORE_SYMLINK,
636 ".gitignore is a symlink");
bb6832d5
JK
637 if (is_hfs_dotmailmap(name) ||
638 is_ntfs_dotmailmap(name))
639 retval += report(options, tree_oid, OBJ_TREE,
640 FSCK_MSG_MAILMAP_SYMLINK,
641 ".mailmap is a symlink");
642 }
643
288a74bc
JS
644 if ((backslash = strchr(name, '\\'))) {
645 while (backslash) {
646 backslash++;
647 has_dotgit |= is_ntfs_dotgit(backslash);
bdfef049
JS
648 if (is_ntfs_dotgitmodules(backslash)) {
649 if (!S_ISLNK(mode))
9e1947cb
JK
650 oidset_insert(&options->gitmodules_found,
651 entry_oid);
bdfef049 652 else
9e1947cb 653 retval += report(options, tree_oid, OBJ_TREE,
bdfef049
JS
654 FSCK_MSG_GITMODULES_SYMLINK,
655 ".gitmodules is a symbolic link");
656 }
288a74bc
JS
657 backslash = strchr(backslash, '\\');
658 }
659 }
660
8354fa3d 661 if (update_tree_entry_gently(&desc)) {
0282f679
JK
662 retval += report(options, tree_oid, OBJ_TREE,
663 FSCK_MSG_BAD_TREE,
664 "cannot be parsed as a tree");
8354fa3d
DT
665 break;
666 }
ba002f3b
MK
667
668 switch (mode) {
669 /*
670 * Standard modes..
671 */
672 case S_IFREG | 0755:
673 case S_IFREG | 0644:
674 case S_IFLNK:
675 case S_IFDIR:
676 case S_IFGITLINK:
677 break;
678 /*
679 * This is nonstandard, but we had a few of these
680 * early on when we honored the full set of mode
681 * bits..
682 */
683 case S_IFREG | 0664:
22410549 684 if (!options->strict)
ba002f3b 685 break;
1cf01a34 686 /* fallthrough */
ba002f3b
MK
687 default:
688 has_bad_modes = 1;
689 }
690
691 if (o_name) {
9068cfb2
RS
692 switch (verify_ordered(o_mode, o_name, mode, name,
693 &df_dup_candidates)) {
ba002f3b
MK
694 case TREE_UNORDERED:
695 not_properly_sorted = 1;
696 break;
697 case TREE_HAS_DUPS:
698 has_dup_entries = 1;
699 break;
700 default:
701 break;
702 }
703 }
704
705 o_mode = mode;
706 o_name = name;
ba002f3b
MK
707 }
708
9068cfb2
RS
709 name_stack_clear(&df_dup_candidates);
710
c479d14a 711 if (has_null_sha1)
0282f679
JK
712 retval += report(options, tree_oid, OBJ_TREE,
713 FSCK_MSG_NULL_SHA1,
714 "contains entries pointing to null sha1");
ba002f3b 715 if (has_full_path)
0282f679
JK
716 retval += report(options, tree_oid, OBJ_TREE,
717 FSCK_MSG_FULL_PATHNAME,
718 "contains full pathnames");
ba002f3b 719 if (has_empty_name)
0282f679
JK
720 retval += report(options, tree_oid, OBJ_TREE,
721 FSCK_MSG_EMPTY_NAME,
722 "contains empty pathname");
5d34a435 723 if (has_dot)
0282f679
JK
724 retval += report(options, tree_oid, OBJ_TREE,
725 FSCK_MSG_HAS_DOT,
726 "contains '.'");
5d34a435 727 if (has_dotdot)
0282f679
JK
728 retval += report(options, tree_oid, OBJ_TREE,
729 FSCK_MSG_HAS_DOTDOT,
730 "contains '..'");
5c17f512 731 if (has_dotgit)
0282f679
JK
732 retval += report(options, tree_oid, OBJ_TREE,
733 FSCK_MSG_HAS_DOTGIT,
734 "contains '.git'");
ba002f3b 735 if (has_zero_pad)
0282f679
JK
736 retval += report(options, tree_oid, OBJ_TREE,
737 FSCK_MSG_ZERO_PADDED_FILEMODE,
738 "contains zero-padded file modes");
ba002f3b 739 if (has_bad_modes)
0282f679
JK
740 retval += report(options, tree_oid, OBJ_TREE,
741 FSCK_MSG_BAD_FILEMODE,
742 "contains bad file modes");
ba002f3b 743 if (has_dup_entries)
0282f679
JK
744 retval += report(options, tree_oid, OBJ_TREE,
745 FSCK_MSG_DUPLICATE_ENTRIES,
746 "contains duplicate file entries");
ba002f3b 747 if (not_properly_sorted)
0282f679
JK
748 retval += report(options, tree_oid, OBJ_TREE,
749 FSCK_MSG_TREE_NOT_SORTED,
750 "not properly sorted");
ba002f3b
MK
751 return retval;
752}
753
8e430903
JK
754/*
755 * Confirm that the headers of a commit or tag object end in a reasonable way,
756 * either with the usual "\n\n" separator, or at least with a trailing newline
757 * on the final header line.
758 *
759 * This property is important for the memory safety of our callers. It allows
760 * them to scan the buffer linewise without constantly checking the remaining
761 * size as long as:
762 *
763 * - they check that there are bytes left in the buffer at the start of any
764 * line (i.e., that the last newline they saw was not the final one we
765 * found here)
766 *
767 * - any intra-line scanning they do will stop at a newline, which will worst
768 * case hit the newline we found here as the end-of-header. This makes it
769 * OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
770 */
84d18c0b 771static int verify_headers(const void *data, unsigned long size,
cc579000
JK
772 const struct object_id *oid, enum object_type type,
773 struct fsck_options *options)
4d0d8975
JS
774{
775 const char *buffer = (const char *)data;
776 unsigned long i;
777
778 for (i = 0; i < size; i++) {
779 switch (buffer[i]) {
780 case '\0':
cc579000 781 return report(options, oid, type,
c99ba492
JS
782 FSCK_MSG_NUL_IN_HEADER,
783 "unterminated header: NUL at offset %ld", i);
4d0d8975
JS
784 case '\n':
785 if (i + 1 < size && buffer[i + 1] == '\n')
786 return 0;
787 }
788 }
789
84d18c0b
JH
790 /*
791 * We did not find double-LF that separates the header
792 * and the body. Not having a body is not a crime but
793 * we do want to see the terminating LF for the last header
794 * line.
795 */
796 if (size && buffer[size - 1] == '\n')
797 return 0;
798
cc579000 799 return report(options, oid, type,
c99ba492 800 FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");
4d0d8975
JS
801}
802
78543993
JK
803static int fsck_ident(const char **ident,
804 const struct object_id *oid, enum object_type type,
805 struct fsck_options *options)
daae1922 806{
e6826e33 807 const char *p = *ident;
d4b8de04
JK
808 char *end;
809
e6826e33
JS
810 *ident = strchrnul(*ident, '\n');
811 if (**ident == '\n')
812 (*ident)++;
813
814 if (*p == '<')
78543993 815 return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
e6826e33
JS
816 p += strcspn(p, "<>\n");
817 if (*p == '>')
78543993 818 return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");
e6826e33 819 if (*p != '<')
78543993 820 return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");
e6826e33 821 if (p[-1] != ' ')
78543993 822 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");
e6826e33
JS
823 p++;
824 p += strcspn(p, "<>\n");
825 if (*p != '>')
78543993 826 return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");
e6826e33
JS
827 p++;
828 if (*p != ' ')
78543993 829 return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
e6826e33 830 p++;
8e430903
JK
831 /*
832 * Our timestamp parser is based on the C strto*() functions, which
833 * will happily eat whitespace, including the newline that is supposed
834 * to prevent us walking past the end of the buffer. So do our own
835 * scan, skipping linear whitespace but not newlines, and then
836 * confirming we found a digit. We _could_ be even more strict here,
837 * as we really expect only a single space, but since we have
838 * traditionally allowed extra whitespace, we'll continue to do so.
839 */
840 while (*p == ' ' || *p == '\t')
841 p++;
842 if (!isdigit(*p))
843 return report(options, oid, type, FSCK_MSG_BAD_DATE,
844 "invalid author/committer line - bad date");
e6826e33 845 if (*p == '0' && p[1] != ' ')
78543993 846 return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
1aeb7e75 847 if (date_overflows(parse_timestamp(p, &end, 10)))
78543993 848 return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");
e6826e33 849 if ((end == p || *end != ' '))
78543993 850 return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");
e6826e33
JS
851 p = end + 1;
852 if ((*p != '+' && *p != '-') ||
853 !isdigit(p[1]) ||
854 !isdigit(p[2]) ||
855 !isdigit(p[3]) ||
856 !isdigit(p[4]) ||
857 (p[5] != '\n'))
78543993 858 return report(options, oid, type, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");
e6826e33 859 p += 6;
daae1922
JN
860 return 0;
861}
862
c5b4269b
JK
863static int fsck_commit(const struct object_id *oid,
864 const char *buffer, unsigned long size,
865 struct fsck_options *options)
ba002f3b 866{
f648ee70 867 struct object_id tree_oid, parent_oid;
ec652315 868 unsigned author_count;
daae1922 869 int err;
6d2d780f 870 const char *buffer_begin = buffer;
8e430903 871 const char *buffer_end = buffer + size;
c54f5ca9 872 const char *p;
ba002f3b 873
8e430903
JK
874 /*
875 * We _must_ stop parsing immediately if this reports failure, as the
876 * memory safety of the rest of the function depends on it. See the
877 * comment above the definition of verify_headers() for more details.
878 */
c5b4269b 879 if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
4d0d8975
JS
880 return -1;
881
8e430903 882 if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
c5b4269b 883 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
c54f5ca9 884 if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
c5b4269b 885 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
b3584761
JS
886 if (err)
887 return err;
888 }
c54f5ca9 889 buffer = p + 1;
8e430903 890 while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
f648ee70 891 if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
c5b4269b 892 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
b3584761
JS
893 if (err)
894 return err;
895 }
c54f5ca9 896 buffer = p + 1;
ba002f3b 897 }
c9ad147f 898 author_count = 0;
8e430903 899 while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
c9ad147f 900 author_count++;
c5b4269b 901 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
c9ad147f
JS
902 if (err)
903 return err;
ba002f3b 904 }
c9ad147f 905 if (author_count < 1)
c5b4269b 906 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");
c9ad147f 907 else if (author_count > 1)
c5b4269b 908 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
daae1922
JN
909 if (err)
910 return err;
8e430903 911 if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
c5b4269b
JK
912 return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
913 err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
daae1922
JN
914 if (err)
915 return err;
6d2d780f 916 if (memchr(buffer_begin, '\0', size)) {
c5b4269b 917 err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,
6d2d780f
JH
918 "NUL byte in the commit object body");
919 if (err)
920 return err;
921 }
ba002f3b
MK
922 return 0;
923}
924
103fb6d4 925static int fsck_tag(const struct object_id *oid, const char *buffer,
2175a0c6 926 unsigned long size, struct fsck_options *options)
cec097be 927{
f648ee70 928 struct object_id tagged_oid;
acf9de4c
ÆAB
929 int tagged_type;
930 return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,
931 &tagged_type);
932}
933
934int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
935 unsigned long size, struct fsck_options *options,
936 struct object_id *tagged_oid,
937 int *tagged_type)
938{
cec097be 939 int ret = 0;
23a173a7 940 char *eol;
cec097be 941 struct strbuf sb = STRBUF_INIT;
8e430903 942 const char *buffer_end = buffer + size;
c54f5ca9 943 const char *p;
cec097be 944
8e430903
JK
945 /*
946 * We _must_ stop parsing immediately if this reports failure, as the
947 * memory safety of the rest of the function depends on it. See the
948 * comment above the definition of verify_headers() for more details.
949 */
103fb6d4 950 ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
8a272f29 951 if (ret)
cec097be
JS
952 goto done;
953
8e430903 954 if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
103fb6d4 955 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
cec097be
JS
956 goto done;
957 }
acf9de4c 958 if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {
103fb6d4 959 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
7d7d5b05
JS
960 if (ret)
961 goto done;
cec097be 962 }
c54f5ca9 963 buffer = p + 1;
cec097be 964
8e430903 965 if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
103fb6d4 966 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
cec097be
JS
967 goto done;
968 }
8e430903 969 eol = memchr(buffer, '\n', buffer_end - buffer);
cec097be 970 if (!eol) {
103fb6d4 971 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
cec097be
JS
972 goto done;
973 }
acf9de4c
ÆAB
974 *tagged_type = type_from_string_gently(buffer, eol - buffer, 1);
975 if (*tagged_type < 0)
103fb6d4 976 ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
cec097be
JS
977 if (ret)
978 goto done;
979 buffer = eol + 1;
980
8e430903 981 if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
103fb6d4 982 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
cec097be
JS
983 goto done;
984 }
8e430903 985 eol = memchr(buffer, '\n', buffer_end - buffer);
cec097be 986 if (!eol) {
103fb6d4 987 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
cec097be
JS
988 goto done;
989 }
990 strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);
f27d05b1 991 if (check_refname_format(sb.buf, 0)) {
103fb6d4 992 ret = report(options, oid, OBJ_TAG,
38370253
JK
993 FSCK_MSG_BAD_TAG_NAME,
994 "invalid 'tag' name: %.*s",
995 (int)(eol - buffer), buffer);
f27d05b1
JS
996 if (ret)
997 goto done;
998 }
cec097be
JS
999 buffer = eol + 1;
1000
8e430903 1001 if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
cec097be 1002 /* early tags do not contain 'tagger' lines; warn only */
103fb6d4 1003 ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
f27d05b1
JS
1004 if (ret)
1005 goto done;
1006 }
cec097be 1007 else
103fb6d4 1008 ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
cec097be 1009
8e430903 1010 if (buffer < buffer_end && !starts_with(buffer, "\n")) {
acf9de4c
ÆAB
1011 /*
1012 * The verify_headers() check will allow
1013 * e.g. "[...]tagger <tagger>\nsome
1014 * garbage\n\nmessage" to pass, thinking "some
1015 * garbage" could be a custom header. E.g. "mktag"
1016 * doesn't want any unknown headers.
1017 */
1018 ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");
1019 if (ret)
1020 goto done;
1021 }
1022
cec097be
JS
1023done:
1024 strbuf_release(&sb);
cec097be
JS
1025 return ret;
1026}
1027
9fd512c8 1028static int starts_with_dot_slash(const char *const path)
a2b26ffb 1029{
9fd512c8
ÆAB
1030 return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_SLASH |
1031 PATH_MATCH_XPLATFORM);
a2b26ffb
JN
1032}
1033
9fd512c8 1034static int starts_with_dot_dot_slash(const char *const path)
a2b26ffb 1035{
9fd512c8
ÆAB
1036 return path_match_flags(path, PATH_MATCH_STARTS_WITH_DOT_DOT_SLASH |
1037 PATH_MATCH_XPLATFORM);
a2b26ffb
JN
1038}
1039
1040static int submodule_url_is_relative(const char *url)
1041{
1042 return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
1043}
1044
c44088ec
JN
1045/*
1046 * Count directory components that a relative submodule URL should chop
1047 * from the remote_url it is to be resolved against.
1048 *
1049 * In other words, this counts "../" components at the start of a
1050 * submodule URL.
1051 *
1052 * Returns the number of directory components to chop and writes a
1053 * pointer to the next character of url after all leading "./" and
1054 * "../" components to out.
1055 */
1056static int count_leading_dotdots(const char *url, const char **out)
1057{
1058 int result = 0;
1059 while (1) {
1060 if (starts_with_dot_dot_slash(url)) {
1061 result++;
1062 url += strlen("../");
1063 continue;
1064 }
1065 if (starts_with_dot_slash(url)) {
1066 url += strlen("./");
1067 continue;
1068 }
1069 *out = url;
1070 return result;
1071 }
1072}
a2b26ffb
JN
1073/*
1074 * Check whether a transport is implemented by git-remote-curl.
1075 *
1076 * If it is, returns 1 and writes the URL that would be passed to
1077 * git-remote-curl to the "out" parameter.
1078 *
1079 * Otherwise, returns 0 and leaves "out" untouched.
1080 *
1081 * Examples:
1082 * http::https://example.com/repo.git -> 1, https://example.com/repo.git
1083 * https://example.com/repo.git -> 1, https://example.com/repo.git
1084 * git://example.com/repo.git -> 0
1085 *
1086 * This is for use in checking for previously exploitable bugs that
1087 * required a submodule URL to be passed to git-remote-curl.
1088 */
1089static int url_to_curl_url(const char *url, const char **out)
1090{
1091 /*
1092 * We don't need to check for case-aliases, "http.exe", and so
1093 * on because in the default configuration, is_transport_allowed
1094 * prevents URLs with those schemes from being cloned
1095 * automatically.
1096 */
1097 if (skip_prefix(url, "http::", out) ||
1098 skip_prefix(url, "https::", out) ||
1099 skip_prefix(url, "ftp::", out) ||
1100 skip_prefix(url, "ftps::", out))
1101 return 1;
1102 if (starts_with(url, "http://") ||
1103 starts_with(url, "https://") ||
1104 starts_with(url, "ftp://") ||
1105 starts_with(url, "ftps://")) {
1106 *out = url;
1107 return 1;
1108 }
1109 return 0;
1110}
1111
07259e74
JK
1112static int check_submodule_url(const char *url)
1113{
a2b26ffb 1114 const char *curl_url;
07259e74
JK
1115
1116 if (looks_like_command_line_option(url))
1117 return -1;
1118
6aed5673 1119 if (submodule_url_is_relative(url) || starts_with(url, "git://")) {
c44088ec
JN
1120 char *decoded;
1121 const char *next;
1122 int has_nl;
1123
a2b26ffb
JN
1124 /*
1125 * This could be appended to an http URL and url-decoded;
1126 * check for malicious characters.
1127 */
c44088ec
JN
1128 decoded = url_decode(url);
1129 has_nl = !!strchr(decoded, '\n');
1130
a2b26ffb
JN
1131 free(decoded);
1132 if (has_nl)
1133 return -1;
c44088ec
JN
1134
1135 /*
1136 * URLs which escape their root via "../" can overwrite
1137 * the host field and previous components, resolving to
1a3609e4
JN
1138 * URLs like https::example.com/submodule.git and
1139 * https:///example.com/submodule.git that were
c44088ec
JN
1140 * susceptible to CVE-2020-11008.
1141 */
1142 if (count_leading_dotdots(url, &next) > 0 &&
1a3609e4 1143 (*next == ':' || *next == '/'))
c44088ec 1144 return -1;
a2b26ffb
JN
1145 }
1146
1147 else if (url_to_curl_url(url, &curl_url)) {
1148 struct credential c = CREDENTIAL_INIT;
1a3609e4
JN
1149 int ret = 0;
1150 if (credential_from_url_gently(&c, curl_url, 1) ||
1151 !*c.host)
1152 ret = -1;
a2b26ffb
JN
1153 credential_clear(&c);
1154 return ret;
1155 }
1156
1157 return 0;
07259e74
JK
1158}
1159
ed8b10f6 1160struct fsck_gitmodules_data {
6da40b22 1161 const struct object_id *oid;
ed8b10f6
JK
1162 struct fsck_options *options;
1163 int ret;
1164};
1165
1166static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
1167{
1168 struct fsck_gitmodules_data *data = vdata;
1169 const char *subsection, *key;
f5914f4b 1170 size_t subsection_len;
ed8b10f6
JK
1171 char *name;
1172
1173 if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||
1174 !subsection)
1175 return 0;
1176
1177 name = xmemdupz(subsection, subsection_len);
1178 if (check_submodule_name(name) < 0)
38370253 1179 data->ret |= report(data->options,
6da40b22 1180 data->oid, OBJ_BLOB,
ed8b10f6
JK
1181 FSCK_MSG_GITMODULES_NAME,
1182 "disallowed submodule name: %s",
1183 name);
a124133e 1184 if (!strcmp(key, "url") && value &&
07259e74 1185 check_submodule_url(value) < 0)
38370253 1186 data->ret |= report(data->options,
6da40b22 1187 data->oid, OBJ_BLOB,
a124133e
JK
1188 FSCK_MSG_GITMODULES_URL,
1189 "disallowed submodule url: %s",
1190 value);
1a7fd1fb
JK
1191 if (!strcmp(key, "path") && value &&
1192 looks_like_command_line_option(value))
38370253 1193 data->ret |= report(data->options,
6da40b22 1194 data->oid, OBJ_BLOB,
1a7fd1fb
JK
1195 FSCK_MSG_GITMODULES_PATH,
1196 "disallowed submodule path: %s",
1197 value);
bb92255e
JN
1198 if (!strcmp(key, "update") && value &&
1199 parse_submodule_update_type(value) == SM_UPDATE_COMMAND)
7034cd09 1200 data->ret |= report(data->options, data->oid, OBJ_BLOB,
bb92255e
JN
1201 FSCK_MSG_GITMODULES_UPDATE,
1202 "disallowed submodule update setting: %s",
1203 value);
ed8b10f6
JK
1204 free(name);
1205
1206 return 0;
1207}
1208
6da40b22 1209static int fsck_blob(const struct object_id *oid, const char *buf,
7ac4f3a0
JK
1210 unsigned long size, struct fsck_options *options)
1211{
bb3a9265 1212 int ret = 0;
ed8b10f6 1213
6da40b22 1214 if (object_on_skiplist(options, oid))
fb162877
RJ
1215 return 0;
1216
bb3a9265
PS
1217 if (oidset_contains(&options->gitmodules_found, oid)) {
1218 struct config_options config_opts = { 0 };
1219 struct fsck_gitmodules_data data;
1220
1221 oidset_insert(&options->gitmodules_done, oid);
1222
1223 if (!buf) {
1224 /*
1225 * A missing buffer here is a sign that the caller found the
1226 * blob too gigantic to load into memory. Let's just consider
1227 * that an error.
1228 */
1229 return report(options, oid, OBJ_BLOB,
1230 FSCK_MSG_GITMODULES_LARGE,
1231 ".gitmodules too large to parse");
1232 }
1233
1234 data.oid = oid;
1235 data.options = options;
1236 data.ret = 0;
1237 config_opts.error_action = CONFIG_ERROR_SILENT;
1238 if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
1239 ".gitmodules", buf, size, &data, &config_opts))
1240 data.ret |= report(options, oid, OBJ_BLOB,
1241 FSCK_MSG_GITMODULES_PARSE,
1242 "could not parse gitmodules blob");
1243 ret |= data.ret;
ed8b10f6
JK
1244 }
1245
27ab4784
PS
1246 if (oidset_contains(&options->gitattributes_found, oid)) {
1247 const char *ptr;
1248
1249 oidset_insert(&options->gitattributes_done, oid);
1250
1251 if (!buf || size > ATTR_MAX_FILE_SIZE) {
1252 /*
1253 * A missing buffer here is a sign that the caller found the
1254 * blob too gigantic to load into memory. Let's just consider
1255 * that an error.
1256 */
1257 return report(options, oid, OBJ_BLOB,
1258 FSCK_MSG_GITATTRIBUTES_LARGE,
1259 ".gitattributes too large to parse");
1260 }
1261
1262 for (ptr = buf; *ptr; ) {
1263 const char *eol = strchrnul(ptr, '\n');
1264 if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {
1265 ret |= report(options, oid, OBJ_BLOB,
1266 FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,
1267 ".gitattributes has too long lines to parse");
1268 break;
1269 }
1270
1271 ptr = *eol ? eol + 1 : eol;
1272 }
1273 }
1274
bb3a9265 1275 return ret;
7ac4f3a0
JK
1276}
1277
90a398bb 1278int fsck_object(struct object *obj, void *data, unsigned long size,
22410549 1279 struct fsck_options *options)
ba002f3b
MK
1280{
1281 if (!obj)
38370253 1282 return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
ba002f3b 1283
35ff327e
JK
1284 return fsck_buffer(&obj->oid, obj->type, data, size, options);
1285}
1286
1287int fsck_buffer(const struct object_id *oid, enum object_type type,
1288 void *data, unsigned long size,
1289 struct fsck_options *options)
1290{
1291 if (type == OBJ_BLOB)
1292 return fsck_blob(oid, data, size, options);
1293 if (type == OBJ_TREE)
1294 return fsck_tree(oid, data, size, options);
1295 if (type == OBJ_COMMIT)
1296 return fsck_commit(oid, data, size, options);
1297 if (type == OBJ_TAG)
1298 return fsck_tag(oid, data, size, options);
1299
1300 return report(options, oid, type,
38370253
JK
1301 FSCK_MSG_UNKNOWN_TYPE,
1302 "unknown type '%d' (internal fsck error)",
35ff327e 1303 type);
ba002f3b 1304}
d6ffc8d7 1305
1cd772cc 1306int fsck_error_function(struct fsck_options *o,
5afc4b1d
JK
1307 const struct object_id *oid,
1308 enum object_type object_type,
394d5d31
ÆAB
1309 enum fsck_msg_type msg_type,
1310 enum fsck_msg_id msg_id,
1311 const char *message)
d6ffc8d7 1312{
0282f4dc 1313 if (msg_type == FSCK_WARN) {
5afc4b1d 1314 warning("object %s: %s", fsck_describe_object(o, oid), message);
0282f4dc
JS
1315 return 0;
1316 }
5afc4b1d 1317 error("object %s: %s", fsck_describe_object(o, oid), message);
d6ffc8d7
MK
1318 return 1;
1319}
159e7b08 1320
a59a8c68
PS
1321static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
1322 enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
1323 struct fsck_options *options, const char *blob_type)
159e7b08
JK
1324{
1325 int ret = 0;
1326 struct oidset_iter iter;
1327 const struct object_id *oid;
1328
a59a8c68 1329 oidset_iter_init(blobs_found, &iter);
159e7b08 1330 while ((oid = oidset_iter_next(&iter))) {
159e7b08
JK
1331 enum object_type type;
1332 unsigned long size;
1333 char *buf;
1334
a59a8c68 1335 if (oidset_contains(blobs_done, oid))
159e7b08
JK
1336 continue;
1337
bc726bd0 1338 buf = repo_read_object_file(the_repository, oid, &type, &size);
159e7b08 1339 if (!buf) {
b8b00f16 1340 if (is_promisor_object(oid))
27387444 1341 continue;
38370253 1342 ret |= report(options,
a59a8c68
PS
1343 oid, OBJ_BLOB, msg_missing,
1344 "unable to read %s blob", blob_type);
159e7b08
JK
1345 continue;
1346 }
1347
1348 if (type == OBJ_BLOB)
b8b00f16 1349 ret |= fsck_blob(oid, buf, size, options);
159e7b08 1350 else
a59a8c68
PS
1351 ret |= report(options, oid, type, msg_type,
1352 "non-blob found at %s", blob_type);
159e7b08
JK
1353 free(buf);
1354 }
1355
a59a8c68
PS
1356 oidset_clear(blobs_found);
1357 oidset_clear(blobs_done);
1358
1359 return ret;
1360}
1361
1362int fsck_finish(struct fsck_options *options)
1363{
1364 int ret = 0;
1365
1366 ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
1367 FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
1368 options, ".gitmodules");
27ab4784
PS
1369 ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
1370 FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
1371 options, ".gitattributes");
159e7b08 1372
159e7b08
JK
1373 return ret;
1374}
1f3299fd 1375
fb79f5bf 1376int git_fsck_config(const char *var, const char *value, void *cb)
1f3299fd 1377{
fb79f5bf 1378 struct fsck_options *options = cb;
1f3299fd
ÆAB
1379 if (strcmp(var, "fsck.skiplist") == 0) {
1380 const char *path;
1381 struct strbuf sb = STRBUF_INIT;
1382
1383 if (git_config_pathname(&path, var, value))
1384 return 1;
1385 strbuf_addf(&sb, "skiplist=%s", path);
1386 free((char *)path);
1387 fsck_set_msg_types(options, sb.buf);
1388 strbuf_release(&sb);
1389 return 0;
1390 }
1391
1392 if (skip_prefix(var, "fsck.", &var)) {
1393 fsck_set_msg_type(options, var, value);
1394 return 0;
1395 }
1396
1397 return git_default_config(var, value, cb);
1398}
3745e269
ÆAB
1399
1400/*
1401 * Custom error callbacks that are used in more than one place.
1402 */
1403
1404int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
1405 const struct object_id *oid,
1406 enum object_type object_type,
1407 enum fsck_msg_type msg_type,
1408 enum fsck_msg_id msg_id,
1409 const char *message)
1410{
1411 if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
1412 puts(oid_to_hex(oid));
1413 return 0;
1414 }
1415 return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
1416}