]> git.ipfire.org Git - thirdparty/git.git/commitdiff
ref: add more strict checks for regular refs
authorshejialuo <shejialuo@gmail.com>
Wed, 20 Nov 2024 11:51:49 +0000 (19:51 +0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 20 Nov 2024 23:21:33 +0000 (08:21 +0900)
We have already used "parse_loose_ref_contents" function to check
whether the ref content is valid in files backend. However, by
using "parse_loose_ref_contents", we allow the ref's content to end with
garbage or without a newline.

Even though we never create such loose refs ourselves, we have accepted
such loose refs. So, it is entirely possible that some third-party tools
may rely on such loose refs being valid. We should not report an error
fsck message at current. We should notify the users about such
"curiously formatted" loose refs so that adequate care is taken before
we decide to tighten the rules in the future.

And it's not suitable either to report a warn fsck message to the user.
We don't yet want the "--strict" flag that controls this bit to end up
generating errors for such weirdly-formatted reference contents, as we
first want to assess whether this retroactive tightening will cause
issues for any tools out there. It may cause compatibility issues which
may break the repository. So, we add the following two fsck infos to
represent the situation where the ref content ends without newline or
has trailing garbages:

1. refMissingNewline(INFO): A loose ref that does not end with
   newline(LF).
2. trailingRefContent(INFO): A loose ref has trailing content.

It might appear that we can't provide the user with any warnings by
using FSCK_INFO. However, in "fsck.c::fsck_vreport", we will convert
FSCK_INFO to FSCK_WARN and we can still warn the user about these
situations when using "git refs verify" without introducing
compatibility issues.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/fsck-msgids.txt
fsck.h
refs.c
refs/files-backend.c
refs/refs-internal.h
t/t0602-reffiles-fsck.sh

index 22c385ea22ea70940a367d32b57388dc6560322b..6db0eaa84a6709f449af573916afcc9ec8130d08 100644 (file)
 `nullSha1`::
        (WARN) Tree contains entries pointing to a null sha1.
 
+`refMissingNewline`::
+       (INFO) A loose ref that does not end with newline(LF). As
+       valid implementations of Git never created such a loose ref
+       file, it may become an error in the future. Report to the
+       git@vger.kernel.org mailing list if you see this error, as
+       we need to know what tools created such a file.
+
+`trailingRefContent`::
+       (INFO) A loose ref has trailing content. As valid implementations
+       of Git never created such a loose ref file, it may become an
+       error in the future. Report to the git@vger.kernel.org mailing
+       list if you see this error, as we need to know what tools
+       created such a file.
+
 `treeNotSorted`::
        (ERROR) A tree is not properly sorted.
 
diff --git a/fsck.h b/fsck.h
index 0d99a8791142646794eae78dc4b5b6721dc5a676..b85072df57620356f01678de7fae51a9b1c6f662 100644 (file)
--- a/fsck.h
+++ b/fsck.h
@@ -85,6 +85,8 @@ enum fsck_msg_type {
        FUNC(MAILMAP_SYMLINK, INFO) \
        FUNC(BAD_TAG_NAME, INFO) \
        FUNC(MISSING_TAGGER_ENTRY, INFO) \
+       FUNC(REF_MISSING_NEWLINE, INFO) \
+       FUNC(TRAILING_REF_CONTENT, INFO) \
        /* ignored (elevated when requested) */ \
        FUNC(EXTRA_HEADER_ENTRY, IGNORE)
 
diff --git a/refs.c b/refs.c
index 395a17273ccb014311dc41032a31389187ce7b4b..f88b32a633b78385a0031641d39ba6c37ae7e1ed 100644 (file)
--- a/refs.c
+++ b/refs.c
@@ -1789,7 +1789,7 @@ static int refs_read_special_head(struct ref_store *ref_store,
        }
 
        result = parse_loose_ref_contents(ref_store->repo->hash_algo, content.buf,
-                                         oid, referent, type, failure_errno);
+                                         oid, referent, type, NULL, failure_errno);
 
 done:
        strbuf_release(&full_path);
index 9f300a7d3c91822281a985fcb3e545812b873ed7..3d4d6124206da5b5115e1e00c4a3f164e728a0b1 100644 (file)
@@ -569,7 +569,7 @@ stat_ref:
        buf = sb_contents.buf;
 
        ret = parse_loose_ref_contents(ref_store->repo->hash_algo, buf,
-                                      oid, referent, type, &myerr);
+                                      oid, referent, type, NULL, &myerr);
 
 out:
        if (ret && !myerr)
@@ -606,7 +606,7 @@ static int files_read_symbolic_ref(struct ref_store *ref_store, const char *refn
 int parse_loose_ref_contents(const struct git_hash_algo *algop,
                             const char *buf, struct object_id *oid,
                             struct strbuf *referent, unsigned int *type,
-                            int *failure_errno)
+                            const char **trailing, int *failure_errno)
 {
        const char *p;
        if (skip_prefix(buf, "ref:", &buf)) {
@@ -628,6 +628,10 @@ int parse_loose_ref_contents(const struct git_hash_algo *algop,
                *failure_errno = EINVAL;
                return -1;
        }
+
+       if (trailing)
+               *trailing = p;
+
        return 0;
 }
 
@@ -3513,6 +3517,7 @@ static int files_fsck_refs_content(struct ref_store *ref_store,
        struct strbuf ref_content = STRBUF_INIT;
        struct strbuf referent = STRBUF_INIT;
        struct fsck_ref_report report = { 0 };
+       const char *trailing = NULL;
        unsigned int type = 0;
        int failure_errno = 0;
        struct object_id oid;
@@ -3537,7 +3542,7 @@ static int files_fsck_refs_content(struct ref_store *ref_store,
 
        if (parse_loose_ref_contents(ref_store->repo->hash_algo,
                                     ref_content.buf, &oid, &referent,
-                                    &type, &failure_errno)) {
+                                    &type, &trailing, &failure_errno)) {
                strbuf_rtrim(&ref_content);
                ret = fsck_report_ref(o, &report,
                                      FSCK_MSG_BAD_REF_CONTENT,
@@ -3545,6 +3550,21 @@ static int files_fsck_refs_content(struct ref_store *ref_store,
                goto cleanup;
        }
 
+       if (!(type & REF_ISSYMREF)) {
+               if (!*trailing) {
+                       ret = fsck_report_ref(o, &report,
+                                             FSCK_MSG_REF_MISSING_NEWLINE,
+                                             "misses LF at the end");
+                       goto cleanup;
+               }
+               if (*trailing != '\n' || *(trailing + 1)) {
+                       ret = fsck_report_ref(o, &report,
+                                             FSCK_MSG_TRAILING_REF_CONTENT,
+                                             "has trailing garbage: '%s'", trailing);
+                       goto cleanup;
+               }
+       }
+
 cleanup:
        strbuf_release(&ref_content);
        strbuf_release(&referent);
index 037d7991cdd8d09c3ad51003b4abc39fee9c89d4..125f1fe735d2c3b8c64258373170cb4d9cc56914 100644 (file)
@@ -716,7 +716,7 @@ struct ref_store {
 int parse_loose_ref_contents(const struct git_hash_algo *algop,
                             const char *buf, struct object_id *oid,
                             struct strbuf *referent, unsigned int *type,
-                            int *failure_errno);
+                            const char **trailing, int *failure_errno);
 
 /*
  * Fill in the generic part of refs and add it to our collection of
index 162370077bb6a896e8cf020e28d04033317cd5a4..33e7a390ad76c8c7eeed73c22d10741cba04ddbb 100755 (executable)
@@ -189,7 +189,48 @@ test_expect_success 'regular ref content should be checked (individual)' '
                EOF
                rm $branch_dir_prefix/a/b/branch-bad &&
                test_cmp expect err || return 1
-       done
+       done &&
+
+       printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end
+       EOF
+       rm $branch_dir_prefix/branch-no-newline &&
+       test_cmp expect err &&
+
+       for trailing_content in " garbage" "    more garbage"
+       do
+               printf "%s" "$(git rev-parse main)$trailing_content" >$branch_dir_prefix/branch-garbage &&
+               git refs verify 2>err &&
+               cat >expect <<-EOF &&
+               warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\''$trailing_content'\''
+               EOF
+               rm $branch_dir_prefix/branch-garbage &&
+               test_cmp expect err || return 1
+       done &&
+
+       printf "%s\n\n\n" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\''
+
+
+       '\''
+       EOF
+       rm $branch_dir_prefix/branch-garbage-special &&
+       test_cmp expect err &&
+
+       printf "%s\n\n\n  garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage-special &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/branch-garbage-special: trailingRefContent: has trailing garbage: '\''
+
+
+         garbage'\''
+       EOF
+       rm $branch_dir_prefix/branch-garbage-special &&
+       test_cmp expect err
 '
 
 test_expect_success 'regular ref content should be checked (aggregate)' '
@@ -207,12 +248,16 @@ test_expect_success 'regular ref content should be checked (aggregate)' '
        printf "%s" $bad_content_1 >$tag_dir_prefix/tag-bad-1 &&
        printf "%s" $bad_content_2 >$tag_dir_prefix/tag-bad-2 &&
        printf "%s" $bad_content_3 >$branch_dir_prefix/a/b/branch-bad &&
+       printf "%s" "$(git rev-parse main)" >$branch_dir_prefix/branch-no-newline &&
+       printf "%s garbage" "$(git rev-parse main)" >$branch_dir_prefix/branch-garbage &&
 
        test_must_fail git refs verify 2>err &&
        cat >expect <<-EOF &&
        error: refs/heads/a/b/branch-bad: badRefContent: $bad_content_3
        error: refs/tags/tag-bad-1: badRefContent: $bad_content_1
        error: refs/tags/tag-bad-2: badRefContent: $bad_content_2
+       warning: refs/heads/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\''
+       warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end
        EOF
        sort err >sorted_err &&
        test_cmp expect sorted_err
@@ -260,7 +305,15 @@ test_expect_success 'ref content checks should work with worktrees' '
                EOF
                rm $worktree2_refdir_prefix/bad-branch-2 &&
                test_cmp expect err || return 1
-       done
+       done &&
+
+       printf "%s" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-no-newline &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: worktrees/worktree-1/refs/worktree/branch-no-newline: refMissingNewline: misses LF at the end
+       EOF
+       rm $worktree1_refdir_prefix/branch-no-newline &&
+       test_cmp expect err
 '
 
 test_done