]> git.ipfire.org Git - thirdparty/git.git/commitdiff
ref: add basic symref content check for files backend
authorshejialuo <shejialuo@gmail.com>
Wed, 20 Nov 2024 11:52:00 +0000 (19:52 +0800)
committerJunio C Hamano <gitster@pobox.com>
Wed, 20 Nov 2024 23:21:33 +0000 (08:21 +0900)
We have code that checks regular ref contents, but we do not yet check
the contents of symbolic refs. By using "parse_loose_ref_content" for
symbolic refs, we will get the information of the "referent".

We do not need to check the "referent" by opening the file. This is
because if "referent" exists in the file system, we will eventually
check its correctness by inspecting every file in the "refs" directory.
If the "referent" does not exist in the filesystem, this is OK as it is
seen as the dangling symref.

So we just need to check the "referent" string content. A regular ref
could be accepted as a textual symref if it begins with "ref:", followed
by zero or more whitespaces, followed by the full refname, followed only
by whitespace characters. However, we always write a single SP after
"ref:" and a single LF after the refname. It may seem that we should
report a fsck error message when the "referent" does not apply above
rules and we should not be so aggressive because third-party
reimplementations of Git may have taken advantage of the looser syntax.
Put it more specific, we accept the following contents:

1. "ref: refs/heads/master   "
2. "ref: refs/heads/master   \n  \n"
3. "ref: refs/heads/master\n\n"

When introducing the regular ref content checks, we created two fsck
infos "refMissingNewline" and "trailingRefContent" which exactly
represents above situations. So we will reuse these two fsck messages to
write checks to info the user about these situations.

But we do not allow any other trailing garbage. The followings are bad
symref contents which will be reported as fsck error by "git-fsck(1)".

1. "ref: refs/heads/master garbage\n"
2. "ref: refs/heads/master \n\n\n garbage  "

And we introduce a new "badReferentName(ERROR)" fsck message to report
above errors by using "is_root_ref" and "check_refname_format" to check
the "referent". Since both "is_root_ref" and "check_refname_format"
don't work with whitespaces, we use the trimmed version of "referent"
with these functions.

In order to add checks, we will do the following things:

1. Record the untrimmed length "orig_len" and untrimmed last byte
   "orig_last_byte".
2. Use "strbuf_rtrim" to trim the whitespaces or newlines to make sure
   "is_root_ref" and "check_refname_format" won't be failed by them.
3. Use "orig_len" and "orig_last_byte" to check whether the "referent"
   misses '\n' at the end or it has trailing whitespaces or newlines.

Mentored-by: Patrick Steinhardt <ps@pks.im>
Mentored-by: Karthik Nayak <karthik.188@gmail.com>
Signed-off-by: shejialuo <shejialuo@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/fsck-msgids.txt
fsck.h
refs/files-backend.c
t/t0602-reffiles-fsck.sh

index 6db0eaa84a6709f449af573916afcc9ec8130d08..dcea05edfc62ba9268bf843247867c5784344084 100644 (file)
@@ -28,6 +28,9 @@
 `badRefName`::
        (ERROR) A ref has an invalid format.
 
+`badReferentName`::
+       (ERROR) The referent name of a symref is invalid.
+
 `badTagName`::
        (INFO) A tag has an invalid format.
 
diff --git a/fsck.h b/fsck.h
index b85072df57620356f01678de7fae51a9b1c6f662..5227dfdef2c20d87da46857260b15af856509542 100644 (file)
--- a/fsck.h
+++ b/fsck.h
@@ -34,6 +34,7 @@ enum fsck_msg_type {
        FUNC(BAD_REF_CONTENT, ERROR) \
        FUNC(BAD_REF_FILETYPE, ERROR) \
        FUNC(BAD_REF_NAME, ERROR) \
+       FUNC(BAD_REFERENT_NAME, ERROR) \
        FUNC(BAD_TIMEZONE, ERROR) \
        FUNC(BAD_TREE, ERROR) \
        FUNC(BAD_TREE_SHA1, ERROR) \
index 3d4d6124206da5b5115e1e00c4a3f164e728a0b1..f4342e3f3e4fb435420c1a2f0798b7448baac65a 100644 (file)
@@ -3509,6 +3509,43 @@ typedef int (*files_fsck_refs_fn)(struct ref_store *ref_store,
                                  const char *refname,
                                  struct dir_iterator *iter);
 
+static int files_fsck_symref_target(struct fsck_options *o,
+                                   struct fsck_ref_report *report,
+                                   struct strbuf *referent)
+{
+       char orig_last_byte;
+       size_t orig_len;
+       int ret = 0;
+
+       orig_len = referent->len;
+       orig_last_byte = referent->buf[orig_len - 1];
+       strbuf_rtrim(referent);
+
+       if (!is_root_ref(referent->buf) &&
+           check_refname_format(referent->buf, 0)) {
+               ret = fsck_report_ref(o, report,
+                                     FSCK_MSG_BAD_REFERENT_NAME,
+                                     "points to invalid refname '%s'", referent->buf);
+               goto out;
+       }
+
+       if (referent->len == orig_len ||
+           (referent->len < orig_len && orig_last_byte != '\n')) {
+               ret = fsck_report_ref(o, report,
+                                     FSCK_MSG_REF_MISSING_NEWLINE,
+                                     "misses LF at the end");
+       }
+
+       if (referent->len != orig_len && referent->len != orig_len - 1) {
+               ret = fsck_report_ref(o, report,
+                                     FSCK_MSG_TRAILING_REF_CONTENT,
+                                     "has trailing whitespaces or newlines");
+       }
+
+out:
+       return ret;
+}
+
 static int files_fsck_refs_content(struct ref_store *ref_store,
                                   struct fsck_options *o,
                                   const char *target_name,
@@ -3563,6 +3600,9 @@ static int files_fsck_refs_content(struct ref_store *ref_store,
                                              "has trailing garbage: '%s'", trailing);
                        goto cleanup;
                }
+       } else {
+               ret = files_fsck_symref_target(o, &report, &referent);
+               goto cleanup;
        }
 
 cleanup:
index 33e7a390ad76c8c7eeed73c22d10741cba04ddbb..ee1e5f2864133a3c9c4285681db39d48b21dd5e6 100755 (executable)
@@ -263,6 +263,109 @@ test_expect_success 'regular ref content should be checked (aggregate)' '
        test_cmp expect sorted_err
 '
 
+test_expect_success 'textual symref content should be checked (individual)' '
+       test_when_finished "rm -rf repo" &&
+       git init repo &&
+       branch_dir_prefix=.git/refs/heads &&
+       cd repo &&
+       test_commit default &&
+       mkdir -p "$branch_dir_prefix/a/b" &&
+
+       for good_referent in "refs/heads/branch" "HEAD"
+       do
+               printf "ref: %s\n" $good_referent >$branch_dir_prefix/branch-good &&
+               git refs verify 2>err &&
+               rm $branch_dir_prefix/branch-good &&
+               test_must_be_empty err || return 1
+       done &&
+
+       for bad_referent in "refs/heads/.branch" "refs/heads/~branch" "refs/heads/?branch"
+       do
+               printf "ref: %s\n" $bad_referent >$branch_dir_prefix/branch-bad &&
+               test_must_fail git refs verify 2>err &&
+               cat >expect <<-EOF &&
+               error: refs/heads/branch-bad: badReferentName: points to invalid refname '\''$bad_referent'\''
+               EOF
+               rm $branch_dir_prefix/branch-bad &&
+               test_cmp expect err || return 1
+       done &&
+
+       printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/branch-no-newline: refMissingNewline: misses LF at the end
+       EOF
+       rm $branch_dir_prefix/branch-no-newline &&
+       test_cmp expect err &&
+
+       printf "ref: refs/heads/branch     " >$branch_dir_prefix/a/b/branch-trailing-1 &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end
+       warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines
+       EOF
+       rm $branch_dir_prefix/a/b/branch-trailing-1 &&
+       test_cmp expect err &&
+
+       printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines
+       EOF
+       rm $branch_dir_prefix/a/b/branch-trailing-2 &&
+       test_cmp expect err &&
+
+       printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines
+       EOF
+       rm $branch_dir_prefix/a/b/branch-trailing-3 &&
+       test_cmp expect err &&
+
+       printf "ref: refs/heads/branch \n  " >$branch_dir_prefix/a/b/branch-complicated &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end
+       warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines
+       EOF
+       rm $branch_dir_prefix/a/b/branch-complicated &&
+       test_cmp expect err
+'
+
+test_expect_success 'textual symref content should be checked (aggregate)' '
+       test_when_finished "rm -rf repo" &&
+       git init repo &&
+       branch_dir_prefix=.git/refs/heads &&
+       tag_dir_prefix=.git/refs/tags &&
+       cd repo &&
+       test_commit default &&
+       mkdir -p "$branch_dir_prefix/a/b" &&
+
+       printf "ref: refs/heads/branch\n" >$branch_dir_prefix/branch-good &&
+       printf "ref: HEAD\n" >$branch_dir_prefix/branch-head &&
+       printf "ref: refs/heads/branch" >$branch_dir_prefix/branch-no-newline-1 &&
+       printf "ref: refs/heads/branch     " >$branch_dir_prefix/a/b/branch-trailing-1 &&
+       printf "ref: refs/heads/branch\n\n" >$branch_dir_prefix/a/b/branch-trailing-2 &&
+       printf "ref: refs/heads/branch \n" >$branch_dir_prefix/a/b/branch-trailing-3 &&
+       printf "ref: refs/heads/branch \n  " >$branch_dir_prefix/a/b/branch-complicated &&
+       printf "ref: refs/heads/.branch\n" >$branch_dir_prefix/branch-bad-1 &&
+
+       test_must_fail git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       error: refs/heads/branch-bad-1: badReferentName: points to invalid refname '\''refs/heads/.branch'\''
+       warning: refs/heads/a/b/branch-complicated: refMissingNewline: misses LF at the end
+       warning: refs/heads/a/b/branch-complicated: trailingRefContent: has trailing whitespaces or newlines
+       warning: refs/heads/a/b/branch-trailing-1: refMissingNewline: misses LF at the end
+       warning: refs/heads/a/b/branch-trailing-1: trailingRefContent: has trailing whitespaces or newlines
+       warning: refs/heads/a/b/branch-trailing-2: trailingRefContent: has trailing whitespaces or newlines
+       warning: refs/heads/a/b/branch-trailing-3: trailingRefContent: has trailing whitespaces or newlines
+       warning: refs/heads/branch-no-newline-1: refMissingNewline: misses LF at the end
+       EOF
+       sort err >sorted_err &&
+       test_cmp expect sorted_err
+'
+
 test_expect_success 'ref content checks should work with worktrees' '
        test_when_finished "rm -rf repo" &&
        git init repo &&
@@ -313,6 +416,14 @@ test_expect_success 'ref content checks should work with worktrees' '
        warning: worktrees/worktree-1/refs/worktree/branch-no-newline: refMissingNewline: misses LF at the end
        EOF
        rm $worktree1_refdir_prefix/branch-no-newline &&
+       test_cmp expect err &&
+
+       printf "%s garbage" "$(git rev-parse HEAD)" >$worktree1_refdir_prefix/branch-garbage &&
+       git refs verify 2>err &&
+       cat >expect <<-EOF &&
+       warning: worktrees/worktree-1/refs/worktree/branch-garbage: trailingRefContent: has trailing garbage: '\'' garbage'\''
+       EOF
+       rm $worktree1_refdir_prefix/branch-garbage &&
        test_cmp expect err
 '