From: Junio C Hamano Date: Tue, 18 Feb 2025 23:30:31 +0000 (-0800) Subject: Merge branch 'ds/backfill' X-Git-Tag: v2.49.0-rc0~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e565f3755342caf1d21e22359eaf09ec11d8c0ae;p=thirdparty%2Fgit.git Merge branch 'ds/backfill' Lazy-loading missing files in a blobless clone on demand is costly as it tends to be one-blob-at-a-time. "git backfill" is introduced to help bulk-download necessary files beforehand. * ds/backfill: backfill: assume --sparse when sparse-checkout is enabled backfill: add --sparse option backfill: add --min-batch-size= option backfill: basic functionality and tests backfill: add builtin boilerplate --- e565f3755342caf1d21e22359eaf09ec11d8c0ae diff --cc Documentation/git-backfill.adoc index 0000000000,95623051f7..95623051f7 mode 000000,100644..100644 --- a/Documentation/git-backfill.adoc +++ b/Documentation/git-backfill.adoc diff --cc Documentation/meson.build index ead8e48213,5e9e3e19c5..1129ce4c85 --- a/Documentation/meson.build +++ b/Documentation/meson.build @@@ -1,207 -1,208 +1,208 @@@ manpages = { # Category 1. - 'git-add.txt' : 1, - 'git-am.txt' : 1, - 'git-annotate.txt' : 1, - 'git-apply.txt' : 1, - 'git-archimport.txt' : 1, - 'git-archive.txt' : 1, - 'git-backfill.txt' : 1, - 'git-bisect.txt' : 1, - 'git-blame.txt' : 1, - 'git-branch.txt' : 1, - 'git-bugreport.txt' : 1, - 'git-bundle.txt' : 1, - 'git-cat-file.txt' : 1, - 'git-check-attr.txt' : 1, - 'git-check-ignore.txt' : 1, - 'git-check-mailmap.txt' : 1, - 'git-checkout-index.txt' : 1, - 'git-checkout.txt' : 1, - 'git-check-ref-format.txt' : 1, - 'git-cherry-pick.txt' : 1, - 'git-cherry.txt' : 1, - 'git-citool.txt' : 1, - 'git-clean.txt' : 1, - 'git-clone.txt' : 1, - 'git-column.txt' : 1, - 'git-commit-graph.txt' : 1, - 'git-commit-tree.txt' : 1, - 'git-commit.txt' : 1, - 'git-config.txt' : 1, - 'git-count-objects.txt' : 1, - 'git-credential-cache--daemon.txt' : 1, - 'git-credential-cache.txt' : 1, - 'git-credential-store.txt' : 1, - 'git-credential.txt' : 1, - 'git-cvsexportcommit.txt' : 1, - 'git-cvsimport.txt' : 1, - 'git-cvsserver.txt' : 1, - 'git-daemon.txt' : 1, - 'git-describe.txt' : 1, - 'git-diagnose.txt' : 1, - 'git-diff-files.txt' : 1, - 'git-diff-index.txt' : 1, - 'git-difftool.txt' : 1, - 'git-diff-tree.txt' : 1, - 'git-diff.txt' : 1, - 'git-fast-export.txt' : 1, - 'git-fast-import.txt' : 1, - 'git-fetch-pack.txt' : 1, - 'git-fetch.txt' : 1, - 'git-filter-branch.txt' : 1, - 'git-fmt-merge-msg.txt' : 1, - 'git-for-each-ref.txt' : 1, - 'git-for-each-repo.txt' : 1, - 'git-format-patch.txt' : 1, - 'git-fsck-objects.txt' : 1, - 'git-fsck.txt' : 1, - 'git-fsmonitor--daemon.txt' : 1, - 'git-gc.txt' : 1, - 'git-get-tar-commit-id.txt' : 1, - 'git-grep.txt' : 1, - 'git-gui.txt' : 1, - 'git-hash-object.txt' : 1, - 'git-help.txt' : 1, - 'git-hook.txt' : 1, - 'git-http-backend.txt' : 1, - 'git-http-fetch.txt' : 1, - 'git-http-push.txt' : 1, - 'git-imap-send.txt' : 1, - 'git-index-pack.txt' : 1, - 'git-init-db.txt' : 1, - 'git-init.txt' : 1, - 'git-instaweb.txt' : 1, - 'git-interpret-trailers.txt' : 1, - 'git-log.txt' : 1, - 'git-ls-files.txt' : 1, - 'git-ls-remote.txt' : 1, - 'git-ls-tree.txt' : 1, - 'git-mailinfo.txt' : 1, - 'git-mailsplit.txt' : 1, - 'git-maintenance.txt' : 1, - 'git-merge-base.txt' : 1, - 'git-merge-file.txt' : 1, - 'git-merge-index.txt' : 1, - 'git-merge-one-file.txt' : 1, - 'git-mergetool--lib.txt' : 1, - 'git-mergetool.txt' : 1, - 'git-merge-tree.txt' : 1, - 'git-merge.txt' : 1, - 'git-mktag.txt' : 1, - 'git-mktree.txt' : 1, - 'git-multi-pack-index.txt' : 1, - 'git-mv.txt' : 1, - 'git-name-rev.txt' : 1, - 'git-notes.txt' : 1, - 'git-p4.txt' : 1, - 'git-pack-objects.txt' : 1, - 'git-pack-redundant.txt' : 1, - 'git-pack-refs.txt' : 1, - 'git-patch-id.txt' : 1, - 'git-prune-packed.txt' : 1, - 'git-prune.txt' : 1, - 'git-pull.txt' : 1, - 'git-push.txt' : 1, - 'git-quiltimport.txt' : 1, - 'git-range-diff.txt' : 1, - 'git-read-tree.txt' : 1, - 'git-rebase.txt' : 1, - 'git-receive-pack.txt' : 1, - 'git-reflog.txt' : 1, - 'git-refs.txt' : 1, - 'git-remote-ext.txt' : 1, - 'git-remote-fd.txt' : 1, - 'git-remote.txt' : 1, - 'git-repack.txt' : 1, - 'git-replace.txt' : 1, - 'git-replay.txt' : 1, - 'git-request-pull.txt' : 1, - 'git-rerere.txt' : 1, - 'git-reset.txt' : 1, - 'git-restore.txt' : 1, - 'git-revert.txt' : 1, - 'git-rev-list.txt' : 1, - 'git-rev-parse.txt' : 1, - 'git-rm.txt' : 1, - 'git-send-email.txt' : 1, - 'git-send-pack.txt' : 1, - 'git-shell.txt' : 1, - 'git-sh-i18n--envsubst.txt' : 1, - 'git-sh-i18n.txt' : 1, - 'git-shortlog.txt' : 1, - 'git-show-branch.txt' : 1, - 'git-show-index.txt' : 1, - 'git-show-ref.txt' : 1, - 'git-show.txt' : 1, - 'git-sh-setup.txt' : 1, - 'git-sparse-checkout.txt' : 1, - 'git-stage.txt' : 1, - 'git-stash.txt' : 1, - 'git-status.txt' : 1, - 'git-stripspace.txt' : 1, - 'git-submodule.txt' : 1, - 'git-svn.txt' : 1, - 'git-switch.txt' : 1, - 'git-symbolic-ref.txt' : 1, - 'git-tag.txt' : 1, - 'git-unpack-file.txt' : 1, - 'git-unpack-objects.txt' : 1, - 'git-update-index.txt' : 1, - 'git-update-ref.txt' : 1, - 'git-update-server-info.txt' : 1, - 'git-upload-archive.txt' : 1, - 'git-upload-pack.txt' : 1, - 'git-var.txt' : 1, - 'git-verify-commit.txt' : 1, - 'git-verify-pack.txt' : 1, - 'git-verify-tag.txt' : 1, - 'git-version.txt' : 1, - 'git-web--browse.txt' : 1, - 'git-whatchanged.txt' : 1, - 'git-worktree.txt' : 1, - 'git-write-tree.txt' : 1, - 'git.txt' : 1, - 'gitk.txt' : 1, - 'gitweb.txt' : 1, - 'scalar.txt' : 1, + 'git-add.adoc' : 1, + 'git-am.adoc' : 1, + 'git-annotate.adoc' : 1, + 'git-apply.adoc' : 1, + 'git-archimport.adoc' : 1, + 'git-archive.adoc' : 1, ++ 'git-backfill.adoc' : 1, + 'git-bisect.adoc' : 1, + 'git-blame.adoc' : 1, + 'git-branch.adoc' : 1, + 'git-bugreport.adoc' : 1, + 'git-bundle.adoc' : 1, + 'git-cat-file.adoc' : 1, + 'git-check-attr.adoc' : 1, + 'git-check-ignore.adoc' : 1, + 'git-check-mailmap.adoc' : 1, + 'git-checkout-index.adoc' : 1, + 'git-checkout.adoc' : 1, + 'git-check-ref-format.adoc' : 1, + 'git-cherry-pick.adoc' : 1, + 'git-cherry.adoc' : 1, + 'git-citool.adoc' : 1, + 'git-clean.adoc' : 1, + 'git-clone.adoc' : 1, + 'git-column.adoc' : 1, + 'git-commit-graph.adoc' : 1, + 'git-commit-tree.adoc' : 1, + 'git-commit.adoc' : 1, + 'git-config.adoc' : 1, + 'git-count-objects.adoc' : 1, + 'git-credential-cache--daemon.adoc' : 1, + 'git-credential-cache.adoc' : 1, + 'git-credential-store.adoc' : 1, + 'git-credential.adoc' : 1, + 'git-cvsexportcommit.adoc' : 1, + 'git-cvsimport.adoc' : 1, + 'git-cvsserver.adoc' : 1, + 'git-daemon.adoc' : 1, + 'git-describe.adoc' : 1, + 'git-diagnose.adoc' : 1, + 'git-diff-files.adoc' : 1, + 'git-diff-index.adoc' : 1, + 'git-difftool.adoc' : 1, + 'git-diff-tree.adoc' : 1, + 'git-diff.adoc' : 1, + 'git-fast-export.adoc' : 1, + 'git-fast-import.adoc' : 1, + 'git-fetch-pack.adoc' : 1, + 'git-fetch.adoc' : 1, + 'git-filter-branch.adoc' : 1, + 'git-fmt-merge-msg.adoc' : 1, + 'git-for-each-ref.adoc' : 1, + 'git-for-each-repo.adoc' : 1, + 'git-format-patch.adoc' : 1, + 'git-fsck-objects.adoc' : 1, + 'git-fsck.adoc' : 1, + 'git-fsmonitor--daemon.adoc' : 1, + 'git-gc.adoc' : 1, + 'git-get-tar-commit-id.adoc' : 1, + 'git-grep.adoc' : 1, + 'git-gui.adoc' : 1, + 'git-hash-object.adoc' : 1, + 'git-help.adoc' : 1, + 'git-hook.adoc' : 1, + 'git-http-backend.adoc' : 1, + 'git-http-fetch.adoc' : 1, + 'git-http-push.adoc' : 1, + 'git-imap-send.adoc' : 1, + 'git-index-pack.adoc' : 1, + 'git-init-db.adoc' : 1, + 'git-init.adoc' : 1, + 'git-instaweb.adoc' : 1, + 'git-interpret-trailers.adoc' : 1, + 'git-log.adoc' : 1, + 'git-ls-files.adoc' : 1, + 'git-ls-remote.adoc' : 1, + 'git-ls-tree.adoc' : 1, + 'git-mailinfo.adoc' : 1, + 'git-mailsplit.adoc' : 1, + 'git-maintenance.adoc' : 1, + 'git-merge-base.adoc' : 1, + 'git-merge-file.adoc' : 1, + 'git-merge-index.adoc' : 1, + 'git-merge-one-file.adoc' : 1, + 'git-mergetool--lib.adoc' : 1, + 'git-mergetool.adoc' : 1, + 'git-merge-tree.adoc' : 1, + 'git-merge.adoc' : 1, + 'git-mktag.adoc' : 1, + 'git-mktree.adoc' : 1, + 'git-multi-pack-index.adoc' : 1, + 'git-mv.adoc' : 1, + 'git-name-rev.adoc' : 1, + 'git-notes.adoc' : 1, + 'git-p4.adoc' : 1, + 'git-pack-objects.adoc' : 1, + 'git-pack-redundant.adoc' : 1, + 'git-pack-refs.adoc' : 1, + 'git-patch-id.adoc' : 1, + 'git-prune-packed.adoc' : 1, + 'git-prune.adoc' : 1, + 'git-pull.adoc' : 1, + 'git-push.adoc' : 1, + 'git-quiltimport.adoc' : 1, + 'git-range-diff.adoc' : 1, + 'git-read-tree.adoc' : 1, + 'git-rebase.adoc' : 1, + 'git-receive-pack.adoc' : 1, + 'git-reflog.adoc' : 1, + 'git-refs.adoc' : 1, + 'git-remote-ext.adoc' : 1, + 'git-remote-fd.adoc' : 1, + 'git-remote.adoc' : 1, + 'git-repack.adoc' : 1, + 'git-replace.adoc' : 1, + 'git-replay.adoc' : 1, + 'git-request-pull.adoc' : 1, + 'git-rerere.adoc' : 1, + 'git-reset.adoc' : 1, + 'git-restore.adoc' : 1, + 'git-revert.adoc' : 1, + 'git-rev-list.adoc' : 1, + 'git-rev-parse.adoc' : 1, + 'git-rm.adoc' : 1, + 'git-send-email.adoc' : 1, + 'git-send-pack.adoc' : 1, + 'git-shell.adoc' : 1, + 'git-sh-i18n--envsubst.adoc' : 1, + 'git-sh-i18n.adoc' : 1, + 'git-shortlog.adoc' : 1, + 'git-show-branch.adoc' : 1, + 'git-show-index.adoc' : 1, + 'git-show-ref.adoc' : 1, + 'git-show.adoc' : 1, + 'git-sh-setup.adoc' : 1, + 'git-sparse-checkout.adoc' : 1, + 'git-stage.adoc' : 1, + 'git-stash.adoc' : 1, + 'git-status.adoc' : 1, + 'git-stripspace.adoc' : 1, + 'git-submodule.adoc' : 1, + 'git-svn.adoc' : 1, + 'git-switch.adoc' : 1, + 'git-symbolic-ref.adoc' : 1, + 'git-tag.adoc' : 1, + 'git-unpack-file.adoc' : 1, + 'git-unpack-objects.adoc' : 1, + 'git-update-index.adoc' : 1, + 'git-update-ref.adoc' : 1, + 'git-update-server-info.adoc' : 1, + 'git-upload-archive.adoc' : 1, + 'git-upload-pack.adoc' : 1, + 'git-var.adoc' : 1, + 'git-verify-commit.adoc' : 1, + 'git-verify-pack.adoc' : 1, + 'git-verify-tag.adoc' : 1, + 'git-version.adoc' : 1, + 'git-web--browse.adoc' : 1, + 'git-whatchanged.adoc' : 1, + 'git-worktree.adoc' : 1, + 'git-write-tree.adoc' : 1, + 'git.adoc' : 1, + 'gitk.adoc' : 1, + 'gitweb.adoc' : 1, + 'scalar.adoc' : 1, # Category 5. - 'gitattributes.txt' : 5, - 'gitformat-bundle.txt' : 5, - 'gitformat-chunk.txt' : 5, - 'gitformat-commit-graph.txt' : 5, - 'gitformat-index.txt' : 5, - 'gitformat-pack.txt' : 5, - 'gitformat-signature.txt' : 5, - 'githooks.txt' : 5, - 'gitignore.txt' : 5, - 'gitmailmap.txt' : 5, - 'gitmodules.txt' : 5, - 'gitprotocol-capabilities.txt' : 5, - 'gitprotocol-common.txt' : 5, - 'gitprotocol-http.txt' : 5, - 'gitprotocol-pack.txt' : 5, - 'gitprotocol-v2.txt' : 5, - 'gitrepository-layout.txt' : 5, - 'gitweb.conf.txt' : 5, + 'gitattributes.adoc' : 5, + 'gitformat-bundle.adoc' : 5, + 'gitformat-chunk.adoc' : 5, + 'gitformat-commit-graph.adoc' : 5, + 'gitformat-index.adoc' : 5, + 'gitformat-pack.adoc' : 5, + 'gitformat-signature.adoc' : 5, + 'githooks.adoc' : 5, + 'gitignore.adoc' : 5, + 'gitmailmap.adoc' : 5, + 'gitmodules.adoc' : 5, + 'gitprotocol-capabilities.adoc' : 5, + 'gitprotocol-common.adoc' : 5, + 'gitprotocol-http.adoc' : 5, + 'gitprotocol-pack.adoc' : 5, + 'gitprotocol-v2.adoc' : 5, + 'gitrepository-layout.adoc' : 5, + 'gitweb.conf.adoc' : 5, # Category 7. - 'gitcli.txt' : 7, - 'gitcore-tutorial.txt' : 7, - 'gitcredentials.txt' : 7, - 'gitcvs-migration.txt' : 7, - 'gitdiffcore.txt' : 7, - 'giteveryday.txt' : 7, - 'gitfaq.txt' : 7, - 'gitglossary.txt' : 7, - 'gitpacking.txt' : 7, - 'gitnamespaces.txt' : 7, - 'gitremote-helpers.txt' : 7, - 'gitrevisions.txt' : 7, - 'gitsubmodules.txt' : 7, - 'gittutorial-2.txt' : 7, - 'gittutorial.txt' : 7, - 'gitworkflows.txt' : 7, + 'gitcli.adoc' : 7, + 'gitcore-tutorial.adoc' : 7, + 'gitcredentials.adoc' : 7, + 'gitcvs-migration.adoc' : 7, + 'gitdiffcore.adoc' : 7, + 'giteveryday.adoc' : 7, + 'gitfaq.adoc' : 7, + 'gitglossary.adoc' : 7, + 'gitpacking.adoc' : 7, + 'gitnamespaces.adoc' : 7, + 'gitremote-helpers.adoc' : 7, + 'gitrevisions.adoc' : 7, + 'gitsubmodules.adoc' : 7, + 'gittutorial-2.adoc' : 7, + 'gittutorial.adoc' : 7, + 'gitworkflows.adoc' : 7, } docs_backend = get_option('docs_backend') diff --cc builtin/backfill.c index 0000000000,d7ee84692f..33e1ea2f84 mode 000000,100644..100644 --- a/builtin/backfill.c +++ b/builtin/backfill.c @@@ -1,0 -1,146 +1,147 @@@ + /* We need this macro to access core_apply_sparse_checkout */ + #define USE_THE_REPOSITORY_VARIABLE + + #include "builtin.h" + #include "git-compat-util.h" + #include "config.h" + #include "parse-options.h" + #include "repository.h" + #include "commit.h" + #include "dir.h" + #include "environment.h" + #include "hex.h" + #include "tree.h" + #include "tree-walk.h" + #include "object.h" + #include "object-store-ll.h" + #include "oid-array.h" + #include "oidset.h" + #include "promisor-remote.h" + #include "strmap.h" + #include "string-list.h" + #include "revision.h" + #include "trace2.h" + #include "progress.h" + #include "packfile.h" + #include "path-walk.h" + + static const char * const builtin_backfill_usage[] = { + N_("git backfill [--min-batch-size=] [--[no-]sparse]"), + NULL + }; + + struct backfill_context { + struct repository *repo; + struct oid_array current_batch; + size_t min_batch_size; + int sparse; + }; + + static void backfill_context_clear(struct backfill_context *ctx) + { + oid_array_clear(&ctx->current_batch); + } + + static void download_batch(struct backfill_context *ctx) + { + promisor_remote_get_direct(ctx->repo, + ctx->current_batch.oid, + ctx->current_batch.nr); + oid_array_clear(&ctx->current_batch); + + /* + * We likely have a new packfile. Add it to the packed list to + * avoid possible duplicate downloads of the same objects. + */ + reprepare_packed_git(ctx->repo); + } + + static int fill_missing_blobs(const char *path UNUSED, + struct oid_array *list, + enum object_type type, + void *data) + { + struct backfill_context *ctx = data; + + if (type != OBJ_BLOB) + return 0; + + for (size_t i = 0; i < list->nr; i++) { + if (!has_object(ctx->repo, &list->oid[i], + OBJECT_INFO_FOR_PREFETCH)) + oid_array_append(&ctx->current_batch, &list->oid[i]); + } + + if (ctx->current_batch.nr >= ctx->min_batch_size) + download_batch(ctx); + + return 0; + } + + static int do_backfill(struct backfill_context *ctx) + { + struct rev_info revs; + struct path_walk_info info = PATH_WALK_INFO_INIT; + int ret; + + if (ctx->sparse) { + CALLOC_ARRAY(info.pl, 1); + if (get_sparse_checkout_patterns(info.pl)) { + path_walk_info_clear(&info); + return error(_("problem loading sparse-checkout")); + } + } + + repo_init_revisions(ctx->repo, &revs, ""); + handle_revision_arg("HEAD", &revs, 0, 0); + + info.blobs = 1; + info.tags = info.commits = info.trees = 0; + + info.revs = &revs; + info.path_fn = fill_missing_blobs; + info.path_fn_data = ctx; + + ret = walk_objects_by_path(&info); + + /* Download the objects that did not fill a batch. */ + if (!ret) + download_batch(ctx); + + path_walk_info_clear(&info); + release_revisions(&revs); + return ret; + } + + int cmd_backfill(int argc, const char **argv, const char *prefix, struct repository *repo) + { + int result; + struct backfill_context ctx = { + .repo = repo, + .current_batch = OID_ARRAY_INIT, + .min_batch_size = 50000, + .sparse = 0, + }; + struct option options[] = { + OPT_INTEGER(0, "min-batch-size", &ctx.min_batch_size, + N_("Minimum number of objects to request at a time")), + OPT_BOOL(0, "sparse", &ctx.sparse, + N_("Restrict the missing objects to the current sparse-checkout")), + OPT_END(), + }; + - show_usage_if_asked(argc, argv, builtin_backfill_usage[0]); ++ show_usage_with_options_if_asked(argc, argv, ++ builtin_backfill_usage, options); + + argc = parse_options(argc, argv, prefix, options, builtin_backfill_usage, + 0); + + repo_config(repo, git_default_config, NULL); + + if (ctx.sparse < 0) + ctx.sparse = core_apply_sparse_checkout; + + result = do_backfill(&ctx); + backfill_context_clear(&ctx); + return result; + } diff --cc t/meson.build index a03ebc81fd,af53e8ee58..780939d49f --- a/t/meson.build +++ b/t/meson.build @@@ -721,7 -721,7 +721,8 @@@ integration_tests = 't5617-clone-submodules-remote.sh', 't5618-alternate-refs.sh', 't5619-clone-local-ambiguous-transport.sh', + 't5620-backfill.sh', + 't5621-clone-revision.sh', 't5700-protocol-v1.sh', 't5701-git-serve.sh', 't5702-protocol-v2.sh',