#include "parse-options.h"
#include "object-store.h"
#include "commit-reach.h"
+#include "shallow.h"
static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
static int initialized_fetch_ref_tips;
static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
-/* TODO: remove this function, use repo_submodule_init instead. */
-int add_submodule_odb(const char *path)
-{
- struct strbuf objects_directory = STRBUF_INIT;
- int ret = 0;
-
- ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
- if (ret)
- goto done;
- if (!is_directory(objects_directory.buf)) {
- ret = -1;
- goto done;
- }
- string_list_insert(&added_submodule_odb_paths,
- strbuf_detach(&objects_directory, NULL));
-done:
- strbuf_release(&objects_directory);
- return ret;
-}
-
void add_submodule_odb_by_path(const char *path)
{
string_list_insert(&added_submodule_odb_paths, xstrdup(path));
/*
* Holds relevant information for a changed submodule. Used as the .util
- * member of the changed submodule string_list_item.
+ * member of the changed submodule name string_list_item.
+ *
+ * (super_oid, path) allows the submodule config to be read from _some_
+ * .gitmodules file. We store this information the first time we find a
+ * superproject commit that points to the submodule, but this is
+ * arbitrary - we can choose any (super_oid, path) that matches the
+ * submodule's name.
+ *
+ * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't
+ * guarantee that we're reading the commit that the user would expect. A better
+ * scheme would be to just fetch a submodule by its name. This requires two
+ * steps:
+ * - Create a function that behaves like repo_submodule_init(), but accepts a
+ * submodule name instead of treeish_name and path. This should be easy
+ * because repo_submodule_init() internally uses the submodule's name.
+ *
+ * - Replace most instances of 'struct submodule' (which is the .gitmodules
+ * config) with just the submodule name. This is OK because we expect
+ * submodule settings to be stored in .git/config (via "git submodule init"),
+ * not .gitmodules. This also lets us delete get_non_gitmodules_submodule(),
+ * which constructs a bogus 'struct submodule' for the sake of giving a
+ * placeholder name to a gitlink.
*/
struct changed_submodule_data {
+ /*
+ * The first superproject commit in the rev walk that points to
+ * the submodule.
+ */
+ const struct object_id *super_oid;
+ /*
+ * Path to the submodule in the superproject commit referenced
+ * by 'super_oid'.
+ */
+ char *path;
/* The submodule commits that have changed in the rev walk. */
struct oid_array new_commits;
};
static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
{
oid_array_clear(&cs_data->new_commits);
+ free(cs_data->path);
}
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
continue;
item = string_list_insert(changed, name);
- if (!item->util)
+ if (item->util)
+ cs_data = item->util;
+ else {
item->util = xcalloc(1, sizeof(struct changed_submodule_data));
- cs_data = item->util;
+ cs_data = item->util;
+ cs_data->super_oid = commit_oid;
+ cs_data->path = xstrdup(p->two->path);
+ }
oid_array_append(&cs_data->new_commits, &p->two->oid);
}
}
if (repo_submodule_init(&subrepo, cb->repo, cb->path, cb->super_oid)) {
cb->result = 0;
- goto cleanup;
+ /* subrepo failed to init, so don't clean it up. */
+ return 0;
}
type = oid_object_info(&subrepo, oid, NULL);
.super_oid = super_oid
};
- /*
- * Perform a cheap, but incorrect check for the existence of 'commits'.
- * This is done by adding the submodule's object store to the in-core
- * object store, and then querying for each commit's existence. If we
- * do not have the commit object anywhere, there is no chance we have
- * it in the object store of the correct submodule and have it
- * reachable from a ref, so we can fail early without spawning rev-list
- * which is expensive.
- */
- if (add_submodule_odb(path))
- return 0;
-
oid_array_for_each_unique(commits, check_has_commit, &has_commit);
if (has_commit.result) {
oid_array_append(&ref_tips_after_fetch, oid);
}
+/*
+ * Returns 1 if there is at least one submodule gitdir in
+ * $GIT_DIR/modules and 0 otherwise. This follows
+ * submodule_name_to_gitdir(), which looks for submodules in
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ *
+ * A submodule can be moved to $GIT_DIR/modules manually by running "git
+ * submodule absorbgitdirs", or it may be initialized there by "git
+ * submodule update".
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+ int ret;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_repo_git_path(&buf, r, "modules/");
+ ret = file_exists(buf.buf) && !is_empty_dir(buf.buf);
+ strbuf_release(&buf);
+ return ret;
+}
+
static void calculate_changed_submodule_paths(struct repository *r,
struct string_list *changed_submodule_names)
{
struct strvec argv = STRVEC_INIT;
struct string_list_item *name;
- /* No need to check if there are no submodules configured */
- if (!submodule_from_path(r, NULL, NULL))
+ /* No need to check if no submodules would be fetched */
+ if (!submodule_from_path(r, NULL, NULL) &&
+ !repo_has_absorbed_submodules(r))
return;
strvec_push(&argv, "--"); /* argv[0] program name */
}
struct submodule_parallel_fetch {
- int count;
+ /*
+ * The index of the last index entry processed by
+ * get_fetch_task_from_index().
+ */
+ int index_count;
+ /*
+ * The index of the last string_list entry processed by
+ * get_fetch_task_from_changed().
+ */
+ int changed_count;
struct strvec args;
struct repository *r;
const char *prefix;
int quiet;
int result;
+ /*
+ * Names of submodules that have new commits. Generated by
+ * walking the newly fetched superproject commits.
+ */
struct string_list changed_submodule_names;
+ /*
+ * Names of submodules that have already been processed. Lets us
+ * avoid fetching the same submodule more than once.
+ */
+ struct string_list seen_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
#define SPF_INIT { \
.args = STRVEC_INIT, \
.changed_submodule_names = STRING_LIST_INIT_DUP, \
+ .seen_submodule_names = STRING_LIST_INIT_DUP, \
.submodules_with_errors = STRBUF_INIT, \
}
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
const char *default_argv; /* The default fetch mode. */
+ struct strvec git_args; /* Args for the child git process. */
struct oid_array *commits; /* Ensure these commits are fetched */
};
return (const struct submodule *) ret;
}
-static struct fetch_task *fetch_task_create(struct repository *r,
- const char *path,
- const struct object_id *treeish_name)
-{
- struct fetch_task *task = xmalloc(sizeof(*task));
- memset(task, 0, sizeof(*task));
-
- task->sub = submodule_from_path(r, treeish_name, path);
- if (!task->sub) {
- /*
- * No entry in .gitmodules? Technically not a submodule,
- * but historically we supported repositories that happen to be
- * in-place where a gitlink is. Keep supporting them.
- */
- task->sub = get_non_gitmodules_submodule(path);
- if (!task->sub) {
- free(task);
- return NULL;
- }
-
- task->free_sub = 1;
- }
-
- return task;
-}
-
static void fetch_task_release(struct fetch_task *p)
{
if (p->free_sub)
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
+
+ strvec_clear(&p->git_args);
}
static struct repository *get_submodule_repo_for(struct repository *r,
return ret;
}
+static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf,
+ const char *path,
+ const struct object_id *treeish_name)
+{
+ struct fetch_task *task = xmalloc(sizeof(*task));
+ memset(task, 0, sizeof(*task));
+
+ task->sub = submodule_from_path(spf->r, treeish_name, path);
+
+ if (!task->sub) {
+ /*
+ * No entry in .gitmodules? Technically not a submodule,
+ * but historically we supported repositories that happen to be
+ * in-place where a gitlink is. Keep supporting them.
+ */
+ task->sub = get_non_gitmodules_submodule(path);
+ if (!task->sub)
+ goto cleanup;
+
+ task->free_sub = 1;
+ }
+
+ if (string_list_lookup(&spf->seen_submodule_names, task->sub->name))
+ goto cleanup;
+
+ switch (get_fetch_recurse_config(task->sub, spf))
+ {
+ default:
+ case RECURSE_SUBMODULES_DEFAULT:
+ case RECURSE_SUBMODULES_ON_DEMAND:
+ if (!task->sub ||
+ !string_list_lookup(
+ &spf->changed_submodule_names,
+ task->sub->name))
+ goto cleanup;
+ task->default_argv = "on-demand";
+ break;
+ case RECURSE_SUBMODULES_ON:
+ task->default_argv = "yes";
+ break;
+ case RECURSE_SUBMODULES_OFF:
+ goto cleanup;
+ }
+
+ task->repo = get_submodule_repo_for(spf->r, path, treeish_name);
+
+ return task;
+
+ cleanup:
+ fetch_task_release(task);
+ free(task);
+ return NULL;
+}
+
static struct fetch_task *
-get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
{
- for (; spf->count < spf->r->index->cache_nr; spf->count++) {
- const struct cache_entry *ce = spf->r->index->cache[spf->count];
+ for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+ const struct cache_entry *ce =
+ spf->r->index->cache[spf->index_count];
struct fetch_task *task;
if (!S_ISGITLINK(ce->ce_mode))
continue;
- task = fetch_task_create(spf->r, ce->name, null_oid());
+ task = fetch_task_create(spf, ce->name, null_oid());
if (!task)
continue;
- switch (get_fetch_recurse_config(task->sub, spf))
- {
- default:
- case RECURSE_SUBMODULES_DEFAULT:
- case RECURSE_SUBMODULES_ON_DEMAND:
- if (!task->sub ||
- !string_list_lookup(
- &spf->changed_submodule_names,
- task->sub->name))
- continue;
- task->default_argv = "on-demand";
- break;
- case RECURSE_SUBMODULES_ON:
- task->default_argv = "yes";
- break;
- case RECURSE_SUBMODULES_OFF:
- continue;
- }
-
- task->repo = get_submodule_repo_for(spf->r, task->sub->path, null_oid());
if (task->repo) {
if (!spf->quiet)
strbuf_addf(err, _("Fetching submodule %s%s\n"),
spf->prefix, ce->name);
- spf->count++;
+ spf->index_count++;
return task;
} else {
struct strbuf empty_submodule_path = STRBUF_INIT;
return NULL;
}
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->changed_count < spf->changed_submodule_names.nr;
+ spf->changed_count++) {
+ struct string_list_item item =
+ spf->changed_submodule_names.items[spf->changed_count];
+ struct changed_submodule_data *cs_data = item.util;
+ struct fetch_task *task;
+
+ if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path))
+ continue;
+
+ task = fetch_task_create(spf, cs_data->path,
+ cs_data->super_oid);
+ if (!task)
+ continue;
+
+ if (!task->repo) {
+ strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"),
+ cs_data->path,
+ find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV));
+
+ fetch_task_release(task);
+ free(task);
+ continue;
+ }
+
+ if (!spf->quiet)
+ strbuf_addf(err,
+ _("Fetching submodule %s%s at commit %s\n"),
+ spf->prefix, task->sub->path,
+ find_unique_abbrev(cs_data->super_oid,
+ DEFAULT_ABBREV));
+
+ spf->changed_count++;
+ /*
+ * NEEDSWORK: Submodules set/unset a value for
+ * core.worktree when they are populated/unpopulated by
+ * "git checkout" (and similar commands, see
+ * submodule_move_head() and
+ * connect_work_tree_and_git_dir()), but if the
+ * submodule is unpopulated in another way (e.g. "git
+ * rm", "rm -r"), core.worktree will still be set even
+ * though the directory doesn't exist, and the child
+ * process will crash while trying to chdir into the
+ * nonexistent directory.
+ *
+ * In this case, we know that the submodule has no
+ * working tree, so we can work around this by
+ * setting "--work-tree=." (--bare does not work because
+ * worktree settings take precedence over bare-ness).
+ * However, this is not necessarily true in other cases,
+ * so a generalized solution is still necessary.
+ *
+ * Possible solutions:
+ * - teach "git [add|rm]" to unset core.worktree and
+ * discourage users from removing submodules without
+ * using a Git command.
+ * - teach submodule child processes to ignore stale
+ * core.worktree values.
+ */
+ strvec_push(&task->git_args, "--work-tree=.");
+ return task;
+ }
+ return NULL;
+}
+
static int get_next_submodule(struct child_process *cp, struct strbuf *err,
void *data, void **task_cb)
{
struct submodule_parallel_fetch *spf = data;
- struct fetch_task *task = get_fetch_task(spf, err);
+ struct fetch_task *task =
+ get_fetch_task_from_index(spf, err);
+ if (!task)
+ task = get_fetch_task_from_changed(spf, err);
if (task) {
struct strbuf submodule_prefix = STRBUF_INIT;
prepare_submodule_repo_env_in_gitdir(&cp->env_array);
cp->git_cmd = 1;
strvec_init(&cp->args);
+ if (task->git_args.nr)
+ strvec_pushv(&cp->args, task->git_args.v);
strvec_pushv(&cp->args, spf->args.v);
strvec_push(&cp->args, task->default_argv);
strvec_push(&cp->args, "--submodule-prefix");
*task_cb = task;
strbuf_release(&submodule_prefix);
+ string_list_insert(&spf->seen_submodule_names, task->sub->name);
return 1;
}
return 0;
}
-int fetch_populated_submodules(struct repository *r,
- const struct strvec *options,
- const char *prefix, int command_line_option,
- int default_option,
- int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+ const struct strvec *options,
+ const char *prefix, int command_line_option,
+ int default_option,
+ int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;