]> git.ipfire.org Git - thirdparty/git.git/blobdiff - submodule.c
Merge branch 'gc/branch-recurse-submodules-fix'
[thirdparty/git.git] / submodule.c
index b36ef267527eb0f936a2f49a07aec2c5beb8c501..86c8f0f89dbdb0e82cb18054392a884c70a71811 100644 (file)
@@ -22,6 +22,7 @@
 #include "parse-options.h"
 #include "object-store.h"
 #include "commit-reach.h"
+#include "shallow.h"
 
 static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
 static int initialized_fetch_ref_tips;
@@ -167,26 +168,6 @@ void stage_updated_gitmodules(struct index_state *istate)
 
 static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
 
-/* TODO: remove this function, use repo_submodule_init instead. */
-int add_submodule_odb(const char *path)
-{
-       struct strbuf objects_directory = STRBUF_INIT;
-       int ret = 0;
-
-       ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
-       if (ret)
-               goto done;
-       if (!is_directory(objects_directory.buf)) {
-               ret = -1;
-               goto done;
-       }
-       string_list_insert(&added_submodule_odb_paths,
-                          strbuf_detach(&objects_directory, NULL));
-done:
-       strbuf_release(&objects_directory);
-       return ret;
-}
-
 void add_submodule_odb_by_path(const char *path)
 {
        string_list_insert(&added_submodule_odb_paths, xstrdup(path));
@@ -808,9 +789,40 @@ static const char *default_name_or_path(const char *path_or_name)
 
 /*
  * Holds relevant information for a changed submodule. Used as the .util
- * member of the changed submodule string_list_item.
+ * member of the changed submodule name string_list_item.
+ *
+ * (super_oid, path) allows the submodule config to be read from _some_
+ * .gitmodules file. We store this information the first time we find a
+ * superproject commit that points to the submodule, but this is
+ * arbitrary - we can choose any (super_oid, path) that matches the
+ * submodule's name.
+ *
+ * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't
+ * guarantee that we're reading the commit that the user would expect. A better
+ * scheme would be to just fetch a submodule by its name. This requires two
+ * steps:
+ * - Create a function that behaves like repo_submodule_init(), but accepts a
+ *   submodule name instead of treeish_name and path. This should be easy
+ *   because repo_submodule_init() internally uses the submodule's name.
+ *
+ * - Replace most instances of 'struct submodule' (which is the .gitmodules
+ *   config) with just the submodule name. This is OK because we expect
+ *   submodule settings to be stored in .git/config (via "git submodule init"),
+ *   not .gitmodules. This also lets us delete get_non_gitmodules_submodule(),
+ *   which constructs a bogus 'struct submodule' for the sake of giving a
+ *   placeholder name to a gitlink.
  */
 struct changed_submodule_data {
+       /*
+        * The first superproject commit in the rev walk that points to
+        * the submodule.
+        */
+       const struct object_id *super_oid;
+       /*
+        * Path to the submodule in the superproject commit referenced
+        * by 'super_oid'.
+        */
+       char *path;
        /* The submodule commits that have changed in the rev walk. */
        struct oid_array new_commits;
 };
@@ -818,6 +830,7 @@ struct changed_submodule_data {
 static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
 {
        oid_array_clear(&cs_data->new_commits);
+       free(cs_data->path);
 }
 
 static void collect_changed_submodules_cb(struct diff_queue_struct *q,
@@ -862,9 +875,14 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
                        continue;
 
                item = string_list_insert(changed, name);
-               if (!item->util)
+               if (item->util)
+                       cs_data = item->util;
+               else {
                        item->util = xcalloc(1, sizeof(struct changed_submodule_data));
-               cs_data = item->util;
+                       cs_data = item->util;
+                       cs_data->super_oid = commit_oid;
+                       cs_data->path = xstrdup(p->two->path);
+               }
                oid_array_append(&cs_data->new_commits, &p->two->oid);
        }
 }
@@ -949,7 +967,8 @@ static int check_has_commit(const struct object_id *oid, void *data)
 
        if (repo_submodule_init(&subrepo, cb->repo, cb->path, cb->super_oid)) {
                cb->result = 0;
-               goto cleanup;
+               /* subrepo failed to init, so don't clean it up. */
+               return 0;
        }
 
        type = oid_object_info(&subrepo, oid, NULL);
@@ -985,18 +1004,6 @@ static int submodule_has_commits(struct repository *r,
                .super_oid = super_oid
        };
 
-       /*
-        * Perform a cheap, but incorrect check for the existence of 'commits'.
-        * This is done by adding the submodule's object store to the in-core
-        * object store, and then querying for each commit's existence.  If we
-        * do not have the commit object anywhere, there is no chance we have
-        * it in the object store of the correct submodule and have it
-        * reachable from a ref, so we can fail early without spawning rev-list
-        * which is expensive.
-        */
-       if (add_submodule_odb(path))
-               return 0;
-
        oid_array_for_each_unique(commits, check_has_commit, &has_commit);
 
        if (has_commit.result) {
@@ -1253,14 +1260,36 @@ void check_for_new_submodule_commits(struct object_id *oid)
        oid_array_append(&ref_tips_after_fetch, oid);
 }
 
+/*
+ * Returns 1 if there is at least one submodule gitdir in
+ * $GIT_DIR/modules and 0 otherwise. This follows
+ * submodule_name_to_gitdir(), which looks for submodules in
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ *
+ * A submodule can be moved to $GIT_DIR/modules manually by running "git
+ * submodule absorbgitdirs", or it may be initialized there by "git
+ * submodule update".
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+       int ret;
+       struct strbuf buf = STRBUF_INIT;
+
+       strbuf_repo_git_path(&buf, r, "modules/");
+       ret = file_exists(buf.buf) && !is_empty_dir(buf.buf);
+       strbuf_release(&buf);
+       return ret;
+}
+
 static void calculate_changed_submodule_paths(struct repository *r,
                struct string_list *changed_submodule_names)
 {
        struct strvec argv = STRVEC_INIT;
        struct string_list_item *name;
 
-       /* No need to check if there are no submodules configured */
-       if (!submodule_from_path(r, NULL, NULL))
+       /* No need to check if no submodules would be fetched */
+       if (!submodule_from_path(r, NULL, NULL) &&
+           !repo_has_absorbed_submodules(r))
                return;
 
        strvec_push(&argv, "--"); /* argv[0] program name */
@@ -1333,7 +1362,16 @@ int submodule_touches_in_range(struct repository *r,
 }
 
 struct submodule_parallel_fetch {
-       int count;
+       /*
+        * The index of the last index entry processed by
+        * get_fetch_task_from_index().
+        */
+       int index_count;
+       /*
+        * The index of the last string_list entry processed by
+        * get_fetch_task_from_changed().
+        */
+       int changed_count;
        struct strvec args;
        struct repository *r;
        const char *prefix;
@@ -1342,7 +1380,16 @@ struct submodule_parallel_fetch {
        int quiet;
        int result;
 
+       /*
+        * Names of submodules that have new commits. Generated by
+        * walking the newly fetched superproject commits.
+        */
        struct string_list changed_submodule_names;
+       /*
+        * Names of submodules that have already been processed. Lets us
+        * avoid fetching the same submodule more than once.
+        */
+       struct string_list seen_submodule_names;
 
        /* Pending fetches by OIDs */
        struct fetch_task **oid_fetch_tasks;
@@ -1353,6 +1400,7 @@ struct submodule_parallel_fetch {
 #define SPF_INIT { \
        .args = STRVEC_INIT, \
        .changed_submodule_names = STRING_LIST_INIT_DUP, \
+       .seen_submodule_names = STRING_LIST_INIT_DUP, \
        .submodules_with_errors = STRBUF_INIT, \
 }
 
@@ -1390,6 +1438,7 @@ struct fetch_task {
        const struct submodule *sub;
        unsigned free_sub : 1; /* Do we need to free the submodule? */
        const char *default_argv; /* The default fetch mode. */
+       struct strvec git_args; /* Args for the child git process. */
 
        struct oid_array *commits; /* Ensure these commits are fetched */
 };
@@ -1425,6 +1474,8 @@ static void fetch_task_release(struct fetch_task *p)
        if (p->repo)
                repo_clear(p->repo);
        FREE_AND_NULL(p->repo);
+
+       strvec_clear(&p->git_args);
 }
 
 static struct repository *get_submodule_repo_for(struct repository *r,
@@ -1463,6 +1514,9 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf
                task->free_sub = 1;
        }
 
+       if (string_list_lookup(&spf->seen_submodule_names, task->sub->name))
+               goto cleanup;
+
        switch (get_fetch_recurse_config(task->sub, spf))
        {
        default:
@@ -1493,10 +1547,12 @@ static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf
 }
 
 static struct fetch_task *
-get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+                         struct strbuf *err)
 {
-       for (; spf->count < spf->r->index->cache_nr; spf->count++) {
-               const struct cache_entry *ce = spf->r->index->cache[spf->count];
+       for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+               const struct cache_entry *ce =
+                       spf->r->index->cache[spf->index_count];
                struct fetch_task *task;
 
                if (!S_ISGITLINK(ce->ce_mode))
@@ -1511,7 +1567,7 @@ get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
                                strbuf_addf(err, _("Fetching submodule %s%s\n"),
                                            spf->prefix, ce->name);
 
-                       spf->count++;
+                       spf->index_count++;
                        return task;
                } else {
                        struct strbuf empty_submodule_path = STRBUF_INIT;
@@ -1539,11 +1595,83 @@ get_fetch_task(struct submodule_parallel_fetch *spf, struct strbuf *err)
        return NULL;
 }
 
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+                           struct strbuf *err)
+{
+       for (; spf->changed_count < spf->changed_submodule_names.nr;
+            spf->changed_count++) {
+               struct string_list_item item =
+                       spf->changed_submodule_names.items[spf->changed_count];
+               struct changed_submodule_data *cs_data = item.util;
+               struct fetch_task *task;
+
+               if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path))
+                       continue;
+
+               task = fetch_task_create(spf, cs_data->path,
+                                        cs_data->super_oid);
+               if (!task)
+                       continue;
+
+               if (!task->repo) {
+                       strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"),
+                                   cs_data->path,
+                                   find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV));
+
+                       fetch_task_release(task);
+                       free(task);
+                       continue;
+               }
+
+               if (!spf->quiet)
+                       strbuf_addf(err,
+                                   _("Fetching submodule %s%s at commit %s\n"),
+                                   spf->prefix, task->sub->path,
+                                   find_unique_abbrev(cs_data->super_oid,
+                                                      DEFAULT_ABBREV));
+
+               spf->changed_count++;
+               /*
+                * NEEDSWORK: Submodules set/unset a value for
+                * core.worktree when they are populated/unpopulated by
+                * "git checkout" (and similar commands, see
+                * submodule_move_head() and
+                * connect_work_tree_and_git_dir()), but if the
+                * submodule is unpopulated in another way (e.g. "git
+                * rm", "rm -r"), core.worktree will still be set even
+                * though the directory doesn't exist, and the child
+                * process will crash while trying to chdir into the
+                * nonexistent directory.
+                *
+                * In this case, we know that the submodule has no
+                * working tree, so we can work around this by
+                * setting "--work-tree=." (--bare does not work because
+                * worktree settings take precedence over bare-ness).
+                * However, this is not necessarily true in other cases,
+                * so a generalized solution is still necessary.
+                *
+                * Possible solutions:
+                * - teach "git [add|rm]" to unset core.worktree and
+                *   discourage users from removing submodules without
+                *   using a Git command.
+                * - teach submodule child processes to ignore stale
+                *   core.worktree values.
+                */
+               strvec_push(&task->git_args, "--work-tree=.");
+               return task;
+       }
+       return NULL;
+}
+
 static int get_next_submodule(struct child_process *cp, struct strbuf *err,
                              void *data, void **task_cb)
 {
        struct submodule_parallel_fetch *spf = data;
-       struct fetch_task *task = get_fetch_task(spf, err);
+       struct fetch_task *task =
+               get_fetch_task_from_index(spf, err);
+       if (!task)
+               task = get_fetch_task_from_changed(spf, err);
 
        if (task) {
                struct strbuf submodule_prefix = STRBUF_INIT;
@@ -1553,6 +1681,8 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err,
                prepare_submodule_repo_env_in_gitdir(&cp->env_array);
                cp->git_cmd = 1;
                strvec_init(&cp->args);
+               if (task->git_args.nr)
+                       strvec_pushv(&cp->args, task->git_args.v);
                strvec_pushv(&cp->args, spf->args.v);
                strvec_push(&cp->args, task->default_argv);
                strvec_push(&cp->args, "--submodule-prefix");
@@ -1564,6 +1694,7 @@ static int get_next_submodule(struct child_process *cp, struct strbuf *err,
                *task_cb = task;
 
                strbuf_release(&submodule_prefix);
+               string_list_insert(&spf->seen_submodule_names, task->sub->name);
                return 1;
        }
 
@@ -1678,11 +1809,11 @@ out:
        return 0;
 }
 
-int fetch_populated_submodules(struct repository *r,
-                              const struct strvec *options,
-                              const char *prefix, int command_line_option,
-                              int default_option,
-                              int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+                    const struct strvec *options,
+                    const char *prefix, int command_line_option,
+                    int default_option,
+                    int quiet, int max_parallel_jobs)
 {
        int i;
        struct submodule_parallel_fetch spf = SPF_INIT;