]> git.ipfire.org Git - thirdparty/git.git/blobdiff - dir.c
Merge branch 'jt/t5500-unflake'
[thirdparty/git.git] / dir.c
diff --git a/dir.c b/dir.c
index 61f559f98008afe61e748fb1821fb45e0bec913f..d97e9558489d3bf622673b1f5b90053154ea9033 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -2,8 +2,6 @@
  * This handles recursive filename detection with exclude
  * files, index knowledge etc..
  *
- * See Documentation/technical/api-directory-listing.txt
- *
  * Copyright (C) Linus Torvalds, 2005-2006
  *              Junio Hamano, 2005-2006
  */
@@ -43,7 +41,8 @@ struct cached_dir {
        int nr_files;
        int nr_dirs;
 
-       struct dirent *de;
+       const char *d_name;
+       int d_type;
        const char *file;
        struct untracked_cache_dir *ucd;
 };
@@ -52,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        struct index_state *istate, const char *path, int len,
        struct untracked_cache_dir *untracked,
        int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
-                    const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+                        const char *path, int len);
 
 int count_slashes(const char *s)
 {
@@ -373,12 +372,19 @@ static int match_pathspec_item(const struct index_state *istate,
                    !ps_strncmp(item, match, name, namelen))
                        return MATCHED_RECURSIVELY_LEADING_PATHSPEC;
 
-               /* name" doesn't match up to the first wild character */
+               /* name doesn't match up to the first wild character */
                if (item->nowildcard_len < item->len &&
                    ps_strncmp(item, match, name,
                               item->nowildcard_len - prefix))
                        return 0;
 
+               /*
+                * name has no wildcard, and it didn't match as a leading
+                * pathspec so return.
+                */
+               if (item->nowildcard_len == item->len)
+                       return 0;
+
                /*
                 * Here is where we would perform a wildmatch to check if
                 * "name" can be matched as a directory (or a prefix) against
@@ -611,6 +617,230 @@ void parse_path_pattern(const char **pattern,
        *patternlen = len;
 }
 
+int pl_hashmap_cmp(const void *unused_cmp_data,
+                  const struct hashmap_entry *a,
+                  const struct hashmap_entry *b,
+                  const void *key)
+{
+       const struct pattern_entry *ee1 =
+                       container_of(a, struct pattern_entry, ent);
+       const struct pattern_entry *ee2 =
+                       container_of(b, struct pattern_entry, ent);
+
+       size_t min_len = ee1->patternlen <= ee2->patternlen
+                        ? ee1->patternlen
+                        : ee2->patternlen;
+
+       if (ignore_case)
+               return strncasecmp(ee1->pattern, ee2->pattern, min_len);
+       return strncmp(ee1->pattern, ee2->pattern, min_len);
+}
+
+static char *dup_and_filter_pattern(const char *pattern)
+{
+       char *set, *read;
+       size_t count  = 0;
+       char *result = xstrdup(pattern);
+
+       set = result;
+       read = result;
+
+       while (*read) {
+               /* skip escape characters (once) */
+               if (*read == '\\')
+                       read++;
+
+               *set = *read;
+
+               set++;
+               read++;
+               count++;
+       }
+       *set = 0;
+
+       if (count > 2 &&
+           *(set - 1) == '*' &&
+           *(set - 2) == '/')
+               *(set - 2) = 0;
+
+       return result;
+}
+
+static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
+{
+       struct pattern_entry *translated;
+       char *truncated;
+       char *data = NULL;
+       const char *prev, *cur, *next;
+
+       if (!pl->use_cone_patterns)
+               return;
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE &&
+           given->flags & PATTERN_FLAG_MUSTBEDIR &&
+           !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 0;
+               return;
+       }
+
+       if (!given->flags && !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 1;
+               return;
+       }
+
+       if (given->patternlen < 2 ||
+           *given->pattern == '*' ||
+           strstr(given->pattern, "**")) {
+               /* Not a cone pattern. */
+               warning(_("unrecognized pattern: '%s'"), given->pattern);
+               goto clear_hashmaps;
+       }
+
+       prev = given->pattern;
+       cur = given->pattern + 1;
+       next = given->pattern + 2;
+
+       while (*cur) {
+               /* Watch for glob characters '*', '\', '[', '?' */
+               if (!is_glob_special(*cur))
+                       goto increment;
+
+               /* But only if *prev != '\\' */
+               if (*prev == '\\')
+                       goto increment;
+
+               /* But allow the initial '\' */
+               if (*cur == '\\' &&
+                   is_glob_special(*next))
+                       goto increment;
+
+               /* But a trailing '/' then '*' is fine */
+               if (*prev == '/' &&
+                   *cur == '*' &&
+                   *next == 0)
+                       goto increment;
+
+               /* Not a cone pattern. */
+               warning(_("unrecognized pattern: '%s'"), given->pattern);
+               goto clear_hashmaps;
+
+       increment:
+               prev++;
+               cur++;
+               next++;
+       }
+
+       if (given->patternlen > 2 &&
+           !strcmp(given->pattern + given->patternlen - 2, "/*")) {
+               if (!(given->flags & PATTERN_FLAG_NEGATIVE)) {
+                       /* Not a cone pattern. */
+                       warning(_("unrecognized pattern: '%s'"), given->pattern);
+                       goto clear_hashmaps;
+               }
+
+               truncated = dup_and_filter_pattern(given->pattern);
+
+               translated = xmalloc(sizeof(struct pattern_entry));
+               translated->pattern = truncated;
+               translated->patternlen = given->patternlen - 2;
+               hashmap_entry_init(&translated->ent,
+                                  ignore_case ?
+                                  strihash(translated->pattern) :
+                                  strhash(translated->pattern));
+
+               if (!hashmap_get_entry(&pl->recursive_hashmap,
+                                      translated, ent, NULL)) {
+                       /* We did not see the "parent" included */
+                       warning(_("unrecognized negative pattern: '%s'"),
+                               given->pattern);
+                       free(truncated);
+                       free(translated);
+                       goto clear_hashmaps;
+               }
+
+               hashmap_add(&pl->parent_hashmap, &translated->ent);
+               hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data);
+               free(data);
+               return;
+       }
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE) {
+               warning(_("unrecognized negative pattern: '%s'"),
+                       given->pattern);
+               goto clear_hashmaps;
+       }
+
+       translated = xmalloc(sizeof(struct pattern_entry));
+
+       translated->pattern = dup_and_filter_pattern(given->pattern);
+       translated->patternlen = given->patternlen;
+       hashmap_entry_init(&translated->ent,
+                          ignore_case ?
+                          strihash(translated->pattern) :
+                          strhash(translated->pattern));
+
+       hashmap_add(&pl->recursive_hashmap, &translated->ent);
+
+       if (hashmap_get_entry(&pl->parent_hashmap, translated, ent, NULL)) {
+               /* we already included this at the parent level */
+               warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"),
+                       given->pattern);
+               hashmap_remove(&pl->parent_hashmap, &translated->ent, &data);
+               free(data);
+               free(translated);
+       }
+
+       return;
+
+clear_hashmaps:
+       warning(_("disabling cone pattern matching"));
+       hashmap_free_entries(&pl->parent_hashmap, struct pattern_entry, ent);
+       hashmap_free_entries(&pl->recursive_hashmap, struct pattern_entry, ent);
+       pl->use_cone_patterns = 0;
+}
+
+static int hashmap_contains_path(struct hashmap *map,
+                                struct strbuf *pattern)
+{
+       struct pattern_entry p;
+
+       /* Check straight mapping */
+       p.pattern = pattern->buf;
+       p.patternlen = pattern->len;
+       hashmap_entry_init(&p.ent,
+                          ignore_case ?
+                          strihash(p.pattern) :
+                          strhash(p.pattern));
+       return !!hashmap_get_entry(map, &p, ent, NULL);
+}
+
+int hashmap_contains_parent(struct hashmap *map,
+                           const char *path,
+                           struct strbuf *buffer)
+{
+       char *slash_pos;
+
+       strbuf_setlen(buffer, 0);
+
+       if (path[0] != '/')
+               strbuf_addch(buffer, '/');
+
+       strbuf_addstr(buffer, path);
+
+       slash_pos = strrchr(buffer->buf, '/');
+
+       while (slash_pos > buffer->buf) {
+               strbuf_setlen(buffer, slash_pos - buffer->buf);
+
+               if (hashmap_contains_path(map, buffer))
+                       return 1;
+
+               slash_pos = strrchr(buffer->buf, '/');
+       }
+
+       return 0;
+}
+
 void add_pattern(const char *string, const char *base,
                 int baselen, struct pattern_list *pl, int srcpos)
 {
@@ -635,6 +865,8 @@ void add_pattern(const char *string, const char *base,
        ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc);
        pl->patterns[pl->nr++] = pattern;
        pattern->pl = pl;
+
+       add_pattern_to_hashsets(pl, pattern);
 }
 
 static int read_skip_worktree_file_from_index(const struct index_state *istate,
@@ -842,8 +1074,8 @@ static int add_patterns(const char *fname, const char *base, int baselen,
                                oidcpy(&oid_stat->oid,
                                       &istate->cache[pos]->oid);
                        else
-                               hash_object_file(buf, size, "blob",
-                                                &oid_stat->oid);
+                               hash_object_file(the_hash_algo, buf, size,
+                                                "blob", &oid_stat->oid);
                        fill_stat_data(&oid_stat->stat, &st);
                        oid_stat->valid = 1;
                }
@@ -860,6 +1092,9 @@ static int add_patterns_from_buffer(char *buf, size_t size,
        int i, lineno = 1;
        char *entry;
 
+       hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
+       hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
+
        pl->filebuf = buf;
 
        if (skip_utf8_bom(&buf, size))
@@ -1052,8 +1287,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
                int prefix = pattern->nowildcardlen;
 
                if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
-                       if (*dtype == DT_UNKNOWN)
-                               *dtype = get_dtype(NULL, istate, pathname, pathlen);
+                       *dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
                        if (*dtype != DT_DIR)
                                continue;
                }
@@ -1096,16 +1330,58 @@ enum pattern_match_result path_matches_pattern_list(
                                struct index_state *istate)
 {
        struct path_pattern *pattern;
-       pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
-                                                 dtype, pl, istate);
-       if (pattern) {
-               if (pattern->flags & PATTERN_FLAG_NEGATIVE)
-                       return NOT_MATCHED;
-               else
-                       return MATCHED;
+       struct strbuf parent_pathname = STRBUF_INIT;
+       int result = NOT_MATCHED;
+       const char *slash_pos;
+
+       if (!pl->use_cone_patterns) {
+               pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
+                                                       dtype, pl, istate);
+               if (pattern) {
+                       if (pattern->flags & PATTERN_FLAG_NEGATIVE)
+                               return NOT_MATCHED;
+                       else
+                               return MATCHED;
+               }
+
+               return UNDECIDED;
        }
 
-       return UNDECIDED;
+       if (pl->full_cone)
+               return MATCHED;
+
+       strbuf_addch(&parent_pathname, '/');
+       strbuf_add(&parent_pathname, pathname, pathlen);
+
+       if (hashmap_contains_path(&pl->recursive_hashmap,
+                                 &parent_pathname)) {
+               result = MATCHED_RECURSIVE;
+               goto done;
+       }
+
+       slash_pos = strrchr(parent_pathname.buf, '/');
+
+       if (slash_pos == parent_pathname.buf) {
+               /* include every file in root */
+               result = MATCHED;
+               goto done;
+       }
+
+       strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+
+       if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
+               result = MATCHED;
+               goto done;
+       }
+
+       if (hashmap_contains_parent(&pl->recursive_hashmap,
+                                   pathname,
+                                   &parent_pathname))
+               result = MATCHED_RECURSIVE;
+
+done:
+       strbuf_release(&parent_pathname);
+       return result;
 }
 
 static struct path_pattern *last_matching_pattern_from_lists(
@@ -1451,33 +1727,59 @@ static enum exist_status directory_exists_in_index(struct index_state *istate,
 static enum path_treatment treat_directory(struct dir_struct *dir,
        struct index_state *istate,
        struct untracked_cache_dir *untracked,
-       const char *dirname, int len, int baselen, int exclude,
+       const char *dirname, int len, int baselen, int excluded,
        const struct pathspec *pathspec)
 {
+       /*
+        * WARNING: From this function, you can return path_recurse or you
+        *          can call read_directory_recursive() (or neither), but
+        *          you CAN'T DO BOTH.
+        */
+       enum path_treatment state;
+       int matches_how = 0;
+       int nested_repo = 0, check_only, stop_early;
+       int old_ignored_nr, old_untracked_nr;
        /* The "len-1" is to strip the final '/' */
-       switch (directory_exists_in_index(istate, dirname, len-1)) {
-       case index_directory:
-               return path_recurse;
+       enum exist_status status = directory_exists_in_index(istate, dirname, len-1);
 
-       case index_gitdir:
+       if (status == index_directory)
+               return path_recurse;
+       if (status == index_gitdir)
                return path_none;
+       if (status != index_nonexistent)
+               BUG("Unhandled value for directory_exists_in_index: %d\n", status);
 
-       case index_nonexistent:
-               if (dir->flags & DIR_SKIP_NESTED_GIT) {
-                       int nested_repo;
-                       struct strbuf sb = STRBUF_INIT;
-                       strbuf_addstr(&sb, dirname);
-                       nested_repo = is_nonbare_repository_dir(&sb);
-                       strbuf_release(&sb);
-                       if (nested_repo)
-                               return path_none;
-               }
+       /*
+        * We don't want to descend into paths that don't match the necessary
+        * patterns.  Clearly, if we don't have a pathspec, then we can't check
+        * for matching patterns.  Also, if (excluded) then we know we matched
+        * the exclusion patterns so as an optimization we can skip checking
+        * for matching patterns.
+        */
+       if (pathspec && !excluded) {
+               matches_how = do_match_pathspec(istate, pathspec, dirname, len,
+                                               0 /* prefix */, NULL /* seen */,
+                                               DO_MATCH_LEADING_PATHSPEC);
+               if (!matches_how)
+                       return path_none;
+       }
 
-               if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
-                       break;
-               if (exclude &&
-                       (dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                       (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
+
+       if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+               !(dir->flags & DIR_NO_GITLINKS)) {
+               struct strbuf sb = STRBUF_INIT;
+               strbuf_addstr(&sb, dirname);
+               nested_repo = is_nonbare_repository_dir(&sb);
+               strbuf_release(&sb);
+       }
+       if (nested_repo)
+               return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+                       (excluded ? path_excluded : path_untracked));
+
+       if (!(dir->flags & DIR_SHOW_OTHER_DIRECTORIES)) {
+               if (excluded &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
 
                        /*
                         * This is an excluded directory and we are
@@ -1499,30 +1801,139 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 
                        return path_none;
                }
-               if (!(dir->flags & DIR_NO_GITLINKS)) {
-                       struct strbuf sb = STRBUF_INIT;
-                       strbuf_addstr(&sb, dirname);
-                       if (is_nonbare_repository_dir(&sb))
-                               return exclude ? path_excluded : path_untracked;
-                       strbuf_release(&sb);
-               }
                return path_recurse;
        }
 
        /* This is the "show_other_directories" case */
 
-       if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
-               return exclude ? path_excluded : path_untracked;
+       /*
+        * If we have a pathspec which could match something _below_ this
+        * directory (e.g. when checking 'subdir/' having a pathspec like
+        * 'subdir/some/deep/path/file' or 'subdir/widget-*.c'), then we
+        * need to recurse.
+        */
+       if (matches_how == MATCHED_RECURSIVELY_LEADING_PATHSPEC)
+               return path_recurse;
+
+       /*
+        * Other than the path_recurse case immediately above, we only need
+        * to recurse into untracked/ignored directories if either of the
+        * following bits is set:
+        *   - DIR_SHOW_IGNORED_TOO (because then we need to determine if
+        *                           there are ignored directories below)
+        *   - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if
+        *                                 the directory is empty)
+        */
+       if (!(dir->flags & (DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES)))
+               return excluded ? path_excluded : path_untracked;
+
+       /*
+        * ...and even if DIR_SHOW_IGNORED_TOO is set, we can still avoid
+        * recursing into ignored directories if the path is excluded and
+        * DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set.
+        */
+       if (excluded &&
+           (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+           (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
+               return path_excluded;
+
+       /*
+        * If we have we don't want to know the all the paths under an
+        * untracked or ignored directory, we still need to go into the
+        * directory to determine if it is empty (because an empty directory
+        * should be path_none instead of path_excluded or path_untracked).
+        */
+       check_only = ((dir->flags & DIR_HIDE_EMPTY_DIRECTORIES) &&
+                     !(dir->flags & DIR_SHOW_IGNORED_TOO));
 
+       /*
+        * However, there's another optimization possible as a subset of
+        * check_only, based on the cases we have to consider:
+        *   A) Directory matches no exclude patterns:
+        *     * Directory is empty => path_none
+        *     * Directory has an untracked file under it => path_untracked
+        *     * Directory has only ignored files under it => path_excluded
+        *   B) Directory matches an exclude pattern:
+        *     * Directory is empty => path_none
+        *     * Directory has an untracked file under it => path_excluded
+        *     * Directory has only ignored files under it => path_excluded
+        * In case A, we can exit as soon as we've found an untracked
+        * file but otherwise have to walk all files.  In case B, though,
+        * we can stop at the first file we find under the directory.
+        */
+       stop_early = check_only && excluded;
+
+       /*
+        * If /every/ file within an untracked directory is ignored, then
+        * we want to treat the directory as ignored (for e.g. status
+        * --porcelain), without listing the individual ignored files
+        * underneath.  To do so, we'll save the current ignored_nr, and
+        * pop all the ones added after it if it turns out the entire
+        * directory is ignored.  Also, when DIR_SHOW_IGNORED_TOO and
+        * !DIR_KEEP_UNTRACKED_CONTENTS then we don't want to show
+        * untracked paths so will need to pop all those off the last
+        * after we traverse.
+        */
+       old_ignored_nr = dir->ignored_nr;
+       old_untracked_nr = dir->nr;
+
+       /* Actually recurse into dirname now, we'll fixup the state later. */
        untracked = lookup_untracked(dir->untracked, untracked,
                                     dirname + baselen, len - baselen);
+       state = read_directory_recursive(dir, istate, dirname, len, untracked,
+                                        check_only, stop_early, pathspec);
+
+       /* There are a variety of reasons we may need to fixup the state... */
+       if (state == path_excluded) {
+               /* state == path_excluded implies all paths under
+                * dirname were ignored...
+                *
+                * if running e.g. `git status --porcelain --ignored=matching`,
+                * then we want to see the subpaths that are ignored.
+                *
+                * if running e.g. just `git status --porcelain`, then
+                * we just want the directory itself to be listed as ignored
+                * and not the individual paths underneath.
+                */
+               int want_ignored_subpaths =
+                       ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+                        (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING));
+
+               if (want_ignored_subpaths) {
+                       /*
+                        * with --ignored=matching, we want the subpaths
+                        * INSTEAD of the directory itself.
+                        */
+                       state = path_none;
+               } else {
+                       int i;
+                       for (i = old_ignored_nr + 1; i<dir->ignored_nr; ++i)
+                               FREE_AND_NULL(dir->ignored[i]);
+                       dir->ignored_nr = old_ignored_nr;
+               }
+       }
 
        /*
-        * If this is an excluded directory, then we only need to check if
-        * the directory contains any files.
+        * We may need to ignore some of the untracked paths we found while
+        * traversing subdirectories.
         */
-       return read_directory_recursive(dir, istate, dirname, len,
-                                       untracked, 1, exclude, pathspec);
+       if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+           !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
+               int i;
+               for (i = old_untracked_nr + 1; i<dir->nr; ++i)
+                       FREE_AND_NULL(dir->entries[i]);
+               dir->nr = old_untracked_nr;
+       }
+
+       /*
+        * If there is nothing under the current directory and we are not
+        * hiding empty directories, then we need to report on the
+        * untracked or ignored status of the directory itself.
+        */
+       if (state == path_none && !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
+               state = excluded ? path_excluded : path_untracked;
+
+       return state;
 }
 
 /*
@@ -1641,10 +2052,9 @@ static int get_index_dtype(struct index_state *istate,
        return DT_UNKNOWN;
 }
 
-static int get_dtype(struct dirent *de, struct index_state *istate,
-                    const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+                        const char *path, int len)
 {
-       int dtype = de ? DTYPE(de) : DT_UNKNOWN;
        struct stat st;
 
        if (dtype != DT_UNKNOWN)
@@ -1663,86 +2073,6 @@ static int get_dtype(struct dirent *de, struct index_state *istate,
        return dtype;
 }
 
-static enum path_treatment treat_one_path(struct dir_struct *dir,
-                                         struct untracked_cache_dir *untracked,
-                                         struct index_state *istate,
-                                         struct strbuf *path,
-                                         int baselen,
-                                         const struct pathspec *pathspec,
-                                         int dtype, struct dirent *de)
-{
-       int exclude;
-       int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
-       enum path_treatment path_treatment;
-
-       if (dtype == DT_UNKNOWN)
-               dtype = get_dtype(de, istate, path->buf, path->len);
-
-       /* Always exclude indexed files */
-       if (dtype != DT_DIR && has_path_in_index)
-               return path_none;
-
-       /*
-        * When we are looking at a directory P in the working tree,
-        * there are three cases:
-        *
-        * (1) P exists in the index.  Everything inside the directory P in
-        * the working tree needs to go when P is checked out from the
-        * index.
-        *
-        * (2) P does not exist in the index, but there is P/Q in the index.
-        * We know P will stay a directory when we check out the contents
-        * of the index, but we do not know yet if there is a directory
-        * P/Q in the working tree to be killed, so we need to recurse.
-        *
-        * (3) P does not exist in the index, and there is no P/Q in the index
-        * to require P to be a directory, either.  Only in this case, we
-        * know that everything inside P will not be killed without
-        * recursing.
-        */
-       if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
-           (dtype == DT_DIR) &&
-           !has_path_in_index &&
-           (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
-               return path_none;
-
-       exclude = is_excluded(dir, istate, path->buf, &dtype);
-
-       /*
-        * Excluded? If we don't explicitly want to show
-        * ignored files, ignore it
-        */
-       if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
-               return path_excluded;
-
-       switch (dtype) {
-       default:
-               return path_none;
-       case DT_DIR:
-               strbuf_addch(path, '/');
-               path_treatment = treat_directory(dir, istate, untracked,
-                                                path->buf, path->len,
-                                                baselen, exclude, pathspec);
-               /*
-                * If 1) we only want to return directories that
-                * match an exclude pattern and 2) this directory does
-                * not match an exclude pattern but all of its
-                * contents are excluded, then indicate that we should
-                * recurse into this directory (instead of marking the
-                * directory itself as an ignored path).
-                */
-               if (!exclude &&
-                   path_treatment == path_excluded &&
-                   (dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                   (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
-                       return path_recurse;
-               return path_treatment;
-       case DT_REG:
-       case DT_LNK:
-               return exclude ? path_excluded : path_untracked;
-       }
-}
-
 static enum path_treatment treat_path_fast(struct dir_struct *dir,
                                           struct untracked_cache_dir *untracked,
                                           struct cached_dir *cdir,
@@ -1751,6 +2081,11 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
                                           int baselen,
                                           const struct pathspec *pathspec)
 {
+       /*
+        * WARNING: From this function, you can return path_recurse or you
+        *          can call read_directory_recursive() (or neither), but
+        *          you CAN'T DO BOTH.
+        */
        strbuf_setlen(path, baselen);
        if (!cdir->ucd) {
                strbuf_addstr(path, cdir->file);
@@ -1784,21 +2119,84 @@ static enum path_treatment treat_path(struct dir_struct *dir,
                                      int baselen,
                                      const struct pathspec *pathspec)
 {
-       int dtype;
-       struct dirent *de = cdir->de;
+       int has_path_in_index, dtype, excluded;
 
-       if (!de)
+       if (!cdir->d_name)
                return treat_path_fast(dir, untracked, cdir, istate, path,
                                       baselen, pathspec);
-       if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+       if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
                return path_none;
        strbuf_setlen(path, baselen);
-       strbuf_addstr(path, de->d_name);
+       strbuf_addstr(path, cdir->d_name);
        if (simplify_away(path->buf, path->len, pathspec))
                return path_none;
 
-       dtype = DTYPE(de);
-       return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+       dtype = resolve_dtype(cdir->d_type, istate, path->buf, path->len);
+
+       /* Always exclude indexed files */
+       has_path_in_index = !!index_file_exists(istate, path->buf, path->len,
+                                               ignore_case);
+       if (dtype != DT_DIR && has_path_in_index)
+               return path_none;
+
+       /*
+        * When we are looking at a directory P in the working tree,
+        * there are three cases:
+        *
+        * (1) P exists in the index.  Everything inside the directory P in
+        * the working tree needs to go when P is checked out from the
+        * index.
+        *
+        * (2) P does not exist in the index, but there is P/Q in the index.
+        * We know P will stay a directory when we check out the contents
+        * of the index, but we do not know yet if there is a directory
+        * P/Q in the working tree to be killed, so we need to recurse.
+        *
+        * (3) P does not exist in the index, and there is no P/Q in the index
+        * to require P to be a directory, either.  Only in this case, we
+        * know that everything inside P will not be killed without
+        * recursing.
+        */
+       if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
+           (dtype == DT_DIR) &&
+           !has_path_in_index &&
+           (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
+               return path_none;
+
+       excluded = is_excluded(dir, istate, path->buf, &dtype);
+
+       /*
+        * Excluded? If we don't explicitly want to show
+        * ignored files, ignore it
+        */
+       if (excluded && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
+               return path_excluded;
+
+       switch (dtype) {
+       default:
+               return path_none;
+       case DT_DIR:
+               /*
+                * WARNING: Do not ignore/amend the return value from
+                * treat_directory(), and especially do not change it to return
+                * path_recurse as that can cause exponential slowdown.
+                * Instead, modify treat_directory() to return the right value.
+                */
+               strbuf_addch(path, '/');
+               return treat_directory(dir, istate, untracked,
+                                      path->buf, path->len,
+                                      baselen, excluded, pathspec);
+       case DT_REG:
+       case DT_LNK:
+               if (excluded)
+                       return path_excluded;
+               if (pathspec &&
+                   !do_match_pathspec(istate, pathspec, path->buf, path->len,
+                                      0 /* prefix */, NULL /* seen */,
+                                      0 /* flags */))
+                       return path_none;
+               return path_untracked;
+       }
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1886,10 +2284,17 @@ static int open_cached_dir(struct cached_dir *cdir,
 
 static int read_cached_dir(struct cached_dir *cdir)
 {
+       struct dirent *de;
+
        if (cdir->fdir) {
-               cdir->de = readdir(cdir->fdir);
-               if (!cdir->de)
+               de = readdir(cdir->fdir);
+               if (!de) {
+                       cdir->d_name = NULL;
+                       cdir->d_type = DT_UNKNOWN;
                        return -1;
+               }
+               cdir->d_name = de->d_name;
+               cdir->d_type = DTYPE(de);
                return 0;
        }
        while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -1925,6 +2330,40 @@ static void close_cached_dir(struct cached_dir *cdir)
        }
 }
 
+static void add_path_to_appropriate_result_list(struct dir_struct *dir,
+       struct untracked_cache_dir *untracked,
+       struct cached_dir *cdir,
+       struct index_state *istate,
+       struct strbuf *path,
+       int baselen,
+       const struct pathspec *pathspec,
+       enum path_treatment state)
+{
+       /* add the path to the appropriate result list */
+       switch (state) {
+       case path_excluded:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       dir_add_name(dir, istate, path->buf, path->len);
+               else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
+                       ((dir->flags & DIR_COLLECT_IGNORED) &&
+                       exclude_matches_pathspec(path->buf, path->len,
+                                                pathspec)))
+                       dir_add_ignored(dir, istate, path->buf, path->len);
+               break;
+
+       case path_untracked:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       break;
+               dir_add_name(dir, istate, path->buf, path->len);
+               if (cdir->fdir)
+                       add_untracked(untracked, path->buf + baselen);
+               break;
+
+       default:
+               break;
+       }
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1937,7 +2376,7 @@ static void close_cached_dir(struct cached_dir *cdir)
  * If 'stop_at_first_file' is specified, 'path_excluded' is returned
  * to signal that a file was found. This is the least significant value that
  * indicates that a file was encountered that does not depend on the order of
- * whether an untracked or exluded path was encountered first.
+ * whether an untracked or excluded path was encountered first.
  *
  * Returns the most significant path_treatment value encountered in the scan.
  * If 'stop_at_first_file' is specified, `path_excluded` is the most
@@ -1949,6 +2388,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        struct untracked_cache_dir *untracked, int check_only,
        int stop_at_first_file, const struct pathspec *pathspec)
 {
+       /*
+        * WARNING: Do NOT recurse unless path_recurse is returned from
+        *          treat_path().  Recursing on any other return value
+        *          can result in exponential slowdown.
+        */
        struct cached_dir cdir;
        enum path_treatment state, subdir_state, dir_state = path_none;
        struct strbuf path = STRBUF_INIT;
@@ -1970,13 +2414,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                        dir_state = state;
 
                /* recurse into subdir if instructed by treat_path */
-               if ((state == path_recurse) ||
-                       ((state == path_untracked) &&
-                        (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
-                        ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
-                         (pathspec &&
-                          do_match_pathspec(istate, pathspec, path.buf, path.len,
-                                            baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)))) {
+               if (state == path_recurse) {
                        struct untracked_cache_dir *ud;
                        ud = lookup_untracked(dir->untracked, untracked,
                                              path.buf + baselen,
@@ -2024,33 +2462,13 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                                        add_untracked(untracked, path.buf + baselen);
                                break;
                        }
-                       /* skip the dir_add_* part */
+                       /* skip the add_path_to_appropriate_result_list() */
                        continue;
                }
 
-               /* add the path to the appropriate result list */
-               switch (state) {
-               case path_excluded:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               dir_add_name(dir, istate, path.buf, path.len);
-                       else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
-                               ((dir->flags & DIR_COLLECT_IGNORED) &&
-                               exclude_matches_pathspec(path.buf, path.len,
-                                                        pathspec)))
-                               dir_add_ignored(dir, istate, path.buf, path.len);
-                       break;
-
-               case path_untracked:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               break;
-                       dir_add_name(dir, istate, path.buf, path.len);
-                       if (cdir.fdir)
-                               add_untracked(untracked, path.buf + baselen);
-                       break;
-
-               default:
-                       break;
-               }
+               add_path_to_appropriate_result_list(dir, untracked, &cdir,
+                                                   istate, &path, baselen,
+                                                   pathspec, state);
        }
        close_cached_dir(&cdir);
  out:
@@ -2081,40 +2499,69 @@ static int treat_leading_path(struct dir_struct *dir,
                              const struct pathspec *pathspec)
 {
        struct strbuf sb = STRBUF_INIT;
-       int baselen, rc = 0;
+       struct strbuf subdir = STRBUF_INIT;
+       int prevlen, baselen;
        const char *cp;
-       int old_flags = dir->flags;
+       struct cached_dir cdir;
+       enum path_treatment state = path_none;
+
+       /*
+        * For each directory component of path, we are going to check whether
+        * that path is relevant given the pathspec.  For example, if path is
+        *    foo/bar/baz/
+        * then we will ask treat_path() whether we should go into foo, then
+        * whether we should go into bar, then whether baz is relevant.
+        * Checking each is important because e.g. if path is
+        *    .git/info/
+        * then we need to check .git to know we shouldn't traverse it.
+        * If the return from treat_path() is:
+        *    * path_none, for any path, we return false.
+        *    * path_recurse, for all path components, we return true
+        *    * <anything else> for some intermediate component, we make sure
+        *        to add that path to the relevant list but return false
+        *        signifying that we shouldn't recurse into it.
+        */
 
        while (len && path[len - 1] == '/')
                len--;
        if (!len)
                return 1;
+
+       memset(&cdir, 0, sizeof(cdir));
+       cdir.d_type = DT_DIR;
        baselen = 0;
-       dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES;
+       prevlen = 0;
        while (1) {
-               cp = path + baselen + !!baselen;
+               prevlen = baselen + !!baselen;
+               cp = path + prevlen;
                cp = memchr(cp, '/', path + len - cp);
                if (!cp)
                        baselen = len;
                else
                        baselen = cp - path;
-               strbuf_setlen(&sb, 0);
+               strbuf_reset(&sb);
                strbuf_add(&sb, path, baselen);
                if (!is_directory(sb.buf))
                        break;
-               if (simplify_away(sb.buf, sb.len, pathspec))
-                       break;
-               if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec,
-                                  DT_DIR, NULL) == path_none)
+               strbuf_reset(&sb);
+               strbuf_add(&sb, path, prevlen);
+               strbuf_reset(&subdir);
+               strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+               cdir.d_name = subdir.buf;
+               state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, pathspec);
+
+               if (state != path_recurse)
                        break; /* do not recurse into it */
-               if (len <= baselen) {
-                       rc = 1;
+               if (len <= baselen)
                        break; /* finished checking */
-               }
        }
+       add_path_to_appropriate_result_list(dir, NULL, &cdir, istate,
+                                           &sb, baselen, pathspec,
+                                           state);
+
+       strbuf_release(&subdir);
        strbuf_release(&sb);
-       dir->flags = old_flags;
-       return rc;
+       return state == path_recurse;
 }
 
 static const char *get_ident_string(void)
@@ -2301,28 +2748,6 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
        QSORT(dir->entries, dir->nr, cmp_dir_entry);
        QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
 
-       /*
-        * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will
-        * also pick up untracked contents of untracked dirs; by default
-        * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not.
-        */
-       if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                    !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
-               int i, j;
-
-               /* remove from dir->entries untracked contents of untracked dirs */
-               for (i = j = 0; j < dir->nr; j++) {
-                       if (i &&
-                           check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) {
-                               FREE_AND_NULL(dir->entries[j]);
-                       } else {
-                               dir->entries[i++] = dir->entries[j];
-                       }
-               }
-
-               dir->nr = i;
-       }
-
        trace_performance_leave("read directory %.*s", len, path);
        if (dir->untracked) {
                static int force_untracked_cache = -1;
@@ -2489,7 +2914,7 @@ static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up)
                                 * wanted anyway
                                 */
                                continue;
-                       /* fall thru */
+                       /* fall through */
                } else if (S_ISDIR(st.st_mode)) {
                        if (!remove_dir_recurse(path, flag, &kept_down))
                                continue; /* happy */