]> git.ipfire.org Git - thirdparty/git.git/blobdiff - dir.c
Merge branch 'jt/t5500-unflake'
[thirdparty/git.git] / dir.c
diff --git a/dir.c b/dir.c
index 0ffb1b3302452c2cce0bdaa55e5259d9be168b63..d97e9558489d3bf622673b1f5b90053154ea9033 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -1727,36 +1727,59 @@ static enum exist_status directory_exists_in_index(struct index_state *istate,
 static enum path_treatment treat_directory(struct dir_struct *dir,
        struct index_state *istate,
        struct untracked_cache_dir *untracked,
-       const char *dirname, int len, int baselen, int exclude,
+       const char *dirname, int len, int baselen, int excluded,
        const struct pathspec *pathspec)
 {
-       int nested_repo = 0;
-
+       /*
+        * WARNING: From this function, you can return path_recurse or you
+        *          can call read_directory_recursive() (or neither), but
+        *          you CAN'T DO BOTH.
+        */
+       enum path_treatment state;
+       int matches_how = 0;
+       int nested_repo = 0, check_only, stop_early;
+       int old_ignored_nr, old_untracked_nr;
        /* The "len-1" is to strip the final '/' */
-       switch (directory_exists_in_index(istate, dirname, len-1)) {
-       case index_directory:
-               return path_recurse;
+       enum exist_status status = directory_exists_in_index(istate, dirname, len-1);
 
-       case index_gitdir:
+       if (status == index_directory)
+               return path_recurse;
+       if (status == index_gitdir)
                return path_none;
+       if (status != index_nonexistent)
+               BUG("Unhandled value for directory_exists_in_index: %d\n", status);
 
-       case index_nonexistent:
-               if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
-                   !(dir->flags & DIR_NO_GITLINKS)) {
-                       struct strbuf sb = STRBUF_INIT;
-                       strbuf_addstr(&sb, dirname);
-                       nested_repo = is_nonbare_repository_dir(&sb);
-                       strbuf_release(&sb);
-               }
-               if (nested_repo)
-                       return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
-                               (exclude ? path_excluded : path_untracked));
+       /*
+        * We don't want to descend into paths that don't match the necessary
+        * patterns.  Clearly, if we don't have a pathspec, then we can't check
+        * for matching patterns.  Also, if (excluded) then we know we matched
+        * the exclusion patterns so as an optimization we can skip checking
+        * for matching patterns.
+        */
+       if (pathspec && !excluded) {
+               matches_how = do_match_pathspec(istate, pathspec, dirname, len,
+                                               0 /* prefix */, NULL /* seen */,
+                                               DO_MATCH_LEADING_PATHSPEC);
+               if (!matches_how)
+                       return path_none;
+       }
 
-               if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
-                       break;
-               if (exclude &&
-                       (dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                       (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
+
+       if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+               !(dir->flags & DIR_NO_GITLINKS)) {
+               struct strbuf sb = STRBUF_INIT;
+               strbuf_addstr(&sb, dirname);
+               nested_repo = is_nonbare_repository_dir(&sb);
+               strbuf_release(&sb);
+       }
+       if (nested_repo)
+               return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+                       (excluded ? path_excluded : path_untracked));
+
+       if (!(dir->flags & DIR_SHOW_OTHER_DIRECTORIES)) {
+               if (excluded &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
 
                        /*
                         * This is an excluded directory and we are
@@ -1783,18 +1806,134 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 
        /* This is the "show_other_directories" case */
 
-       if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
-               return exclude ? path_excluded : path_untracked;
+       /*
+        * If we have a pathspec which could match something _below_ this
+        * directory (e.g. when checking 'subdir/' having a pathspec like
+        * 'subdir/some/deep/path/file' or 'subdir/widget-*.c'), then we
+        * need to recurse.
+        */
+       if (matches_how == MATCHED_RECURSIVELY_LEADING_PATHSPEC)
+               return path_recurse;
+
+       /*
+        * Other than the path_recurse case immediately above, we only need
+        * to recurse into untracked/ignored directories if either of the
+        * following bits is set:
+        *   - DIR_SHOW_IGNORED_TOO (because then we need to determine if
+        *                           there are ignored directories below)
+        *   - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if
+        *                                 the directory is empty)
+        */
+       if (!(dir->flags & (DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES)))
+               return excluded ? path_excluded : path_untracked;
 
+       /*
+        * ...and even if DIR_SHOW_IGNORED_TOO is set, we can still avoid
+        * recursing into ignored directories if the path is excluded and
+        * DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set.
+        */
+       if (excluded &&
+           (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+           (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
+               return path_excluded;
+
+       /*
+        * If we have we don't want to know the all the paths under an
+        * untracked or ignored directory, we still need to go into the
+        * directory to determine if it is empty (because an empty directory
+        * should be path_none instead of path_excluded or path_untracked).
+        */
+       check_only = ((dir->flags & DIR_HIDE_EMPTY_DIRECTORIES) &&
+                     !(dir->flags & DIR_SHOW_IGNORED_TOO));
+
+       /*
+        * However, there's another optimization possible as a subset of
+        * check_only, based on the cases we have to consider:
+        *   A) Directory matches no exclude patterns:
+        *     * Directory is empty => path_none
+        *     * Directory has an untracked file under it => path_untracked
+        *     * Directory has only ignored files under it => path_excluded
+        *   B) Directory matches an exclude pattern:
+        *     * Directory is empty => path_none
+        *     * Directory has an untracked file under it => path_excluded
+        *     * Directory has only ignored files under it => path_excluded
+        * In case A, we can exit as soon as we've found an untracked
+        * file but otherwise have to walk all files.  In case B, though,
+        * we can stop at the first file we find under the directory.
+        */
+       stop_early = check_only && excluded;
+
+       /*
+        * If /every/ file within an untracked directory is ignored, then
+        * we want to treat the directory as ignored (for e.g. status
+        * --porcelain), without listing the individual ignored files
+        * underneath.  To do so, we'll save the current ignored_nr, and
+        * pop all the ones added after it if it turns out the entire
+        * directory is ignored.  Also, when DIR_SHOW_IGNORED_TOO and
+        * !DIR_KEEP_UNTRACKED_CONTENTS then we don't want to show
+        * untracked paths so will need to pop all those off the last
+        * after we traverse.
+        */
+       old_ignored_nr = dir->ignored_nr;
+       old_untracked_nr = dir->nr;
+
+       /* Actually recurse into dirname now, we'll fixup the state later. */
        untracked = lookup_untracked(dir->untracked, untracked,
                                     dirname + baselen, len - baselen);
+       state = read_directory_recursive(dir, istate, dirname, len, untracked,
+                                        check_only, stop_early, pathspec);
+
+       /* There are a variety of reasons we may need to fixup the state... */
+       if (state == path_excluded) {
+               /* state == path_excluded implies all paths under
+                * dirname were ignored...
+                *
+                * if running e.g. `git status --porcelain --ignored=matching`,
+                * then we want to see the subpaths that are ignored.
+                *
+                * if running e.g. just `git status --porcelain`, then
+                * we just want the directory itself to be listed as ignored
+                * and not the individual paths underneath.
+                */
+               int want_ignored_subpaths =
+                       ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+                        (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING));
+
+               if (want_ignored_subpaths) {
+                       /*
+                        * with --ignored=matching, we want the subpaths
+                        * INSTEAD of the directory itself.
+                        */
+                       state = path_none;
+               } else {
+                       int i;
+                       for (i = old_ignored_nr + 1; i<dir->ignored_nr; ++i)
+                               FREE_AND_NULL(dir->ignored[i]);
+                       dir->ignored_nr = old_ignored_nr;
+               }
+       }
 
        /*
-        * If this is an excluded directory, then we only need to check if
-        * the directory contains any files.
+        * We may need to ignore some of the untracked paths we found while
+        * traversing subdirectories.
         */
-       return read_directory_recursive(dir, istate, dirname, len,
-                                       untracked, 1, exclude, pathspec);
+       if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+           !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
+               int i;
+               for (i = old_untracked_nr + 1; i<dir->nr; ++i)
+                       FREE_AND_NULL(dir->entries[i]);
+               dir->nr = old_untracked_nr;
+       }
+
+       /*
+        * If there is nothing under the current directory and we are not
+        * hiding empty directories, then we need to report on the
+        * untracked or ignored status of the directory itself.
+        */
+       if (state == path_none && !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
+               state = excluded ? path_excluded : path_untracked;
+
+       return state;
 }
 
 /*
@@ -1934,85 +2073,6 @@ static int resolve_dtype(int dtype, struct index_state *istate,
        return dtype;
 }
 
-static enum path_treatment treat_one_path(struct dir_struct *dir,
-                                         struct untracked_cache_dir *untracked,
-                                         struct index_state *istate,
-                                         struct strbuf *path,
-                                         int baselen,
-                                         const struct pathspec *pathspec,
-                                         int dtype)
-{
-       int exclude;
-       int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
-       enum path_treatment path_treatment;
-
-       dtype = resolve_dtype(dtype, istate, path->buf, path->len);
-
-       /* Always exclude indexed files */
-       if (dtype != DT_DIR && has_path_in_index)
-               return path_none;
-
-       /*
-        * When we are looking at a directory P in the working tree,
-        * there are three cases:
-        *
-        * (1) P exists in the index.  Everything inside the directory P in
-        * the working tree needs to go when P is checked out from the
-        * index.
-        *
-        * (2) P does not exist in the index, but there is P/Q in the index.
-        * We know P will stay a directory when we check out the contents
-        * of the index, but we do not know yet if there is a directory
-        * P/Q in the working tree to be killed, so we need to recurse.
-        *
-        * (3) P does not exist in the index, and there is no P/Q in the index
-        * to require P to be a directory, either.  Only in this case, we
-        * know that everything inside P will not be killed without
-        * recursing.
-        */
-       if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
-           (dtype == DT_DIR) &&
-           !has_path_in_index &&
-           (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
-               return path_none;
-
-       exclude = is_excluded(dir, istate, path->buf, &dtype);
-
-       /*
-        * Excluded? If we don't explicitly want to show
-        * ignored files, ignore it
-        */
-       if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
-               return path_excluded;
-
-       switch (dtype) {
-       default:
-               return path_none;
-       case DT_DIR:
-               strbuf_addch(path, '/');
-               path_treatment = treat_directory(dir, istate, untracked,
-                                                path->buf, path->len,
-                                                baselen, exclude, pathspec);
-               /*
-                * If 1) we only want to return directories that
-                * match an exclude pattern and 2) this directory does
-                * not match an exclude pattern but all of its
-                * contents are excluded, then indicate that we should
-                * recurse into this directory (instead of marking the
-                * directory itself as an ignored path).
-                */
-               if (!exclude &&
-                   path_treatment == path_excluded &&
-                   (dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                   (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
-                       return path_recurse;
-               return path_treatment;
-       case DT_REG:
-       case DT_LNK:
-               return exclude ? path_excluded : path_untracked;
-       }
-}
-
 static enum path_treatment treat_path_fast(struct dir_struct *dir,
                                           struct untracked_cache_dir *untracked,
                                           struct cached_dir *cdir,
@@ -2021,6 +2081,11 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
                                           int baselen,
                                           const struct pathspec *pathspec)
 {
+       /*
+        * WARNING: From this function, you can return path_recurse or you
+        *          can call read_directory_recursive() (or neither), but
+        *          you CAN'T DO BOTH.
+        */
        strbuf_setlen(path, baselen);
        if (!cdir->ucd) {
                strbuf_addstr(path, cdir->file);
@@ -2054,6 +2119,8 @@ static enum path_treatment treat_path(struct dir_struct *dir,
                                      int baselen,
                                      const struct pathspec *pathspec)
 {
+       int has_path_in_index, dtype, excluded;
+
        if (!cdir->d_name)
                return treat_path_fast(dir, untracked, cdir, istate, path,
                                       baselen, pathspec);
@@ -2064,8 +2131,72 @@ static enum path_treatment treat_path(struct dir_struct *dir,
        if (simplify_away(path->buf, path->len, pathspec))
                return path_none;
 
-       return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
-                             cdir->d_type);
+       dtype = resolve_dtype(cdir->d_type, istate, path->buf, path->len);
+
+       /* Always exclude indexed files */
+       has_path_in_index = !!index_file_exists(istate, path->buf, path->len,
+                                               ignore_case);
+       if (dtype != DT_DIR && has_path_in_index)
+               return path_none;
+
+       /*
+        * When we are looking at a directory P in the working tree,
+        * there are three cases:
+        *
+        * (1) P exists in the index.  Everything inside the directory P in
+        * the working tree needs to go when P is checked out from the
+        * index.
+        *
+        * (2) P does not exist in the index, but there is P/Q in the index.
+        * We know P will stay a directory when we check out the contents
+        * of the index, but we do not know yet if there is a directory
+        * P/Q in the working tree to be killed, so we need to recurse.
+        *
+        * (3) P does not exist in the index, and there is no P/Q in the index
+        * to require P to be a directory, either.  Only in this case, we
+        * know that everything inside P will not be killed without
+        * recursing.
+        */
+       if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
+           (dtype == DT_DIR) &&
+           !has_path_in_index &&
+           (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
+               return path_none;
+
+       excluded = is_excluded(dir, istate, path->buf, &dtype);
+
+       /*
+        * Excluded? If we don't explicitly want to show
+        * ignored files, ignore it
+        */
+       if (excluded && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
+               return path_excluded;
+
+       switch (dtype) {
+       default:
+               return path_none;
+       case DT_DIR:
+               /*
+                * WARNING: Do not ignore/amend the return value from
+                * treat_directory(), and especially do not change it to return
+                * path_recurse as that can cause exponential slowdown.
+                * Instead, modify treat_directory() to return the right value.
+                */
+               strbuf_addch(path, '/');
+               return treat_directory(dir, istate, untracked,
+                                      path->buf, path->len,
+                                      baselen, excluded, pathspec);
+       case DT_REG:
+       case DT_LNK:
+               if (excluded)
+                       return path_excluded;
+               if (pathspec &&
+                   !do_match_pathspec(istate, pathspec, path->buf, path->len,
+                                      0 /* prefix */, NULL /* seen */,
+                                      0 /* flags */))
+                       return path_none;
+               return path_untracked;
+       }
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -2245,7 +2376,7 @@ static void add_path_to_appropriate_result_list(struct dir_struct *dir,
  * If 'stop_at_first_file' is specified, 'path_excluded' is returned
  * to signal that a file was found. This is the least significant value that
  * indicates that a file was encountered that does not depend on the order of
- * whether an untracked or exluded path was encountered first.
+ * whether an untracked or excluded path was encountered first.
  *
  * Returns the most significant path_treatment value encountered in the scan.
  * If 'stop_at_first_file' is specified, `path_excluded` is the most
@@ -2258,14 +2389,10 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        int stop_at_first_file, const struct pathspec *pathspec)
 {
        /*
-        * WARNING WARNING WARNING:
-        *
-        * Any updates to the traversal logic here may need corresponding
-        * updates in treat_leading_path().  See the commit message for the
-        * commit adding this warning as well as the commit preceding it
-        * for details.
+        * WARNING: Do NOT recurse unless path_recurse is returned from
+        *          treat_path().  Recursing on any other return value
+        *          can result in exponential slowdown.
         */
-
        struct cached_dir cdir;
        enum path_treatment state, subdir_state, dir_state = path_none;
        struct strbuf path = STRBUF_INIT;
@@ -2287,13 +2414,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                        dir_state = state;
 
                /* recurse into subdir if instructed by treat_path */
-               if ((state == path_recurse) ||
-                       ((state == path_untracked) &&
-                        (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
-                        ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
-                         (pathspec &&
-                          do_match_pathspec(istate, pathspec, path.buf, path.len,
-                                            baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)))) {
+               if (state == path_recurse) {
                        struct untracked_cache_dir *ud;
                        ud = lookup_untracked(dir->untracked, untracked,
                                              path.buf + baselen,
@@ -2341,7 +2462,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                                        add_untracked(untracked, path.buf + baselen);
                                break;
                        }
-                       /* skip the dir_add_* part */
+                       /* skip the add_path_to_appropriate_result_list() */
                        continue;
                }
 
@@ -2377,15 +2498,6 @@ static int treat_leading_path(struct dir_struct *dir,
                              const char *path, int len,
                              const struct pathspec *pathspec)
 {
-       /*
-        * WARNING WARNING WARNING:
-        *
-        * Any updates to the traversal logic here may need corresponding
-        * updates in read_directory_recursive().  See 777b420347 (dir:
-        * synchronize treat_leading_path() and read_directory_recursive(),
-        * 2019-12-19) and its parent commit for details.
-        */
-
        struct strbuf sb = STRBUF_INIT;
        struct strbuf subdir = STRBUF_INIT;
        int prevlen, baselen;
@@ -2436,23 +2548,7 @@ static int treat_leading_path(struct dir_struct *dir,
                strbuf_reset(&subdir);
                strbuf_add(&subdir, path+prevlen, baselen-prevlen);
                cdir.d_name = subdir.buf;
-               state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
-                                   pathspec);
-               if (state == path_untracked &&
-                   resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
-                   (dir->flags & DIR_SHOW_IGNORED_TOO ||
-                    do_match_pathspec(istate, pathspec, sb.buf, sb.len,
-                                      baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
-                       if (!match_pathspec(istate, pathspec, sb.buf, sb.len,
-                                           0 /* prefix */, NULL,
-                                           0 /* do NOT special case dirs */))
-                               state = path_none;
-                       add_path_to_appropriate_result_list(dir, NULL, &cdir,
-                                                           istate,
-                                                           &sb, baselen,
-                                                           pathspec, state);
-                       state = path_recurse;
-               }
+               state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, pathspec);
 
                if (state != path_recurse)
                        break; /* do not recurse into it */
@@ -2652,28 +2748,6 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
        QSORT(dir->entries, dir->nr, cmp_dir_entry);
        QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
 
-       /*
-        * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will
-        * also pick up untracked contents of untracked dirs; by default
-        * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not.
-        */
-       if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
-                    !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
-               int i, j;
-
-               /* remove from dir->entries untracked contents of untracked dirs */
-               for (i = j = 0; j < dir->nr; j++) {
-                       if (i &&
-                           check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) {
-                               FREE_AND_NULL(dir->entries[j]);
-                       } else {
-                               dir->entries[i++] = dir->entries[j];
-                       }
-               }
-
-               dir->nr = i;
-       }
-
        trace_performance_leave("read directory %.*s", len, path);
        if (dir->untracked) {
                static int force_untracked_cache = -1;