]> git.ipfire.org Git - thirdparty/git.git/blobdiff - dir.c
Merge branch 'ma/diff-doc-clarify-regexp-example' into maint
[thirdparty/git.git] / dir.c
diff --git a/dir.c b/dir.c
index 9047373f6b5ae08d369c8ed78021b4c43c98b008..b460211e6149707b813d9354154233272cdfd87d 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -2,8 +2,6 @@
  * This handles recursive filename detection with exclude
  * files, index knowledge etc..
  *
- * See Documentation/technical/api-directory-listing.txt
- *
  * Copyright (C) Linus Torvalds, 2005-2006
  *              Junio Hamano, 2005-2006
  */
@@ -43,7 +41,8 @@ struct cached_dir {
        int nr_files;
        int nr_dirs;
 
-       struct dirent *de;
+       const char *d_name;
+       int d_type;
        const char *file;
        struct untracked_cache_dir *ucd;
 };
@@ -52,8 +51,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        struct index_state *istate, const char *path, int len,
        struct untracked_cache_dir *untracked,
        int check_only, int stop_at_first_file, const struct pathspec *pathspec);
-static int get_dtype(struct dirent *de, struct index_state *istate,
-                    const char *path, int len);
+static int resolve_dtype(int dtype, struct index_state *istate,
+                        const char *path, int len);
 
 int count_slashes(const char *s)
 {
@@ -373,12 +372,19 @@ static int match_pathspec_item(const struct index_state *istate,
                    !ps_strncmp(item, match, name, namelen))
                        return MATCHED_RECURSIVELY_LEADING_PATHSPEC;
 
-               /* name" doesn't match up to the first wild character */
+               /* name doesn't match up to the first wild character */
                if (item->nowildcard_len < item->len &&
                    ps_strncmp(item, match, name,
                               item->nowildcard_len - prefix))
                        return 0;
 
+               /*
+                * name has no wildcard, and it didn't match as a leading
+                * pathspec so return.
+                */
+               if (item->nowildcard_len == item->len)
+                       return 0;
+
                /*
                 * Here is where we would perform a wildmatch to check if
                 * "name" can be matched as a directory (or a prefix) against
@@ -611,6 +617,159 @@ void parse_path_pattern(const char **pattern,
        *patternlen = len;
 }
 
+int pl_hashmap_cmp(const void *unused_cmp_data,
+                  const struct hashmap_entry *a,
+                  const struct hashmap_entry *b,
+                  const void *key)
+{
+       const struct pattern_entry *ee1 =
+                       container_of(a, struct pattern_entry, ent);
+       const struct pattern_entry *ee2 =
+                       container_of(b, struct pattern_entry, ent);
+
+       size_t min_len = ee1->patternlen <= ee2->patternlen
+                        ? ee1->patternlen
+                        : ee2->patternlen;
+
+       if (ignore_case)
+               return strncasecmp(ee1->pattern, ee2->pattern, min_len);
+       return strncmp(ee1->pattern, ee2->pattern, min_len);
+}
+
+static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
+{
+       struct pattern_entry *translated;
+       char *truncated;
+       char *data = NULL;
+
+       if (!pl->use_cone_patterns)
+               return;
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE &&
+           given->flags & PATTERN_FLAG_MUSTBEDIR &&
+           !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 0;
+               return;
+       }
+
+       if (!given->flags && !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 1;
+               return;
+       }
+
+       if (given->patternlen > 2 &&
+           !strcmp(given->pattern + given->patternlen - 2, "/*")) {
+               if (!(given->flags & PATTERN_FLAG_NEGATIVE)) {
+                       /* Not a cone pattern. */
+                       pl->use_cone_patterns = 0;
+                       warning(_("unrecognized pattern: '%s'"), given->pattern);
+                       goto clear_hashmaps;
+               }
+
+               truncated = xstrdup(given->pattern);
+               truncated[given->patternlen - 2] = 0;
+
+               translated = xmalloc(sizeof(struct pattern_entry));
+               translated->pattern = truncated;
+               translated->patternlen = given->patternlen - 2;
+               hashmap_entry_init(&translated->ent,
+                                  ignore_case ?
+                                  strihash(translated->pattern) :
+                                  strhash(translated->pattern));
+
+               if (!hashmap_get_entry(&pl->recursive_hashmap,
+                                      translated, ent, NULL)) {
+                       /* We did not see the "parent" included */
+                       warning(_("unrecognized negative pattern: '%s'"),
+                               given->pattern);
+                       free(truncated);
+                       free(translated);
+                       goto clear_hashmaps;
+               }
+
+               hashmap_add(&pl->parent_hashmap, &translated->ent);
+               hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data);
+               free(data);
+               return;
+       }
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE) {
+               warning(_("unrecognized negative pattern: '%s'"),
+                       given->pattern);
+               goto clear_hashmaps;
+       }
+
+       translated = xmalloc(sizeof(struct pattern_entry));
+
+       translated->pattern = xstrdup(given->pattern);
+       translated->patternlen = given->patternlen;
+       hashmap_entry_init(&translated->ent,
+                          ignore_case ?
+                          strihash(translated->pattern) :
+                          strhash(translated->pattern));
+
+       hashmap_add(&pl->recursive_hashmap, &translated->ent);
+
+       if (hashmap_get_entry(&pl->parent_hashmap, translated, ent, NULL)) {
+               /* we already included this at the parent level */
+               warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"),
+                       given->pattern);
+               hashmap_remove(&pl->parent_hashmap, &translated->ent, &data);
+               free(data);
+               free(translated);
+       }
+
+       return;
+
+clear_hashmaps:
+       warning(_("disabling cone pattern matching"));
+       hashmap_free_entries(&pl->parent_hashmap, struct pattern_entry, ent);
+       hashmap_free_entries(&pl->recursive_hashmap, struct pattern_entry, ent);
+       pl->use_cone_patterns = 0;
+}
+
+static int hashmap_contains_path(struct hashmap *map,
+                                struct strbuf *pattern)
+{
+       struct pattern_entry p;
+
+       /* Check straight mapping */
+       p.pattern = pattern->buf;
+       p.patternlen = pattern->len;
+       hashmap_entry_init(&p.ent,
+                          ignore_case ?
+                          strihash(p.pattern) :
+                          strhash(p.pattern));
+       return !!hashmap_get_entry(map, &p, ent, NULL);
+}
+
+int hashmap_contains_parent(struct hashmap *map,
+                           const char *path,
+                           struct strbuf *buffer)
+{
+       char *slash_pos;
+
+       strbuf_setlen(buffer, 0);
+
+       if (path[0] != '/')
+               strbuf_addch(buffer, '/');
+
+       strbuf_addstr(buffer, path);
+
+       slash_pos = strrchr(buffer->buf, '/');
+
+       while (slash_pos > buffer->buf) {
+               strbuf_setlen(buffer, slash_pos - buffer->buf);
+
+               if (hashmap_contains_path(map, buffer))
+                       return 1;
+
+               slash_pos = strrchr(buffer->buf, '/');
+       }
+
+       return 0;
+}
+
 void add_pattern(const char *string, const char *base,
                 int baselen, struct pattern_list *pl, int srcpos)
 {
@@ -635,6 +794,8 @@ void add_pattern(const char *string, const char *base,
        ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc);
        pl->patterns[pl->nr++] = pattern;
        pattern->pl = pl;
+
+       add_pattern_to_hashsets(pl, pattern);
 }
 
 static int read_skip_worktree_file_from_index(const struct index_state *istate,
@@ -860,6 +1021,9 @@ static int add_patterns_from_buffer(char *buf, size_t size,
        int i, lineno = 1;
        char *entry;
 
+       hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
+       hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
+
        pl->filebuf = buf;
 
        if (skip_utf8_bom(&buf, size))
@@ -1052,8 +1216,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname
                int prefix = pattern->nowildcardlen;
 
                if (pattern->flags & PATTERN_FLAG_MUSTBEDIR) {
-                       if (*dtype == DT_UNKNOWN)
-                               *dtype = get_dtype(NULL, istate, pathname, pathlen);
+                       *dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
                        if (*dtype != DT_DIR)
                                continue;
                }
@@ -1096,16 +1259,58 @@ enum pattern_match_result path_matches_pattern_list(
                                struct index_state *istate)
 {
        struct path_pattern *pattern;
-       pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
-                                                 dtype, pl, istate);
-       if (pattern) {
-               if (pattern->flags & PATTERN_FLAG_NEGATIVE)
-                       return NOT_MATCHED;
-               else
-                       return MATCHED;
+       struct strbuf parent_pathname = STRBUF_INIT;
+       int result = NOT_MATCHED;
+       const char *slash_pos;
+
+       if (!pl->use_cone_patterns) {
+               pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
+                                                       dtype, pl, istate);
+               if (pattern) {
+                       if (pattern->flags & PATTERN_FLAG_NEGATIVE)
+                               return NOT_MATCHED;
+                       else
+                               return MATCHED;
+               }
+
+               return UNDECIDED;
        }
 
-       return UNDECIDED;
+       if (pl->full_cone)
+               return MATCHED;
+
+       strbuf_addch(&parent_pathname, '/');
+       strbuf_add(&parent_pathname, pathname, pathlen);
+
+       if (hashmap_contains_path(&pl->recursive_hashmap,
+                                 &parent_pathname)) {
+               result = MATCHED_RECURSIVE;
+               goto done;
+       }
+
+       slash_pos = strrchr(parent_pathname.buf, '/');
+
+       if (slash_pos == parent_pathname.buf) {
+               /* include every file in root */
+               result = MATCHED;
+               goto done;
+       }
+
+       strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+
+       if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
+               result = MATCHED;
+               goto done;
+       }
+
+       if (hashmap_contains_parent(&pl->recursive_hashmap,
+                                   pathname,
+                                   &parent_pathname))
+               result = MATCHED_RECURSIVE;
+
+done:
+       strbuf_release(&parent_pathname);
+       return result;
 }
 
 static struct path_pattern *last_matching_pattern_from_lists(
@@ -1454,6 +1659,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
        const char *dirname, int len, int baselen, int exclude,
        const struct pathspec *pathspec)
 {
+       int nested_repo = 0;
+
        /* The "len-1" is to strip the final '/' */
        switch (directory_exists_in_index(istate, dirname, len-1)) {
        case index_directory:
@@ -1463,15 +1670,16 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
                return path_none;
 
        case index_nonexistent:
-               if (dir->flags & DIR_SKIP_NESTED_GIT) {
-                       int nested_repo;
+               if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+                   !(dir->flags & DIR_NO_GITLINKS)) {
                        struct strbuf sb = STRBUF_INIT;
                        strbuf_addstr(&sb, dirname);
                        nested_repo = is_nonbare_repository_dir(&sb);
                        strbuf_release(&sb);
-                       if (nested_repo)
-                               return path_none;
                }
+               if (nested_repo)
+                       return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+                               (exclude ? path_excluded : path_untracked));
 
                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
                        break;
@@ -1499,13 +1707,6 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 
                        return path_none;
                }
-               if (!(dir->flags & DIR_NO_GITLINKS)) {
-                       struct strbuf sb = STRBUF_INIT;
-                       strbuf_addstr(&sb, dirname);
-                       if (is_nonbare_repository_dir(&sb))
-                               return exclude ? path_excluded : path_untracked;
-                       strbuf_release(&sb);
-               }
                return path_recurse;
        }
 
@@ -1641,10 +1842,9 @@ static int get_index_dtype(struct index_state *istate,
        return DT_UNKNOWN;
 }
 
-static int get_dtype(struct dirent *de, struct index_state *istate,
-                    const char *path, int len)
+static int resolve_dtype(int dtype, struct index_state *istate,
+                        const char *path, int len)
 {
-       int dtype = de ? DTYPE(de) : DT_UNKNOWN;
        struct stat st;
 
        if (dtype != DT_UNKNOWN)
@@ -1669,14 +1869,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
                                          struct strbuf *path,
                                          int baselen,
                                          const struct pathspec *pathspec,
-                                         int dtype, struct dirent *de)
+                                         int dtype)
 {
        int exclude;
        int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
        enum path_treatment path_treatment;
 
-       if (dtype == DT_UNKNOWN)
-               dtype = get_dtype(de, istate, path->buf, path->len);
+       dtype = resolve_dtype(dtype, istate, path->buf, path->len);
 
        /* Always exclude indexed files */
        if (dtype != DT_DIR && has_path_in_index)
@@ -1784,21 +1983,18 @@ static enum path_treatment treat_path(struct dir_struct *dir,
                                      int baselen,
                                      const struct pathspec *pathspec)
 {
-       int dtype;
-       struct dirent *de = cdir->de;
-
-       if (!de)
+       if (!cdir->d_name)
                return treat_path_fast(dir, untracked, cdir, istate, path,
                                       baselen, pathspec);
-       if (is_dot_or_dotdot(de->d_name) || !fspathcmp(de->d_name, ".git"))
+       if (is_dot_or_dotdot(cdir->d_name) || !fspathcmp(cdir->d_name, ".git"))
                return path_none;
        strbuf_setlen(path, baselen);
-       strbuf_addstr(path, de->d_name);
+       strbuf_addstr(path, cdir->d_name);
        if (simplify_away(path->buf, path->len, pathspec))
                return path_none;
 
-       dtype = DTYPE(de);
-       return treat_one_path(dir, untracked, istate, path, baselen, pathspec, dtype, de);
+       return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
+                             cdir->d_type);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1886,10 +2082,17 @@ static int open_cached_dir(struct cached_dir *cdir,
 
 static int read_cached_dir(struct cached_dir *cdir)
 {
+       struct dirent *de;
+
        if (cdir->fdir) {
-               cdir->de = readdir(cdir->fdir);
-               if (!cdir->de)
+               de = readdir(cdir->fdir);
+               if (!de) {
+                       cdir->d_name = NULL;
+                       cdir->d_type = DT_UNKNOWN;
                        return -1;
+               }
+               cdir->d_name = de->d_name;
+               cdir->d_type = DTYPE(de);
                return 0;
        }
        while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
@@ -1925,6 +2128,40 @@ static void close_cached_dir(struct cached_dir *cdir)
        }
 }
 
+static void add_path_to_appropriate_result_list(struct dir_struct *dir,
+       struct untracked_cache_dir *untracked,
+       struct cached_dir *cdir,
+       struct index_state *istate,
+       struct strbuf *path,
+       int baselen,
+       const struct pathspec *pathspec,
+       enum path_treatment state)
+{
+       /* add the path to the appropriate result list */
+       switch (state) {
+       case path_excluded:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       dir_add_name(dir, istate, path->buf, path->len);
+               else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
+                       ((dir->flags & DIR_COLLECT_IGNORED) &&
+                       exclude_matches_pathspec(path->buf, path->len,
+                                                pathspec)))
+                       dir_add_ignored(dir, istate, path->buf, path->len);
+               break;
+
+       case path_untracked:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       break;
+               dir_add_name(dir, istate, path->buf, path->len);
+               if (cdir->fdir)
+                       add_untracked(untracked, path->buf + baselen);
+               break;
+
+       default:
+               break;
+       }
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1949,6 +2186,15 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        struct untracked_cache_dir *untracked, int check_only,
        int stop_at_first_file, const struct pathspec *pathspec)
 {
+       /*
+        * WARNING WARNING WARNING:
+        *
+        * Any updates to the traversal logic here may need corresponding
+        * updates in treat_leading_path().  See the commit message for the
+        * commit adding this warning as well as the commit preceding it
+        * for details.
+        */
+
        struct cached_dir cdir;
        enum path_treatment state, subdir_state, dir_state = path_none;
        struct strbuf path = STRBUF_INIT;
@@ -1972,7 +2218,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                /* recurse into subdir if instructed by treat_path */
                if ((state == path_recurse) ||
                        ((state == path_untracked) &&
-                        (get_dtype(cdir.de, istate, path.buf, path.len) == DT_DIR) &&
+                        (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
                         ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
                          (pathspec &&
                           do_match_pathspec(istate, pathspec, path.buf, path.len,
@@ -2028,29 +2274,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                        continue;
                }
 
-               /* add the path to the appropriate result list */
-               switch (state) {
-               case path_excluded:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               dir_add_name(dir, istate, path.buf, path.len);
-                       else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
-                               ((dir->flags & DIR_COLLECT_IGNORED) &&
-                               exclude_matches_pathspec(path.buf, path.len,
-                                                        pathspec)))
-                               dir_add_ignored(dir, istate, path.buf, path.len);
-                       break;
-
-               case path_untracked:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               break;
-                       dir_add_name(dir, istate, path.buf, path.len);
-                       if (cdir.fdir)
-                               add_untracked(untracked, path.buf + baselen);
-                       break;
-
-               default:
-                       break;
-               }
+               add_path_to_appropriate_result_list(dir, untracked, &cdir,
+                                                   istate, &path, baselen,
+                                                   pathspec, state);
        }
        close_cached_dir(&cdir);
  out:
@@ -2080,41 +2306,95 @@ static int treat_leading_path(struct dir_struct *dir,
                              const char *path, int len,
                              const struct pathspec *pathspec)
 {
+       /*
+        * WARNING WARNING WARNING:
+        *
+        * Any updates to the traversal logic here may need corresponding
+        * updates in read_directory_recursive().  See 777b420347 (dir:
+        * synchronize treat_leading_path() and read_directory_recursive(),
+        * 2019-12-19) and its parent commit for details.
+        */
+
        struct strbuf sb = STRBUF_INIT;
-       int baselen, rc = 0;
+       struct strbuf subdir = STRBUF_INIT;
+       int prevlen, baselen;
        const char *cp;
-       int old_flags = dir->flags;
+       struct cached_dir cdir;
+       enum path_treatment state = path_none;
+
+       /*
+        * For each directory component of path, we are going to check whether
+        * that path is relevant given the pathspec.  For example, if path is
+        *    foo/bar/baz/
+        * then we will ask treat_path() whether we should go into foo, then
+        * whether we should go into bar, then whether baz is relevant.
+        * Checking each is important because e.g. if path is
+        *    .git/info/
+        * then we need to check .git to know we shouldn't traverse it.
+        * If the return from treat_path() is:
+        *    * path_none, for any path, we return false.
+        *    * path_recurse, for all path components, we return true
+        *    * <anything else> for some intermediate component, we make sure
+        *        to add that path to the relevant list but return false
+        *        signifying that we shouldn't recurse into it.
+        */
 
        while (len && path[len - 1] == '/')
                len--;
        if (!len)
                return 1;
+
+       memset(&cdir, 0, sizeof(cdir));
+       cdir.d_type = DT_DIR;
        baselen = 0;
-       dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES;
+       prevlen = 0;
        while (1) {
-               cp = path + baselen + !!baselen;
+               prevlen = baselen + !!baselen;
+               cp = path + prevlen;
                cp = memchr(cp, '/', path + len - cp);
                if (!cp)
                        baselen = len;
                else
                        baselen = cp - path;
-               strbuf_setlen(&sb, 0);
+               strbuf_reset(&sb);
                strbuf_add(&sb, path, baselen);
                if (!is_directory(sb.buf))
                        break;
-               if (simplify_away(sb.buf, sb.len, pathspec))
-                       break;
-               if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec,
-                                  DT_DIR, NULL) == path_none)
+               strbuf_reset(&sb);
+               strbuf_add(&sb, path, prevlen);
+               strbuf_reset(&subdir);
+               strbuf_add(&subdir, path+prevlen, baselen-prevlen);
+               cdir.d_name = subdir.buf;
+               state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
+                                   pathspec);
+               if (state == path_untracked &&
+                   resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO ||
+                    do_match_pathspec(istate, pathspec, sb.buf, sb.len,
+                                      baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
+                       if (!match_pathspec(istate, pathspec, sb.buf, sb.len,
+                                           0 /* prefix */, NULL,
+                                           0 /* do NOT special case dirs */))
+                               state = path_none;
+                       add_path_to_appropriate_result_list(dir, NULL, &cdir,
+                                                           istate,
+                                                           &sb, baselen,
+                                                           pathspec, state);
+                       state = path_recurse;
+               }
+
+               if (state != path_recurse)
                        break; /* do not recurse into it */
-               if (len <= baselen) {
-                       rc = 1;
+               if (len <= baselen)
                        break; /* finished checking */
-               }
        }
+       add_path_to_appropriate_result_list(dir, NULL, &cdir, istate,
+                                           &sb, baselen, pathspec,
+                                           state);
+
+       strbuf_release(&subdir);
        strbuf_release(&sb);
-       dir->flags = old_flags;
-       return rc;
+       return state == path_recurse;
 }
 
 static const char *get_ident_string(void)