]> git.ipfire.org Git - thirdparty/git.git/blobdiff - dir.c
Merge branch 'jt/sha1-file-remove-oi-skip-cached' into maint
[thirdparty/git.git] / dir.c
diff --git a/dir.c b/dir.c
index 9047373f6b5ae08d369c8ed78021b4c43c98b008..7d255227b130d7729747fe1a86c8efc04eee9f7e 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -2,8 +2,6 @@
  * This handles recursive filename detection with exclude
  * files, index knowledge etc..
  *
- * See Documentation/technical/api-directory-listing.txt
- *
  * Copyright (C) Linus Torvalds, 2005-2006
  *              Junio Hamano, 2005-2006
  */
@@ -373,12 +371,19 @@ static int match_pathspec_item(const struct index_state *istate,
                    !ps_strncmp(item, match, name, namelen))
                        return MATCHED_RECURSIVELY_LEADING_PATHSPEC;
 
-               /* name" doesn't match up to the first wild character */
+               /* name doesn't match up to the first wild character */
                if (item->nowildcard_len < item->len &&
                    ps_strncmp(item, match, name,
                               item->nowildcard_len - prefix))
                        return 0;
 
+               /*
+                * name has no wildcard, and it didn't match as a leading
+                * pathspec so return.
+                */
+               if (item->nowildcard_len == item->len)
+                       return 0;
+
                /*
                 * Here is where we would perform a wildmatch to check if
                 * "name" can be matched as a directory (or a prefix) against
@@ -611,6 +616,159 @@ void parse_path_pattern(const char **pattern,
        *patternlen = len;
 }
 
+int pl_hashmap_cmp(const void *unused_cmp_data,
+                  const struct hashmap_entry *a,
+                  const struct hashmap_entry *b,
+                  const void *key)
+{
+       const struct pattern_entry *ee1 =
+                       container_of(a, struct pattern_entry, ent);
+       const struct pattern_entry *ee2 =
+                       container_of(b, struct pattern_entry, ent);
+
+       size_t min_len = ee1->patternlen <= ee2->patternlen
+                        ? ee1->patternlen
+                        : ee2->patternlen;
+
+       if (ignore_case)
+               return strncasecmp(ee1->pattern, ee2->pattern, min_len);
+       return strncmp(ee1->pattern, ee2->pattern, min_len);
+}
+
+static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern *given)
+{
+       struct pattern_entry *translated;
+       char *truncated;
+       char *data = NULL;
+
+       if (!pl->use_cone_patterns)
+               return;
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE &&
+           given->flags & PATTERN_FLAG_MUSTBEDIR &&
+           !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 0;
+               return;
+       }
+
+       if (!given->flags && !strcmp(given->pattern, "/*")) {
+               pl->full_cone = 1;
+               return;
+       }
+
+       if (given->patternlen > 2 &&
+           !strcmp(given->pattern + given->patternlen - 2, "/*")) {
+               if (!(given->flags & PATTERN_FLAG_NEGATIVE)) {
+                       /* Not a cone pattern. */
+                       pl->use_cone_patterns = 0;
+                       warning(_("unrecognized pattern: '%s'"), given->pattern);
+                       goto clear_hashmaps;
+               }
+
+               truncated = xstrdup(given->pattern);
+               truncated[given->patternlen - 2] = 0;
+
+               translated = xmalloc(sizeof(struct pattern_entry));
+               translated->pattern = truncated;
+               translated->patternlen = given->patternlen - 2;
+               hashmap_entry_init(&translated->ent,
+                                  ignore_case ?
+                                  strihash(translated->pattern) :
+                                  strhash(translated->pattern));
+
+               if (!hashmap_get_entry(&pl->recursive_hashmap,
+                                      translated, ent, NULL)) {
+                       /* We did not see the "parent" included */
+                       warning(_("unrecognized negative pattern: '%s'"),
+                               given->pattern);
+                       free(truncated);
+                       free(translated);
+                       goto clear_hashmaps;
+               }
+
+               hashmap_add(&pl->parent_hashmap, &translated->ent);
+               hashmap_remove(&pl->recursive_hashmap, &translated->ent, &data);
+               free(data);
+               return;
+       }
+
+       if (given->flags & PATTERN_FLAG_NEGATIVE) {
+               warning(_("unrecognized negative pattern: '%s'"),
+                       given->pattern);
+               goto clear_hashmaps;
+       }
+
+       translated = xmalloc(sizeof(struct pattern_entry));
+
+       translated->pattern = xstrdup(given->pattern);
+       translated->patternlen = given->patternlen;
+       hashmap_entry_init(&translated->ent,
+                          ignore_case ?
+                          strihash(translated->pattern) :
+                          strhash(translated->pattern));
+
+       hashmap_add(&pl->recursive_hashmap, &translated->ent);
+
+       if (hashmap_get_entry(&pl->parent_hashmap, translated, ent, NULL)) {
+               /* we already included this at the parent level */
+               warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"),
+                       given->pattern);
+               hashmap_remove(&pl->parent_hashmap, &translated->ent, &data);
+               free(data);
+               free(translated);
+       }
+
+       return;
+
+clear_hashmaps:
+       warning(_("disabling cone pattern matching"));
+       hashmap_free_entries(&pl->parent_hashmap, struct pattern_entry, ent);
+       hashmap_free_entries(&pl->recursive_hashmap, struct pattern_entry, ent);
+       pl->use_cone_patterns = 0;
+}
+
+static int hashmap_contains_path(struct hashmap *map,
+                                struct strbuf *pattern)
+{
+       struct pattern_entry p;
+
+       /* Check straight mapping */
+       p.pattern = pattern->buf;
+       p.patternlen = pattern->len;
+       hashmap_entry_init(&p.ent,
+                          ignore_case ?
+                          strihash(p.pattern) :
+                          strhash(p.pattern));
+       return !!hashmap_get_entry(map, &p, ent, NULL);
+}
+
+int hashmap_contains_parent(struct hashmap *map,
+                           const char *path,
+                           struct strbuf *buffer)
+{
+       char *slash_pos;
+
+       strbuf_setlen(buffer, 0);
+
+       if (path[0] != '/')
+               strbuf_addch(buffer, '/');
+
+       strbuf_addstr(buffer, path);
+
+       slash_pos = strrchr(buffer->buf, '/');
+
+       while (slash_pos > buffer->buf) {
+               strbuf_setlen(buffer, slash_pos - buffer->buf);
+
+               if (hashmap_contains_path(map, buffer))
+                       return 1;
+
+               slash_pos = strrchr(buffer->buf, '/');
+       }
+
+       return 0;
+}
+
 void add_pattern(const char *string, const char *base,
                 int baselen, struct pattern_list *pl, int srcpos)
 {
@@ -635,6 +793,8 @@ void add_pattern(const char *string, const char *base,
        ALLOC_GROW(pl->patterns, pl->nr + 1, pl->alloc);
        pl->patterns[pl->nr++] = pattern;
        pattern->pl = pl;
+
+       add_pattern_to_hashsets(pl, pattern);
 }
 
 static int read_skip_worktree_file_from_index(const struct index_state *istate,
@@ -860,6 +1020,9 @@ static int add_patterns_from_buffer(char *buf, size_t size,
        int i, lineno = 1;
        char *entry;
 
+       hashmap_init(&pl->recursive_hashmap, pl_hashmap_cmp, NULL, 0);
+       hashmap_init(&pl->parent_hashmap, pl_hashmap_cmp, NULL, 0);
+
        pl->filebuf = buf;
 
        if (skip_utf8_bom(&buf, size))
@@ -1096,16 +1259,58 @@ enum pattern_match_result path_matches_pattern_list(
                                struct index_state *istate)
 {
        struct path_pattern *pattern;
-       pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
-                                                 dtype, pl, istate);
-       if (pattern) {
-               if (pattern->flags & PATTERN_FLAG_NEGATIVE)
-                       return NOT_MATCHED;
-               else
-                       return MATCHED;
+       struct strbuf parent_pathname = STRBUF_INIT;
+       int result = NOT_MATCHED;
+       const char *slash_pos;
+
+       if (!pl->use_cone_patterns) {
+               pattern = last_matching_pattern_from_list(pathname, pathlen, basename,
+                                                       dtype, pl, istate);
+               if (pattern) {
+                       if (pattern->flags & PATTERN_FLAG_NEGATIVE)
+                               return NOT_MATCHED;
+                       else
+                               return MATCHED;
+               }
+
+               return UNDECIDED;
+       }
+
+       if (pl->full_cone)
+               return MATCHED;
+
+       strbuf_addch(&parent_pathname, '/');
+       strbuf_add(&parent_pathname, pathname, pathlen);
+
+       if (hashmap_contains_path(&pl->recursive_hashmap,
+                                 &parent_pathname)) {
+               result = MATCHED_RECURSIVE;
+               goto done;
+       }
+
+       slash_pos = strrchr(parent_pathname.buf, '/');
+
+       if (slash_pos == parent_pathname.buf) {
+               /* include every file in root */
+               result = MATCHED;
+               goto done;
+       }
+
+       strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf);
+
+       if (hashmap_contains_path(&pl->parent_hashmap, &parent_pathname)) {
+               result = MATCHED;
+               goto done;
        }
 
-       return UNDECIDED;
+       if (hashmap_contains_parent(&pl->recursive_hashmap,
+                                   pathname,
+                                   &parent_pathname))
+               result = MATCHED_RECURSIVE;
+
+done:
+       strbuf_release(&parent_pathname);
+       return result;
 }
 
 static struct path_pattern *last_matching_pattern_from_lists(
@@ -1454,6 +1659,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
        const char *dirname, int len, int baselen, int exclude,
        const struct pathspec *pathspec)
 {
+       int nested_repo = 0;
+
        /* The "len-1" is to strip the final '/' */
        switch (directory_exists_in_index(istate, dirname, len-1)) {
        case index_directory:
@@ -1463,15 +1670,16 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
                return path_none;
 
        case index_nonexistent:
-               if (dir->flags & DIR_SKIP_NESTED_GIT) {
-                       int nested_repo;
+               if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+                   !(dir->flags & DIR_NO_GITLINKS)) {
                        struct strbuf sb = STRBUF_INIT;
                        strbuf_addstr(&sb, dirname);
                        nested_repo = is_nonbare_repository_dir(&sb);
                        strbuf_release(&sb);
-                       if (nested_repo)
-                               return path_none;
                }
+               if (nested_repo)
+                       return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+                               (exclude ? path_excluded : path_untracked));
 
                if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
                        break;
@@ -1499,13 +1707,6 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 
                        return path_none;
                }
-               if (!(dir->flags & DIR_NO_GITLINKS)) {
-                       struct strbuf sb = STRBUF_INIT;
-                       strbuf_addstr(&sb, dirname);
-                       if (is_nonbare_repository_dir(&sb))
-                               return exclude ? path_excluded : path_untracked;
-                       strbuf_release(&sb);
-               }
                return path_recurse;
        }
 
@@ -1925,6 +2126,40 @@ static void close_cached_dir(struct cached_dir *cdir)
        }
 }
 
+static void add_path_to_appropriate_result_list(struct dir_struct *dir,
+       struct untracked_cache_dir *untracked,
+       struct cached_dir *cdir,
+       struct index_state *istate,
+       struct strbuf *path,
+       int baselen,
+       const struct pathspec *pathspec,
+       enum path_treatment state)
+{
+       /* add the path to the appropriate result list */
+       switch (state) {
+       case path_excluded:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       dir_add_name(dir, istate, path->buf, path->len);
+               else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
+                       ((dir->flags & DIR_COLLECT_IGNORED) &&
+                       exclude_matches_pathspec(path->buf, path->len,
+                                                pathspec)))
+                       dir_add_ignored(dir, istate, path->buf, path->len);
+               break;
+
+       case path_untracked:
+               if (dir->flags & DIR_SHOW_IGNORED)
+                       break;
+               dir_add_name(dir, istate, path->buf, path->len);
+               if (cdir->fdir)
+                       add_untracked(untracked, path->buf + baselen);
+               break;
+
+       default:
+               break;
+       }
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1949,6 +2184,15 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
        struct untracked_cache_dir *untracked, int check_only,
        int stop_at_first_file, const struct pathspec *pathspec)
 {
+       /*
+        * WARNING WARNING WARNING:
+        *
+        * Any updates to the traversal logic here may need corresponding
+        * updates in treat_leading_path().  See the commit message for the
+        * commit adding this warning as well as the commit preceding it
+        * for details.
+        */
+
        struct cached_dir cdir;
        enum path_treatment state, subdir_state, dir_state = path_none;
        struct strbuf path = STRBUF_INIT;
@@ -2028,29 +2272,9 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
                        continue;
                }
 
-               /* add the path to the appropriate result list */
-               switch (state) {
-               case path_excluded:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               dir_add_name(dir, istate, path.buf, path.len);
-                       else if ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
-                               ((dir->flags & DIR_COLLECT_IGNORED) &&
-                               exclude_matches_pathspec(path.buf, path.len,
-                                                        pathspec)))
-                               dir_add_ignored(dir, istate, path.buf, path.len);
-                       break;
-
-               case path_untracked:
-                       if (dir->flags & DIR_SHOW_IGNORED)
-                               break;
-                       dir_add_name(dir, istate, path.buf, path.len);
-                       if (cdir.fdir)
-                               add_untracked(untracked, path.buf + baselen);
-                       break;
-
-               default:
-                       break;
-               }
+               add_path_to_appropriate_result_list(dir, untracked, &cdir,
+                                                   istate, &path, baselen,
+                                                   pathspec, state);
        }
        close_cached_dir(&cdir);
  out:
@@ -2080,41 +2304,104 @@ static int treat_leading_path(struct dir_struct *dir,
                              const char *path, int len,
                              const struct pathspec *pathspec)
 {
+       /*
+        * WARNING WARNING WARNING:
+        *
+        * Any updates to the traversal logic here may need corresponding
+        * updates in treat_leading_path().  See the commit message for the
+        * commit adding this warning as well as the commit preceding it
+        * for details.
+        */
+
        struct strbuf sb = STRBUF_INIT;
-       int baselen, rc = 0;
+       int prevlen, baselen;
        const char *cp;
-       int old_flags = dir->flags;
+       struct cached_dir cdir;
+       struct dirent *de;
+       enum path_treatment state = path_none;
+
+       /*
+        * For each directory component of path, we are going to check whether
+        * that path is relevant given the pathspec.  For example, if path is
+        *    foo/bar/baz/
+        * then we will ask treat_path() whether we should go into foo, then
+        * whether we should go into bar, then whether baz is relevant.
+        * Checking each is important because e.g. if path is
+        *    .git/info/
+        * then we need to check .git to know we shouldn't traverse it.
+        * If the return from treat_path() is:
+        *    * path_none, for any path, we return false.
+        *    * path_recurse, for all path components, we return true
+        *    * <anything else> for some intermediate component, we make sure
+        *        to add that path to the relevant list but return false
+        *        signifying that we shouldn't recurse into it.
+        */
 
        while (len && path[len - 1] == '/')
                len--;
        if (!len)
                return 1;
+
+       /*
+        * We need a manufactured dirent with sufficient space to store a
+        * leading directory component of path in its d_name.  Here, we
+        * assume that the dirent's d_name is either declared as
+        *    char d_name[BIG_ENOUGH]
+        * or that it is declared at the end of the struct as
+        *    char d_name[]
+        * For either case, padding with len+1 bytes at the end will ensure
+        * sufficient storage space.
+        */
+       de = xcalloc(1, st_add3(sizeof(struct dirent), len, 1));
+       memset(&cdir, 0, sizeof(cdir));
+       cdir.de = de;
+#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
+       de->d_type = DT_DIR;
+#endif
        baselen = 0;
-       dir->flags &= ~DIR_SHOW_OTHER_DIRECTORIES;
+       prevlen = 0;
        while (1) {
-               cp = path + baselen + !!baselen;
+               prevlen = baselen + !!baselen;
+               cp = path + prevlen;
                cp = memchr(cp, '/', path + len - cp);
                if (!cp)
                        baselen = len;
                else
                        baselen = cp - path;
-               strbuf_setlen(&sb, 0);
+               strbuf_reset(&sb);
                strbuf_add(&sb, path, baselen);
                if (!is_directory(sb.buf))
                        break;
-               if (simplify_away(sb.buf, sb.len, pathspec))
-                       break;
-               if (treat_one_path(dir, NULL, istate, &sb, baselen, pathspec,
-                                  DT_DIR, NULL) == path_none)
+               strbuf_reset(&sb);
+               strbuf_add(&sb, path, prevlen);
+               memcpy(de->d_name, path+prevlen, baselen-prevlen);
+               de->d_name[baselen-prevlen] = '\0';
+               state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
+                                   pathspec);
+               if (state == path_untracked &&
+                   get_dtype(cdir.de, istate, sb.buf, sb.len) == DT_DIR &&
+                   (dir->flags & DIR_SHOW_IGNORED_TOO ||
+                    do_match_pathspec(istate, pathspec, sb.buf, sb.len,
+                                      baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
+                       add_path_to_appropriate_result_list(dir, NULL, &cdir,
+                                                           istate,
+                                                           &sb, baselen,
+                                                           pathspec, state);
+                       state = path_recurse;
+               }
+
+               if (state != path_recurse)
                        break; /* do not recurse into it */
-               if (len <= baselen) {
-                       rc = 1;
+               if (len <= baselen)
                        break; /* finished checking */
-               }
        }
+       add_path_to_appropriate_result_list(dir, NULL, &cdir, istate,
+                                           &sb, baselen, pathspec,
+                                           state);
+
+       free(de);
        strbuf_release(&sb);
-       dir->flags = old_flags;
-       return rc;
+       return state == path_recurse;
 }
 
 static const char *get_ident_string(void)