]> git.ipfire.org Git - thirdparty/git.git/commitdiff
sparse-checkout: respect core.ignoreCase in cone mode
authorDerrick Stolee <dstolee@microsoft.com>
Fri, 13 Dec 2019 18:09:53 +0000 (18:09 +0000)
committerJunio C Hamano <gitster@pobox.com>
Fri, 13 Dec 2019 20:01:02 +0000 (12:01 -0800)
When a user uses the sparse-checkout feature in cone mode, they
add patterns using "git sparse-checkout set <dir1> <dir2> ..."
or by using "--stdin" to provide the directories line-by-line over
stdin. This behaviour naturally looks a lot like the way a user
would type "git add <dir1> <dir2> ..."

If core.ignoreCase is enabled, then "git add" will match the input
using a case-insensitive match. Do the same for the sparse-checkout
feature.

Perform case-insensitive checks while updating the skip-worktree
bits during unpack_trees(). This is done by changing the hash
algorithm and hashmap comparison methods to optionally use case-
insensitive methods.

When this is enabled, there is a small performance cost in the
hashing algorithm. To tease out the worst possible case, the
following was run on a repo with a deep directory structure:

git ls-tree -d -r --name-only HEAD |
git sparse-checkout set --stdin

The 'set' command was timed with core.ignoreCase disabled or
enabled. For the repo with a deep history, the numbers were

core.ignoreCase=false: 62s
core.ignoreCase=true:  74s (+19.3%)

For reproducibility, the equivalent test on the Linux kernel
repository had these numbers:

core.ignoreCase=false: 3.1s
core.ignoreCase=true:  3.6s (+16%)

Now, this is not an entirely fair comparison, as most users
will define their sparse cone using more shallow directories,
and the performance improvement from eb42feca97 ("unpack-trees:
hash less in cone mode" 2019-11-21) can remove most of the
hash cost. For a more realistic test, drop the "-r" from the
ls-tree command to store only the first-level directories.
In that case, the Linux kernel repository takes 0.2-0.25s in
each case, and the deep repository takes one second, plus or
minus 0.05s, in each case.

Thus, we _can_ demonstrate a cost to this change, but it is
unlikely to matter to any reasonable sparse-checkout cone.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-sparse-checkout.txt
builtin/sparse-checkout.c
dir.c
t/t1091-sparse-checkout-builtin.sh

index b97528567330f50a91d1c86f72f6b4df991d848c..9c3c66cc37b3a49be9f9466cbe2d1cf7486ffb8c 100644 (file)
@@ -150,6 +150,11 @@ expecting patterns of these types. Git will warn if the patterns do not match.
 If the patterns do match the expected format, then Git will use faster hash-
 based algorithms to compute inclusion in the sparse-checkout.
 
+If `core.ignoreCase=true`, then the pattern-matching algorithm will use a
+case-insensitive check. This corrects for case mismatched filenames in the
+'git sparse-checkout set' command to reflect the expected cone in the working
+directory.
+
 SEE ALSO
 --------
 
index a542d617a5fe25b2ec0d7b80c4dc172387ef7302..5d62f7a66d9b345b09fea90d0f0376df3161cfcb 100644 (file)
@@ -313,7 +313,10 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat
        struct pattern_entry *e = xmalloc(sizeof(*e));
        e->patternlen = path->len;
        e->pattern = strbuf_detach(path, NULL);
-       hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen));
+       hashmap_entry_init(&e->ent,
+                          ignore_case ?
+                          strihash(e->pattern) :
+                          strhash(e->pattern));
 
        hashmap_add(&pl->recursive_hashmap, &e->ent);
 
@@ -329,7 +332,10 @@ static void insert_recursive_pattern(struct pattern_list *pl, struct strbuf *pat
                e = xmalloc(sizeof(struct pattern_entry));
                e->patternlen = newlen;
                e->pattern = xstrndup(oldpattern, newlen);
-               hashmap_entry_init(&e->ent, memhash(e->pattern, e->patternlen));
+               hashmap_entry_init(&e->ent,
+                                  ignore_case ?
+                                  strihash(e->pattern) :
+                                  strhash(e->pattern));
 
                if (!hashmap_get_entry(&pl->parent_hashmap, e, ent, NULL))
                        hashmap_add(&pl->parent_hashmap, &e->ent);
diff --git a/dir.c b/dir.c
index 2ef92a50a046062f4584143de6138deb9a8a0db7..22d08e61c296a0ca2676c705422b9b50c292650c 100644 (file)
--- a/dir.c
+++ b/dir.c
@@ -625,6 +625,8 @@ int pl_hashmap_cmp(const void *unused_cmp_data,
                         ? ee1->patternlen
                         : ee2->patternlen;
 
+       if (ignore_case)
+               return strncasecmp(ee1->pattern, ee2->pattern, min_len);
        return strncmp(ee1->pattern, ee2->pattern, min_len);
 }
 
@@ -665,7 +667,9 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern
                translated->pattern = truncated;
                translated->patternlen = given->patternlen - 2;
                hashmap_entry_init(&translated->ent,
-                                  memhash(translated->pattern, translated->patternlen));
+                                  ignore_case ?
+                                  strihash(translated->pattern) :
+                                  strhash(translated->pattern));
 
                if (!hashmap_get_entry(&pl->recursive_hashmap,
                                       translated, ent, NULL)) {
@@ -694,7 +698,9 @@ static void add_pattern_to_hashsets(struct pattern_list *pl, struct path_pattern
        translated->pattern = xstrdup(given->pattern);
        translated->patternlen = given->patternlen;
        hashmap_entry_init(&translated->ent,
-                          memhash(translated->pattern, translated->patternlen));
+                          ignore_case ?
+                          strihash(translated->pattern) :
+                          strhash(translated->pattern));
 
        hashmap_add(&pl->recursive_hashmap, &translated->ent);
 
@@ -724,7 +730,10 @@ static int hashmap_contains_path(struct hashmap *map,
        /* Check straight mapping */
        p.pattern = pattern->buf;
        p.patternlen = pattern->len;
-       hashmap_entry_init(&p.ent, memhash(p.pattern, p.patternlen));
+       hashmap_entry_init(&p.ent,
+                          ignore_case ?
+                          strihash(p.pattern) :
+                          strhash(p.pattern));
        return !!hashmap_get_entry(map, &p, ent, NULL);
 }
 
index d5e289252633d9090b4ef5f0caaff7101bb785f2..cee98a1c8a80d2ba8be2e5fef57b43b14fc56869 100755 (executable)
@@ -304,4 +304,21 @@ test_expect_success 'sparse-checkout (init|set|disable) fails with dirty status'
        git -C dirty sparse-checkout disable
 '
 
+test_expect_success 'cone mode: set with core.ignoreCase=true' '
+       git -C repo sparse-checkout init --cone &&
+       git -C repo -c core.ignoreCase=true sparse-checkout set folder1 &&
+       cat >expect <<-EOF &&
+               /*
+               !/*/
+               /folder1/
+       EOF
+       test_cmp expect repo/.git/info/sparse-checkout &&
+       ls repo >dir &&
+       cat >expect <<-EOF &&
+               a
+               folder1
+       EOF
+       test_cmp expect dir
+'
+
 test_done