]>
Commit | Line | Data |
---|---|---|
96872bc2 LT |
1 | /* |
2 | * name-hash.c | |
3 | * | |
4 | * Hashing names in the index state | |
5 | * | |
6 | * Copyright (C) 2008 Linus Torvalds | |
7 | */ | |
8 | #define NO_THE_INDEX_COMPATIBILITY_MACROS | |
9 | #include "cache.h" | |
10 | ||
2092678c | 11 | struct dir_entry { |
e05881a4 | 12 | struct hashmap_entry ent; |
2092678c KB |
13 | struct dir_entry *parent; |
14 | struct cache_entry *ce; | |
15 | int nr; | |
16 | unsigned int namelen; | |
17 | }; | |
18 | ||
e05881a4 KB |
19 | static int dir_entry_cmp(const struct dir_entry *e1, |
20 | const struct dir_entry *e2, const char *name) | |
21 | { | |
22 | return e1->namelen != e2->namelen || strncasecmp(e1->ce->name, | |
23 | name ? name : e2->ce->name, e1->namelen); | |
24 | } | |
25 | ||
2092678c KB |
26 | static struct dir_entry *find_dir_entry(struct index_state *istate, |
27 | const char *name, unsigned int namelen) | |
28 | { | |
e05881a4 KB |
29 | struct dir_entry key; |
30 | hashmap_entry_init(&key, memihash(name, namelen)); | |
31 | key.namelen = namelen; | |
32 | return hashmap_get(&istate->dir_hash, &key, name); | |
2092678c KB |
33 | } |
34 | ||
35 | static struct dir_entry *hash_dir_entry(struct index_state *istate, | |
36 | struct cache_entry *ce, int namelen) | |
5102c617 JJ |
37 | { |
38 | /* | |
39 | * Throw each directory component in the hash for quick lookup | |
d28eec26 | 40 | * during a git status. Directory components are stored without their |
5102c617 | 41 | * closing slash. Despite submodules being a directory, they never |
d28eec26 | 42 | * reach this point, because they are stored |
2092678c | 43 | * in index_state.name_hash (as ordinary cache_entries). |
5102c617 | 44 | * |
2092678c KB |
45 | * Note that the cache_entry stored with the dir_entry merely |
46 | * supplies the name of the directory (up to dir_entry.namelen). We | |
47 | * track the number of 'active' files in a directory in dir_entry.nr, | |
48 | * so we can tell if the directory is still relevant, e.g. for git | |
49 | * status. However, if cache_entries are removed, we cannot pinpoint | |
50 | * an exact cache_entry that's still active. It is very possible that | |
51 | * multiple dir_entries point to the same cache_entry. | |
5102c617 | 52 | */ |
2092678c KB |
53 | struct dir_entry *dir; |
54 | ||
55 | /* get length of parent directory */ | |
56 | while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) | |
57 | namelen--; | |
58 | if (namelen <= 0) | |
59 | return NULL; | |
d28eec26 | 60 | namelen--; |
2092678c KB |
61 | |
62 | /* lookup existing entry for that directory */ | |
63 | dir = find_dir_entry(istate, ce->name, namelen); | |
64 | if (!dir) { | |
65 | /* not found, create it and add to hash table */ | |
2092678c | 66 | dir = xcalloc(1, sizeof(struct dir_entry)); |
e05881a4 | 67 | hashmap_entry_init(dir, memihash(ce->name, namelen)); |
2092678c KB |
68 | dir->namelen = namelen; |
69 | dir->ce = ce; | |
e05881a4 | 70 | hashmap_add(&istate->dir_hash, dir); |
2092678c KB |
71 | |
72 | /* recursively add missing parent directories */ | |
d28eec26 | 73 | dir->parent = hash_dir_entry(istate, ce, namelen); |
5102c617 | 74 | } |
2092678c KB |
75 | return dir; |
76 | } | |
77 | ||
78 | static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
79 | { | |
80 | /* Add reference to the directory entry (and parents if 0). */ | |
81 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
82 | while (dir && !(dir->nr++)) | |
83 | dir = dir->parent; | |
84 | } | |
85 | ||
86 | static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
87 | { | |
88 | /* | |
89 | * Release reference to the directory entry (and parents if 0). | |
90 | * | |
91 | * Note: we do not remove / free the entry because there's no | |
92 | * hash.[ch]::remove_hash and dir->next may point to other entries | |
93 | * that are still valid, so we must not free the memory. | |
94 | */ | |
95 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
96 | while (dir && dir->nr && !(--dir->nr)) | |
97 | dir = dir->parent; | |
5102c617 JJ |
98 | } |
99 | ||
96872bc2 LT |
100 | static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) |
101 | { | |
102 | void **pos; | |
103 | unsigned int hash; | |
104 | ||
105 | if (ce->ce_flags & CE_HASHED) | |
106 | return; | |
107 | ce->ce_flags |= CE_HASHED; | |
2092678c | 108 | ce->next = NULL; |
e05881a4 | 109 | hash = memihash(ce->name, ce_namelen(ce)); |
96872bc2 LT |
110 | pos = insert_hash(hash, ce, &istate->name_hash); |
111 | if (pos) { | |
112 | ce->next = *pos; | |
113 | *pos = ce; | |
114 | } | |
5102c617 | 115 | |
2092678c KB |
116 | if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) |
117 | add_dir_entry(istate, ce); | |
96872bc2 LT |
118 | } |
119 | ||
120 | static void lazy_init_name_hash(struct index_state *istate) | |
121 | { | |
122 | int nr; | |
123 | ||
124 | if (istate->name_hash_initialized) | |
125 | return; | |
c7359281 NTND |
126 | if (istate->cache_nr) |
127 | preallocate_hash(&istate->name_hash, istate->cache_nr); | |
e05881a4 | 128 | hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); |
96872bc2 LT |
129 | for (nr = 0; nr < istate->cache_nr; nr++) |
130 | hash_index_entry(istate, istate->cache[nr]); | |
131 | istate->name_hash_initialized = 1; | |
132 | } | |
133 | ||
134 | void add_name_hash(struct index_state *istate, struct cache_entry *ce) | |
135 | { | |
2092678c KB |
136 | /* if already hashed, add reference to directory entries */ |
137 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) | |
138 | add_dir_entry(istate, ce); | |
139 | ||
96872bc2 LT |
140 | ce->ce_flags &= ~CE_UNHASHED; |
141 | if (istate->name_hash_initialized) | |
142 | hash_index_entry(istate, ce); | |
143 | } | |
144 | ||
2092678c KB |
145 | /* |
146 | * We don't actually *remove* it, we can just mark it invalid so that | |
147 | * we won't find it in lookups. | |
148 | * | |
149 | * Not only would we have to search the lists (simple enough), but | |
150 | * we'd also have to rehash other hash buckets in case this makes the | |
151 | * hash bucket empty (common). So it's much better to just mark | |
152 | * it. | |
153 | */ | |
154 | void remove_name_hash(struct index_state *istate, struct cache_entry *ce) | |
155 | { | |
156 | /* if already hashed, release reference to directory entries */ | |
157 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) | |
158 | remove_dir_entry(istate, ce); | |
159 | ||
160 | ce->ce_flags |= CE_UNHASHED; | |
161 | } | |
162 | ||
cd2fef59 LT |
163 | static int slow_same_name(const char *name1, int len1, const char *name2, int len2) |
164 | { | |
165 | if (len1 != len2) | |
166 | return 0; | |
167 | ||
168 | while (len1) { | |
169 | unsigned char c1 = *name1++; | |
170 | unsigned char c2 = *name2++; | |
171 | len1--; | |
172 | if (c1 != c2) { | |
173 | c1 = toupper(c1); | |
174 | c2 = toupper(c2); | |
175 | if (c1 != c2) | |
176 | return 0; | |
177 | } | |
178 | } | |
179 | return 1; | |
180 | } | |
181 | ||
182 | static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) | |
183 | { | |
184 | int len = ce_namelen(ce); | |
185 | ||
186 | /* | |
187 | * Always do exact compare, even if we want a case-ignoring comparison; | |
188 | * we do the quick exact one first, because it will be the common case. | |
189 | */ | |
190 | if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) | |
191 | return 1; | |
192 | ||
5102c617 JJ |
193 | if (!icase) |
194 | return 0; | |
195 | ||
2092678c | 196 | return slow_same_name(name, namelen, ce->name, len); |
cd2fef59 LT |
197 | } |
198 | ||
db5360f3 ES |
199 | struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen) |
200 | { | |
201 | struct cache_entry *ce; | |
202 | struct dir_entry *dir; | |
203 | ||
204 | lazy_init_name_hash(istate); | |
205 | dir = find_dir_entry(istate, name, namelen); | |
206 | if (dir && dir->nr) | |
207 | return dir->ce; | |
208 | ||
209 | /* | |
210 | * It might be a submodule. Unlike plain directories, which are stored | |
211 | * in the dir-hash, submodules are stored in the name-hash, so check | |
212 | * there, as well. | |
213 | */ | |
d28eec26 | 214 | ce = index_file_exists(istate, name, namelen, 1); |
db5360f3 ES |
215 | if (ce && S_ISGITLINK(ce->ce_mode)) |
216 | return ce; | |
217 | ||
218 | return NULL; | |
219 | } | |
220 | ||
221 | struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) | |
96872bc2 | 222 | { |
e05881a4 | 223 | unsigned int hash = memihash(name, namelen); |
96872bc2 LT |
224 | struct cache_entry *ce; |
225 | ||
226 | lazy_init_name_hash(istate); | |
227 | ce = lookup_hash(hash, &istate->name_hash); | |
228 | ||
229 | while (ce) { | |
230 | if (!(ce->ce_flags & CE_UNHASHED)) { | |
cd2fef59 | 231 | if (same_name(ce, name, namelen, icase)) |
df292c79 | 232 | return ce; |
96872bc2 | 233 | } |
2092678c | 234 | ce = ce->next; |
96872bc2 | 235 | } |
df292c79 | 236 | return NULL; |
96872bc2 | 237 | } |
2092678c | 238 | |
db5360f3 ES |
239 | struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) |
240 | { | |
241 | if (namelen > 0 && name[namelen - 1] == '/') | |
d28eec26 | 242 | return index_dir_exists(istate, name, namelen - 1); |
db5360f3 ES |
243 | return index_file_exists(istate, name, namelen, icase); |
244 | } | |
245 | ||
2092678c KB |
246 | void free_name_hash(struct index_state *istate) |
247 | { | |
248 | if (!istate->name_hash_initialized) | |
249 | return; | |
250 | istate->name_hash_initialized = 0; | |
2092678c KB |
251 | |
252 | free_hash(&istate->name_hash); | |
e05881a4 | 253 | hashmap_free(&istate->dir_hash, 1); |
2092678c | 254 | } |