]>
Commit | Line | Data |
---|---|---|
96872bc2 LT |
1 | /* |
2 | * name-hash.c | |
3 | * | |
4 | * Hashing names in the index state | |
5 | * | |
6 | * Copyright (C) 2008 Linus Torvalds | |
7 | */ | |
8 | #define NO_THE_INDEX_COMPATIBILITY_MACROS | |
9 | #include "cache.h" | |
10 | ||
cd2fef59 LT |
11 | /* |
12 | * This removes bit 5 if bit 6 is set. | |
13 | * | |
14 | * That will make US-ASCII characters hash to their upper-case | |
15 | * equivalent. We could easily do this one whole word at a time, | |
16 | * but that's for future worries. | |
17 | */ | |
18 | static inline unsigned char icase_hash(unsigned char c) | |
19 | { | |
20 | return c & ~((c & 0x40) >> 1); | |
21 | } | |
22 | ||
96872bc2 LT |
23 | static unsigned int hash_name(const char *name, int namelen) |
24 | { | |
25 | unsigned int hash = 0x123; | |
26 | ||
c19387e7 | 27 | while (namelen--) { |
96872bc2 | 28 | unsigned char c = *name++; |
cd2fef59 | 29 | c = icase_hash(c); |
96872bc2 | 30 | hash = hash*101 + c; |
c19387e7 | 31 | } |
96872bc2 LT |
32 | return hash; |
33 | } | |
34 | ||
2092678c KB |
35 | struct dir_entry { |
36 | struct dir_entry *next; | |
37 | struct dir_entry *parent; | |
38 | struct cache_entry *ce; | |
39 | int nr; | |
40 | unsigned int namelen; | |
41 | }; | |
42 | ||
43 | static struct dir_entry *find_dir_entry(struct index_state *istate, | |
44 | const char *name, unsigned int namelen) | |
45 | { | |
46 | unsigned int hash = hash_name(name, namelen); | |
47 | struct dir_entry *dir; | |
48 | ||
49 | for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) | |
50 | if (dir->namelen == namelen && | |
51 | !strncasecmp(dir->ce->name, name, namelen)) | |
52 | return dir; | |
53 | return NULL; | |
54 | } | |
55 | ||
56 | static struct dir_entry *hash_dir_entry(struct index_state *istate, | |
57 | struct cache_entry *ce, int namelen) | |
5102c617 JJ |
58 | { |
59 | /* | |
60 | * Throw each directory component in the hash for quick lookup | |
d28eec26 | 61 | * during a git status. Directory components are stored without their |
5102c617 | 62 | * closing slash. Despite submodules being a directory, they never |
d28eec26 | 63 | * reach this point, because they are stored |
2092678c | 64 | * in index_state.name_hash (as ordinary cache_entries). |
5102c617 | 65 | * |
2092678c KB |
66 | * Note that the cache_entry stored with the dir_entry merely |
67 | * supplies the name of the directory (up to dir_entry.namelen). We | |
68 | * track the number of 'active' files in a directory in dir_entry.nr, | |
69 | * so we can tell if the directory is still relevant, e.g. for git | |
70 | * status. However, if cache_entries are removed, we cannot pinpoint | |
71 | * an exact cache_entry that's still active. It is very possible that | |
72 | * multiple dir_entries point to the same cache_entry. | |
5102c617 | 73 | */ |
2092678c KB |
74 | struct dir_entry *dir; |
75 | ||
76 | /* get length of parent directory */ | |
77 | while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) | |
78 | namelen--; | |
79 | if (namelen <= 0) | |
80 | return NULL; | |
d28eec26 | 81 | namelen--; |
2092678c KB |
82 | |
83 | /* lookup existing entry for that directory */ | |
84 | dir = find_dir_entry(istate, ce->name, namelen); | |
85 | if (!dir) { | |
86 | /* not found, create it and add to hash table */ | |
87 | void **pdir; | |
88 | unsigned int hash = hash_name(ce->name, namelen); | |
5102c617 | 89 | |
2092678c KB |
90 | dir = xcalloc(1, sizeof(struct dir_entry)); |
91 | dir->namelen = namelen; | |
92 | dir->ce = ce; | |
93 | ||
94 | pdir = insert_hash(hash, dir, &istate->dir_hash); | |
95 | if (pdir) { | |
96 | dir->next = *pdir; | |
97 | *pdir = dir; | |
5102c617 | 98 | } |
2092678c KB |
99 | |
100 | /* recursively add missing parent directories */ | |
d28eec26 | 101 | dir->parent = hash_dir_entry(istate, ce, namelen); |
5102c617 | 102 | } |
2092678c KB |
103 | return dir; |
104 | } | |
105 | ||
106 | static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
107 | { | |
108 | /* Add reference to the directory entry (and parents if 0). */ | |
109 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
110 | while (dir && !(dir->nr++)) | |
111 | dir = dir->parent; | |
112 | } | |
113 | ||
114 | static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
115 | { | |
116 | /* | |
117 | * Release reference to the directory entry (and parents if 0). | |
118 | * | |
119 | * Note: we do not remove / free the entry because there's no | |
120 | * hash.[ch]::remove_hash and dir->next may point to other entries | |
121 | * that are still valid, so we must not free the memory. | |
122 | */ | |
123 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
124 | while (dir && dir->nr && !(--dir->nr)) | |
125 | dir = dir->parent; | |
5102c617 JJ |
126 | } |
127 | ||
96872bc2 LT |
128 | static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) |
129 | { | |
130 | void **pos; | |
131 | unsigned int hash; | |
132 | ||
133 | if (ce->ce_flags & CE_HASHED) | |
134 | return; | |
135 | ce->ce_flags |= CE_HASHED; | |
2092678c | 136 | ce->next = NULL; |
96872bc2 LT |
137 | hash = hash_name(ce->name, ce_namelen(ce)); |
138 | pos = insert_hash(hash, ce, &istate->name_hash); | |
139 | if (pos) { | |
140 | ce->next = *pos; | |
141 | *pos = ce; | |
142 | } | |
5102c617 | 143 | |
2092678c KB |
144 | if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) |
145 | add_dir_entry(istate, ce); | |
96872bc2 LT |
146 | } |
147 | ||
148 | static void lazy_init_name_hash(struct index_state *istate) | |
149 | { | |
150 | int nr; | |
151 | ||
152 | if (istate->name_hash_initialized) | |
153 | return; | |
c7359281 NTND |
154 | if (istate->cache_nr) |
155 | preallocate_hash(&istate->name_hash, istate->cache_nr); | |
96872bc2 LT |
156 | for (nr = 0; nr < istate->cache_nr; nr++) |
157 | hash_index_entry(istate, istate->cache[nr]); | |
158 | istate->name_hash_initialized = 1; | |
159 | } | |
160 | ||
161 | void add_name_hash(struct index_state *istate, struct cache_entry *ce) | |
162 | { | |
2092678c KB |
163 | /* if already hashed, add reference to directory entries */ |
164 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) | |
165 | add_dir_entry(istate, ce); | |
166 | ||
96872bc2 LT |
167 | ce->ce_flags &= ~CE_UNHASHED; |
168 | if (istate->name_hash_initialized) | |
169 | hash_index_entry(istate, ce); | |
170 | } | |
171 | ||
2092678c KB |
172 | /* |
173 | * We don't actually *remove* it, we can just mark it invalid so that | |
174 | * we won't find it in lookups. | |
175 | * | |
176 | * Not only would we have to search the lists (simple enough), but | |
177 | * we'd also have to rehash other hash buckets in case this makes the | |
178 | * hash bucket empty (common). So it's much better to just mark | |
179 | * it. | |
180 | */ | |
181 | void remove_name_hash(struct index_state *istate, struct cache_entry *ce) | |
182 | { | |
183 | /* if already hashed, release reference to directory entries */ | |
184 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) | |
185 | remove_dir_entry(istate, ce); | |
186 | ||
187 | ce->ce_flags |= CE_UNHASHED; | |
188 | } | |
189 | ||
cd2fef59 LT |
190 | static int slow_same_name(const char *name1, int len1, const char *name2, int len2) |
191 | { | |
192 | if (len1 != len2) | |
193 | return 0; | |
194 | ||
195 | while (len1) { | |
196 | unsigned char c1 = *name1++; | |
197 | unsigned char c2 = *name2++; | |
198 | len1--; | |
199 | if (c1 != c2) { | |
200 | c1 = toupper(c1); | |
201 | c2 = toupper(c2); | |
202 | if (c1 != c2) | |
203 | return 0; | |
204 | } | |
205 | } | |
206 | return 1; | |
207 | } | |
208 | ||
209 | static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) | |
210 | { | |
211 | int len = ce_namelen(ce); | |
212 | ||
213 | /* | |
214 | * Always do exact compare, even if we want a case-ignoring comparison; | |
215 | * we do the quick exact one first, because it will be the common case. | |
216 | */ | |
217 | if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) | |
218 | return 1; | |
219 | ||
5102c617 JJ |
220 | if (!icase) |
221 | return 0; | |
222 | ||
2092678c | 223 | return slow_same_name(name, namelen, ce->name, len); |
cd2fef59 LT |
224 | } |
225 | ||
db5360f3 ES |
226 | struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen) |
227 | { | |
228 | struct cache_entry *ce; | |
229 | struct dir_entry *dir; | |
230 | ||
231 | lazy_init_name_hash(istate); | |
232 | dir = find_dir_entry(istate, name, namelen); | |
233 | if (dir && dir->nr) | |
234 | return dir->ce; | |
235 | ||
236 | /* | |
237 | * It might be a submodule. Unlike plain directories, which are stored | |
238 | * in the dir-hash, submodules are stored in the name-hash, so check | |
239 | * there, as well. | |
240 | */ | |
d28eec26 | 241 | ce = index_file_exists(istate, name, namelen, 1); |
db5360f3 ES |
242 | if (ce && S_ISGITLINK(ce->ce_mode)) |
243 | return ce; | |
244 | ||
245 | return NULL; | |
246 | } | |
247 | ||
248 | struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) | |
96872bc2 LT |
249 | { |
250 | unsigned int hash = hash_name(name, namelen); | |
251 | struct cache_entry *ce; | |
252 | ||
253 | lazy_init_name_hash(istate); | |
254 | ce = lookup_hash(hash, &istate->name_hash); | |
255 | ||
256 | while (ce) { | |
257 | if (!(ce->ce_flags & CE_UNHASHED)) { | |
cd2fef59 | 258 | if (same_name(ce, name, namelen, icase)) |
df292c79 | 259 | return ce; |
96872bc2 | 260 | } |
2092678c | 261 | ce = ce->next; |
96872bc2 | 262 | } |
df292c79 | 263 | return NULL; |
96872bc2 | 264 | } |
2092678c | 265 | |
db5360f3 ES |
266 | struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) |
267 | { | |
268 | if (namelen > 0 && name[namelen - 1] == '/') | |
d28eec26 | 269 | return index_dir_exists(istate, name, namelen - 1); |
db5360f3 ES |
270 | return index_file_exists(istate, name, namelen, icase); |
271 | } | |
272 | ||
2092678c KB |
273 | static int free_dir_entry(void *entry, void *unused) |
274 | { | |
275 | struct dir_entry *dir = entry; | |
276 | while (dir) { | |
277 | struct dir_entry *next = dir->next; | |
278 | free(dir); | |
279 | dir = next; | |
280 | } | |
281 | return 0; | |
282 | } | |
283 | ||
284 | void free_name_hash(struct index_state *istate) | |
285 | { | |
286 | if (!istate->name_hash_initialized) | |
287 | return; | |
288 | istate->name_hash_initialized = 0; | |
289 | if (ignore_case) | |
290 | /* free directory entries */ | |
291 | for_each_hash(&istate->dir_hash, free_dir_entry, NULL); | |
292 | ||
293 | free_hash(&istate->name_hash); | |
294 | free_hash(&istate->dir_hash); | |
295 | } |