]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * name-hash.c | |
3 | * | |
4 | * Hashing names in the index state | |
5 | * | |
6 | * Copyright (C) 2008 Linus Torvalds | |
7 | */ | |
8 | #define NO_THE_INDEX_COMPATIBILITY_MACROS | |
9 | #include "cache.h" | |
10 | ||
11 | /* | |
12 | * This removes bit 5 if bit 6 is set. | |
13 | * | |
14 | * That will make US-ASCII characters hash to their upper-case | |
15 | * equivalent. We could easily do this one whole word at a time, | |
16 | * but that's for future worries. | |
17 | */ | |
18 | static inline unsigned char icase_hash(unsigned char c) | |
19 | { | |
20 | return c & ~((c & 0x40) >> 1); | |
21 | } | |
22 | ||
23 | static unsigned int hash_name(const char *name, int namelen) | |
24 | { | |
25 | unsigned int hash = 0x123; | |
26 | ||
27 | while (namelen--) { | |
28 | unsigned char c = *name++; | |
29 | c = icase_hash(c); | |
30 | hash = hash*101 + c; | |
31 | } | |
32 | return hash; | |
33 | } | |
34 | ||
35 | struct dir_entry { | |
36 | struct dir_entry *next; | |
37 | struct dir_entry *parent; | |
38 | struct cache_entry *ce; | |
39 | int nr; | |
40 | unsigned int namelen; | |
41 | }; | |
42 | ||
43 | static struct dir_entry *find_dir_entry(struct index_state *istate, | |
44 | const char *name, unsigned int namelen) | |
45 | { | |
46 | unsigned int hash = hash_name(name, namelen); | |
47 | struct dir_entry *dir; | |
48 | ||
49 | for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) | |
50 | if (dir->namelen == namelen && | |
51 | !strncasecmp(dir->ce->name, name, namelen)) | |
52 | return dir; | |
53 | return NULL; | |
54 | } | |
55 | ||
56 | static struct dir_entry *hash_dir_entry(struct index_state *istate, | |
57 | struct cache_entry *ce, int namelen) | |
58 | { | |
59 | /* | |
60 | * Throw each directory component in the hash for quick lookup | |
61 | * during a git status. Directory components are stored with their | |
62 | * closing slash. Despite submodules being a directory, they never | |
63 | * reach this point, because they are stored without a closing slash | |
64 | * in index_state.name_hash (as ordinary cache_entries). | |
65 | * | |
66 | * Note that the cache_entry stored with the dir_entry merely | |
67 | * supplies the name of the directory (up to dir_entry.namelen). We | |
68 | * track the number of 'active' files in a directory in dir_entry.nr, | |
69 | * so we can tell if the directory is still relevant, e.g. for git | |
70 | * status. However, if cache_entries are removed, we cannot pinpoint | |
71 | * an exact cache_entry that's still active. It is very possible that | |
72 | * multiple dir_entries point to the same cache_entry. | |
73 | */ | |
74 | struct dir_entry *dir; | |
75 | ||
76 | /* get length of parent directory */ | |
77 | while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) | |
78 | namelen--; | |
79 | if (namelen <= 0) | |
80 | return NULL; | |
81 | ||
82 | /* lookup existing entry for that directory */ | |
83 | dir = find_dir_entry(istate, ce->name, namelen); | |
84 | if (!dir) { | |
85 | /* not found, create it and add to hash table */ | |
86 | void **pdir; | |
87 | unsigned int hash = hash_name(ce->name, namelen); | |
88 | ||
89 | dir = xcalloc(1, sizeof(struct dir_entry)); | |
90 | dir->namelen = namelen; | |
91 | dir->ce = ce; | |
92 | ||
93 | pdir = insert_hash(hash, dir, &istate->dir_hash); | |
94 | if (pdir) { | |
95 | dir->next = *pdir; | |
96 | *pdir = dir; | |
97 | } | |
98 | ||
99 | /* recursively add missing parent directories */ | |
100 | dir->parent = hash_dir_entry(istate, ce, namelen - 1); | |
101 | } | |
102 | return dir; | |
103 | } | |
104 | ||
105 | static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
106 | { | |
107 | /* Add reference to the directory entry (and parents if 0). */ | |
108 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
109 | while (dir && !(dir->nr++)) | |
110 | dir = dir->parent; | |
111 | } | |
112 | ||
113 | static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
114 | { | |
115 | /* | |
116 | * Release reference to the directory entry (and parents if 0). | |
117 | * | |
118 | * Note: we do not remove / free the entry because there's no | |
119 | * hash.[ch]::remove_hash and dir->next may point to other entries | |
120 | * that are still valid, so we must not free the memory. | |
121 | */ | |
122 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
123 | while (dir && dir->nr && !(--dir->nr)) | |
124 | dir = dir->parent; | |
125 | } | |
126 | ||
127 | static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) | |
128 | { | |
129 | void **pos; | |
130 | unsigned int hash; | |
131 | ||
132 | if (ce->ce_flags & CE_HASHED) | |
133 | return; | |
134 | ce->ce_flags |= CE_HASHED; | |
135 | ce->next = NULL; | |
136 | hash = hash_name(ce->name, ce_namelen(ce)); | |
137 | pos = insert_hash(hash, ce, &istate->name_hash); | |
138 | if (pos) { | |
139 | ce->next = *pos; | |
140 | *pos = ce; | |
141 | } | |
142 | ||
143 | if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) | |
144 | add_dir_entry(istate, ce); | |
145 | } | |
146 | ||
147 | static void lazy_init_name_hash(struct index_state *istate) | |
148 | { | |
149 | int nr; | |
150 | ||
151 | if (istate->name_hash_initialized) | |
152 | return; | |
153 | if (istate->cache_nr) | |
154 | preallocate_hash(&istate->name_hash, istate->cache_nr); | |
155 | for (nr = 0; nr < istate->cache_nr; nr++) | |
156 | hash_index_entry(istate, istate->cache[nr]); | |
157 | istate->name_hash_initialized = 1; | |
158 | } | |
159 | ||
160 | void add_name_hash(struct index_state *istate, struct cache_entry *ce) | |
161 | { | |
162 | /* if already hashed, add reference to directory entries */ | |
163 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) | |
164 | add_dir_entry(istate, ce); | |
165 | ||
166 | ce->ce_flags &= ~CE_UNHASHED; | |
167 | if (istate->name_hash_initialized) | |
168 | hash_index_entry(istate, ce); | |
169 | } | |
170 | ||
171 | /* | |
172 | * We don't actually *remove* it, we can just mark it invalid so that | |
173 | * we won't find it in lookups. | |
174 | * | |
175 | * Not only would we have to search the lists (simple enough), but | |
176 | * we'd also have to rehash other hash buckets in case this makes the | |
177 | * hash bucket empty (common). So it's much better to just mark | |
178 | * it. | |
179 | */ | |
180 | void remove_name_hash(struct index_state *istate, struct cache_entry *ce) | |
181 | { | |
182 | /* if already hashed, release reference to directory entries */ | |
183 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) | |
184 | remove_dir_entry(istate, ce); | |
185 | ||
186 | ce->ce_flags |= CE_UNHASHED; | |
187 | } | |
188 | ||
189 | static int slow_same_name(const char *name1, int len1, const char *name2, int len2) | |
190 | { | |
191 | if (len1 != len2) | |
192 | return 0; | |
193 | ||
194 | while (len1) { | |
195 | unsigned char c1 = *name1++; | |
196 | unsigned char c2 = *name2++; | |
197 | len1--; | |
198 | if (c1 != c2) { | |
199 | c1 = toupper(c1); | |
200 | c2 = toupper(c2); | |
201 | if (c1 != c2) | |
202 | return 0; | |
203 | } | |
204 | } | |
205 | return 1; | |
206 | } | |
207 | ||
208 | static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) | |
209 | { | |
210 | int len = ce_namelen(ce); | |
211 | ||
212 | /* | |
213 | * Always do exact compare, even if we want a case-ignoring comparison; | |
214 | * we do the quick exact one first, because it will be the common case. | |
215 | */ | |
216 | if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) | |
217 | return 1; | |
218 | ||
219 | if (!icase) | |
220 | return 0; | |
221 | ||
222 | return slow_same_name(name, namelen, ce->name, len); | |
223 | } | |
224 | ||
225 | struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) | |
226 | { | |
227 | unsigned int hash = hash_name(name, namelen); | |
228 | struct cache_entry *ce; | |
229 | ||
230 | lazy_init_name_hash(istate); | |
231 | ce = lookup_hash(hash, &istate->name_hash); | |
232 | ||
233 | while (ce) { | |
234 | if (!(ce->ce_flags & CE_UNHASHED)) { | |
235 | if (same_name(ce, name, namelen, icase)) | |
236 | return ce; | |
237 | } | |
238 | ce = ce->next; | |
239 | } | |
240 | ||
241 | /* | |
242 | * When looking for a directory (trailing '/'), it might be a | |
243 | * submodule or a directory. Despite submodules being directories, | |
244 | * they are stored in the name hash without a closing slash. | |
245 | * When ignore_case is 1, directories are stored in a separate hash | |
246 | * table *with* their closing slash. | |
247 | * | |
248 | * The side effect of this storage technique is we have need to | |
249 | * lookup the directory in a separate hash table, and if not found | |
250 | * remove the slash from name and perform the lookup again without | |
251 | * the slash. If a match is made, S_ISGITLINK(ce->mode) will be | |
252 | * true. | |
253 | */ | |
254 | if (icase && name[namelen - 1] == '/') { | |
255 | struct dir_entry *dir = find_dir_entry(istate, name, namelen); | |
256 | if (dir && dir->nr) | |
257 | return dir->ce; | |
258 | ||
259 | ce = index_name_exists(istate, name, namelen - 1, icase); | |
260 | if (ce && S_ISGITLINK(ce->ce_mode)) | |
261 | return ce; | |
262 | } | |
263 | return NULL; | |
264 | } | |
265 | ||
266 | static int free_dir_entry(void *entry, void *unused) | |
267 | { | |
268 | struct dir_entry *dir = entry; | |
269 | while (dir) { | |
270 | struct dir_entry *next = dir->next; | |
271 | free(dir); | |
272 | dir = next; | |
273 | } | |
274 | return 0; | |
275 | } | |
276 | ||
277 | void free_name_hash(struct index_state *istate) | |
278 | { | |
279 | if (!istate->name_hash_initialized) | |
280 | return; | |
281 | istate->name_hash_initialized = 0; | |
282 | if (ignore_case) | |
283 | /* free directory entries */ | |
284 | for_each_hash(&istate->dir_hash, free_dir_entry, NULL); | |
285 | ||
286 | free_hash(&istate->name_hash); | |
287 | free_hash(&istate->dir_hash); | |
288 | } |