]>
Commit | Line | Data |
---|---|---|
96872bc2 LT |
1 | /* |
2 | * name-hash.c | |
3 | * | |
4 | * Hashing names in the index state | |
5 | * | |
6 | * Copyright (C) 2008 Linus Torvalds | |
7 | */ | |
8 | #define NO_THE_INDEX_COMPATIBILITY_MACROS | |
9 | #include "cache.h" | |
10 | ||
2092678c | 11 | struct dir_entry { |
e05881a4 | 12 | struct hashmap_entry ent; |
2092678c KB |
13 | struct dir_entry *parent; |
14 | struct cache_entry *ce; | |
15 | int nr; | |
16 | unsigned int namelen; | |
17 | }; | |
18 | ||
e05881a4 KB |
19 | static int dir_entry_cmp(const struct dir_entry *e1, |
20 | const struct dir_entry *e2, const char *name) | |
21 | { | |
22 | return e1->namelen != e2->namelen || strncasecmp(e1->ce->name, | |
23 | name ? name : e2->ce->name, e1->namelen); | |
24 | } | |
25 | ||
2092678c KB |
26 | static struct dir_entry *find_dir_entry(struct index_state *istate, |
27 | const char *name, unsigned int namelen) | |
28 | { | |
e05881a4 KB |
29 | struct dir_entry key; |
30 | hashmap_entry_init(&key, memihash(name, namelen)); | |
31 | key.namelen = namelen; | |
32 | return hashmap_get(&istate->dir_hash, &key, name); | |
2092678c KB |
33 | } |
34 | ||
35 | static struct dir_entry *hash_dir_entry(struct index_state *istate, | |
36 | struct cache_entry *ce, int namelen) | |
5102c617 JJ |
37 | { |
38 | /* | |
39 | * Throw each directory component in the hash for quick lookup | |
d28eec26 | 40 | * during a git status. Directory components are stored without their |
5102c617 | 41 | * closing slash. Despite submodules being a directory, they never |
d28eec26 | 42 | * reach this point, because they are stored |
2092678c | 43 | * in index_state.name_hash (as ordinary cache_entries). |
5102c617 | 44 | * |
2092678c KB |
45 | * Note that the cache_entry stored with the dir_entry merely |
46 | * supplies the name of the directory (up to dir_entry.namelen). We | |
47 | * track the number of 'active' files in a directory in dir_entry.nr, | |
48 | * so we can tell if the directory is still relevant, e.g. for git | |
49 | * status. However, if cache_entries are removed, we cannot pinpoint | |
50 | * an exact cache_entry that's still active. It is very possible that | |
51 | * multiple dir_entries point to the same cache_entry. | |
5102c617 | 52 | */ |
2092678c KB |
53 | struct dir_entry *dir; |
54 | ||
55 | /* get length of parent directory */ | |
56 | while (namelen > 0 && !is_dir_sep(ce->name[namelen - 1])) | |
57 | namelen--; | |
58 | if (namelen <= 0) | |
59 | return NULL; | |
d28eec26 | 60 | namelen--; |
2092678c KB |
61 | |
62 | /* lookup existing entry for that directory */ | |
63 | dir = find_dir_entry(istate, ce->name, namelen); | |
64 | if (!dir) { | |
65 | /* not found, create it and add to hash table */ | |
2092678c | 66 | dir = xcalloc(1, sizeof(struct dir_entry)); |
e05881a4 | 67 | hashmap_entry_init(dir, memihash(ce->name, namelen)); |
2092678c KB |
68 | dir->namelen = namelen; |
69 | dir->ce = ce; | |
e05881a4 | 70 | hashmap_add(&istate->dir_hash, dir); |
2092678c KB |
71 | |
72 | /* recursively add missing parent directories */ | |
d28eec26 | 73 | dir->parent = hash_dir_entry(istate, ce, namelen); |
5102c617 | 74 | } |
2092678c KB |
75 | return dir; |
76 | } | |
77 | ||
78 | static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
79 | { | |
80 | /* Add reference to the directory entry (and parents if 0). */ | |
81 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
82 | while (dir && !(dir->nr++)) | |
83 | dir = dir->parent; | |
84 | } | |
85 | ||
86 | static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) | |
87 | { | |
88 | /* | |
1c8cca19 KB |
89 | * Release reference to the directory entry. If 0, remove and continue |
90 | * with parent directory. | |
2092678c KB |
91 | */ |
92 | struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); | |
1c8cca19 KB |
93 | while (dir && !(--dir->nr)) { |
94 | struct dir_entry *parent = dir->parent; | |
95 | hashmap_remove(&istate->dir_hash, dir, NULL); | |
96 | free(dir); | |
97 | dir = parent; | |
98 | } | |
5102c617 JJ |
99 | } |
100 | ||
96872bc2 LT |
101 | static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) |
102 | { | |
103 | void **pos; | |
104 | unsigned int hash; | |
105 | ||
106 | if (ce->ce_flags & CE_HASHED) | |
107 | return; | |
108 | ce->ce_flags |= CE_HASHED; | |
2092678c | 109 | ce->next = NULL; |
e05881a4 | 110 | hash = memihash(ce->name, ce_namelen(ce)); |
96872bc2 LT |
111 | pos = insert_hash(hash, ce, &istate->name_hash); |
112 | if (pos) { | |
113 | ce->next = *pos; | |
114 | *pos = ce; | |
115 | } | |
5102c617 | 116 | |
2092678c KB |
117 | if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) |
118 | add_dir_entry(istate, ce); | |
96872bc2 LT |
119 | } |
120 | ||
121 | static void lazy_init_name_hash(struct index_state *istate) | |
122 | { | |
123 | int nr; | |
124 | ||
125 | if (istate->name_hash_initialized) | |
126 | return; | |
c7359281 NTND |
127 | if (istate->cache_nr) |
128 | preallocate_hash(&istate->name_hash, istate->cache_nr); | |
e05881a4 | 129 | hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); |
96872bc2 LT |
130 | for (nr = 0; nr < istate->cache_nr; nr++) |
131 | hash_index_entry(istate, istate->cache[nr]); | |
132 | istate->name_hash_initialized = 1; | |
133 | } | |
134 | ||
135 | void add_name_hash(struct index_state *istate, struct cache_entry *ce) | |
136 | { | |
2092678c KB |
137 | /* if already hashed, add reference to directory entries */ |
138 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) | |
139 | add_dir_entry(istate, ce); | |
140 | ||
96872bc2 LT |
141 | ce->ce_flags &= ~CE_UNHASHED; |
142 | if (istate->name_hash_initialized) | |
143 | hash_index_entry(istate, ce); | |
144 | } | |
145 | ||
2092678c KB |
146 | /* |
147 | * We don't actually *remove* it, we can just mark it invalid so that | |
148 | * we won't find it in lookups. | |
149 | * | |
150 | * Not only would we have to search the lists (simple enough), but | |
151 | * we'd also have to rehash other hash buckets in case this makes the | |
152 | * hash bucket empty (common). So it's much better to just mark | |
153 | * it. | |
154 | */ | |
155 | void remove_name_hash(struct index_state *istate, struct cache_entry *ce) | |
156 | { | |
157 | /* if already hashed, release reference to directory entries */ | |
158 | if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) | |
159 | remove_dir_entry(istate, ce); | |
160 | ||
161 | ce->ce_flags |= CE_UNHASHED; | |
162 | } | |
163 | ||
cd2fef59 LT |
164 | static int slow_same_name(const char *name1, int len1, const char *name2, int len2) |
165 | { | |
166 | if (len1 != len2) | |
167 | return 0; | |
168 | ||
169 | while (len1) { | |
170 | unsigned char c1 = *name1++; | |
171 | unsigned char c2 = *name2++; | |
172 | len1--; | |
173 | if (c1 != c2) { | |
174 | c1 = toupper(c1); | |
175 | c2 = toupper(c2); | |
176 | if (c1 != c2) | |
177 | return 0; | |
178 | } | |
179 | } | |
180 | return 1; | |
181 | } | |
182 | ||
183 | static int same_name(const struct cache_entry *ce, const char *name, int namelen, int icase) | |
184 | { | |
185 | int len = ce_namelen(ce); | |
186 | ||
187 | /* | |
188 | * Always do exact compare, even if we want a case-ignoring comparison; | |
189 | * we do the quick exact one first, because it will be the common case. | |
190 | */ | |
191 | if (len == namelen && !cache_name_compare(name, namelen, ce->name, len)) | |
192 | return 1; | |
193 | ||
5102c617 JJ |
194 | if (!icase) |
195 | return 0; | |
196 | ||
2092678c | 197 | return slow_same_name(name, namelen, ce->name, len); |
cd2fef59 LT |
198 | } |
199 | ||
db5360f3 ES |
200 | struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen) |
201 | { | |
202 | struct cache_entry *ce; | |
203 | struct dir_entry *dir; | |
204 | ||
205 | lazy_init_name_hash(istate); | |
206 | dir = find_dir_entry(istate, name, namelen); | |
207 | if (dir && dir->nr) | |
208 | return dir->ce; | |
209 | ||
210 | /* | |
211 | * It might be a submodule. Unlike plain directories, which are stored | |
212 | * in the dir-hash, submodules are stored in the name-hash, so check | |
213 | * there, as well. | |
214 | */ | |
d28eec26 | 215 | ce = index_file_exists(istate, name, namelen, 1); |
db5360f3 ES |
216 | if (ce && S_ISGITLINK(ce->ce_mode)) |
217 | return ce; | |
218 | ||
219 | return NULL; | |
220 | } | |
221 | ||
222 | struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) | |
96872bc2 | 223 | { |
e05881a4 | 224 | unsigned int hash = memihash(name, namelen); |
96872bc2 LT |
225 | struct cache_entry *ce; |
226 | ||
227 | lazy_init_name_hash(istate); | |
228 | ce = lookup_hash(hash, &istate->name_hash); | |
229 | ||
230 | while (ce) { | |
231 | if (!(ce->ce_flags & CE_UNHASHED)) { | |
cd2fef59 | 232 | if (same_name(ce, name, namelen, icase)) |
df292c79 | 233 | return ce; |
96872bc2 | 234 | } |
2092678c | 235 | ce = ce->next; |
96872bc2 | 236 | } |
df292c79 | 237 | return NULL; |
96872bc2 | 238 | } |
2092678c | 239 | |
db5360f3 ES |
240 | struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) |
241 | { | |
242 | if (namelen > 0 && name[namelen - 1] == '/') | |
d28eec26 | 243 | return index_dir_exists(istate, name, namelen - 1); |
db5360f3 ES |
244 | return index_file_exists(istate, name, namelen, icase); |
245 | } | |
246 | ||
2092678c KB |
247 | void free_name_hash(struct index_state *istate) |
248 | { | |
249 | if (!istate->name_hash_initialized) | |
250 | return; | |
251 | istate->name_hash_initialized = 0; | |
2092678c KB |
252 | |
253 | free_hash(&istate->name_hash); | |
e05881a4 | 254 | hashmap_free(&istate->dir_hash, 1); |
2092678c | 255 | } |