]>
Commit | Line | Data |
---|---|---|
213d9883 OL |
1 | // Copyright (C) 2020 Joel Rosdahl and other contributors |
2 | // | |
3 | // See doc/AUTHORS.adoc for a complete list of contributors. | |
4 | // | |
5 | // This program is free software; you can redistribute it and/or modify it | |
6 | // under the terms of the GNU General Public License as published by the Free | |
7 | // Software Foundation; either version 3 of the License, or (at your option) | |
8 | // any later version. | |
9 | // | |
10 | // This program is distributed in the hope that it will be useful, but WITHOUT | |
11 | // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
13 | // more details. | |
14 | // | |
15 | // You should have received a copy of the GNU General Public License along with | |
16 | // this program; if not, write to the Free Software Foundation, Inc., 51 | |
17 | // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | ||
19 | #include "InodeCache.hpp" | |
20 | ||
2354adc2 JR |
21 | #include "Config.hpp" |
22 | #include "Fd.hpp" | |
23 | #include "Finalizer.hpp" | |
a5021452 | 24 | #include "Hash.hpp" |
05acfffb | 25 | #include "Logging.hpp" |
2354adc2 | 26 | #include "Stat.hpp" |
53b35a42 | 27 | #include "TemporaryFile.hpp" |
2354adc2 | 28 | #include "Util.hpp" |
2354adc2 JR |
29 | |
30 | #include <atomic> | |
31 | #include <libgen.h> | |
32 | #include <sys/mman.h> | |
33 | #include <type_traits> | |
213d9883 | 34 | |
213d9883 | 35 | // The inode cache resides on a file that is mapped into shared memory by |
88699012 JR |
36 | // running processes. It is implemented as a two level structure, where the top |
37 | // level is a hash table consisting of buckets. Each bucket contains entries | |
213d9883 OL |
38 | // that are sorted in LRU order. Entries map from keys representing files to |
39 | // cached hash results. | |
40 | // | |
41 | // Concurrent access is guarded by a mutex in each bucket. | |
42 | // | |
43 | // Current cache size is fixed and the given constants are considered large | |
44 | // enough for most projects. The size could be made configurable if there is a | |
45 | // demand for it. | |
213d9883 | 46 | |
88699012 JR |
47 | namespace { |
48 | ||
49 | // The version number corresponds to the format of the cache entries and to | |
50 | // semantics of the key fields. | |
51 | // | |
52 | // Note: The key is hashed using the main hash algorithm, so the version number | |
53 | // does not need to be incremented if said algorithm is changed (except if the | |
54 | // digest size changes since that affects the entry format). | |
55 | const uint32_t k_version = 1; | |
56 | ||
57 | // Note: Increment the version number if constants affecting storage size are | |
58 | // changed. | |
213d9883 OL |
59 | const uint32_t k_num_buckets = 32 * 1024; |
60 | const uint32_t k_num_entries = 4; | |
61 | ||
0a189c2c | 62 | static_assert(Digest::size() == 20, |
213d9883 | 63 | "Increment version number if size of digest is changed."); |
0a189c2c JR |
64 | static_assert(std::is_trivially_copyable<Digest>::value, |
65 | "Digest is expected to be trivially copyable."); | |
213d9883 OL |
66 | |
67 | static_assert( | |
68 | static_cast<int>(InodeCache::ContentType::binary) == 0, | |
69 | "Numeric value is part of key, increment version number if changed."); | |
70 | static_assert( | |
71 | static_cast<int>(InodeCache::ContentType::code) == 1, | |
72 | "Numeric value is part of key, increment version number if changed."); | |
73 | static_assert( | |
74 | static_cast<int>(InodeCache::ContentType::code_with_sloppy_time_macros) == 2, | |
75 | "Numeric value is part of key, increment version number if changed."); | |
76 | static_assert( | |
77 | static_cast<int>(InodeCache::ContentType::precompiled_header) == 3, | |
78 | "Numeric value is part of key, increment version number if changed."); | |
79 | ||
80 | } // namespace | |
81 | ||
82 | struct InodeCache::Key | |
83 | { | |
84 | ContentType type; | |
85 | dev_t st_dev; | |
86 | ino_t st_ino; | |
87 | mode_t st_mode; | |
2354adc2 | 88 | #ifdef HAVE_STRUCT_STAT_ST_MTIM |
213d9883 | 89 | timespec st_mtim; |
2354adc2 | 90 | #else |
213d9883 | 91 | time_t st_mtim; |
2354adc2 JR |
92 | #endif |
93 | #ifdef HAVE_STRUCT_STAT_ST_CTIM | |
213d9883 | 94 | timespec st_ctim; // Included for sanity checking. |
2354adc2 | 95 | #else |
213d9883 | 96 | time_t st_ctim; // Included for sanity checking. |
2354adc2 | 97 | #endif |
213d9883 OL |
98 | off_t st_size; // Included for sanity checking. |
99 | bool sloppy_time_macros; | |
100 | }; | |
101 | ||
102 | struct InodeCache::Entry | |
103 | { | |
0a189c2c JR |
104 | Digest key_digest; // Hashed key |
105 | Digest file_digest; // Cached file hash | |
213d9883 OL |
106 | int return_value; // Cached return value |
107 | }; | |
108 | ||
109 | struct InodeCache::Bucket | |
110 | { | |
111 | pthread_mutex_t mt; | |
112 | Entry entries[k_num_entries]; | |
113 | }; | |
114 | ||
115 | struct InodeCache::SharedRegion | |
116 | { | |
117 | uint32_t version; | |
118 | std::atomic<int64_t> hits; | |
119 | std::atomic<int64_t> misses; | |
120 | std::atomic<int64_t> errors; | |
121 | Bucket buckets[k_num_buckets]; | |
122 | }; | |
123 | ||
124 | bool | |
125 | InodeCache::mmap_file(const std::string& inode_cache_file) | |
126 | { | |
127 | if (m_sr) { | |
128 | munmap(m_sr, sizeof(SharedRegion)); | |
129 | m_sr = nullptr; | |
130 | } | |
e071bcfd JR |
131 | Fd fd(open(inode_cache_file.c_str(), O_RDWR)); |
132 | if (!fd) { | |
213d9883 OL |
133 | cc_log("Failed to open inode cache %s: %s", |
134 | inode_cache_file.c_str(), | |
135 | strerror(errno)); | |
136 | return false; | |
137 | } | |
138 | bool is_nfs; | |
e071bcfd | 139 | if (Util::is_nfs_fd(*fd, &is_nfs) == 0 && is_nfs) { |
213d9883 OL |
140 | cc_log( |
141 | "Inode cache not supported because the cache file is located on nfs: %s", | |
142 | inode_cache_file.c_str()); | |
213d9883 OL |
143 | return false; |
144 | } | |
145 | SharedRegion* sr = reinterpret_cast<SharedRegion*>(mmap( | |
e071bcfd JR |
146 | nullptr, sizeof(SharedRegion), PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0)); |
147 | fd.close(); | |
213d9883 OL |
148 | if (sr == reinterpret_cast<void*>(-1)) { |
149 | cc_log("Failed to mmap %s: %s", inode_cache_file.c_str(), strerror(errno)); | |
150 | return false; | |
151 | } | |
152 | // Drop the file from disk if the found version is not matching. This will | |
153 | // allow a new file to be generated. | |
154 | if (sr->version != k_version) { | |
155 | cc_log( | |
77bf7bc3 JR |
156 | "Dropping inode cache because found version %u does not match expected" |
157 | " version %u", | |
213d9883 OL |
158 | sr->version, |
159 | k_version); | |
160 | munmap(sr, sizeof(SharedRegion)); | |
161 | unlink(inode_cache_file.c_str()); | |
162 | return false; | |
163 | } | |
164 | m_sr = sr; | |
165 | if (m_config.debug()) { | |
166 | cc_log("inode cache file loaded: %s", inode_cache_file.c_str()); | |
167 | } | |
168 | return true; | |
169 | } | |
170 | ||
171 | bool | |
2292fef1 JR |
172 | InodeCache::hash_inode(const std::string& path, |
173 | ContentType type, | |
174 | Digest& digest) | |
213d9883 OL |
175 | { |
176 | Stat stat = Stat::stat(path); | |
177 | if (!stat) { | |
2292fef1 JR |
178 | cc_log( |
179 | "Could not stat %s: %s", path.c_str(), strerror(stat.error_number())); | |
213d9883 OL |
180 | return false; |
181 | } | |
182 | ||
183 | Key key; | |
184 | memset(&key, 0, sizeof(Key)); | |
185 | key.type = type; | |
186 | key.st_dev = stat.device(); | |
187 | key.st_ino = stat.inode(); | |
188 | key.st_mode = stat.mode(); | |
2354adc2 | 189 | #ifdef HAVE_STRUCT_STAT_ST_MTIM |
213d9883 | 190 | key.st_mtim = stat.mtim(); |
2354adc2 | 191 | #else |
213d9883 | 192 | key.st_mtim = stat.mtime(); |
2354adc2 JR |
193 | #endif |
194 | #ifdef HAVE_STRUCT_STAT_ST_CTIM | |
213d9883 | 195 | key.st_ctim = stat.ctim(); |
2354adc2 | 196 | #else |
213d9883 | 197 | key.st_ctim = stat.ctime(); |
2354adc2 | 198 | #endif |
213d9883 OL |
199 | key.st_size = stat.size(); |
200 | ||
a5021452 JR |
201 | Hash hash; |
202 | hash.hash(&key, sizeof(Key)); | |
203 | digest = hash.digest(); | |
213d9883 OL |
204 | return true; |
205 | } | |
206 | ||
207 | InodeCache::Bucket* | |
208 | InodeCache::acquire_bucket(uint32_t index) | |
209 | { | |
210 | Bucket* bucket = &m_sr->buckets[index]; | |
211 | int err = pthread_mutex_lock(&bucket->mt); | |
2354adc2 | 212 | #ifdef PTHREAD_MUTEX_ROBUST |
213d9883 | 213 | if (err == EOWNERDEAD) { |
77bf7bc3 | 214 | if (m_config.debug()) { |
213d9883 | 215 | ++m_sr->errors; |
77bf7bc3 | 216 | } |
213d9883 OL |
217 | err = pthread_mutex_consistent(&bucket->mt); |
218 | if (err) { | |
219 | cc_log( | |
220 | "Can't consolidate stale mutex at index %u: %s", index, strerror(err)); | |
221 | cc_log("Consider removing the inode cache file if the problem persists"); | |
222 | return nullptr; | |
223 | } | |
224 | cc_log("Wiping bucket at index %u because of stale mutex", index); | |
225 | memset(bucket->entries, 0, sizeof(Bucket::entries)); | |
226 | } else { | |
2354adc2 | 227 | #endif |
213d9883 OL |
228 | if (err) { |
229 | cc_log("Failed to lock mutex at index %u: %s", index, strerror(err)); | |
230 | cc_log("Consider removing the inode cache file if problem persists"); | |
231 | ++m_sr->errors; | |
232 | return nullptr; | |
233 | } | |
2354adc2 | 234 | #ifdef PTHREAD_MUTEX_ROBUST |
213d9883 | 235 | } |
2354adc2 | 236 | #endif |
213d9883 OL |
237 | return bucket; |
238 | } | |
239 | ||
240 | InodeCache::Bucket* | |
0a189c2c | 241 | InodeCache::acquire_bucket(const Digest& key_digest) |
213d9883 OL |
242 | { |
243 | uint32_t hash; | |
0a189c2c | 244 | Util::big_endian_to_int(key_digest.bytes(), hash); |
213d9883 OL |
245 | return acquire_bucket(hash % k_num_buckets); |
246 | } | |
247 | ||
248 | void | |
249 | InodeCache::release_bucket(Bucket* bucket) | |
250 | { | |
251 | pthread_mutex_unlock(&bucket->mt); | |
252 | } | |
253 | ||
254 | bool | |
255 | InodeCache::create_new_file(const std::string& filename) | |
256 | { | |
257 | cc_log("Creating a new inode cache"); | |
258 | ||
259 | // Create the new file to a temporary name to prevent other processes from | |
260 | // mapping it before it is fully initialized. | |
53b35a42 | 261 | TemporaryFile tmp_file(filename); |
e071bcfd | 262 | |
53b35a42 | 263 | Finalizer temp_file_remover([&] { unlink(tmp_file.path.c_str()); }); |
e071bcfd | 264 | |
213d9883 | 265 | bool is_nfs; |
53b35a42 | 266 | if (Util::is_nfs_fd(*tmp_file.fd, &is_nfs) == 0 && is_nfs) { |
213d9883 | 267 | cc_log( |
77bf7bc3 JR |
268 | "Inode cache not supported because the cache file would be located on" |
269 | " nfs: %s", | |
213d9883 | 270 | filename.c_str()); |
213d9883 OL |
271 | return false; |
272 | } | |
53b35a42 | 273 | int err = Util::fallocate(*tmp_file.fd, sizeof(SharedRegion)); |
213d9883 OL |
274 | if (err) { |
275 | cc_log("Failed to allocate file space for inode cache: %s", strerror(err)); | |
213d9883 OL |
276 | return false; |
277 | } | |
278 | SharedRegion* sr = | |
279 | reinterpret_cast<SharedRegion*>(mmap(nullptr, | |
280 | sizeof(SharedRegion), | |
281 | PROT_READ | PROT_WRITE, | |
282 | MAP_SHARED, | |
53b35a42 | 283 | *tmp_file.fd, |
213d9883 OL |
284 | 0)); |
285 | if (sr == reinterpret_cast<void*>(-1)) { | |
286 | cc_log("Failed to mmap new inode cache: %s", strerror(errno)); | |
213d9883 OL |
287 | return false; |
288 | } | |
289 | ||
290 | // Initialize new shared region. | |
291 | sr->version = k_version; | |
292 | pthread_mutexattr_t mattr; | |
293 | pthread_mutexattr_init(&mattr); | |
294 | pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED); | |
2354adc2 | 295 | #ifdef PTHREAD_MUTEX_ROBUST |
213d9883 | 296 | pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST); |
2354adc2 | 297 | #endif |
62a4755c JR |
298 | for (auto& bucket : sr->buckets) { |
299 | pthread_mutex_init(&bucket.mt, &mattr); | |
213d9883 OL |
300 | } |
301 | ||
302 | munmap(sr, sizeof(SharedRegion)); | |
53b35a42 | 303 | tmp_file.fd.close(); |
213d9883 OL |
304 | |
305 | // link() will fail silently if a file with the same name already exists. | |
306 | // This will be the case if two processes try to create a new file | |
307 | // simultaneously. Thus close the current file handle and reopen a new one, | |
308 | // which will make us use the first created file even if we didn't win the | |
309 | // race. | |
53b35a42 | 310 | if (link(tmp_file.path.c_str(), filename.c_str()) != 0) { |
213d9883 | 311 | cc_log("Failed to link new inode cache: %s", strerror(errno)); |
213d9883 OL |
312 | return false; |
313 | } | |
314 | ||
213d9883 OL |
315 | return true; |
316 | } | |
317 | ||
318 | bool | |
319 | InodeCache::initialize() | |
320 | { | |
321 | if (m_failed || !m_config.inode_cache()) { | |
322 | return false; | |
323 | } | |
324 | ||
325 | if (m_sr) { | |
326 | return true; | |
327 | } | |
328 | ||
329 | std::string filename = get_file(); | |
330 | if (m_sr || mmap_file(filename)) { | |
331 | return true; | |
332 | } | |
333 | ||
334 | // Try to create a new cache if we failed to map an existing file. | |
335 | create_new_file(filename); | |
336 | ||
337 | // Concurrent processes could try to create new files simultaneously and the | |
338 | // file that actually landed on disk will be from the process that won the | |
339 | // race. Thus we try to open the file from disk instead of reusing the file | |
340 | // handle to the file we just created. | |
341 | if (mmap_file(filename)) { | |
342 | return true; | |
343 | } | |
344 | ||
345 | m_failed = true; | |
346 | return false; | |
347 | } | |
348 | ||
77bf7bc3 | 349 | InodeCache::InodeCache(const Config& config) : m_config(config) |
213d9883 OL |
350 | { |
351 | } | |
352 | ||
353 | InodeCache::~InodeCache() | |
354 | { | |
355 | if (m_sr) { | |
356 | munmap(m_sr, sizeof(SharedRegion)); | |
357 | } | |
358 | } | |
359 | ||
360 | bool | |
2292fef1 | 361 | InodeCache::get(const std::string& path, |
213d9883 | 362 | ContentType type, |
0a189c2c | 363 | Digest& file_digest, |
213d9883 OL |
364 | int* return_value) |
365 | { | |
366 | if (!initialize()) { | |
367 | return false; | |
368 | } | |
369 | ||
0a189c2c JR |
370 | Digest key_digest; |
371 | if (!hash_inode(path, type, key_digest)) { | |
213d9883 OL |
372 | return false; |
373 | } | |
374 | ||
375 | Bucket* bucket = acquire_bucket(key_digest); | |
376 | ||
377 | if (!bucket) { | |
378 | return false; | |
379 | } | |
380 | ||
381 | bool found = false; | |
382 | ||
383 | for (uint32_t i = 0; i < k_num_entries; ++i) { | |
0a189c2c | 384 | if (bucket->entries[i].key_digest == key_digest) { |
213d9883 OL |
385 | if (i > 0) { |
386 | Entry tmp = bucket->entries[i]; | |
387 | memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i); | |
388 | bucket->entries[0] = tmp; | |
389 | } | |
390 | ||
0a189c2c | 391 | file_digest = bucket->entries[0].file_digest; |
213d9883 OL |
392 | if (return_value) { |
393 | *return_value = bucket->entries[0].return_value; | |
394 | } | |
395 | found = true; | |
396 | break; | |
397 | } | |
398 | } | |
399 | release_bucket(bucket); | |
400 | ||
2292fef1 | 401 | cc_log("inode cache %s: %s", found ? "hit" : "miss", path.c_str()); |
213d9883 OL |
402 | |
403 | if (m_config.debug()) { | |
404 | if (found) { | |
405 | ++m_sr->hits; | |
406 | } else { | |
407 | ++m_sr->misses; | |
408 | } | |
409 | cc_log( | |
410 | "accumulated stats for inode cache: hits=%ld, misses=%ld, errors=%ld", | |
411 | static_cast<long>(m_sr->hits.load()), | |
412 | static_cast<long>(m_sr->misses.load()), | |
413 | static_cast<long>(m_sr->errors.load())); | |
414 | } | |
415 | return found; | |
416 | } | |
417 | ||
418 | bool | |
2292fef1 | 419 | InodeCache::put(const std::string& path, |
213d9883 | 420 | ContentType type, |
0a189c2c | 421 | const Digest& file_digest, |
213d9883 OL |
422 | int return_value) |
423 | { | |
424 | if (!initialize()) { | |
425 | return false; | |
426 | } | |
427 | ||
0a189c2c JR |
428 | Digest key_digest; |
429 | if (!hash_inode(path, type, key_digest)) { | |
213d9883 OL |
430 | return false; |
431 | } | |
432 | ||
433 | Bucket* bucket = acquire_bucket(key_digest); | |
434 | ||
435 | if (!bucket) { | |
436 | return false; | |
437 | } | |
438 | ||
439 | memmove(&bucket->entries[1], | |
440 | &bucket->entries[0], | |
441 | sizeof(Entry) * (k_num_entries - 1)); | |
442 | ||
443 | bucket->entries[0].key_digest = key_digest; | |
444 | bucket->entries[0].file_digest = file_digest; | |
445 | bucket->entries[0].return_value = return_value; | |
446 | ||
447 | release_bucket(bucket); | |
448 | ||
2292fef1 | 449 | cc_log("inode cache insert: %s", path.c_str()); |
213d9883 OL |
450 | |
451 | return true; | |
452 | } | |
453 | ||
454 | bool | |
455 | InodeCache::drop() | |
456 | { | |
457 | std::string file = get_file(); | |
458 | if (unlink(file.c_str()) != 0) { | |
459 | return false; | |
460 | } | |
461 | if (m_sr) { | |
462 | munmap(m_sr, sizeof(SharedRegion)); | |
463 | m_sr = nullptr; | |
464 | } | |
465 | return true; | |
466 | } | |
467 | ||
468 | std::string | |
469 | InodeCache::get_file() | |
470 | { | |
471 | return fmt::format("{}/inode-cache.v{}", m_config.temporary_dir(), k_version); | |
472 | } | |
473 | ||
474 | int64_t | |
475 | InodeCache::get_hits() | |
476 | { | |
477 | return initialize() ? m_sr->hits.load() : -1; | |
478 | } | |
479 | ||
480 | int64_t | |
481 | InodeCache::get_misses() | |
482 | { | |
483 | return initialize() ? m_sr->misses.load() : -1; | |
484 | } | |
485 | ||
486 | int64_t | |
487 | InodeCache::get_errors() | |
488 | { | |
489 | return initialize() ? m_sr->errors.load() : -1; | |
490 | } |