]> git.ipfire.org Git - thirdparty/ccache.git/blob - src/InodeCache.cpp
feat: Port the inode cache to Windows (#1388)
[thirdparty/ccache.git] / src / InodeCache.cpp
1 // Copyright (C) 2020-2023 Joel Rosdahl and other contributors
2 //
3 // See doc/AUTHORS.adoc for a complete list of contributors.
4 //
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 // more details.
14 //
15 // You should have received a copy of the GNU General Public License along with
16 // this program; if not, write to the Free Software Foundation, Inc., 51
17 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 #include "InodeCache.hpp"
20
21 #include "Config.hpp"
22 #include "Hash.hpp"
23 #include "Util.hpp"
24
25 #include <util/DirEntry.hpp>
26 #include <util/Fd.hpp>
27 #include <util/Finalizer.hpp>
28 #include <util/PathString.hpp>
29 #include <util/TemporaryFile.hpp>
30 #include <util/conversion.hpp>
31 #include <util/file.hpp>
32 #include <util/fmtmacros.hpp>
33 #include <util/logging.hpp>
34
35 #include <fcntl.h>
36
37 #ifndef _WIN32
38 # include <libgen.h>
39 # include <sched.h>
40 # include <unistd.h>
41 #endif
42
43 #ifdef HAVE_LINUX_FS_H
44 # include <linux/magic.h>
45 # include <sys/statfs.h>
46 #elif defined(HAVE_STRUCT_STATFS_F_FSTYPENAME)
47 # include <sys/mount.h>
48 # include <sys/param.h>
49 #endif
50
51 #include <atomic>
52 #include <type_traits>
53 #include <vector>
54
55 // The inode cache resides on a file that is mapped into shared memory by
56 // running processes. It is implemented as a two level structure, where the top
57 // level is a hash table consisting of buckets. Each bucket contains entries
58 // that are sorted in LRU order. Entries map from keys representing files to
59 // cached hash results.
60 //
61 // Concurrent access is guarded by a mutex in each bucket.
62 //
63 // Current cache size is fixed and the given constants are considered large
64 // enough for most projects. The size could be made configurable if there is a
65 // demand for it.
66
67 namespace {
68
69 // The version number corresponds to the format of the cache entries and to
70 // semantics of the key fields.
71 //
72 // Note: The key is hashed using the main hash algorithm, so the version number
73 // does not need to be incremented if said algorithm is changed (except if the
74 // digest size changes since that affects the entry format).
75 const uint32_t k_version = 2;
76
77 // Note: Increment the version number if constants affecting storage size are
78 // changed.
79 const uint32_t k_num_buckets = 32 * 1024;
80 const uint32_t k_num_entries = 4;
81
82 // Maximum time the spin lock loop will try before giving up.
83 const auto k_max_lock_duration = util::Duration(5);
84
85 // The memory-mapped file may reside on a filesystem with compression. Memory
86 // accesses to the file risk crashing if such a filesystem gets full, so stop
87 // using the inode cache well before this happens.
88 const uint64_t k_min_fs_mib_left = 100; // 100 MiB
89
90 // How long a filesystem space check is valid before we make a new one.
91 const util::Duration k_fs_space_check_valid_duration(1);
92
93 static_assert(std::tuple_size<Hash::Digest>() == 20,
94 "Increment version number if size of digest is changed.");
95 static_assert(std::is_trivially_copyable<Hash::Digest>::value,
96 "Digest is expected to be trivially copyable.");
97
98 static_assert(
99 static_cast<int>(InodeCache::ContentType::raw) == 0,
100 "Numeric value is part of key, increment version number if changed.");
101 static_assert(
102 static_cast<int>(InodeCache::ContentType::checked_for_temporal_macros) == 1,
103 "Numeric value is part of key, increment version number if changed.");
104
105 bool
106 fd_is_on_known_to_work_file_system(int fd)
107 {
108 #ifndef _WIN32
109 bool known_to_work = false;
110 struct statfs buf;
111 if (fstatfs(fd, &buf) != 0) {
112 LOG("fstatfs failed: {}", strerror(errno));
113 } else {
114 # ifdef HAVE_LINUX_FS_H
115 // statfs's f_type field is a signed 32-bit integer on some platforms. Large
116 // values therefore cause narrowing warnings, so cast the value to a large
117 // unsigned type.
118 const auto f_type = static_cast<uintmax_t>(buf.f_type);
119 switch (f_type) {
120 // Is a filesystem you know works with the inode cache missing in this
121 // list? Please submit an issue or pull request to the ccache project.
122 case 0x9123683e: // BTRFS_SUPER_MAGIC
123 case 0xef53: // EXT2_SUPER_MAGIC
124 case 0x01021994: // TMPFS_MAGIC
125 case 0x58465342: // XFS_SUPER_MAGIC
126 known_to_work = true;
127 break;
128 default:
129 LOG("Filesystem type 0x{:x} not known to work for the inode cache",
130 f_type);
131 }
132 # elif defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) // macOS X and some BSDs
133 static const std::vector<std::string> known_to_work_filesystems = {
134 // Is a filesystem you know works with the inode cache missing in this
135 // list? Please submit an issue or pull request to the ccache project.
136 "apfs",
137 "tmpfs",
138 "ufs",
139 "xfs",
140 "zfs",
141 };
142 if (std::find(known_to_work_filesystems.begin(),
143 known_to_work_filesystems.end(),
144 buf.f_fstypename)
145 != known_to_work_filesystems.end()) {
146 known_to_work = true;
147 } else {
148 LOG("Filesystem type {} not known to work for the inode cache",
149 buf.f_fstypename);
150 }
151 # else
152 # error Inconsistency: INODE_CACHE_SUPPORTED is set but we should not get here
153 # endif
154 }
155 return known_to_work;
156 #else
157 HANDLE file = (HANDLE)_get_osfhandle(fd);
158 if (file == INVALID_HANDLE_VALUE) {
159 return false;
160 }
161
162 // Try to get information about remote protocol for this file.
163 // If the call succeeds, this is a remote file.
164 // If the call fails with invalid parameter error, consider that it is a local
165 // file
166 FILE_REMOTE_PROTOCOL_INFO infos;
167 if (GetFileInformationByHandleEx(
168 file, FileRemoteProtocolInfo, &infos, sizeof(infos))) {
169 return false;
170 }
171
172 if (GetLastError() == ERROR_INVALID_PARAMETER) {
173 return true;
174 }
175
176 return false;
177 #endif
178 }
179
180 bool
181 spin_lock(std::atomic<pid_t>& owner_pid, const pid_t self_pid)
182 {
183 pid_t prev_pid = 0;
184 pid_t lock_pid = 0;
185 bool reset_timer = false;
186 util::TimePoint lock_time;
187 while (true) {
188 for (int i = 0; i < 10000; ++i) {
189 lock_pid = owner_pid.load(std::memory_order_relaxed);
190 if (lock_pid == 0
191 && owner_pid.compare_exchange_weak(
192 lock_pid, self_pid, std::memory_order_acquire)) {
193 return true;
194 }
195
196 if (prev_pid != lock_pid) {
197 // Check for changing PID here so ABA locking is detected with better
198 // probability.
199 prev_pid = lock_pid;
200 reset_timer = true;
201 }
202 std::this_thread::yield();
203 }
204 // If everything is OK, we should never hit this.
205 if (reset_timer) {
206 lock_time = util::TimePoint::now();
207 reset_timer = false;
208 } else if (util::TimePoint::now() - lock_time > k_max_lock_duration) {
209 return false;
210 }
211 }
212 }
213
214 void
215 spin_unlock(std::atomic<pid_t>& owner_pid)
216 {
217 owner_pid.store(0, std::memory_order_release);
218 }
219
220 } // namespace
221
222 struct InodeCache::Key
223 {
224 ContentType type;
225 dev_t st_dev;
226 ino_t st_ino;
227 mode_t st_mode;
228 timespec st_mtim;
229 timespec st_ctim; // Included for sanity checking.
230 off_t st_size; // Included for sanity checking.
231 };
232
233 struct InodeCache::Entry
234 {
235 Hash::Digest key_digest; // Hashed key
236 Hash::Digest file_digest; // Cached file hash
237 int return_value; // Cached return value
238 };
239
240 struct InodeCache::Bucket
241 {
242 std::atomic<pid_t> owner_pid;
243 Entry entries[k_num_entries];
244 };
245
246 struct InodeCache::SharedRegion
247 {
248 uint32_t version;
249 std::atomic<int64_t> hits;
250 std::atomic<int64_t> misses;
251 std::atomic<int64_t> errors;
252 Bucket buckets[k_num_buckets];
253 };
254
255 bool
256 InodeCache::mmap_file(const std::string& inode_cache_file)
257 {
258 m_sr = nullptr;
259 m_map.unmap();
260 m_fd = util::Fd(open(inode_cache_file.c_str(), O_RDWR));
261 if (!m_fd) {
262 LOG("Failed to open inode cache {}: {}", inode_cache_file, strerror(errno));
263 return false;
264 }
265 if (!fd_is_on_known_to_work_file_system(*m_fd)) {
266 return false;
267 }
268
269 auto map = util::MemoryMap::map(*m_fd, sizeof(SharedRegion));
270 if (!map) {
271 LOG("Failed to map inode cache file {}: {}", inode_cache_file, map.error());
272 return false;
273 }
274
275 SharedRegion* sr = reinterpret_cast<SharedRegion*>(map->ptr());
276
277 // Drop the file from disk if the found version is not matching. This will
278 // allow a new file to be generated.
279 if (sr->version != k_version) {
280 LOG(
281 "Dropping inode cache because found version {} does not match expected"
282 " version {}",
283 sr->version,
284 k_version);
285 map->unmap();
286 m_fd.close();
287 unlink(inode_cache_file.c_str());
288 return false;
289 }
290 m_map = std::move(*map);
291 m_sr = sr;
292 if (m_config.debug()) {
293 LOG("Inode cache file loaded: {}", inode_cache_file);
294 }
295 return true;
296 }
297
298 bool
299 InodeCache::hash_inode(const std::string& path,
300 ContentType type,
301 Hash::Digest& digest)
302 {
303 util::DirEntry de(path);
304 if (!de.exists()) {
305 LOG("Could not stat {}: {}", path, strerror(de.error_number()));
306 return false;
307 }
308
309 // See comment for InodeCache::InodeCache why this check is done.
310 auto now = util::TimePoint::now();
311 if (now - de.ctime() < m_min_age || now - de.mtime() < m_min_age) {
312 LOG("Too new ctime or mtime of {}, not considering for inode cache", path);
313 return false;
314 }
315
316 Key key;
317 memset(&key, 0, sizeof(Key));
318 key.type = type;
319 key.st_dev = de.device();
320 key.st_ino = de.inode();
321 key.st_mode = de.mode();
322 // Note: Manually copying sec and nsec of mtime and ctime to prevent copying
323 // the padding bytes
324 auto mtime = de.mtime().to_timespec();
325 key.st_mtim.tv_sec = mtime.tv_sec;
326 key.st_mtim.tv_nsec = mtime.tv_nsec;
327 auto ctime = de.ctime().to_timespec();
328 key.st_ctim.tv_sec = ctime.tv_sec;
329 key.st_ctim.tv_nsec = ctime.tv_nsec;
330 key.st_size = de.size();
331
332 Hash hash;
333 hash.hash(nonstd::span<const uint8_t>(reinterpret_cast<const uint8_t*>(&key),
334 sizeof(key)));
335 digest = hash.digest();
336 return true;
337 }
338
339 bool
340 InodeCache::with_bucket(const Hash::Digest& key_digest,
341 const BucketHandler& bucket_handler)
342 {
343 uint32_t hash;
344 util::big_endian_to_int(key_digest.data(), hash);
345 const uint32_t index = hash % k_num_buckets;
346 Bucket* bucket = &m_sr->buckets[index];
347 bool acquired_lock = spin_lock(bucket->owner_pid, m_self_pid);
348 while (!acquired_lock) {
349 LOG("Dropping inode cache file because of stale mutex at index {}", index);
350 if (!drop() || !initialize()) {
351 return false;
352 }
353 if (m_config.debug()) {
354 ++m_sr->errors;
355 }
356 bucket = &m_sr->buckets[index];
357 acquired_lock = spin_lock(bucket->owner_pid, m_self_pid);
358 }
359 try {
360 bucket_handler(bucket);
361 } catch (...) {
362 spin_unlock(bucket->owner_pid);
363 throw;
364 }
365 spin_unlock(bucket->owner_pid);
366 return true;
367 }
368
369 bool
370 InodeCache::create_new_file(const std::string& filename)
371 {
372 // Create the new file to a temporary name to prevent other processes from
373 // mapping it before it is fully initialized.
374 auto tmp_file = util::TemporaryFile::create(filename);
375 if (!tmp_file) {
376 LOG("Failed to created inode cache file: {}", tmp_file.error());
377 return false;
378 }
379
380 util::Finalizer temp_file_remover(
381 [&] { unlink(util::PathString(tmp_file->path).c_str()); });
382
383 if (!fd_is_on_known_to_work_file_system(*tmp_file->fd)) {
384 return false;
385 }
386
387 if (auto result = util::fallocate(*tmp_file->fd, sizeof(SharedRegion));
388 !result) {
389 LOG("Failed to allocate file space for inode cache: {}", result.error());
390 return false;
391 }
392
393 auto map = util::MemoryMap::map(*tmp_file->fd, sizeof(SharedRegion));
394 if (!map) {
395 LOG("Failed to mmap new inode cache: {}", map.error());
396 return false;
397 }
398
399 SharedRegion* sr = reinterpret_cast<SharedRegion*>(map->ptr());
400
401 // Initialize new shared region.
402 sr->version = k_version;
403 for (auto& bucket : sr->buckets) {
404 bucket.owner_pid = 0;
405 memset(bucket.entries, 0, sizeof(Bucket::entries));
406 }
407
408 sr = nullptr;
409 map->unmap();
410 tmp_file->fd.close();
411
412 #ifndef _WIN32
413 // link() will fail silently if a file with the same name already exists.
414 // This will be the case if two processes try to create a new file
415 // simultaneously. Thus close the current file handle and reopen a new one,
416 // which will make us use the first created file even if we didn't win the
417 // race.
418 if (link(tmp_file->path.c_str(), filename.c_str()) != 0) {
419 LOG("Failed to link new inode cache: {}", strerror(errno));
420 return false;
421 }
422 #else
423 if (MoveFileA(util::PathString(tmp_file->path).c_str(), filename.c_str())
424 == 0) {
425 unsigned error = GetLastError();
426 if (error == ERROR_FILE_EXISTS) {
427 // Not an error, another process won the race. Remove the file we just
428 // created
429 DeleteFileA(util::PathString(tmp_file->path).c_str());
430 LOG("Another process created inode cache {}", filename);
431 return true;
432 } else {
433 LOG("Failed to move new inode cache: {}", error);
434 return false;
435 }
436 }
437 #endif
438
439 LOG("Created a new inode cache {}", filename);
440 return true;
441 }
442
443 bool
444 InodeCache::initialize()
445 {
446 if (m_failed || !m_config.inode_cache()) {
447 return false;
448 }
449
450 if (m_fd) {
451 auto now = util::TimePoint::now();
452 if (now > m_last_fs_space_check + k_fs_space_check_valid_duration) {
453 m_last_fs_space_check = now;
454
455 uint64_t free_space = 0;
456 #ifndef _WIN32
457 struct statfs buf;
458 if (fstatfs(*m_fd, &buf) != 0) {
459 LOG("fstatfs failed: {}", strerror(errno));
460 return false;
461 }
462 free_space = static_cast<uint64_t>(buf.f_bavail) * 512;
463 #else
464 ULARGE_INTEGER free_space_for_user{};
465
466 if (GetDiskFreeSpaceExA(m_config.temporary_dir().c_str(),
467 &free_space_for_user,
468 nullptr,
469 nullptr)
470 == 0) {
471 LOG("GetDiskFreeSpaceExA failed: {}", GetLastError());
472 return false;
473 }
474 free_space = free_space_for_user.QuadPart;
475 #endif
476 if (free_space < k_min_fs_mib_left * 1024 * 1024) {
477 LOG("Filesystem has less than {} MiB free space, not using inode cache",
478 k_min_fs_mib_left);
479 return false;
480 }
481 }
482 }
483
484 if (m_sr) {
485 return true;
486 }
487
488 std::string filename = get_file();
489 if (m_sr || mmap_file(filename)) {
490 return true;
491 }
492
493 // Try to create a new cache if we failed to map an existing file.
494 create_new_file(filename);
495
496 // Concurrent processes could try to create new files simultaneously and the
497 // file that actually landed on disk will be from the process that won the
498 // race. Thus we try to open the file from disk instead of reusing the file
499 // handle to the file we just created.
500 if (mmap_file(filename)) {
501 return true;
502 }
503
504 m_failed = true;
505 return false;
506 }
507
508 InodeCache::InodeCache(const Config& config, util::Duration min_age)
509 : m_config(config),
510 // CCACHE_DISABLE_INODE_CACHE_MIN_AGE is only for testing purposes; see
511 // test/suites/inode_cache.bash.
512 m_min_age(getenv("CCACHE_DISABLE_INODE_CACHE_MIN_AGE") ? util::Duration(0)
513 : min_age),
514 m_self_pid(getpid())
515 {
516 }
517
518 InodeCache::~InodeCache()
519 {
520 if (m_sr) {
521 LOG("Accumulated stats for inode cache: hits={}, misses={}, errors={}",
522 m_sr->hits.load(),
523 m_sr->misses.load(),
524 m_sr->errors.load());
525 }
526 }
527
528 bool
529 InodeCache::available(int fd)
530 {
531 return fd_is_on_known_to_work_file_system(fd);
532 }
533
534 std::optional<std::pair<HashSourceCodeResult, Hash::Digest>>
535 InodeCache::get(const std::string& path, ContentType type)
536 {
537 if (!initialize()) {
538 return std::nullopt;
539 }
540
541 Hash::Digest key_digest;
542 if (!hash_inode(path, type, key_digest)) {
543 return std::nullopt;
544 }
545
546 std::optional<HashSourceCodeResult> result;
547 Hash::Digest file_digest;
548 const bool success = with_bucket(key_digest, [&](const auto bucket) {
549 for (uint32_t i = 0; i < k_num_entries; ++i) {
550 if (bucket->entries[i].key_digest == key_digest) {
551 if (i > 0) {
552 Entry tmp = bucket->entries[i];
553 memmove(&bucket->entries[1], &bucket->entries[0], sizeof(Entry) * i);
554 bucket->entries[0] = tmp;
555 }
556
557 file_digest = bucket->entries[0].file_digest;
558 result =
559 HashSourceCodeResult::from_bitmask(bucket->entries[0].return_value);
560 break;
561 }
562 }
563 });
564 if (!success) {
565 return std::nullopt;
566 }
567
568 if (m_config.debug()) {
569 LOG("Inode cache {}: {}", result ? "hit" : "miss", path);
570 if (result) {
571 ++m_sr->hits;
572 } else {
573 ++m_sr->misses;
574 }
575 }
576 if (result) {
577 return std::make_pair(*result, file_digest);
578 } else {
579 return std::nullopt;
580 }
581 }
582
583 bool
584 InodeCache::put(const std::string& path,
585 ContentType type,
586 const Hash::Digest& file_digest,
587 HashSourceCodeResult return_value)
588 {
589 if (!initialize()) {
590 return false;
591 }
592
593 Hash::Digest key_digest;
594 if (!hash_inode(path, type, key_digest)) {
595 return false;
596 }
597
598 const bool success = with_bucket(key_digest, [&](const auto bucket) {
599 memmove(&bucket->entries[1],
600 &bucket->entries[0],
601 sizeof(Entry) * (k_num_entries - 1));
602
603 bucket->entries[0].key_digest = key_digest;
604 bucket->entries[0].file_digest = file_digest;
605 bucket->entries[0].return_value = return_value.to_bitmask();
606 });
607
608 if (!success) {
609 return false;
610 }
611
612 if (m_config.debug()) {
613 LOG("Inode cache insert: {}", path);
614 }
615 return true;
616 }
617
618 bool
619 InodeCache::drop()
620 {
621 m_sr = nullptr;
622 m_map.unmap();
623 m_fd.close();
624 std::string file = get_file();
625 if (unlink(file.c_str()) != 0 && errno != ENOENT) {
626 return false;
627 }
628 LOG("Dropped inode cache {}", file);
629 return true;
630 }
631
632 std::string
633 InodeCache::get_file()
634 {
635 const uint8_t arch_bits = 8 * sizeof(void*);
636 return FMT(
637 "{}/inode-cache-{}.v{}", m_config.temporary_dir(), arch_bits, k_version);
638 }
639
640 int64_t
641 InodeCache::get_hits()
642 {
643 return initialize() ? m_sr->hits.load() : -1;
644 }
645
646 int64_t
647 InodeCache::get_misses()
648 {
649 return initialize() ? m_sr->misses.load() : -1;
650 }
651
652 int64_t
653 InodeCache::get_errors()
654 {
655 return initialize() ? m_sr->errors.load() : -1;
656 }