1 // Copyright (C) 2020-2023 Joel Rosdahl and other contributors
3 // See doc/AUTHORS.adoc for a complete list of contributors.
5 // This program is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License as published by the Free
7 // Software Foundation; either version 3 of the License, or (at your option)
10 // This program is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 // You should have received a copy of the GNU General Public License along with
16 // this program; if not, write to the Free Software Foundation, Inc., 51
17 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 #include "InodeCache.hpp"
25 #include <util/DirEntry.hpp>
26 #include <util/Fd.hpp>
27 #include <util/Finalizer.hpp>
28 #include <util/PathString.hpp>
29 #include <util/TemporaryFile.hpp>
30 #include <util/conversion.hpp>
31 #include <util/file.hpp>
32 #include <util/fmtmacros.hpp>
33 #include <util/logging.hpp>
43 #ifdef HAVE_LINUX_FS_H
44 # include <linux/magic.h>
45 # include <sys/statfs.h>
46 #elif defined(HAVE_STRUCT_STATFS_F_FSTYPENAME)
47 # include <sys/mount.h>
48 # include <sys/param.h>
52 #include <type_traits>
55 // The inode cache resides on a file that is mapped into shared memory by
56 // running processes. It is implemented as a two level structure, where the top
57 // level is a hash table consisting of buckets. Each bucket contains entries
58 // that are sorted in LRU order. Entries map from keys representing files to
59 // cached hash results.
61 // Concurrent access is guarded by a mutex in each bucket.
63 // Current cache size is fixed and the given constants are considered large
64 // enough for most projects. The size could be made configurable if there is a
69 // The version number corresponds to the format of the cache entries and to
70 // semantics of the key fields.
72 // Note: The key is hashed using the main hash algorithm, so the version number
73 // does not need to be incremented if said algorithm is changed (except if the
74 // digest size changes since that affects the entry format).
75 const uint32_t k_version
= 2;
77 // Note: Increment the version number if constants affecting storage size are
79 const uint32_t k_num_buckets
= 32 * 1024;
80 const uint32_t k_num_entries
= 4;
82 // Maximum time the spin lock loop will try before giving up.
83 const auto k_max_lock_duration
= util::Duration(5);
85 // The memory-mapped file may reside on a filesystem with compression. Memory
86 // accesses to the file risk crashing if such a filesystem gets full, so stop
87 // using the inode cache well before this happens.
88 const uint64_t k_min_fs_mib_left
= 100; // 100 MiB
90 // How long a filesystem space check is valid before we make a new one.
91 const util::Duration
k_fs_space_check_valid_duration(1);
93 static_assert(std::tuple_size
<Hash::Digest
>() == 20,
94 "Increment version number if size of digest is changed.");
95 static_assert(std::is_trivially_copyable
<Hash::Digest
>::value
,
96 "Digest is expected to be trivially copyable.");
99 static_cast<int>(InodeCache::ContentType::raw
) == 0,
100 "Numeric value is part of key, increment version number if changed.");
102 static_cast<int>(InodeCache::ContentType::checked_for_temporal_macros
) == 1,
103 "Numeric value is part of key, increment version number if changed.");
106 fd_is_on_known_to_work_file_system(int fd
)
109 bool known_to_work
= false;
111 if (fstatfs(fd
, &buf
) != 0) {
112 LOG("fstatfs failed: {}", strerror(errno
));
114 # ifdef HAVE_LINUX_FS_H
115 // statfs's f_type field is a signed 32-bit integer on some platforms. Large
116 // values therefore cause narrowing warnings, so cast the value to a large
118 const auto f_type
= static_cast<uintmax_t>(buf
.f_type
);
120 // Is a filesystem you know works with the inode cache missing in this
121 // list? Please submit an issue or pull request to the ccache project.
122 case 0x9123683e: // BTRFS_SUPER_MAGIC
123 case 0xef53: // EXT2_SUPER_MAGIC
124 case 0x01021994: // TMPFS_MAGIC
125 case 0x58465342: // XFS_SUPER_MAGIC
126 known_to_work
= true;
129 LOG("Filesystem type 0x{:x} not known to work for the inode cache",
132 # elif defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) // macOS X and some BSDs
133 static const std::vector
<std::string
> known_to_work_filesystems
= {
134 // Is a filesystem you know works with the inode cache missing in this
135 // list? Please submit an issue or pull request to the ccache project.
142 if (std::find(known_to_work_filesystems
.begin(),
143 known_to_work_filesystems
.end(),
145 != known_to_work_filesystems
.end()) {
146 known_to_work
= true;
148 LOG("Filesystem type {} not known to work for the inode cache",
152 # error Inconsistency: INODE_CACHE_SUPPORTED is set but we should not get here
155 return known_to_work
;
157 HANDLE file
= (HANDLE
)_get_osfhandle(fd
);
158 if (file
== INVALID_HANDLE_VALUE
) {
162 // Try to get information about remote protocol for this file.
163 // If the call succeeds, this is a remote file.
164 // If the call fails with invalid parameter error, consider that it is a local
166 FILE_REMOTE_PROTOCOL_INFO infos
;
167 if (GetFileInformationByHandleEx(
168 file
, FileRemoteProtocolInfo
, &infos
, sizeof(infos
))) {
172 if (GetLastError() == ERROR_INVALID_PARAMETER
) {
181 spin_lock(std::atomic
<pid_t
>& owner_pid
, const pid_t self_pid
)
185 bool reset_timer
= false;
186 util::TimePoint lock_time
;
188 for (int i
= 0; i
< 10000; ++i
) {
189 lock_pid
= owner_pid
.load(std::memory_order_relaxed
);
191 && owner_pid
.compare_exchange_weak(
192 lock_pid
, self_pid
, std::memory_order_acquire
)) {
196 if (prev_pid
!= lock_pid
) {
197 // Check for changing PID here so ABA locking is detected with better
202 std::this_thread::yield();
204 // If everything is OK, we should never hit this.
206 lock_time
= util::TimePoint::now();
208 } else if (util::TimePoint::now() - lock_time
> k_max_lock_duration
) {
215 spin_unlock(std::atomic
<pid_t
>& owner_pid
)
217 owner_pid
.store(0, std::memory_order_release
);
222 struct InodeCache::Key
229 timespec st_ctim
; // Included for sanity checking.
230 off_t st_size
; // Included for sanity checking.
233 struct InodeCache::Entry
235 Hash::Digest key_digest
; // Hashed key
236 Hash::Digest file_digest
; // Cached file hash
237 int return_value
; // Cached return value
240 struct InodeCache::Bucket
242 std::atomic
<pid_t
> owner_pid
;
243 Entry entries
[k_num_entries
];
246 struct InodeCache::SharedRegion
249 std::atomic
<int64_t> hits
;
250 std::atomic
<int64_t> misses
;
251 std::atomic
<int64_t> errors
;
252 Bucket buckets
[k_num_buckets
];
256 InodeCache::mmap_file(const std::string
& inode_cache_file
)
260 m_fd
= util::Fd(open(inode_cache_file
.c_str(), O_RDWR
));
262 LOG("Failed to open inode cache {}: {}", inode_cache_file
, strerror(errno
));
265 if (!fd_is_on_known_to_work_file_system(*m_fd
)) {
269 auto map
= util::MemoryMap::map(*m_fd
, sizeof(SharedRegion
));
271 LOG("Failed to map inode cache file {}: {}", inode_cache_file
, map
.error());
275 SharedRegion
* sr
= reinterpret_cast<SharedRegion
*>(map
->ptr());
277 // Drop the file from disk if the found version is not matching. This will
278 // allow a new file to be generated.
279 if (sr
->version
!= k_version
) {
281 "Dropping inode cache because found version {} does not match expected"
287 unlink(inode_cache_file
.c_str());
290 m_map
= std::move(*map
);
292 if (m_config
.debug()) {
293 LOG("Inode cache file loaded: {}", inode_cache_file
);
299 InodeCache::hash_inode(const std::string
& path
,
301 Hash::Digest
& digest
)
303 util::DirEntry
de(path
);
305 LOG("Could not stat {}: {}", path
, strerror(de
.error_number()));
309 // See comment for InodeCache::InodeCache why this check is done.
310 auto now
= util::TimePoint::now();
311 if (now
- de
.ctime() < m_min_age
|| now
- de
.mtime() < m_min_age
) {
312 LOG("Too new ctime or mtime of {}, not considering for inode cache", path
);
317 memset(&key
, 0, sizeof(Key
));
319 key
.st_dev
= de
.device();
320 key
.st_ino
= de
.inode();
321 key
.st_mode
= de
.mode();
322 // Note: Manually copying sec and nsec of mtime and ctime to prevent copying
324 auto mtime
= de
.mtime().to_timespec();
325 key
.st_mtim
.tv_sec
= mtime
.tv_sec
;
326 key
.st_mtim
.tv_nsec
= mtime
.tv_nsec
;
327 auto ctime
= de
.ctime().to_timespec();
328 key
.st_ctim
.tv_sec
= ctime
.tv_sec
;
329 key
.st_ctim
.tv_nsec
= ctime
.tv_nsec
;
330 key
.st_size
= de
.size();
333 hash
.hash(nonstd::span
<const uint8_t>(reinterpret_cast<const uint8_t*>(&key
),
335 digest
= hash
.digest();
340 InodeCache::with_bucket(const Hash::Digest
& key_digest
,
341 const BucketHandler
& bucket_handler
)
344 util::big_endian_to_int(key_digest
.data(), hash
);
345 const uint32_t index
= hash
% k_num_buckets
;
346 Bucket
* bucket
= &m_sr
->buckets
[index
];
347 bool acquired_lock
= spin_lock(bucket
->owner_pid
, m_self_pid
);
348 while (!acquired_lock
) {
349 LOG("Dropping inode cache file because of stale mutex at index {}", index
);
350 if (!drop() || !initialize()) {
353 if (m_config
.debug()) {
356 bucket
= &m_sr
->buckets
[index
];
357 acquired_lock
= spin_lock(bucket
->owner_pid
, m_self_pid
);
360 bucket_handler(bucket
);
362 spin_unlock(bucket
->owner_pid
);
365 spin_unlock(bucket
->owner_pid
);
370 InodeCache::create_new_file(const std::string
& filename
)
372 // Create the new file to a temporary name to prevent other processes from
373 // mapping it before it is fully initialized.
374 auto tmp_file
= util::TemporaryFile::create(filename
);
376 LOG("Failed to created inode cache file: {}", tmp_file
.error());
380 util::Finalizer
temp_file_remover(
381 [&] { unlink(util::PathString(tmp_file
->path
).c_str()); });
383 if (!fd_is_on_known_to_work_file_system(*tmp_file
->fd
)) {
387 if (auto result
= util::fallocate(*tmp_file
->fd
, sizeof(SharedRegion
));
389 LOG("Failed to allocate file space for inode cache: {}", result
.error());
393 auto map
= util::MemoryMap::map(*tmp_file
->fd
, sizeof(SharedRegion
));
395 LOG("Failed to mmap new inode cache: {}", map
.error());
399 SharedRegion
* sr
= reinterpret_cast<SharedRegion
*>(map
->ptr());
401 // Initialize new shared region.
402 sr
->version
= k_version
;
403 for (auto& bucket
: sr
->buckets
) {
404 bucket
.owner_pid
= 0;
405 memset(bucket
.entries
, 0, sizeof(Bucket::entries
));
410 tmp_file
->fd
.close();
413 // link() will fail silently if a file with the same name already exists.
414 // This will be the case if two processes try to create a new file
415 // simultaneously. Thus close the current file handle and reopen a new one,
416 // which will make us use the first created file even if we didn't win the
418 if (link(tmp_file
->path
.c_str(), filename
.c_str()) != 0) {
419 LOG("Failed to link new inode cache: {}", strerror(errno
));
423 if (MoveFileA(util::PathString(tmp_file
->path
).c_str(), filename
.c_str())
425 unsigned error
= GetLastError();
426 if (error
== ERROR_FILE_EXISTS
) {
427 // Not an error, another process won the race. Remove the file we just
429 DeleteFileA(util::PathString(tmp_file
->path
).c_str());
430 LOG("Another process created inode cache {}", filename
);
433 LOG("Failed to move new inode cache: {}", error
);
439 LOG("Created a new inode cache {}", filename
);
444 InodeCache::initialize()
446 if (m_failed
|| !m_config
.inode_cache()) {
451 auto now
= util::TimePoint::now();
452 if (now
> m_last_fs_space_check
+ k_fs_space_check_valid_duration
) {
453 m_last_fs_space_check
= now
;
455 uint64_t free_space
= 0;
458 if (fstatfs(*m_fd
, &buf
) != 0) {
459 LOG("fstatfs failed: {}", strerror(errno
));
462 free_space
= static_cast<uint64_t>(buf
.f_bavail
) * 512;
464 ULARGE_INTEGER free_space_for_user
{};
466 if (GetDiskFreeSpaceExA(m_config
.temporary_dir().c_str(),
467 &free_space_for_user
,
471 LOG("GetDiskFreeSpaceExA failed: {}", GetLastError());
474 free_space
= free_space_for_user
.QuadPart
;
476 if (free_space
< k_min_fs_mib_left
* 1024 * 1024) {
477 LOG("Filesystem has less than {} MiB free space, not using inode cache",
488 std::string filename
= get_file();
489 if (m_sr
|| mmap_file(filename
)) {
493 // Try to create a new cache if we failed to map an existing file.
494 create_new_file(filename
);
496 // Concurrent processes could try to create new files simultaneously and the
497 // file that actually landed on disk will be from the process that won the
498 // race. Thus we try to open the file from disk instead of reusing the file
499 // handle to the file we just created.
500 if (mmap_file(filename
)) {
508 InodeCache::InodeCache(const Config
& config
, util::Duration min_age
)
510 // CCACHE_DISABLE_INODE_CACHE_MIN_AGE is only for testing purposes; see
511 // test/suites/inode_cache.bash.
512 m_min_age(getenv("CCACHE_DISABLE_INODE_CACHE_MIN_AGE") ? util::Duration(0)
518 InodeCache::~InodeCache()
521 LOG("Accumulated stats for inode cache: hits={}, misses={}, errors={}",
524 m_sr
->errors
.load());
529 InodeCache::available(int fd
)
531 return fd_is_on_known_to_work_file_system(fd
);
534 std::optional
<std::pair
<HashSourceCodeResult
, Hash::Digest
>>
535 InodeCache::get(const std::string
& path
, ContentType type
)
541 Hash::Digest key_digest
;
542 if (!hash_inode(path
, type
, key_digest
)) {
546 std::optional
<HashSourceCodeResult
> result
;
547 Hash::Digest file_digest
;
548 const bool success
= with_bucket(key_digest
, [&](const auto bucket
) {
549 for (uint32_t i
= 0; i
< k_num_entries
; ++i
) {
550 if (bucket
->entries
[i
].key_digest
== key_digest
) {
552 Entry tmp
= bucket
->entries
[i
];
553 memmove(&bucket
->entries
[1], &bucket
->entries
[0], sizeof(Entry
) * i
);
554 bucket
->entries
[0] = tmp
;
557 file_digest
= bucket
->entries
[0].file_digest
;
559 HashSourceCodeResult::from_bitmask(bucket
->entries
[0].return_value
);
568 if (m_config
.debug()) {
569 LOG("Inode cache {}: {}", result
? "hit" : "miss", path
);
577 return std::make_pair(*result
, file_digest
);
584 InodeCache::put(const std::string
& path
,
586 const Hash::Digest
& file_digest
,
587 HashSourceCodeResult return_value
)
593 Hash::Digest key_digest
;
594 if (!hash_inode(path
, type
, key_digest
)) {
598 const bool success
= with_bucket(key_digest
, [&](const auto bucket
) {
599 memmove(&bucket
->entries
[1],
601 sizeof(Entry
) * (k_num_entries
- 1));
603 bucket
->entries
[0].key_digest
= key_digest
;
604 bucket
->entries
[0].file_digest
= file_digest
;
605 bucket
->entries
[0].return_value
= return_value
.to_bitmask();
612 if (m_config
.debug()) {
613 LOG("Inode cache insert: {}", path
);
624 std::string file
= get_file();
625 if (unlink(file
.c_str()) != 0 && errno
!= ENOENT
) {
628 LOG("Dropped inode cache {}", file
);
633 InodeCache::get_file()
635 const uint8_t arch_bits
= 8 * sizeof(void*);
637 "{}/inode-cache-{}.v{}", m_config
.temporary_dir(), arch_bits
, k_version
);
641 InodeCache::get_hits()
643 return initialize() ? m_sr
->hits
.load() : -1;
647 InodeCache::get_misses()
649 return initialize() ? m_sr
->misses
.load() : -1;
653 InodeCache::get_errors()
655 return initialize() ? m_sr
->errors
.load() : -1;