From 4b8f8249452c8c367a1605e87959f59d5595e766 Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Sat, 7 Aug 2021 09:00:17 +0200 Subject: [PATCH] feat(storage): Add support for cache sharding on secondary storage (#912) This adds support for a shards attribute with a comma-separated list of names for sharding (partitioning) the cache entries using Rendezvous hashing, typically to spread the cache over a server cluster. When set, the storage URL must contain an asterisk (*), which will be replaced by one of the shard names to form a real URL. A shard name can optionally have an appended weight within parentheses to indicate how much of the key space should be associated with that shard. A shard with weight w will contain w/S of the cache, where S is the sum of all shard weights. A weight could for instance be set to represent the available memory for a memory cache on a specific server. The default weight is 1. For example, redis://cache-*.example.com|shards=a(3),b(1),c(1.5) will put 55% (3/5.5) of the cache on redis://cache-a.example.com, 18% (1/5.5) on redis://cache-b.example.com and 27% (1.5/5.5) on redis://cache-c.example.com. --- doc/MANUAL.adoc | 25 +++ src/storage/Storage.cpp | 182 ++++++++++++++++----- src/storage/secondary/SecondaryStorage.cpp | 2 +- test/suites/secondary_file.bash | 13 ++ 4 files changed, 178 insertions(+), 44 deletions(-) diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index c4b7b09f3..6bc4651b7 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -893,6 +893,28 @@ Optional attributes available for all secondary storage backends: * *read-only*: If *true*, only read from this backend, don't write. The default is *false*. +* *shards*: A comma-separated list of names for sharding (partitioning) the + cache entries using + https://en.wikipedia.org/wiki/Rendezvous_hashing[Rendezvous hashing], + typically to spread the cache over a server cluster. When set, the storage URL + must contain an asterisk (`+*+`), which will be replaced by one of the shard + names to form a real URL. A shard name can optionally have an appended weight + within parentheses to indicate how much of the key space should be associated + with that shard. A shard with weight *w* will contain *w*/*S* of the cache, + where *S* is the sum of all shard weights. A weight could for instance be set + to represent the available memory for a memory cache on a specific server. The + default weight is *1*. ++ +Examples: ++ +-- +* `+redis://cache-*.example.com|shards=a(3),b(1),c(1.5)+` will put 55% (3/5.5) + of the cache on `+redis://cache-a.example.com+`, 18% (1/5.5) on + `+redis://cache-b.example.com+` and 27% (1.5/5.5) on + `+redis://cache-c.example.com+`. +* `+http://example.com/*|shards=alpha,beta+` will put 50% of the cache on + `+http://example.com/alpha+` and 50% on `+http://example.com/beta+`. +-- These are the available backends: @@ -977,6 +999,9 @@ https://redis.io/topics/lru-cache[configure LRU eviction]. TIP: See https://ccache.dev/howto/redis-storage.html[How to set up Redis storage] for hints on setting up a Redis server for use with ccache. +TIP: You can set up a cluster of Redis servers using the `shards` attribute +described in _<>_. + Examples: * `+redis://localhost+` diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp index 7bd7d439c..8d531a3ea 100644 --- a/src/storage/Storage.cpp +++ b/src/storage/Storage.cpp @@ -18,6 +18,7 @@ #include "Storage.hpp" +#include #include #include #include @@ -39,6 +40,7 @@ #include #include +#include #include #include @@ -67,19 +69,33 @@ get_features() return util::join(features, " "); } +struct SecondaryStorageShardConfig +{ + std::string name; + double weight; +}; + struct SecondaryStorageConfig { + std::vector shards; secondary::SecondaryStorage::Backend::Params params; bool read_only = false; }; +struct SecondaryStorageBackendEntry +{ + Url url; // With expanded "*". + std::string url_for_logging; // With expanded "*". + std::unique_ptr impl; + bool failed = false; +}; + struct SecondaryStorageEntry { SecondaryStorageConfig config; - std::string url_for_logging; + std::string url_for_logging; // With unexpanded "*". std::shared_ptr storage; - std::unique_ptr backend; - bool failed = false; + std::vector backends; }; static std::string @@ -126,10 +142,37 @@ parse_storage_config(const nonstd::string_view entry) util::value_or_throw(util::percent_decode(raw_value)); if (key == "read-only" && value == "true") { result.read_only = true; + } else if (key == "shards") { + const auto url_str = result.params.url.str(); + if (url_str.find('*') == std::string::npos) { + throw core::Error(R"(Missing "*" in URL when using shards: "{}")", + url_str); + } + for (const auto& shard : util::Tokenizer(value, ",")) { + double weight = 1.0; + nonstd::string_view name; + const auto lp_pos = shard.find('('); + if (lp_pos != nonstd::string_view::npos) { + if (shard.back() != ')') { + throw core::Error("Invalid shard name: \"{}\"", shard); + } + weight = + util::value_or_throw(util::parse_double(std::string( + shard.substr(lp_pos + 1, shard.length() - lp_pos - 2)))); + if (weight < 0.0) { + throw core::Error("Invalid shard weight: \"{}\"", weight); + } + name = shard.substr(0, lp_pos); + } else { + name = shard; + } + + result.shards.push_back({std::string(name), weight}); + } } - result.params.attributes.emplace_back( - secondary::SecondaryStorage::Backend::Attribute{ - std::string(key), value, std::string(raw_value)}); + + result.params.attributes.push_back( + {std::string(key), value, std::string(raw_value)}); } return result; @@ -229,9 +272,7 @@ Storage::put(const Digest& key, const bool should_put_in_secondary_storage = std::any_of(m_secondary_storages.begin(), m_secondary_storages.end(), - [](const auto& entry) { - return !entry->failed && !entry->config.read_only; - }); + [](const auto& entry) { return !entry->config.read_only; }); if (should_put_in_secondary_storage) { std::string value; try { @@ -278,68 +319,119 @@ Storage::add_secondary_storages() throw core::Error("unknown secondary storage URL: {}", url_for_logging.str()); } - m_secondary_storages.emplace_back( - std::make_unique(SecondaryStorageEntry{ - config, url_for_logging.str(), storage, {}, false})); + m_secondary_storages.push_back(std::make_unique( + SecondaryStorageEntry{config, url_for_logging.str(), storage, {}})); } } static void mark_backend_as_failed( - SecondaryStorageEntry& entry, + SecondaryStorageBackendEntry& backend_entry, const secondary::SecondaryStorage::Backend::Failure failure) { // The backend is expected to log details about the error. - entry.failed = true; + backend_entry.failed = true; (void)failure; // TODO: Update statistics. } -static bool -backend_is_available(SecondaryStorageEntry& entry, - nonstd::string_view operation_description, - const bool for_writing) +static double +to_half_open_unit_interval(uint64_t value) { - if (for_writing && entry.config.read_only) { - LOG("Not {} {} since it is read-only", - operation_description, - entry.url_for_logging); - return false; + constexpr uint8_t double_significand_bits = 53; + constexpr uint64_t denominator = 1ULL << double_significand_bits; + constexpr uint64_t mask = denominator - 1; + return static_cast(value & mask) / denominator; +} + +static Url +get_shard_url(const Digest& key, + const std::string& url, + const std::vector& shards) +{ + ASSERT(!shards.empty()); + + // This is the "weighted rendezvous hashing" algorithm. + double highest_score = -1.0; + std::string best_shard; + for (const auto& shard_config : shards) { + Checksum checksum; + checksum.update(key.bytes(), key.size()); + checksum.update(shard_config.name.data(), shard_config.name.length()); + const double score = to_half_open_unit_interval(checksum.digest()); + ASSERT(score >= 0.0 && score < 1.0); + const double weighted_score = + score == 0.0 ? 0.0 : shard_config.weight / -std::log(score); + if (weighted_score > highest_score) { + best_shard = shard_config.name; + highest_score = weighted_score; + } } - if (entry.failed) { - LOG("Not {} {} since it failed earlier", + return util::replace_first(url, "*", best_shard); +} + +static SecondaryStorageBackendEntry* +get_backend(SecondaryStorageEntry& entry, + const Digest& key, + nonstd::string_view operation_description, + const bool for_writing) +{ + if (for_writing && entry.config.read_only) { + LOG("Not {} {} since it is read-only", operation_description, entry.url_for_logging); - return false; + return nullptr; } - if (!entry.backend) { + const auto shard_url = + entry.config.shards.empty() + ? entry.config.params.url + : get_shard_url(key, entry.config.params.url.str(), entry.config.shards); + auto backend = + std::find_if(entry.backends.begin(), + entry.backends.end(), + [&](const auto& x) { return x.url.str() == shard_url.str(); }); + + if (backend == entry.backends.end()) { + auto shard_url_for_logging = shard_url; + shard_url_for_logging.user_info(""); + entry.backends.push_back( + {shard_url, shard_url_for_logging.str(), {}, false}); + auto shard_params = entry.config.params; + shard_params.url = shard_url; try { - entry.backend = entry.storage->create_backend(entry.config.params); + entry.backends.back().impl = entry.storage->create_backend(shard_params); } catch (const secondary::SecondaryStorage::Backend::Failed& e) { LOG("Failed to construct backend for {}{}", entry.url_for_logging, nonstd::string_view(e.what()).empty() ? "" : FMT(": {}", e.what())); - mark_backend_as_failed(entry, e.failure()); + mark_backend_as_failed(entry.backends.back(), e.failure()); } + return &entry.backends.back(); + } else if (backend->failed) { + LOG("Not {} {} since it failed earlier", + operation_description, + entry.url_for_logging); + return nullptr; + } else { + return &*backend; } - - return static_cast(entry.backend); } nonstd::optional Storage::get_from_secondary_storage(const Digest& key) { for (const auto& entry : m_secondary_storages) { - if (!backend_is_available(*entry, "getting from", false)) { + auto backend = get_backend(*entry, key, "getting from", false); + if (!backend) { continue; } Timer timer; - const auto result = entry->backend->get(key); + const auto result = backend->impl->get(key); const auto ms = timer.measure_ms(); if (!result) { - mark_backend_as_failed(*entry, result.error()); + mark_backend_as_failed(*backend, result.error()); continue; } @@ -347,12 +439,14 @@ Storage::get_from_secondary_storage(const Digest& key) if (value) { LOG("Retrieved {} from {} ({:.2f} ms)", key.to_string(), - entry->url_for_logging, + backend->url_for_logging, ms); return *value; } else { - LOG( - "No {} in {} ({:.2f} ms)", key.to_string(), entry->url_for_logging, ms); + LOG("No {} in {} ({:.2f} ms)", + key.to_string(), + backend->url_for_logging, + ms); } } @@ -363,16 +457,17 @@ void Storage::put_in_secondary_storage(const Digest& key, const std::string& value) { for (const auto& entry : m_secondary_storages) { - if (!backend_is_available(*entry, "putting in", true)) { + auto backend = get_backend(*entry, key, "putting in", true); + if (!backend) { continue; } Timer timer; - const auto result = entry->backend->put(key, value); + const auto result = backend->impl->put(key, value); const auto ms = timer.measure_ms(); if (!result) { // The backend is expected to log details about the error. - mark_backend_as_failed(*entry, result.error()); + mark_backend_as_failed(*backend, result.error()); continue; } @@ -389,15 +484,16 @@ void Storage::remove_from_secondary_storage(const Digest& key) { for (const auto& entry : m_secondary_storages) { - if (!backend_is_available(*entry, "removing from", true)) { + auto backend = get_backend(*entry, key, "removing from", true); + if (!backend) { continue; } Timer timer; - const auto result = entry->backend->remove(key); + const auto result = backend->impl->remove(key); const auto ms = timer.measure_ms(); if (!result) { - mark_backend_as_failed(*entry, result.error()); + mark_backend_as_failed(*backend, result.error()); continue; } diff --git a/src/storage/secondary/SecondaryStorage.cpp b/src/storage/secondary/SecondaryStorage.cpp index be1c5e545..41b3d38d3 100644 --- a/src/storage/secondary/SecondaryStorage.cpp +++ b/src/storage/secondary/SecondaryStorage.cpp @@ -27,7 +27,7 @@ namespace secondary { bool SecondaryStorage::Backend::is_framework_attribute(const std::string& name) { - return name == "read-only"; + return name == "read-only" || name == "shards"; } std::chrono::milliseconds diff --git a/test/suites/secondary_file.bash b/test/suites/secondary_file.bash index 90cf8adc8..adc455b5e 100644 --- a/test/suites/secondary_file.bash +++ b/test/suites/secondary_file.bash @@ -113,4 +113,17 @@ SUITE_secondary_file() { $CCACHE_COMPILE -c test.c expect_perm secondary drwxrwxrwx expect_perm secondary/CACHEDIR.TAG -rw-rw-rw- + + # ------------------------------------------------------------------------- + TEST "Sharding" + + CCACHE_SECONDARY_STORAGE="file://$PWD/secondary/*|shards=a,b(2)" + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + if [ ! -d secondary/a ] && [ ! -d secondary/b ]; then + test_failed "Expected secondary/a or secondary/b to exist" + fi } -- 2.47.2