From: Joel Rosdahl Date: Fri, 15 Dec 2023 19:01:49 +0000 (+0100) Subject: fix: Generalize expansion of remote storage URLs with sharding X-Git-Tag: v4.9~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=829f93c48f21;p=thirdparty%2Fccache.git fix: Generalize expansion of remote storage URLs with sharding Fixes #1321. --- diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp index a40ef10f7..fb72e1763 100644 --- a/src/storage/Storage.cpp +++ b/src/storage/Storage.cpp @@ -74,31 +74,43 @@ get_features() return util::join(features, " "); } +// Representation of one shard configuration. struct RemoteStorageShardConfig { std::string name; double weight; + Url url; // Cache of URL with expanded "*" }; +// Representation of one entry in the remote_storage config option. struct RemoteStorageConfig { + // Raw URL with unexpanded "*". + std::string url_str; + + // "shard" attribute. std::vector shards; - remote::RemoteStorage::Backend::Params params; + + // "read-only" attribute. bool read_only = false; + + // Other attributes. + std::vector attributes; }; +// An instantiated remote storage backend. struct RemoteStorageBackendEntry { - Url url; // With expanded "*". - std::string url_for_logging; // With expanded "*". + Url url; // With expanded "*" + std::string url_for_logging; // With expanded "*" std::unique_ptr impl; bool failed = false; }; +// An instantiated remote storage. struct RemoteStorageEntry { RemoteStorageConfig config; - std::string url_for_logging; // With unexpanded "*". std::shared_ptr storage; std::vector backends; }; @@ -106,25 +118,28 @@ struct RemoteStorageEntry static std::string to_string(const RemoteStorageConfig& entry) { - std::string result = entry.params.url.str(); - for (const auto& attr : entry.params.attributes) { + std::string result = entry.url_str; + for (const auto& attr : entry.attributes) { result += FMT("|{}={}", attr.key, attr.raw_value); } return result; } -static Url +static tl::expected url_from_string(const std::string& url_string) { // The Url class is parsing the URL object lazily. Check if the URL is valid // now to avoid exceptions later. + Url url(url_string); try { - Url url(url_string); - std::ignore = url.str(); - return url; + std::ignore = url.scheme(); } catch (const std::exception& e) { - throw core::Error(FMT("Cannot parse URL {}: {}", url_string, e.what())); + return tl::unexpected(FMT("Cannot parse URL {}: {}", url_string, e.what())); + } + if (url.scheme().empty()) { + return tl::unexpected(FMT("URL scheme must not be empty: {}", url_string)); } + return url; } static RemoteStorageConfig @@ -139,12 +154,8 @@ parse_storage_config(const std::string_view entry) } RemoteStorageConfig result; - const auto url_str = std::string(parts[0]); - result.params.url = url_from_string(url_str); - - if (result.params.url.scheme().empty()) { - throw core::Error(FMT("URL scheme must not be empty: {}", entry)); - } + result.url_str = std::string(parts[0]); + const auto& url_str = result.url_str; for (size_t i = 1; i < parts.size(); ++i) { if (parts[i].empty()) { @@ -157,10 +168,16 @@ parse_storage_config(const std::string_view entry) if (key == "read-only") { result.read_only = (value == "true"); } else if (key == "shards") { - if (url_str.find('*') == std::string::npos) { + const auto asterisk_count = + std::count(url_str.begin(), url_str.end(), '*'); + if (asterisk_count == 0) { throw core::Error( FMT(R"(Missing "*" in URL when using shards: "{}")", url_str)); + } else if (asterisk_count > 1) { + throw core::Error( + FMT(R"(Multiple "*" in URL when using shards: "{}")", url_str)); } + std::string scheme; for (const auto& shard : util::Tokenizer(value, ",")) { double weight = 1.0; std::string_view name; @@ -180,14 +197,28 @@ parse_storage_config(const std::string_view entry) name = shard; } - result.shards.push_back({std::string(name), weight}); + Url url = util::value_or_throw( + url_from_string(util::replace_first(url_str, "*", name))); + if (!scheme.empty() && url.scheme() != scheme) { + throw core::Error(FMT("Scheme {} different from {} in {}", + url.scheme(), + scheme, + url_str)); + } + result.shards.push_back({std::string(name), weight, url}); } } - result.params.attributes.push_back( + result.attributes.push_back( {std::string(key), value, std::string(raw_value)}); } + // No shards => save the single URL as the sole shard. + if (result.shards.empty()) { + result.shards.push_back( + {"", 0.0, util::value_or_throw(url_from_string(url_str))}); + } + return result; } @@ -202,9 +233,9 @@ parse_storage_configs(const std::string_view& configs) } static std::shared_ptr -get_storage(const Url& url) +get_storage(const std::string& scheme) { - const auto it = k_remote_storage_implementations.find(url.scheme()); + const auto it = k_remote_storage_implementations.find(scheme); if (it != k_remote_storage_implementations.end()) { return it->second; } else { @@ -289,39 +320,46 @@ Storage::has_remote_storage() const return !m_remote_storages.empty(); } +static std::string +get_redacted_url_str_for_logging(const Url& url) +{ + Url redacted_url(url); + if (!url.user_info().empty()) { + redacted_url.user_info(k_redacted_password); + } + return redacted_url.str(); +} + std::string Storage::get_remote_storage_config_for_logging() const { auto configs = parse_storage_configs(m_config.remote_storage()); for (auto& config : configs) { - const auto storage = get_storage(config.params.url); - if (storage) { - storage->redact_secrets(config.params); - } + const auto url = url_from_string(config.url_str); + if (url) { + const auto storage = get_storage(url->scheme()); + if (storage) { + config.url_str = get_redacted_url_str_for_logging(*url); + storage->redact_secrets(config.attributes); + } + } // else: unexpanded URL is not a proper URL, not much we can do } return util::join(configs, " "); } -static void -redact_url_for_logging(Url& url_for_logging) -{ - url_for_logging.user_info(""); -} - void Storage::add_remote_storages() { const auto configs = parse_storage_configs(m_config.remote_storage()); for (const auto& config : configs) { - auto url_for_logging = config.params.url; - redact_url_for_logging(url_for_logging); - const auto storage = get_storage(config.params.url); + ASSERT(!config.shards.empty()); + const std::string scheme = config.shards.front().url.scheme(); + const auto storage = get_storage(scheme); if (!storage) { - throw core::Error( - FMT("unknown remote storage URL: {}", url_for_logging.str())); + throw core::Error(FMT("unknown remote storage scheme: {}", scheme)); } m_remote_storages.push_back(std::make_unique( - RemoteStorageEntry{config, url_for_logging.str(), storage, {}})); + RemoteStorageEntry{config, storage, {}})); } } @@ -349,14 +387,17 @@ to_half_open_unit_interval(uint64_t value) static Url get_shard_url(const Hash::Digest& key, - const std::string& url, const std::vector& shards) { ASSERT(!shards.empty()); + if (shards.size() == 1) { + return shards.front().url; + } + // This is the "weighted rendezvous hashing" algorithm. double highest_score = -1.0; - std::string best_shard; + Url best_shard_url; for (const auto& shard_config : shards) { util::XXH3_64 hash; hash.update(key.data(), key.size()); @@ -366,12 +407,12 @@ get_shard_url(const Hash::Digest& key, const double weighted_score = score == 0.0 ? 0.0 : shard_config.weight / -std::log(score); if (weighted_score > highest_score) { - best_shard = shard_config.name; + best_shard_url = shard_config.url; highest_score = weighted_score; } } - return url_from_string(util::replace_first(url, "*", best_shard)); + return best_shard_url; } RemoteStorageBackendEntry* @@ -381,33 +422,28 @@ Storage::get_backend(RemoteStorageEntry& entry, const bool for_writing) { if (for_writing && entry.config.read_only) { - LOG("Not {} {} since it is read-only", + LOG("Not {} {} storage since it is read-only", operation_description, - entry.url_for_logging); + entry.config.shards.front().url.scheme()); return nullptr; } - const auto shard_url = - entry.config.shards.empty() - ? entry.config.params.url - : get_shard_url(key, entry.config.params.url.str(), entry.config.shards); + const auto shard_url = get_shard_url(key, entry.config.shards); + const auto url_str_for_logging = + get_redacted_url_str_for_logging(shard_url.str()); auto backend = std::find_if(entry.backends.begin(), entry.backends.end(), [&](const auto& x) { return x.url.str() == shard_url.str(); }); if (backend == entry.backends.end()) { - auto shard_url_for_logging = shard_url; - redact_url_for_logging(shard_url_for_logging); - entry.backends.push_back( - {shard_url, shard_url_for_logging.str(), {}, false}); - auto shard_params = entry.config.params; - shard_params.url = shard_url; + entry.backends.push_back({shard_url, url_str_for_logging, {}, false}); try { - entry.backends.back().impl = entry.storage->create_backend(shard_params); + entry.backends.back().impl = + entry.storage->create_backend(shard_url, entry.config.attributes); } catch (const remote::RemoteStorage::Backend::Failed& e) { LOG("Failed to construct backend for {}{}", - entry.url_for_logging, + url_str_for_logging, std::string_view(e.what()).empty() ? "" : FMT(": {}", e.what())); mark_backend_as_failed(entry.backends.back(), e.failure()); return nullptr; @@ -416,7 +452,7 @@ Storage::get_backend(RemoteStorageEntry& entry, } else if (backend->failed) { LOG("Not {} {} since it failed earlier", operation_description, - entry.url_for_logging); + url_str_for_logging); return nullptr; } else { return &*backend; diff --git a/src/storage/Storage.hpp b/src/storage/Storage.hpp index 3a63ffe94..793662f80 100644 --- a/src/storage/Storage.hpp +++ b/src/storage/Storage.hpp @@ -35,6 +35,8 @@ namespace storage { +constexpr auto k_redacted_password = "********"; + std::string get_features(); struct RemoteStorageBackendEntry; diff --git a/src/storage/remote/FileStorage.cpp b/src/storage/remote/FileStorage.cpp index 80cf62099..548d63e5d 100644 --- a/src/storage/remote/FileStorage.cpp +++ b/src/storage/remote/FileStorage.cpp @@ -47,7 +47,8 @@ namespace { class FileStorageBackend : public RemoteStorage::Backend { public: - FileStorageBackend(const Params& params); + FileStorageBackend(const Url& url, + const std::vector& attributes); tl::expected, Failure> get(const Hash::Digest& key) override; @@ -69,13 +70,14 @@ private: std::string get_entry_path(const Hash::Digest& key) const; }; -FileStorageBackend::FileStorageBackend(const Params& params) +FileStorageBackend::FileStorageBackend( + const Url& url, const std::vector& attributes) { - ASSERT(params.url.scheme() == "file"); + ASSERT(url.scheme() == "file"); - const auto& host = params.url.host(); + const auto& host = url.host(); #ifdef _WIN32 - m_dir = util::replace_all(params.url.path(), "/", "\\"); + m_dir = util::replace_all(url.path(), "/", "\\"); if (m_dir.length() >= 3 && m_dir[0] == '\\' && m_dir[2] == ':') { // \X:\foo\bar -> X:\foo\bar according to RFC 8089 appendix E.2. m_dir = m_dir.substr(1); @@ -88,12 +90,12 @@ FileStorageBackend::FileStorageBackend(const Params& params) throw core::Fatal( FMT("invalid file URL \"{}\": specifying a host other than localhost is" " not supported", - params.url.str())); + url.str())); } - m_dir = params.url.path(); + m_dir = url.path(); #endif - for (const auto& attr : params.attributes) { + for (const auto& attr : attributes) { if (attr.key == "layout") { if (attr.value == "flat") { m_layout = Layout::flat; @@ -205,9 +207,10 @@ FileStorageBackend::get_entry_path(const Hash::Digest& key) const } // namespace std::unique_ptr -FileStorage::create_backend(const Backend::Params& params) const +FileStorage::create_backend( + const Url& url, const std::vector& attributes) const { - return std::make_unique(params); + return std::make_unique(url, attributes); } } // namespace storage::remote diff --git a/src/storage/remote/FileStorage.hpp b/src/storage/remote/FileStorage.hpp index ae72cfdd1..c1d72961a 100644 --- a/src/storage/remote/FileStorage.hpp +++ b/src/storage/remote/FileStorage.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2021-2022 Joel Rosdahl and other contributors +// Copyright (C) 2021-2023 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -25,8 +25,9 @@ namespace storage::remote { class FileStorage : public RemoteStorage { public: - std::unique_ptr - create_backend(const Backend::Params& params) const override; + std::unique_ptr create_backend( + const Url& url, + const std::vector& attributes) const override; }; } // namespace storage::remote diff --git a/src/storage/remote/HttpStorage.cpp b/src/storage/remote/HttpStorage.cpp index 37ddc237a..0778b96de 100644 --- a/src/storage/remote/HttpStorage.cpp +++ b/src/storage/remote/HttpStorage.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,7 +41,8 @@ namespace { class HttpStorageBackend : public RemoteStorage::Backend { public: - HttpStorageBackend(const Params& params); + HttpStorageBackend(const Url& url, + const std::vector& attributes); tl::expected, Failure> get(const Hash::Digest& key) override; @@ -103,15 +105,16 @@ failure_from_httplib_error(httplib::Error error) : RemoteStorage::Backend::Failure::error; } -HttpStorageBackend::HttpStorageBackend(const Params& params) - : m_url_path(get_url_path(params.url)), - m_http_client(get_url(params.url)) +HttpStorageBackend::HttpStorageBackend( + const Url& url, const std::vector& attributes) + : m_url_path(get_url_path(url)), + m_http_client(get_url(url)) { - if (!params.url.user_info().empty()) { - const auto [user, password] = util::split_once(params.url.user_info(), ':'); + if (!url.user_info().empty()) { + const auto [user, password] = util::split_once(url.user_info(), ':'); if (!password) { throw core::Fatal(FMT("Expected username:password in URL but got \"{}\"", - params.url.user_info())); + url.user_info())); } m_http_client.set_basic_auth(std::string(user), std::string(*password)); } @@ -124,7 +127,7 @@ HttpStorageBackend::HttpStorageBackend(const Params& params) auto connect_timeout = k_default_connect_timeout; auto operation_timeout = k_default_operation_timeout; - for (const auto& attr : params.attributes) { + for (const auto& attr : attributes) { if (attr.key == "bearer-token") { m_http_client.set_bearer_token_auth(attr.value); } else if (attr.key == "connect-timeout") { @@ -284,27 +287,22 @@ HttpStorageBackend::get_entry_path(const Hash::Digest& key) const } // namespace std::unique_ptr -HttpStorage::create_backend(const Backend::Params& params) const +HttpStorage::create_backend( + const Url& url, const std::vector& attributes) const { - return std::make_unique(params); + return std::make_unique(url, attributes); } void -HttpStorage::redact_secrets(Backend::Params& params) const +HttpStorage::redact_secrets(std::vector& attributes) const { - auto& url = params.url; - const auto [user, password] = util::split_once(url.user_info(), ':'); - if (password) { - url.user_info(FMT("{}:{}", user, k_redacted_password)); - } - auto bearer_token_attribute = - std::find_if(params.attributes.begin(), - params.attributes.end(), - [&](const auto& attr) { return attr.key == "bearer-token"; }); - if (bearer_token_attribute != params.attributes.end()) { - bearer_token_attribute->value = k_redacted_password; - bearer_token_attribute->raw_value = k_redacted_password; + std::find_if(attributes.begin(), attributes.end(), [&](const auto& attr) { + return attr.key == "bearer-token"; + }); + if (bearer_token_attribute != attributes.end()) { + bearer_token_attribute->value = storage::k_redacted_password; + bearer_token_attribute->raw_value = storage::k_redacted_password; } } diff --git a/src/storage/remote/HttpStorage.hpp b/src/storage/remote/HttpStorage.hpp index dbe45c9d8..9b1aaf4b5 100644 --- a/src/storage/remote/HttpStorage.hpp +++ b/src/storage/remote/HttpStorage.hpp @@ -25,10 +25,12 @@ namespace storage::remote { class HttpStorage : public RemoteStorage { public: - std::unique_ptr - create_backend(const Backend::Params& params) const override; + std::unique_ptr create_backend( + const Url& url, + const std::vector& attributes) const override; - void redact_secrets(Backend::Params& params) const override; + void + redact_secrets(std::vector& attributes) const override; }; } // namespace storage::remote diff --git a/src/storage/remote/RedisStorage.cpp b/src/storage/remote/RedisStorage.cpp index 2b3401e69..299963d52 100644 --- a/src/storage/remote/RedisStorage.cpp +++ b/src/storage/remote/RedisStorage.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -64,7 +65,8 @@ const uint32_t DEFAULT_PORT = 6379; class RedisStorageBackend : public RemoteStorage::Backend { public: - RedisStorageBackend(const RemoteStorage::Backend::Params& params); + RedisStorageBackend(const Url& url, + const std::vector& attributes); tl::expected, Failure> get(const Hash::Digest& key) override; @@ -112,25 +114,26 @@ split_user_info(const std::string& user_info) } } -RedisStorageBackend::RedisStorageBackend(const Params& params) +RedisStorageBackend::RedisStorageBackend( + const Url& url, + const std::vector& attributes) : m_prefix("ccache"), // TODO: attribute m_context(nullptr, redisFree) { - const auto& url = params.url; ASSERT(url.scheme() == "redis" || url.scheme() == "redis+unix"); - if (url.scheme() == "redis+unix" && !params.url.host().empty() - && params.url.host() != "localhost") { + if (url.scheme() == "redis+unix" && !url.host().empty() + && url.host() != "localhost") { throw core::Fatal( FMT("invalid file path \"{}\": specifying a host other than localhost is" " not supported", - params.url.str(), - params.url.host())); + url.str(), + url.host())); } auto connect_timeout = k_default_connect_timeout; auto operation_timeout = k_default_operation_timeout; - for (const auto& attr : params.attributes) { + for (const auto& attr : attributes) { if (attr.key == "connect-timeout") { connect_timeout = parse_timeout_attribute(attr.value); } else if (attr.key == "operation-timeout") { @@ -313,12 +316,12 @@ RedisStorageBackend::authenticate(const Url& url) if (password) { if (user) { // redis://user:password@host - LOG("Redis AUTH {} {}", *user, k_redacted_password); + LOG("Redis AUTH {} {}", *user, storage::k_redacted_password); util::value_or_throw( redis_command("AUTH %s %s", user->c_str(), password->c_str())); } else { // redis://password@host - LOG("Redis AUTH {}", k_redacted_password); + LOG("Redis AUTH {}", storage::k_redacted_password); util::value_or_throw(redis_command("AUTH %s", password->c_str())); } } @@ -353,25 +356,10 @@ RedisStorageBackend::get_key_string(const Hash::Digest& digest) const } // namespace std::unique_ptr -RedisStorage::create_backend(const Backend::Params& params) const +RedisStorage::create_backend( + const Url& url, const std::vector& attributes) const { - return std::make_unique(params); -} - -void -RedisStorage::redact_secrets(Backend::Params& params) const -{ - auto& url = params.url; - const auto [user, password] = split_user_info(url.user_info()); - if (password) { - if (user) { - // redis://user:password@host - url.user_info(FMT("{}:{}", *user, k_redacted_password)); - } else { - // redis://password@host - url.user_info(k_redacted_password); - } - } + return std::make_unique(url, attributes); } } // namespace storage::remote diff --git a/src/storage/remote/RedisStorage.hpp b/src/storage/remote/RedisStorage.hpp index 530cb431c..11e21a366 100644 --- a/src/storage/remote/RedisStorage.hpp +++ b/src/storage/remote/RedisStorage.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2021-2022 Joel Rosdahl and other contributors +// Copyright (C) 2021-2023 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -25,10 +25,9 @@ namespace storage::remote { class RedisStorage : public RemoteStorage { public: - std::unique_ptr - create_backend(const Backend::Params& params) const override; - - void redact_secrets(Backend::Params& params) const override; + std::unique_ptr create_backend( + const Url& url, + const std::vector& attributes) const override; }; } // namespace storage::remote diff --git a/src/storage/remote/RemoteStorage.hpp b/src/storage/remote/RemoteStorage.hpp index d71f60ba1..9c14d6313 100644 --- a/src/storage/remote/RemoteStorage.hpp +++ b/src/storage/remote/RemoteStorage.hpp @@ -34,7 +34,6 @@ namespace storage::remote { -constexpr auto k_redacted_password = "********"; const auto k_default_connect_timeout = std::chrono::milliseconds{100}; const auto k_default_operation_timeout = std::chrono::milliseconds{10000}; @@ -52,12 +51,6 @@ public: std::string raw_value; // Value part, not percent-decoded. }; - struct Params - { - Url url; - std::vector attributes; - }; - enum class Failure { error, // Operation error, e.g. bad parameters or failed connection. timeout, // Timeout, e.g. due to slow network or server. @@ -110,16 +103,19 @@ public: // `core::Fatal` on fatal configuration error or `Backend::Failed` on // connection error or timeout. virtual std::unique_ptr - create_backend(const Backend::Params& parameters) const = 0; + create_backend(const Url& url, + const std::vector& attributes) const = 0; - // Redact secrets in backend parameters, if any. - virtual void redact_secrets(Backend::Params& parameters) const; + // Redact secrets in backend attributes, if any. + virtual void + redact_secrets(std::vector& attributes) const; }; // --- Inline implementations --- inline void -RemoteStorage::redact_secrets(RemoteStorage::Backend::Params& /*config*/) const +RemoteStorage::redact_secrets( + std::vector& /*attributes*/) const { } diff --git a/test/suites/remote_file.bash b/test/suites/remote_file.bash index e64afda51..629ef4532 100644 --- a/test/suites/remote_file.bash +++ b/test/suites/remote_file.bash @@ -316,6 +316,19 @@ SUITE_remote_file() { test_failed "Expected remote/a or remote/b to exist" fi + $CCACHE -Cz >/dev/null + rm -rf remote + + CCACHE_REMOTE_STORAGE="*|shards=file://$PWD/remote/a,file://$PWD/remote/b" + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 1 + expect_stat files_in_cache 2 + if [ ! -d remote/a ] && [ ! -d remote/b ]; then + test_failed "Expected remote/a or remote/b to exist" + fi + # ------------------------------------------------------------------------- TEST "Reshare" diff --git a/test/suites/remote_http.bash b/test/suites/remote_http.bash index 608448f88..33fc49ecc 100644 --- a/test/suites/remote_http.bash +++ b/test/suites/remote_http.bash @@ -187,7 +187,30 @@ if $RUN_WIN_XFAIL; then expect_not_contains test.o.*.ccache-log secret123 expect_contains test.o.*.ccache-log "status code: 401" fi + # ------------------------------------------------------------------------- + TEST "Port sharding" + + start_http_server 12780 remote + export CCACHE_REMOTE_STORAGE="http://localhost:*|shards=12780" + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 0 + expect_stat cache_miss 1 + expect_stat files_in_cache 2 + expect_file_count 2 '*' remote # result + manifest + subdirs=$(find remote -type d | wc -l) + if [ "${subdirs}" -lt 2 ]; then # "remote" itself counts as one + test_failed "Expected subdirectories in remote" + fi + + $CCACHE_COMPILE -c test.c + expect_stat direct_cache_hit 1 + expect_stat cache_miss 1 + expect_stat files_in_cache 2 + expect_file_count 2 '*' remote # result + manifest + + # ------------------------------------------------------------------------- TEST "IPv6 address" if maybe_start_ipv6_http_server 12780 remote; then