]> git.ipfire.org Git - thirdparty/ccache.git/blame - src/storage/Storage.cpp
fix: Detect bad remote storage URLs gracefully again
[thirdparty/ccache.git] / src / storage / Storage.cpp
CommitLineData
69def064 1// Copyright (C) 2021-2024 Joel Rosdahl and other contributors
c7c0837a
JR
2//
3// See doc/AUTHORS.adoc for a complete list of contributors.
4//
5// This program is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License as published by the Free
7// Software Foundation; either version 3 of the License, or (at your option)
8// any later version.
9//
10// This program is distributed in the hope that it will be useful, but WITHOUT
11// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13// more details.
14//
15// You should have received a copy of the GNU General Public License along with
16// this program; if not, write to the Free Software Foundation, Inc., 51
17// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19#include "Storage.hpp"
20
2b3368cf 21#include <Config.hpp>
923b71c2 22#include <MiniTrace.hpp>
2b3368cf 23#include <Util.hpp>
0e4e4b63 24#include <core/CacheEntry.hpp>
1f6144f8 25#include <core/Statistic.hpp>
eb266c99 26#include <core/exceptions.hpp>
0cd6f70b
JR
27#include <storage/remote/FileStorage.hpp>
28#include <storage/remote/HttpStorage.hpp>
e4d64b4b 29#include <util/assertions.hpp>
ef96a84a 30#include <util/fmtmacros.hpp>
13cb56cd 31#include <util/logging.hpp>
1ef63482 32#ifdef HAVE_REDIS_STORAGE_BACKEND
0cd6f70b 33# include <storage/remote/RedisStorage.hpp>
1ef63482 34#endif
1db166ec 35#include <util/Bytes.hpp>
ea326a9c 36#include <util/Timer.hpp>
2b3368cf 37#include <util/Tokenizer.hpp>
a2206d36 38#include <util/XXH3_64.hpp>
87338466 39#include <util/expected.hpp>
6288b53c 40#include <util/file.hpp>
303c82fe 41#include <util/string.hpp>
2b3368cf 42
456986fc
GJ
43#include <third_party/url.hpp>
44
4b8f8249 45#include <cmath>
d663cb51
JR
46#include <memory>
47#include <string>
87338466 48#include <unordered_map>
01e72eaa 49#include <vector>
2b3368cf 50
c7c0837a
JR
51namespace storage {
52
87338466 53const std::unordered_map<std::string /*scheme*/,
0cd6f70b
JR
54 std::shared_ptr<remote::RemoteStorage>>
55 k_remote_storage_implementations = {
56 {"file", std::make_shared<remote::FileStorage>()},
57 {"http", std::make_shared<remote::HttpStorage>()},
87338466 58#ifdef HAVE_REDIS_STORAGE_BACKEND
0cd6f70b
JR
59 {"redis", std::make_shared<remote::RedisStorage>()},
60 {"redis+unix", std::make_shared<remote::RedisStorage>()},
87338466
JR
61#endif
62};
63
01e72eaa
GJ
64std::string
65get_features()
66{
67 std::vector<std::string> features;
0cd6f70b
JR
68 features.reserve(k_remote_storage_implementations.size());
69 std::transform(k_remote_storage_implementations.begin(),
70 k_remote_storage_implementations.end(),
01e72eaa
GJ
71 std::back_inserter(features),
72 [](auto& entry) { return FMT("{}-storage", entry.first); });
73 std::sort(features.begin(), features.end());
74 return util::join(features, " ");
75}
76
829f93c4 77// Representation of one shard configuration.
0cd6f70b 78struct RemoteStorageShardConfig
4b8f8249
JR
79{
80 std::string name;
81 double weight;
829f93c4 82 Url url; // Cache of URL with expanded "*"
4b8f8249
JR
83};
84
829f93c4 85// Representation of one entry in the remote_storage config option.
0cd6f70b 86struct RemoteStorageConfig
87338466 87{
829f93c4
JR
88 // Raw URL with unexpanded "*".
89 std::string url_str;
90
91 // "shard" attribute.
0cd6f70b 92 std::vector<RemoteStorageShardConfig> shards;
829f93c4
JR
93
94 // "read-only" attribute.
87338466 95 bool read_only = false;
829f93c4
JR
96
97 // Other attributes.
98 std::vector<remote::RemoteStorage::Backend::Attribute> attributes;
87338466
JR
99};
100
829f93c4 101// An instantiated remote storage backend.
0cd6f70b 102struct RemoteStorageBackendEntry
4b8f8249 103{
829f93c4
JR
104 Url url; // With expanded "*"
105 std::string url_for_logging; // With expanded "*"
0cd6f70b 106 std::unique_ptr<remote::RemoteStorage::Backend> impl;
4b8f8249
JR
107 bool failed = false;
108};
109
829f93c4 110// An instantiated remote storage.
0cd6f70b 111struct RemoteStorageEntry
87338466 112{
0cd6f70b 113 RemoteStorageConfig config;
0cd6f70b
JR
114 std::shared_ptr<remote::RemoteStorage> storage;
115 std::vector<RemoteStorageBackendEntry> backends;
87338466
JR
116};
117
118static std::string
0cd6f70b 119to_string(const RemoteStorageConfig& entry)
87338466 120{
829f93c4
JR
121 std::string result = entry.url_str;
122 for (const auto& attr : entry.attributes) {
87338466
JR
123 result += FMT("|{}={}", attr.key, attr.raw_value);
124 }
125 return result;
126}
127
829f93c4 128static tl::expected<Url, std::string>
8d47ee7e
JR
129url_from_string(const std::string& url_string)
130{
131 // The Url class is parsing the URL object lazily. Check if the URL is valid
132 // now to avoid exceptions later.
829f93c4 133 Url url(url_string);
8d47ee7e 134 try {
69def064 135 std::ignore = url.str();
8d47ee7e 136 } catch (const std::exception& e) {
829f93c4
JR
137 return tl::unexpected(FMT("Cannot parse URL {}: {}", url_string, e.what()));
138 }
139 if (url.scheme().empty()) {
140 return tl::unexpected(FMT("URL scheme must not be empty: {}", url_string));
8d47ee7e 141 }
829f93c4 142 return url;
8d47ee7e
JR
143}
144
0cd6f70b 145static RemoteStorageConfig
60005c83 146parse_storage_config(const std::string_view entry)
87338466
JR
147{
148 const auto parts =
32970d78 149 util::split_into_views(entry, "|", util::Tokenizer::Mode::include_empty);
87338466
JR
150
151 if (parts.empty() || parts.front().empty()) {
a4ab84f9 152 throw core::Error(
0cd6f70b 153 FMT("remote storage config must provide a URL: {}", entry));
87338466
JR
154 }
155
0cd6f70b 156 RemoteStorageConfig result;
829f93c4
JR
157 result.url_str = std::string(parts[0]);
158 const auto& url_str = result.url_str;
87338466
JR
159
160 for (size_t i = 1; i < parts.size(); ++i) {
161 if (parts[i].empty()) {
162 continue;
163 }
4812396d
JR
164 const auto [key, right_hand_side] = util::split_once(parts[i], '=');
165 const auto& raw_value = right_hand_side.value_or("true");
87338466 166 const auto value =
eb266c99 167 util::value_or_throw<core::Error>(util::percent_decode(raw_value));
1924ad69
JR
168 if (key == "read-only") {
169 result.read_only = (value == "true");
4b8f8249 170 } else if (key == "shards") {
829f93c4
JR
171 const auto asterisk_count =
172 std::count(url_str.begin(), url_str.end(), '*');
173 if (asterisk_count == 0) {
a4ab84f9
JR
174 throw core::Error(
175 FMT(R"(Missing "*" in URL when using shards: "{}")", url_str));
829f93c4
JR
176 } else if (asterisk_count > 1) {
177 throw core::Error(
178 FMT(R"(Multiple "*" in URL when using shards: "{}")", url_str));
4b8f8249 179 }
829f93c4 180 std::string scheme;
4b8f8249
JR
181 for (const auto& shard : util::Tokenizer(value, ",")) {
182 double weight = 1.0;
60005c83 183 std::string_view name;
4b8f8249 184 const auto lp_pos = shard.find('(');
60005c83 185 if (lp_pos != std::string_view::npos) {
4b8f8249 186 if (shard.back() != ')') {
a4ab84f9 187 throw core::Error(FMT("Invalid shard name: \"{}\"", shard));
4b8f8249
JR
188 }
189 weight =
190 util::value_or_throw<core::Error>(util::parse_double(std::string(
191 shard.substr(lp_pos + 1, shard.length() - lp_pos - 2))));
192 if (weight < 0.0) {
a4ab84f9 193 throw core::Error(FMT("Invalid shard weight: \"{}\"", weight));
4b8f8249
JR
194 }
195 name = shard.substr(0, lp_pos);
196 } else {
197 name = shard;
198 }
199
829f93c4
JR
200 Url url = util::value_or_throw<core::Error>(
201 url_from_string(util::replace_first(url_str, "*", name)));
202 if (!scheme.empty() && url.scheme() != scheme) {
203 throw core::Error(FMT("Scheme {} different from {} in {}",
204 url.scheme(),
205 scheme,
206 url_str));
207 }
208 result.shards.push_back({std::string(name), weight, url});
4b8f8249 209 }
87338466 210 }
4b8f8249 211
829f93c4 212 result.attributes.push_back(
4b8f8249 213 {std::string(key), value, std::string(raw_value)});
87338466
JR
214 }
215
829f93c4
JR
216 // No shards => save the single URL as the sole shard.
217 if (result.shards.empty()) {
218 result.shards.push_back(
219 {"", 0.0, util::value_or_throw<core::Error>(url_from_string(url_str))});
220 }
221
87338466
JR
222 return result;
223}
224
0cd6f70b 225static std::vector<RemoteStorageConfig>
60005c83 226parse_storage_configs(const std::string_view& configs)
87338466 227{
0cd6f70b 228 std::vector<RemoteStorageConfig> result;
87338466
JR
229 for (const auto& config : util::Tokenizer(configs, " ")) {
230 result.push_back(parse_storage_config(config));
231 }
232 return result;
233}
234
0cd6f70b 235static std::shared_ptr<remote::RemoteStorage>
829f93c4 236get_storage(const std::string& scheme)
87338466 237{
829f93c4 238 const auto it = k_remote_storage_implementations.find(scheme);
0cd6f70b 239 if (it != k_remote_storage_implementations.end()) {
87338466
JR
240 return it->second;
241 } else {
242 return {};
243 }
244}
245
0cd6f70b 246Storage::Storage(const Config& config) : local(config), m_config(config)
2b3368cf
JR
247{
248}
249
d663cb51 250// Define the destructor in the implementation file to avoid having to declare
0cd6f70b 251// RemoteStorageEntry and its constituents in the header file.
f965ecb6 252Storage::~Storage() = default;
c7c0837a
JR
253
254void
255Storage::initialize()
256{
0cd6f70b 257 add_remote_storages();
c7c0837a
JR
258}
259
260void
261Storage::finalize()
262{
0cd6f70b 263 local.finalize();
c7c0837a
JR
264}
265
0db6d06d 266void
0e4e4b63 267Storage::get(const Hash::Hash::Digest& key,
b30ef4b4 268 const core::CacheEntryType type,
0db6d06d 269 const EntryReceiver& entry_receiver)
c7c0837a 270{
923b71c2
JR
271 MTR_SCOPE("storage", "get");
272
d5782080
JR
273 if (!m_config.remote_only()) {
274 auto value = local.get(key, type);
d5782080
JR
275 if (value) {
276 if (m_config.reshare()) {
277 put_in_remote_storage(key, *value, true);
278 }
279 if (entry_receiver(std::move(*value))) {
280 return;
281 }
9af44de3 282 }
b30ef4b4 283 }
9af44de3 284
2ea3416d 285 get_from_remote_storage(key, type, [&](util::Bytes&& data) {
d5782080
JR
286 if (!m_config.remote_only()) {
287 local.put(key, type, data, true);
288 }
0db6d06d
JR
289 return entry_receiver(std::move(data));
290 });
c7c0837a
JR
291}
292
d663cb51 293void
0e4e4b63 294Storage::put(const Hash::Digest& key,
c7c0837a 295 const core::CacheEntryType type,
d663cb51 296 nonstd::span<const uint8_t> value)
c7c0837a 297{
923b71c2
JR
298 MTR_SCOPE("storage", "put");
299
d5782080
JR
300 if (!m_config.remote_only()) {
301 local.put(key, type, value);
302 }
0cd6f70b 303 put_in_remote_storage(key, value, false);
c7c0837a
JR
304}
305
306void
0e4e4b63 307Storage::remove(const Hash::Digest& key, const core::CacheEntryType type)
c7c0837a 308{
923b71c2
JR
309 MTR_SCOPE("storage", "remove");
310
d5782080
JR
311 if (!m_config.remote_only()) {
312 local.remove(key, type);
313 }
0cd6f70b 314 remove_from_remote_storage(key);
87338466 315}
2b3368cf 316
182eeb9f 317bool
0cd6f70b 318Storage::has_remote_storage() const
182eeb9f 319{
0cd6f70b 320 return !m_remote_storages.empty();
182eeb9f
JR
321}
322
829f93c4
JR
323static std::string
324get_redacted_url_str_for_logging(const Url& url)
325{
326 Url redacted_url(url);
327 if (!url.user_info().empty()) {
328 redacted_url.user_info(k_redacted_password);
329 }
330 return redacted_url.str();
331}
332
87338466 333std::string
0cd6f70b 334Storage::get_remote_storage_config_for_logging() const
87338466 335{
0cd6f70b 336 auto configs = parse_storage_configs(m_config.remote_storage());
87338466 337 for (auto& config : configs) {
829f93c4
JR
338 const auto url = url_from_string(config.url_str);
339 if (url) {
340 const auto storage = get_storage(url->scheme());
341 if (storage) {
342 config.url_str = get_redacted_url_str_for_logging(*url);
343 storage->redact_secrets(config.attributes);
344 }
345 } // else: unexpanded URL is not a proper URL, not much we can do
87338466
JR
346 }
347 return util::join(configs, " ");
348}
2b3368cf 349
87338466 350void
0cd6f70b 351Storage::add_remote_storages()
87338466 352{
0cd6f70b 353 const auto configs = parse_storage_configs(m_config.remote_storage());
87338466 354 for (const auto& config : configs) {
829f93c4
JR
355 ASSERT(!config.shards.empty());
356 const std::string scheme = config.shards.front().url.scheme();
357 const auto storage = get_storage(scheme);
87338466 358 if (!storage) {
829f93c4 359 throw core::Error(FMT("unknown remote storage scheme: {}", scheme));
2b3368cf 360 }
0cd6f70b 361 m_remote_storages.push_back(std::make_unique<RemoteStorageEntry>(
829f93c4 362 RemoteStorageEntry{config, storage, {}}));
2b3368cf
JR
363 }
364}
365
1f6144f8
JR
366void
367Storage::mark_backend_as_failed(
0cd6f70b
JR
368 RemoteStorageBackendEntry& backend_entry,
369 const remote::RemoteStorage::Backend::Failure failure)
2b3368cf 370{
87338466 371 // The backend is expected to log details about the error.
4b8f8249 372 backend_entry.failed = true;
0cd6f70b
JR
373 local.increment_statistic(
374 failure == remote::RemoteStorage::Backend::Failure::timeout
375 ? core::Statistic::remote_storage_timeout
376 : core::Statistic::remote_storage_error);
87338466 377}
2b3368cf 378
4b8f8249
JR
379static double
380to_half_open_unit_interval(uint64_t value)
2b3368cf 381{
4b8f8249
JR
382 constexpr uint8_t double_significand_bits = 53;
383 constexpr uint64_t denominator = 1ULL << double_significand_bits;
384 constexpr uint64_t mask = denominator - 1;
385 return static_cast<double>(value & mask) / denominator;
386}
387
388static Url
0e4e4b63 389get_shard_url(const Hash::Digest& key,
0cd6f70b 390 const std::vector<RemoteStorageShardConfig>& shards)
4b8f8249
JR
391{
392 ASSERT(!shards.empty());
393
829f93c4
JR
394 if (shards.size() == 1) {
395 return shards.front().url;
396 }
397
4b8f8249
JR
398 // This is the "weighted rendezvous hashing" algorithm.
399 double highest_score = -1.0;
829f93c4 400 Url best_shard_url;
4b8f8249 401 for (const auto& shard_config : shards) {
e83ac28d 402 util::XXH3_64 hash;
0e4e4b63 403 hash.update(key.data(), key.size());
e83ac28d
JR
404 hash.update(shard_config.name.data(), shard_config.name.length());
405 const double score = to_half_open_unit_interval(hash.digest());
4b8f8249
JR
406 ASSERT(score >= 0.0 && score < 1.0);
407 const double weighted_score =
408 score == 0.0 ? 0.0 : shard_config.weight / -std::log(score);
409 if (weighted_score > highest_score) {
829f93c4 410 best_shard_url = shard_config.url;
4b8f8249
JR
411 highest_score = weighted_score;
412 }
87338466 413 }
456986fc 414
829f93c4 415 return best_shard_url;
4b8f8249
JR
416}
417
0cd6f70b
JR
418RemoteStorageBackendEntry*
419Storage::get_backend(RemoteStorageEntry& entry,
0e4e4b63 420 const Hash::Digest& key,
60005c83 421 const std::string_view operation_description,
1f6144f8 422 const bool for_writing)
4b8f8249
JR
423{
424 if (for_writing && entry.config.read_only) {
829f93c4 425 LOG("Not {} {} storage since it is read-only",
87338466 426 operation_description,
829f93c4 427 entry.config.shards.front().url.scheme());
4b8f8249 428 return nullptr;
456986fc 429 }
2b3368cf 430
829f93c4
JR
431 const auto shard_url = get_shard_url(key, entry.config.shards);
432 const auto url_str_for_logging =
433 get_redacted_url_str_for_logging(shard_url.str());
4b8f8249
JR
434 auto backend =
435 std::find_if(entry.backends.begin(),
436 entry.backends.end(),
437 [&](const auto& x) { return x.url.str() == shard_url.str(); });
438
439 if (backend == entry.backends.end()) {
829f93c4 440 entry.backends.push_back({shard_url, url_str_for_logging, {}, false});
17273f22 441 try {
829f93c4
JR
442 entry.backends.back().impl =
443 entry.storage->create_backend(shard_url, entry.config.attributes);
0cd6f70b 444 } catch (const remote::RemoteStorage::Backend::Failed& e) {
17273f22 445 LOG("Failed to construct backend for {}{}",
829f93c4 446 url_str_for_logging,
60005c83 447 std::string_view(e.what()).empty() ? "" : FMT(": {}", e.what()));
4b8f8249 448 mark_backend_as_failed(entry.backends.back(), e.failure());
5cdeeb31 449 return nullptr;
17273f22 450 }
4b8f8249
JR
451 return &entry.backends.back();
452 } else if (backend->failed) {
453 LOG("Not {} {} since it failed earlier",
454 operation_description,
829f93c4 455 url_str_for_logging);
4b8f8249
JR
456 return nullptr;
457 } else {
458 return &*backend;
456986fc 459 }
87338466 460}
2b3368cf 461
0db6d06d 462void
0e4e4b63 463Storage::get_from_remote_storage(const Hash::Digest& key,
2ea3416d 464 const core::CacheEntryType type,
0cd6f70b 465 const EntryReceiver& entry_receiver)
87338466 466{
0cd6f70b 467 MTR_SCOPE("remote_storage", "get");
923b71c2 468
0cd6f70b 469 for (const auto& entry : m_remote_storages) {
4b8f8249
JR
470 auto backend = get_backend(*entry, key, "getting from", false);
471 if (!backend) {
456986fc
GJ
472 continue;
473 }
87338466 474
ea326a9c 475 Timer timer;
0db6d06d 476 auto result = backend->impl->get(key);
ea326a9c 477 const auto ms = timer.measure_ms();
87338466 478 if (!result) {
4b8f8249 479 mark_backend_as_failed(*backend, result.error());
87338466 480 continue;
2b3368cf 481 }
87338466 482
0db6d06d 483 auto& value = *result;
87338466 484 if (value) {
ea326a9c 485 LOG("Retrieved {} from {} ({:.2f} ms)",
0e4e4b63 486 util::format_digest(key),
4b8f8249 487 backend->url_for_logging,
ea326a9c 488 ms);
2ea3416d
JR
489 local.increment_statistic(core::Statistic::remote_storage_read_hit);
490 if (type == core::CacheEntryType::result) {
491 local.increment_statistic(core::Statistic::remote_storage_hit);
492 }
0db6d06d
JR
493 if (entry_receiver(std::move(*value))) {
494 return;
495 }
2b3368cf 496 } else {
4b8f8249 497 LOG("No {} in {} ({:.2f} ms)",
0e4e4b63 498 util::format_digest(key),
4b8f8249
JR
499 backend->url_for_logging,
500 ms);
2ea3416d 501 local.increment_statistic(core::Statistic::remote_storage_read_miss);
2b3368cf
JR
502 }
503 }
2b3368cf
JR
504}
505
87338466 506void
0e4e4b63 507Storage::put_in_remote_storage(const Hash::Digest& key,
0cd6f70b
JR
508 nonstd::span<const uint8_t> value,
509 bool only_if_missing)
2b3368cf 510{
0cd6f70b 511 MTR_SCOPE("remote_storage", "put");
923b71c2 512
4798634d 513 if (!core::CacheEntry::Header(value).self_contained) {
0cd6f70b 514 LOG("Not putting {} in remote storage since it's not self-contained",
0e4e4b63 515 util::format_digest(key));
4798634d
JR
516 return;
517 }
518
0cd6f70b 519 for (const auto& entry : m_remote_storages) {
4b8f8249
JR
520 auto backend = get_backend(*entry, key, "putting in", true);
521 if (!backend) {
87338466
JR
522 continue;
523 }
f8789e2f 524
ea326a9c 525 Timer timer;
9af44de3 526 const auto result = backend->impl->put(key, value, only_if_missing);
ea326a9c 527 const auto ms = timer.measure_ms();
87338466
JR
528 if (!result) {
529 // The backend is expected to log details about the error.
4b8f8249 530 mark_backend_as_failed(*backend, result.error());
87338466
JR
531 continue;
532 }
1ef63482 533
87338466 534 const bool stored = *result;
ea326a9c 535 LOG("{} {} in {} ({:.2f} ms)",
9af44de3 536 stored ? "Stored" : "Did not have to store",
0e4e4b63 537 util::format_digest(key),
ee1314f8 538 backend->url_for_logging,
ea326a9c 539 ms);
2ea3416d 540 local.increment_statistic(core::Statistic::remote_storage_write);
87338466 541 }
2b3368cf
JR
542}
543
544void
0e4e4b63 545Storage::remove_from_remote_storage(const Hash::Digest& key)
2b3368cf 546{
0cd6f70b 547 MTR_SCOPE("remote_storage", "remove");
923b71c2 548
0cd6f70b 549 for (const auto& entry : m_remote_storages) {
4b8f8249
JR
550 auto backend = get_backend(*entry, key, "removing from", true);
551 if (!backend) {
87338466
JR
552 continue;
553 }
554
ea326a9c 555 Timer timer;
4b8f8249 556 const auto result = backend->impl->remove(key);
ea326a9c 557 const auto ms = timer.measure_ms();
87338466 558 if (!result) {
4b8f8249 559 mark_backend_as_failed(*backend, result.error());
87338466
JR
560 continue;
561 }
562
563 const bool removed = *result;
564 if (removed) {
ea326a9c 565 LOG("Removed {} from {} ({:.2f} ms)",
0e4e4b63 566 util::format_digest(key),
ee1314f8 567 backend->url_for_logging,
ea326a9c 568 ms);
87338466 569 } else {
ea326a9c 570 LOG("No {} to remove from {} ({:.2f} ms)",
0e4e4b63 571 util::format_digest(key),
ee1314f8 572 backend->url_for_logging,
ea326a9c 573 ms);
2b3368cf 574 }
2ea3416d
JR
575
576 local.increment_statistic(core::Statistic::remote_storage_write);
2b3368cf 577 }
c7c0837a
JR
578}
579
580} // namespace storage