From 7e26a34c3eab0ed5f0d5d9af30bbd6ef577d24b0 Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Fri, 7 Jul 2023 10:29:40 +0200 Subject: [PATCH] refactor: Move format_base16/format_base32hex to util --- src/Digest.hpp | 10 +++++----- src/Hash.cpp | 5 +++-- src/Util.cpp | 27 --------------------------- src/Util.hpp | 8 -------- src/core/CacheEntry.cpp | 7 +++---- src/core/mainoptions.cpp | 3 +-- src/storage/remote/HttpStorage.cpp | 2 +- src/storage/remote/RedisStorage.cpp | 2 ++ src/util/string.cpp | 27 +++++++++++++++++++++++++++ src/util/string.hpp | 8 ++++++++ unittest/test_Util.cpp | 24 ------------------------ unittest/test_util_XXH3_128.cpp | 10 +++++----- unittest/test_util_string.cpp | 24 ++++++++++++++++++++++++ 13 files changed, 79 insertions(+), 78 deletions(-) diff --git a/src/Digest.hpp b/src/Digest.hpp index 189b53769..6ab8b9764 100644 --- a/src/Digest.hpp +++ b/src/Digest.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020-2021 Joel Rosdahl and other contributors +// Copyright (C) 2020-2023 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -18,7 +18,7 @@ #pragma once -#include "Util.hpp" +#include #include "third_party/fmt/core.h" @@ -76,9 +76,9 @@ Digest::to_string() const // allow for up to four uniform cache levels. The rest are encoded as // lowercase base32hex digits without padding characters. const size_t base16_bytes = 2; - return Util::format_base16(m_bytes, base16_bytes) - + Util::format_base32hex(m_bytes + base16_bytes, - size() - base16_bytes); + return util::format_base16({m_bytes, base16_bytes}) + + util::format_base32hex( + {m_bytes + base16_bytes, size() - base16_bytes}); } inline bool diff --git a/src/Hash.cpp b/src/Hash.cpp index 442369e91..be1c16bf4 100644 --- a/src/Hash.cpp +++ b/src/Hash.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2020-2022 Joel Rosdahl and other contributors +// Copyright (C) 2020-2023 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -84,7 +85,7 @@ Hash::hash(const void* data, size_t size, HashType hash_type) switch (hash_type) { case HashType::binary: add_debug_text( - Util::format_base16(static_cast(data), size)); + util::format_base16({static_cast(data), size})); break; case HashType::text: diff --git a/src/Util.cpp b/src/Util.cpp index e00502fa5..3f769cf55 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -37,10 +37,6 @@ #include // NOLINT: PATH_MAX is defined in limits.h -extern "C" { -#include "third_party/base32hex.h" -} - #ifdef HAVE_DIRENT_H # include #endif @@ -543,29 +539,6 @@ format_argv_for_logging(const char* const* argv) return result; } -std::string -format_base16(const uint8_t* data, size_t size) -{ - static const char digits[] = "0123456789abcdef"; - std::string result; - result.resize(2 * size); - for (size_t i = 0; i < size; ++i) { - result[i * 2] = digits[data[i] >> 4]; - result[i * 2 + 1] = digits[data[i] & 0xF]; - } - return result; -} - -std::string -format_base32hex(const uint8_t* data, size_t size) -{ - const size_t bytes_to_reserve = size * 8 / 5 + 1; - std::string result(bytes_to_reserve, 0); - const size_t actual_size = base32hex(&result[0], data, size); - result.resize(actual_size); - return result; -} - void ensure_dir_exists(std::string_view dir) { diff --git a/src/Util.hpp b/src/Util.hpp index d1a4796fc..13e8bd2c2 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -135,14 +135,6 @@ int fallocate(int fd, long new_size); // not intended to be machine parsable. `argv` must be terminated by a nullptr. std::string format_argv_for_logging(const char* const* argv); -// Format a hexadecimal string representing `size` bytes of `data`. The returned -// string will be `2 * size` long. -std::string format_base16(const uint8_t* data, size_t size); - -// Format a lowercase base32hex string representing `size` bytes of `data`. No -// padding characters will be added. -std::string format_base32hex(const uint8_t* data, size_t size); - // Return current working directory (CWD) as returned from getcwd(3) (i.e., // normalized path without symlink parts). Returns the empty string on error. std::string get_actual_cwd(); diff --git a/src/core/CacheEntry.cpp b/src/core/CacheEntry.cpp index fb1c1e3c1..26595edcc 100644 --- a/src/core/CacheEntry.cpp +++ b/src/core/CacheEntry.cpp @@ -226,10 +226,9 @@ CacheEntry::verify_checksum() const const auto actual = checksum.digest(); if (actual != m_checksum) { - throw core::Error( - FMT("Incorrect checksum (actual {}, expected {})", - Util::format_base16(actual.data(), actual.size()), - Util::format_base16(m_checksum.data(), m_checksum.size()))); + throw core::Error(FMT("Incorrect checksum (actual {}, expected {})", + util::format_base16(actual), + util::format_base16(m_checksum))); } } diff --git a/src/core/mainoptions.cpp b/src/core/mainoptions.cpp index 2789876ff..f9cd2cf80 100644 --- a/src/core/mainoptions.cpp +++ b/src/core/mainoptions.cpp @@ -582,8 +582,7 @@ process_main_options(int argc, const char* const* argv) checksum.update({data, size}); }); const auto digest = checksum.digest(); - PRINT( - stdout, "{}\n", Util::format_base16(digest.data(), digest.size())); + PRINT(stdout, "{}\n", util::format_base16(digest)); } else { PRINT(stderr, "Error: Failed to checksum {}\n", arg); } diff --git a/src/storage/remote/HttpStorage.cpp b/src/storage/remote/HttpStorage.cpp index e055de156..3512a4365 100644 --- a/src/storage/remote/HttpStorage.cpp +++ b/src/storage/remote/HttpStorage.cpp @@ -259,7 +259,7 @@ HttpStorageBackend::get_entry_path(const Digest& key) const // Mimic hex representation of a SHA256 hash value. const auto sha256_hex_size = 64; static_assert(Digest::size() == 20, "Update below if digest size changes"); - std::string hex_digits = Util::format_base16(key.bytes(), key.size()); + std::string hex_digits = util::format_base16({key.bytes(), key.size()}); hex_digits.append(hex_digits.data(), sha256_hex_size - hex_digits.size()); LOG("Translated key {} to Bazel layout ac/{}", key.to_string(), hex_digits); return FMT("{}ac/{}", m_url_path, hex_digits); diff --git a/src/storage/remote/RedisStorage.cpp b/src/storage/remote/RedisStorage.cpp index 5021b6b11..34a57484b 100644 --- a/src/storage/remote/RedisStorage.cpp +++ b/src/storage/remote/RedisStorage.cpp @@ -26,6 +26,8 @@ #include #include +#include + // Ignore "ISO C++ forbids flexible array member ‘buf’" warning from -Wpedantic. #ifdef __GNUC__ # pragma GCC diagnostic push diff --git a/src/util/string.cpp b/src/util/string.cpp index 48d4b8b0f..1f90934d4 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -21,12 +21,39 @@ #include #include +extern "C" { +#include +} + #include #include #include namespace util { +std::string +format_base16(nonstd::span data) +{ + static const char digits[] = "0123456789abcdef"; + std::string result; + result.resize(2 * data.size()); + for (size_t i = 0; i < data.size(); ++i) { + result[i * 2] = digits[data[i] >> 4]; + result[i * 2 + 1] = digits[data[i] & 0xF]; + } + return result; +} + +std::string +format_base32hex(nonstd::span data) +{ + const size_t bytes_to_reserve = data.size() * 8 / 5 + 1; + std::string result(bytes_to_reserve, 0); + const size_t actual_size = base32hex(&result[0], data.data(), data.size()); + result.resize(actual_size); + return result; +} + std::string format_human_readable_diff(int64_t diff, SizeUnitPrefixType prefix_type) { diff --git a/src/util/string.hpp b/src/util/string.hpp index 6e1fcf4d7..c8e17c0dc 100644 --- a/src/util/string.hpp +++ b/src/util/string.hpp @@ -41,6 +41,14 @@ enum class SizeUnitPrefixType { binary, decimal }; // Return true if `suffix` is a suffix of `string`. bool ends_with(std::string_view string, std::string_view suffix); +// Format a hexadecimal string representing `data`. The returned string will be +// `2 * data.size()` long. +std::string format_base16(nonstd::span data); + +// Format a lowercase base32hex string representing `data`. No padding +// characters will be added. +std::string format_base32hex(nonstd::span data); + // Format `diff` as a human-readable string. std::string format_human_readable_diff(int64_t diff, SizeUnitPrefixType prefix_type); diff --git a/unittest/test_Util.cpp b/unittest/test_Util.cpp index ae320edf3..3907d0d52 100644 --- a/unittest/test_Util.cpp +++ b/unittest/test_Util.cpp @@ -234,30 +234,6 @@ TEST_CASE("Util::format_argv_for_logging") CHECK(Util::format_argv_for_logging(argv_2) == "foo bar"); } -TEST_CASE("Util::format_base16") -{ - uint8_t none[] = ""; - uint8_t text[4] = "foo"; // incl. NUL - uint8_t data[4] = {0, 1, 2, 3}; - - CHECK(Util::format_base16(none, 0) == ""); - CHECK(Util::format_base16(text, sizeof(text)) == "666f6f00"); - CHECK(Util::format_base16(data, sizeof(data)) == "00010203"); -} - -TEST_CASE("Util::format_base32hex") -{ - // Test vectors (without padding) from RFC 4648. - const uint8_t input[] = {'f', 'o', 'o', 'b', 'a', 'r'}; - CHECK(Util::format_base32hex(input, 0) == ""); - CHECK(Util::format_base32hex(input, 1) == "co"); - CHECK(Util::format_base32hex(input, 2) == "cpng"); - CHECK(Util::format_base32hex(input, 3) == "cpnmu"); - CHECK(Util::format_base32hex(input, 4) == "cpnmuog"); - CHECK(Util::format_base32hex(input, 5) == "cpnmuoj1"); - CHECK(Util::format_base32hex(input, 6) == "cpnmuoj1e8"); -} - TEST_CASE("Util::get_extension") { CHECK(Util::get_extension("") == ""); diff --git a/unittest/test_util_XXH3_128.cpp b/unittest/test_util_XXH3_128.cpp index 23b411880..a940069d7 100644 --- a/unittest/test_util_XXH3_128.cpp +++ b/unittest/test_util_XXH3_128.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2011-2022 Joel Rosdahl and other contributors +// Copyright (C) 2011-2023 Joel Rosdahl and other contributors // // See doc/AUTHORS.adoc for a complete list of contributors. // @@ -27,22 +27,22 @@ TEST_CASE("util::XXH3_128") { util::XXH3_128 checksum; auto digest = checksum.digest(); - CHECK(Util::format_base16(digest.data(), 16) + CHECK(util::format_base16({digest.data(), 16}) == "99aa06d3014798d86001c324468d497f"); checksum.update(util::to_span("foo")); digest = checksum.digest(); - CHECK(Util::format_base16(digest.data(), 16) + CHECK(util::format_base16({digest.data(), 16}) == "79aef92e83454121ab6e5f64077e7d8a"); checksum.update(util::to_span("t")); digest = checksum.digest(); - CHECK(Util::format_base16(digest.data(), 16) + CHECK(util::format_base16({digest.data(), 16}) == "e6045075b5bf1ae7a3e4c87775e6c97f"); checksum.reset(); digest = checksum.digest(); - CHECK(Util::format_base16(digest.data(), 16) + CHECK(util::format_base16({digest.data(), 16}) == "99aa06d3014798d86001c324468d497f"); } diff --git a/unittest/test_util_string.cpp b/unittest/test_util_string.cpp index d6afeab7f..371020480 100644 --- a/unittest/test_util_string.cpp +++ b/unittest/test_util_string.cpp @@ -39,6 +39,30 @@ operator==(std::pair> left, TEST_SUITE_BEGIN("util"); +TEST_CASE("util::format_base16") +{ + uint8_t none[] = ""; + uint8_t text[4] = "foo"; // incl. NUL + uint8_t data[4] = {0, 1, 2, 3}; + + CHECK(util::format_base16({none, 0}) == ""); + CHECK(util::format_base16({text, sizeof(text)}) == "666f6f00"); + CHECK(util::format_base16({data, sizeof(data)}) == "00010203"); +} + +TEST_CASE("util::format_base32hex") +{ + // Test vectors (without padding) from RFC 4648. + const uint8_t input[] = {'f', 'o', 'o', 'b', 'a', 'r'}; + CHECK(util::format_base32hex({input, 0}) == ""); + CHECK(util::format_base32hex({input, 1}) == "co"); + CHECK(util::format_base32hex({input, 2}) == "cpng"); + CHECK(util::format_base32hex({input, 3}) == "cpnmu"); + CHECK(util::format_base32hex({input, 4}) == "cpnmuog"); + CHECK(util::format_base32hex({input, 5}) == "cpnmuoj1"); + CHECK(util::format_base32hex({input, 6}) == "cpnmuoj1e8"); +} + TEST_CASE("util::ends_with") { CHECK(util::ends_with("", "")); -- 2.47.2