Reducing file lengths should be beneficial since it reduces the number
of needed system calls when scanning many files in the cache. The effect
is very small but there is no real downside. See also
b16001a67f4389956ef6e7ccf7d8023684b57119.
Base32 is chosen since the encoding algorithm is very simple compared to
e.g. base36. Base64 cannot be used since the encoded digest string is
used in filenames and the cache directory may be located on a case
insensitive filesystem. The base32hex variant is chosen instead of the
other base32 variants since it feels more natural and there are no
visual ambiguity issues.
The first two bytes are encoded as base16 to maintain compatibility with
the cleanup algorithm in older ccache versions and to allow for up to
four uniform cache levels.
inline std::string
Digest::to_string() const
{
- return Util::format_base16(m_bytes, size());
+ // The first two bytes are encoded as four lowercase base16 digits to maintain
+ // compatibility with the cleanup algorithm in older ccache versions and to
+ // allow for up to four uniform cache levels. The rest are encoded as
+ // lowercase base32hex digits without padding characters.
+ const size_t base16_bytes = 2;
+ return Util::format_base16(m_bytes, base16_bytes)
+ + Util::format_base32hex(m_bytes + base16_bytes,
+ size() - base16_bytes);
}
inline bool
#include "Logging.hpp"
#include "TemporaryFile.hpp"
+extern "C" {
+#include "third_party/base32hex.h"
+}
+
#include <algorithm>
#include <fstream>
return result;
}
+std::string
+format_base32hex(const uint8_t* data, size_t size)
+{
+ const size_t bytes_to_reserve = size * 8 / 5 + 1;
+ std::string result(bytes_to_reserve, 0);
+ const size_t actual_size = base32hex(&result[0], data, size);
+ result.resize(actual_size);
+ return result;
+}
+
std::string
format_human_readable_size(uint64_t size)
{
// string will be `2 * size` long.
std::string format_base16(const uint8_t* data, size_t size);
+// Format a lowercase base32hex string representing `size` bytes of `data`. No
+// padding characters will be added.
+std::string format_base32hex(const uint8_t* data, size_t size);
+
// Format `size` as a human-readable string.
std::string format_human_readable_size(uint64_t size);
$CCACHE --hash-file /dev/null > hash.out
printf "a" | $CCACHE --hash-file - >> hash.out
- hash_0='af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9'
- hash_1='17762fddd969a453925d65717ac3eea21320b66b'
+ hash_0='af1396svbud1kqg40jfa6reciicrpcisi'
+ hash_1='17765vetiqd4ae95qpbhfb1ut8gj42r6m'
if grep "$hash_0" hash.out >/dev/null 2>&1 && \
grep "$hash_1" hash.out >/dev/null 2>&1; then
manifest=`find $CCACHE_DIR -name '*M'`
$CCACHE --dump-manifest $manifest >manifest.dump
- checksum_test1_h='b7271c414e35d190304ccbb02cdce8aaa391497e'
- checksum_test2_h='24f1184b3644bd65db35d8de74fbe468757a4200'
- checksum_test3_h='56a66d1ef7bfe44154ecd084e395a1c8d55bb3a1'
+ checksum_test1_h='b7273h0ksdehi0o4pitg5jeehal3i54ns'
+ checksum_test2_h='24f1315jch5tcndjbm6uejtu8q3lf9100'
+ checksum_test3_h='56a6dkffffv485aepk44seaq3i6lbepq2'
if grep "Hash: $checksum_test1_h" manifest.dump >/dev/null 2>&1 && \
grep "Hash: $checksum_test2_h" manifest.dump >/dev/null 2>&1 && \
{
SUBCASE("initial state")
{
- CHECK(Hash().digest().to_string()
- == "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9");
+ CHECK(Hash().digest().to_string() == "af1396svbud1kqg40jfa6reciicrpcisi");
}
SUBCASE("empty string")
{
CHECK(Hash().hash("").digest().to_string()
- == "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9");
+ == "af1396svbud1kqg40jfa6reciicrpcisi");
}
SUBCASE("a")
{
CHECK(Hash().hash("a").digest().to_string()
- == "17762fddd969a453925d65717ac3eea21320b66b");
+ == "17765vetiqd4ae95qpbhfb1ut8gj42r6m");
}
SUBCASE("message digest")
{
CHECK(Hash().hash("message digest").digest().to_string()
- == "7bc2a2eeb95ddbf9b7ecf6adcb76b453091c58dc");
+ == "7bc2kbnbinerv6ruptldpdrb8ko93hcdo");
}
SUBCASE("long string")
"123456789012345678901234567890123456789012345678901234567890"
"12345678901234567890";
CHECK(Hash().hash(long_string).digest().to_string()
- == "f263acf51621980b9c8de5da4a17d314984e05ab");
+ == "f263ljqhc8co1ee8rpeq98bt654o9o2qm");
}
}
h.hash("message");
h.digest();
h.hash(" digest");
- CHECK(h.digest().to_string() == "7bc2a2eeb95ddbf9b7ecf6adcb76b453091c58dc");
+ CHECK(h.digest().to_string() == "7bc2kbnbinerv6ruptldpdrb8ko93hcdo");
}
TEST_CASE("Hash::digest should be idempotent")
{
Hash h;
- CHECK(h.digest().to_string() == "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9");
- CHECK(h.digest().to_string() == "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9");
+ CHECK(h.digest().to_string() == "af1396svbud1kqg40jfa6reciicrpcisi");
+ CHECK(h.digest().to_string() == "af1396svbud1kqg40jfa6reciicrpcisi");
}
TEST_CASE("Digest::bytes")
CHECK(Util::format_base16(data, sizeof(data)) == "00010203");
}
+TEST_CASE("Util::format_base32hex")
+{
+ // Test vectors (without padding) from RFC 4648.
+ const uint8_t input[] = {'f', 'o', 'o', 'b', 'a', 'r'};
+ CHECK(Util::format_base32hex(input, 0) == "");
+ CHECK(Util::format_base32hex(input, 1) == "co");
+ CHECK(Util::format_base32hex(input, 2) == "cpng");
+ CHECK(Util::format_base32hex(input, 3) == "cpnmu");
+ CHECK(Util::format_base32hex(input, 4) == "cpnmuog");
+ CHECK(Util::format_base32hex(input, 5) == "cpnmuoj1");
+ CHECK(Util::format_base32hex(input, 6) == "cpnmuoj1e8");
+}
+
TEST_CASE("Util::format_human_readable_size")
{
CHECK(Util::format_human_readable_size(0) == "0.0 kB");