From ac91433916dea68a369b8c9a2abd3fc9328cf10c Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?=
Date: Tue, 7 Oct 2025 14:38:49 +0100 Subject: [PATCH] maint: cksum: document a base64/hex parsing ambiguity with untagged * src/digest.c (split_3): Mention the ambiguity in misinterpreting base64 characters as hex is not a practical consideration. Also add an example of both tagged formats which makes it easier to interpret the parsing logic. --- src/digest.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/digest.c b/src/digest.c index d2e6e212aa..58b9c9ec1f 100644 --- a/src/digest.c +++ b/src/digest.c @@ -868,6 +868,10 @@ split_3 (char *s, size_t s_len, } #endif + /* Try to parse BSD or OpenSSL tagged format. I.e.: + openssl: MD5(f)= d41d8cd98f00b204e9800998ecf8427e + bsd: MD5 (f) = d41d8cd98f00b204e9800998ecf8427e */ + size_t parse_offset = i; algo_name_len = strlen (DIGEST_TYPE_STRING); if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len)) @@ -942,7 +946,14 @@ split_3 (char *s, size_t s_len, ; # if HASH_ALGO_CKSUM /* Check the number of base64 characters. This works because the hexadecimal - character set is a subset of the base64 character set. */ + character set is a subset of the base64 character set. + Note there is the ambiguity that all characters are hex when they + are actually base64 encoded, which could be ambiguous with: + cksum -a sha2 -l 384 --base64 --untagged + cksum -a sha2 -l 256 --untagged + Similarly for sha3 and blake2b. + However at this length the chances are exceedingly rare (1 in 480R), + and smaller blake2b lengths aren't practical for verification anyway. */ size_t digest_base64_bytes = digest_hex_bytes; size_t trailing_equals = 0; for (; isubase64 (*hp); ++hp, ++digest_base64_bytes) -- 2.47.3