From: Pádraig Brady
Date: Tue, 7 Oct 2025 13:38:49 +0000 (+0100) Subject: maint: cksum: document a base64/hex parsing ambiguity with untagged X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ac91433916dea68a369b8c9a2abd3fc9328cf10c;p=thirdparty%2Fcoreutils.git maint: cksum: document a base64/hex parsing ambiguity with untagged * src/digest.c (split_3): Mention the ambiguity in misinterpreting base64 characters as hex is not a practical consideration. Also add an example of both tagged formats which makes it easier to interpret the parsing logic. --- diff --git a/src/digest.c b/src/digest.c index d2e6e212aa..58b9c9ec1f 100644 --- a/src/digest.c +++ b/src/digest.c @@ -868,6 +868,10 @@ split_3 (char *s, size_t s_len, } #endif + /* Try to parse BSD or OpenSSL tagged format. I.e.: + openssl: MD5(f)= d41d8cd98f00b204e9800998ecf8427e + bsd: MD5 (f) = d41d8cd98f00b204e9800998ecf8427e */ + size_t parse_offset = i; algo_name_len = strlen (DIGEST_TYPE_STRING); if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len)) @@ -942,7 +946,14 @@ split_3 (char *s, size_t s_len, ; # if HASH_ALGO_CKSUM /* Check the number of base64 characters. This works because the hexadecimal - character set is a subset of the base64 character set. */ + character set is a subset of the base64 character set. + Note there is the ambiguity that all characters are hex when they + are actually base64 encoded, which could be ambiguous with: + cksum -a sha2 -l 384 --base64 --untagged + cksum -a sha2 -l 256 --untagged + Similarly for sha3 and blake2b. + However at this length the chances are exceedingly rare (1 in 480R), + and smaller blake2b lengths aren't practical for verification anyway. */ size_t digest_base64_bytes = digest_hex_bytes; size_t trailing_equals = 0; for (; isubase64 (*hp); ++hp, ++digest_base64_bytes)