From ac91433916dea68a369b8c9a2abd3fc9328cf10c Mon Sep 17 00:00:00 2001
From: =?utf8?q?P=C3=A1draig=20Brady?= <P@draigBrady.com>
Date: Tue, 7 Oct 2025 14:38:49 +0100
Subject: [PATCH] maint: cksum: document a base64/hex parsing ambiguity with
 untagged

* src/digest.c (split_3): Mention the ambiguity in misinterpreting
base64 characters as hex is not a practical consideration.
Also add an example of both tagged formats which makes it
easier to interpret the parsing logic.
---
 src/digest.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/digest.c b/src/digest.c
index d2e6e212aa..58b9c9ec1f 100644
--- a/src/digest.c
+++ b/src/digest.c
@@ -868,6 +868,10 @@ split_3 (char *s, size_t s_len,
     }
 #endif
 
+  /* Try to parse BSD or OpenSSL tagged format.  I.e.:
+     openssl: MD5(f)= d41d8cd98f00b204e9800998ecf8427e
+     bsd:     MD5 (f) = d41d8cd98f00b204e9800998ecf8427e  */
+
   size_t parse_offset = i;
   algo_name_len = strlen (DIGEST_TYPE_STRING);
   if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
@@ -942,7 +946,14 @@ split_3 (char *s, size_t s_len,
     ;
 # if HASH_ALGO_CKSUM
   /* Check the number of base64 characters.  This works because the hexadecimal
-     character set is a subset of the base64 character set.  */
+     character set is a subset of the base64 character set.
+     Note there is the ambiguity that all characters are hex when they
+     are actually base64 encoded, which could be ambiguous with:
+        cksum -a sha2 -l 384 --base64 --untagged
+        cksum -a sha2 -l 256 --untagged
+     Similarly for sha3 and blake2b.
+     However at this length the chances are exceedingly rare (1 in 480R),
+     and smaller blake2b lengths aren't practical for verification anyway.  */
   size_t digest_base64_bytes = digest_hex_bytes;
   size_t trailing_equals = 0;
   for (; isubase64 (*hp); ++hp, ++digest_base64_bytes)
-- 
2.47.3