From: Pádraig Brady <P@draigBrady.com>
Date: Mon, 8 Sep 2025 16:40:00 +0000 (+0100)
Subject: maint: basenc: refactor all encodings to use finalize
X-Git-Tag: v9.8~62
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4141ae3e4970dbc206b56366b0581f04fa87dfc1;p=thirdparty%2Fcoreutils.git

maint: basenc: refactor all encodings to use finalize

Finalize was required for base58, but it's a more general mechanism
which simplifies the logic for all encodings

* src/basenc.c (do_decode): Always call base_decode_ctx_finalize(),
rather than the awkward double loop at end of buffer.
* tests/basenc/basenc.pl: Add basenc finalization tests.
---

diff --git a/src/basenc.c b/src/basenc.c
index 090404ee4b..5976b1aa68 100644
--- a/src/basenc.c
+++ b/src/basenc.c
@@ -222,6 +222,7 @@ static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */
 # define base_decode_context base32_decode_context
 # define base_decode_ctx_init base32_decode_ctx_init
 # define base_decode_ctx base32_decode_ctx
+# define base_decode_ctx_finalize decode_ctx_finalize
 # define isubase isubase32
 #elif BASE_TYPE == 64
 # define BASE_LENGTH BASE64_LENGTH
@@ -238,6 +239,7 @@ static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */
 # define base_decode_context base64_decode_context
 # define base_decode_ctx_init base64_decode_ctx_init
 # define base_decode_ctx base64_decode_ctx
+# define base_decode_ctx_finalize decode_ctx_finalize
 # define isubase isubase64
 #elif BASE_TYPE == 42
 
@@ -316,10 +318,68 @@ static bool (*base_encode_ctx) (struct base_encode_context *ctx,
 static bool (*base_encode_ctx_finalize) (struct base_encode_context *ctx,
                                          char *restrict *out, idx_t *outlen);
 
+static bool
+no_padding (MAYBE_UNUSED struct base_decode_context *ctx)
+{
+  return false;
+}
+#endif
+
+#if BASE_TYPE == 42
+static bool (*has_padding) (struct base_decode_context *ctx);
+
+static bool
+base64_ctx_has_padding (struct base_decode_context *ctx)
+{
+  return ctx->i && ctx->ctx.base64.buf[ctx->i - 1] == '=';
+}
+
+static bool
+base32_ctx_has_padding (struct base_decode_context *ctx)
+{
+  return ctx->i && ctx->ctx.base32.buf[ctx->i - 1] == '=';
+}
+#else
+static bool
+has_padding (struct base_decode_context *ctx)
+{
+  return ctx->i && ctx->buf[ctx->i - 1] == '=';
+}
 #endif
 
 
+/* Process any pending data in CTX, while auto padding if appropriate.
+   Return TRUE on success, FALSE on failure.  */
+
+static bool
+decode_ctx_finalize (struct base_decode_context *ctx,
+                     char *restrict *out, idx_t *outlen)
+{
+  if (ctx->i == 0)
+    {
+      *outlen = 0;
+      return true;
+    }
+
+  /* Auto-pad input and flush the context */
+  char padbuf[8] ATTRIBUTE_NONSTRING = "========";
+  idx_t auto_padding = REQUIRED_PADDING (ctx->i);
+  idx_t n = *outlen;
+  bool result;
+
+  if (auto_padding && ! has_padding (ctx))
+    {
+      affirm (auto_padding <= sizeof (padbuf));
+      result = base_decode_ctx (ctx, padbuf, auto_padding, *out, &n);
+    }
+  else
+    {
+      result = base_decode_ctx (ctx, "", 0, *out, &n);
+    }
 
+  *outlen = n;
+  return result;
+}
 
 #if BASE_TYPE == 42
 
@@ -1446,22 +1506,6 @@ do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column)
   finish_and_exit (in, infile);
 }
 
-/* Returns TRUE if BUF of length LEN
-   ends with a '=' character.
-   Trailing '\n' characters are ignored.  */
-ATTRIBUTE_PURE
-static bool
-has_padding (char const *buf, size_t len)
-{
-  while (len--)
-    {
-      if (buf[len] == '\n')
-        continue;
-      return buf[len] == '=';
-    }
-  return false;
-}
-
 static _Noreturn void
 do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
 {
@@ -1469,7 +1513,6 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
   idx_t sum;
   struct base_decode_context ctx;
 
-  char padbuf[8] ATTRIBUTE_NONSTRING = "========";
   inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
   outbuf = xmalloc (DEC_BLOCKSIZE);
 
@@ -1507,55 +1550,27 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
         }
       while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in));
 
-      /* The following "loop" is usually iterated just once.
-         However, when it processes the final input buffer, we want
-         to iterate it one additional time, but with an indicator
-         telling it to flush what is in CTX.  */
-      for (int k = 0; k < 1 + !!feof (in); k++)
+      while (sum || feof (in))
         {
-          if (k == 1)
-            {
-              if (ctx.i == 0)
-                break;
-
-              /* auto pad input (at eof).  */
-              idx_t auto_padding = REQUIRED_PADDING (ctx.i);
-              if (auto_padding && ! has_padding (inbuf, sum))
-                {
-                  affirm (auto_padding <= sizeof (padbuf));
-                  IF_LINT (free (inbuf));
-                  sum = auto_padding;
-                  inbuf = padbuf;
-                }
-              else
-                sum = 0;  /* process ctx buffer only */
-            }
           idx_t n = DEC_BLOCKSIZE;
-          ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n);
+          if (sum)
+            ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n);
+          else
+            ok = base_decode_ctx_finalize (&ctx, &outbuf, &n);
 
           if (fwrite (outbuf, 1, n, out) < n)
             write_error ();
 
-          if (!ok)
+          if (! ok)
             error (EXIT_FAILURE, 0, _("invalid input"));
+
+          if (sum == 0)
+            break;
+          sum = 0;
         }
     }
   while (!feof (in));
 
-#if BASE_TYPE == 42
-  if (base_decode_ctx_finalize)
-    {
-      idx_t outlen = DEC_BLOCKSIZE;
-      bool ok = base_decode_ctx_finalize (&ctx, &outbuf, &outlen);
-
-      if (fwrite (outbuf, 1, outlen, out) < outlen)
-        write_error ();
-
-      if (!ok)
-        error (EXIT_FAILURE, 0, _("invalid input"));
-    }
-#endif
-
   finish_and_exit (in, infile);
 }
 
@@ -1631,11 +1646,16 @@ main (int argc, char **argv)
       }
 
 #if BASE_TYPE == 42
+  required_padding = no_required_padding;
+  has_padding = no_padding;
+  base_decode_ctx_finalize = decode_ctx_finalize;
+
   switch (base_type)
     {
     case BASE64_OPTION:
       base_length = base64_length_wrapper;
       required_padding = base64_required_padding;
+      has_padding = base64_ctx_has_padding;
       isubase = isubase64;
       base_encode = base64_encode;
       base_decode_ctx_init = base64_decode_ctx_init_wrapper;
@@ -1645,6 +1665,7 @@ main (int argc, char **argv)
     case BASE64URL_OPTION:
       base_length = base64_length_wrapper;
       required_padding = base64_required_padding;
+      has_padding = base64_ctx_has_padding;
       isubase = isubase64url;
       base_encode = base64url_encode;
       base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
@@ -1654,6 +1675,7 @@ main (int argc, char **argv)
     case BASE32_OPTION:
       base_length = base32_length_wrapper;
       required_padding = base32_required_padding;
+      has_padding = base32_ctx_has_padding;
       isubase = isubase32;
       base_encode = base32_encode;
       base_decode_ctx_init = base32_decode_ctx_init_wrapper;
@@ -1663,6 +1685,7 @@ main (int argc, char **argv)
     case BASE32HEX_OPTION:
       base_length = base32_length_wrapper;
       required_padding = base32_required_padding;
+      has_padding = base32_ctx_has_padding;
       isubase = isubase32hex;
       base_encode = base32hex_encode;
       base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
@@ -1671,7 +1694,6 @@ main (int argc, char **argv)
 
     case BASE16_OPTION:
       base_length = base16_length;
-      required_padding = no_required_padding;
       isubase = isubase16;
       base_encode = base16_encode;
       base_decode_ctx_init = base16_decode_ctx_init;
@@ -1680,7 +1702,6 @@ main (int argc, char **argv)
 
     case BASE2MSBF_OPTION:
       base_length = base2_length;
-      required_padding = no_required_padding;
       isubase = isubase2;
       base_encode = base2msbf_encode;
       base_decode_ctx_init = base2_decode_ctx_init;
@@ -1689,7 +1710,6 @@ main (int argc, char **argv)
 
     case BASE2LSBF_OPTION:
       base_length = base2_length;
-      required_padding = no_required_padding;
       isubase = isubase2;
       base_encode = base2lsbf_encode;
       base_decode_ctx_init = base2_decode_ctx_init;
@@ -1698,7 +1718,6 @@ main (int argc, char **argv)
 
     case Z85_OPTION:
       base_length = z85_length;
-      required_padding = no_required_padding;
       isubase = isuz85;
       base_encode = z85_encode;
       base_decode_ctx_init = z85_decode_ctx_init;
@@ -1707,7 +1726,6 @@ main (int argc, char **argv)
 
     case BASE58_OPTION:
       base_length = base58_length;
-      required_padding = no_required_padding;
       isubase = isubase58;
       base_encode_ctx_init = base58_encode_ctx_init;
       base_encode_ctx = base58_encode_ctx;
diff --git a/tests/basenc/basenc.pl b/tests/basenc/basenc.pl
index 0d71cf1d9d..4cda3bd146 100755
--- a/tests/basenc/basenc.pl
+++ b/tests/basenc/basenc.pl
@@ -336,6 +336,35 @@ my @Tests =
  # Disallow arbitrary whitespace
  ['b58_inval_6', '--base58 -d',    {IN=>$base58_out." "}, {EXIT=>1},
   {ERR=>"$prog: invalid input\n"}],
+
+ # Base32 partial padding with newlines tests
+ ['b32_paddec1', '--base32 --decode', {IN=>'MFRGG'}, {OUT=>"abc"}],
+ ['b32_paddec2', '--base32 --decode', {IN=>'MFRGG==='}, {OUT=>"abc"}],
+ ['b32_paddec3', '--base32 --decode', {IN=>'MFRGGZDFMFRGG'}, {OUT=>"abcdeabc"}],
+ ['b32_paddec4', '--base32 -d', {IN=>"MFRGGZDF\nMFRGG"}, {OUT=>"abcdeabc"}],
+
+ # Base32 bad decode tests - partial padding with newlines
+ ['b32_baddecode1', '--base32 --decode', {IN=>'MFRGGZDF='}, {OUT=>"abcde"},
+  {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
+ ['b32_baddecode2', '--base32 --decode', {IN=>"MFRGGZDF=\n"}, {OUT=>"abcde"},
+  {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
+
+ # Base32hex partial padding
+ ['b32h_paddec1', '--base32hex --decode', {IN=>'C5H66'}, {OUT=>"abc"}],
+ ['b32h_paddec2', '--base32hex --decode', {IN=>'C5H66==='}, {OUT=>"abc"}],
+
+ # Test auto-padding boundary conditions
+ ['ctx_auto_pad1', '--base64 --decode', {IN=>'QQ'}, {OUT=>"A"}],
+ ['ctx_auto_pad2', '--base64 --decode', {IN=>'QWI'}, {OUT=>"Ab"}],
+ ['ctx_auto_pad3', '--base32 --decode', {IN=>'IE'}, {OUT=>"A"}],
+ ['ctx_auto_pad4', '--base32 --decode', {IN=>'IFBA'}, {OUT=>"AB"}],
+ ['ctx_auto_pad5', '--base32 --decode', {IN=>'IFBEG'}, {OUT=>"ABC"}],
+
+ # Mixed padding scenarios with newlines at various positions
+ ['ctx_mixed_pad1', '--base64 --decode', {IN=>"QWI=\nQQ"}, {OUT=>"AbA"}],
+ ['ctx_mixed_pad2', '--base64 --decode', {IN=>"QWI=\nQWI="}, {OUT=>"AbAb"}],
+ ['ctx_mixed_pad3', '--base32 --decode', {IN=>"IFBA====\nIE"}, {OUT=>"ABA"}],
+ ['ctx_mixed_pad4', '--base32 -d', {IN=>"IFBA====\nIFBA===="}, {OUT=>"ABAB"}],
 );
 
 # Prepend the command line argument and append a newline to end