From: Pádraig Brady
Date: Mon, 8 Sep 2025 16:40:00 +0000 (+0100) Subject: maint: basenc: refactor all encodings to use finalize X-Git-Tag: v9.8~62 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4141ae3e4970dbc206b56366b0581f04fa87dfc1;p=thirdparty%2Fcoreutils.git maint: basenc: refactor all encodings to use finalize Finalize was required for base58, but it's a more general mechanism which simplifies the logic for all encodings * src/basenc.c (do_decode): Always call base_decode_ctx_finalize(), rather than the awkward double loop at end of buffer. * tests/basenc/basenc.pl: Add basenc finalization tests. --- diff --git a/src/basenc.c b/src/basenc.c index 090404ee4b..5976b1aa68 100644 --- a/src/basenc.c +++ b/src/basenc.c @@ -222,6 +222,7 @@ static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */ # define base_decode_context base32_decode_context # define base_decode_ctx_init base32_decode_ctx_init # define base_decode_ctx base32_decode_ctx +# define base_decode_ctx_finalize decode_ctx_finalize # define isubase isubase32 #elif BASE_TYPE == 64 # define BASE_LENGTH BASE64_LENGTH @@ -238,6 +239,7 @@ static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */ # define base_decode_context base64_decode_context # define base_decode_ctx_init base64_decode_ctx_init # define base_decode_ctx base64_decode_ctx +# define base_decode_ctx_finalize decode_ctx_finalize # define isubase isubase64 #elif BASE_TYPE == 42 @@ -316,10 +318,68 @@ static bool (*base_encode_ctx) (struct base_encode_context *ctx, static bool (*base_encode_ctx_finalize) (struct base_encode_context *ctx, char *restrict *out, idx_t *outlen); +static bool +no_padding (MAYBE_UNUSED struct base_decode_context *ctx) +{ + return false; +} +#endif + +#if BASE_TYPE == 42 +static bool (*has_padding) (struct base_decode_context *ctx); + +static bool +base64_ctx_has_padding (struct base_decode_context *ctx) +{ + return ctx->i && ctx->ctx.base64.buf[ctx->i - 1] == '='; +} + +static bool +base32_ctx_has_padding (struct base_decode_context *ctx) +{ + return ctx->i && ctx->ctx.base32.buf[ctx->i - 1] == '='; +} +#else +static bool +has_padding (struct base_decode_context *ctx) +{ + return ctx->i && ctx->buf[ctx->i - 1] == '='; +} #endif +/* Process any pending data in CTX, while auto padding if appropriate. + Return TRUE on success, FALSE on failure. */ + +static bool +decode_ctx_finalize (struct base_decode_context *ctx, + char *restrict *out, idx_t *outlen) +{ + if (ctx->i == 0) + { + *outlen = 0; + return true; + } + + /* Auto-pad input and flush the context */ + char padbuf[8] ATTRIBUTE_NONSTRING = "========"; + idx_t auto_padding = REQUIRED_PADDING (ctx->i); + idx_t n = *outlen; + bool result; + + if (auto_padding && ! has_padding (ctx)) + { + affirm (auto_padding <= sizeof (padbuf)); + result = base_decode_ctx (ctx, padbuf, auto_padding, *out, &n); + } + else + { + result = base_decode_ctx (ctx, "", 0, *out, &n); + } + *outlen = n; + return result; +} #if BASE_TYPE == 42 @@ -1446,22 +1506,6 @@ do_encode (FILE *in, char const *infile, FILE *out, idx_t wrap_column) finish_and_exit (in, infile); } -/* Returns TRUE if BUF of length LEN - ends with a '=' character. - Trailing '\n' characters are ignored. */ -ATTRIBUTE_PURE -static bool -has_padding (char const *buf, size_t len) -{ - while (len--) - { - if (buf[len] == '\n') - continue; - return buf[len] == '='; - } - return false; -} - static _Noreturn void do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage) { @@ -1469,7 +1513,6 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage) idx_t sum; struct base_decode_context ctx; - char padbuf[8] ATTRIBUTE_NONSTRING = "========"; inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE)); outbuf = xmalloc (DEC_BLOCKSIZE); @@ -1507,55 +1550,27 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage) } while (sum < BASE_LENGTH (DEC_BLOCKSIZE) && !feof (in)); - /* The following "loop" is usually iterated just once. - However, when it processes the final input buffer, we want - to iterate it one additional time, but with an indicator - telling it to flush what is in CTX. */ - for (int k = 0; k < 1 + !!feof (in); k++) + while (sum || feof (in)) { - if (k == 1) - { - if (ctx.i == 0) - break; - - /* auto pad input (at eof). */ - idx_t auto_padding = REQUIRED_PADDING (ctx.i); - if (auto_padding && ! has_padding (inbuf, sum)) - { - affirm (auto_padding <= sizeof (padbuf)); - IF_LINT (free (inbuf)); - sum = auto_padding; - inbuf = padbuf; - } - else - sum = 0; /* process ctx buffer only */ - } idx_t n = DEC_BLOCKSIZE; - ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n); + if (sum) + ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n); + else + ok = base_decode_ctx_finalize (&ctx, &outbuf, &n); if (fwrite (outbuf, 1, n, out) < n) write_error (); - if (!ok) + if (! ok) error (EXIT_FAILURE, 0, _("invalid input")); + + if (sum == 0) + break; + sum = 0; } } while (!feof (in)); -#if BASE_TYPE == 42 - if (base_decode_ctx_finalize) - { - idx_t outlen = DEC_BLOCKSIZE; - bool ok = base_decode_ctx_finalize (&ctx, &outbuf, &outlen); - - if (fwrite (outbuf, 1, outlen, out) < outlen) - write_error (); - - if (!ok) - error (EXIT_FAILURE, 0, _("invalid input")); - } -#endif - finish_and_exit (in, infile); } @@ -1631,11 +1646,16 @@ main (int argc, char **argv) } #if BASE_TYPE == 42 + required_padding = no_required_padding; + has_padding = no_padding; + base_decode_ctx_finalize = decode_ctx_finalize; + switch (base_type) { case BASE64_OPTION: base_length = base64_length_wrapper; required_padding = base64_required_padding; + has_padding = base64_ctx_has_padding; isubase = isubase64; base_encode = base64_encode; base_decode_ctx_init = base64_decode_ctx_init_wrapper; @@ -1645,6 +1665,7 @@ main (int argc, char **argv) case BASE64URL_OPTION: base_length = base64_length_wrapper; required_padding = base64_required_padding; + has_padding = base64_ctx_has_padding; isubase = isubase64url; base_encode = base64url_encode; base_decode_ctx_init = base64url_decode_ctx_init_wrapper; @@ -1654,6 +1675,7 @@ main (int argc, char **argv) case BASE32_OPTION: base_length = base32_length_wrapper; required_padding = base32_required_padding; + has_padding = base32_ctx_has_padding; isubase = isubase32; base_encode = base32_encode; base_decode_ctx_init = base32_decode_ctx_init_wrapper; @@ -1663,6 +1685,7 @@ main (int argc, char **argv) case BASE32HEX_OPTION: base_length = base32_length_wrapper; required_padding = base32_required_padding; + has_padding = base32_ctx_has_padding; isubase = isubase32hex; base_encode = base32hex_encode; base_decode_ctx_init = base32hex_decode_ctx_init_wrapper; @@ -1671,7 +1694,6 @@ main (int argc, char **argv) case BASE16_OPTION: base_length = base16_length; - required_padding = no_required_padding; isubase = isubase16; base_encode = base16_encode; base_decode_ctx_init = base16_decode_ctx_init; @@ -1680,7 +1702,6 @@ main (int argc, char **argv) case BASE2MSBF_OPTION: base_length = base2_length; - required_padding = no_required_padding; isubase = isubase2; base_encode = base2msbf_encode; base_decode_ctx_init = base2_decode_ctx_init; @@ -1689,7 +1710,6 @@ main (int argc, char **argv) case BASE2LSBF_OPTION: base_length = base2_length; - required_padding = no_required_padding; isubase = isubase2; base_encode = base2lsbf_encode; base_decode_ctx_init = base2_decode_ctx_init; @@ -1698,7 +1718,6 @@ main (int argc, char **argv) case Z85_OPTION: base_length = z85_length; - required_padding = no_required_padding; isubase = isuz85; base_encode = z85_encode; base_decode_ctx_init = z85_decode_ctx_init; @@ -1707,7 +1726,6 @@ main (int argc, char **argv) case BASE58_OPTION: base_length = base58_length; - required_padding = no_required_padding; isubase = isubase58; base_encode_ctx_init = base58_encode_ctx_init; base_encode_ctx = base58_encode_ctx; diff --git a/tests/basenc/basenc.pl b/tests/basenc/basenc.pl index 0d71cf1d9d..4cda3bd146 100755 --- a/tests/basenc/basenc.pl +++ b/tests/basenc/basenc.pl @@ -336,6 +336,35 @@ my @Tests = # Disallow arbitrary whitespace ['b58_inval_6', '--base58 -d', {IN=>$base58_out." "}, {EXIT=>1}, {ERR=>"$prog: invalid input\n"}], + + # Base32 partial padding with newlines tests + ['b32_paddec1', '--base32 --decode', {IN=>'MFRGG'}, {OUT=>"abc"}], + ['b32_paddec2', '--base32 --decode', {IN=>'MFRGG==='}, {OUT=>"abc"}], + ['b32_paddec3', '--base32 --decode', {IN=>'MFRGGZDFMFRGG'}, {OUT=>"abcdeabc"}], + ['b32_paddec4', '--base32 -d', {IN=>"MFRGGZDF\nMFRGG"}, {OUT=>"abcdeabc"}], + + # Base32 bad decode tests - partial padding with newlines + ['b32_baddecode1', '--base32 --decode', {IN=>'MFRGGZDF='}, {OUT=>"abcde"}, + {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}], + ['b32_baddecode2', '--base32 --decode', {IN=>"MFRGGZDF=\n"}, {OUT=>"abcde"}, + {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}], + + # Base32hex partial padding + ['b32h_paddec1', '--base32hex --decode', {IN=>'C5H66'}, {OUT=>"abc"}], + ['b32h_paddec2', '--base32hex --decode', {IN=>'C5H66==='}, {OUT=>"abc"}], + + # Test auto-padding boundary conditions + ['ctx_auto_pad1', '--base64 --decode', {IN=>'QQ'}, {OUT=>"A"}], + ['ctx_auto_pad2', '--base64 --decode', {IN=>'QWI'}, {OUT=>"Ab"}], + ['ctx_auto_pad3', '--base32 --decode', {IN=>'IE'}, {OUT=>"A"}], + ['ctx_auto_pad4', '--base32 --decode', {IN=>'IFBA'}, {OUT=>"AB"}], + ['ctx_auto_pad5', '--base32 --decode', {IN=>'IFBEG'}, {OUT=>"ABC"}], + + # Mixed padding scenarios with newlines at various positions + ['ctx_mixed_pad1', '--base64 --decode', {IN=>"QWI=\nQQ"}, {OUT=>"AbA"}], + ['ctx_mixed_pad2', '--base64 --decode', {IN=>"QWI=\nQWI="}, {OUT=>"AbAb"}], + ['ctx_mixed_pad3', '--base32 --decode', {IN=>"IFBA====\nIE"}, {OUT=>"ABA"}], + ['ctx_mixed_pad4', '--base32 -d', {IN=>"IFBA====\nIFBA===="}, {OUT=>"ABAB"}], ); # Prepend the command line argument and append a newline to end