From: Lasse Collin Date: Fri, 23 May 2025 11:38:49 +0000 (+0300) Subject: liblzma: Add a new .xz decoder flag LZMA_TELL_BLOCK_END X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f6df1e8d7bd647855a7df8a7136a5410b15c0c07;p=thirdparty%2Fxz.git liblzma: Add a new .xz decoder flag LZMA_TELL_BLOCK_END It makes lzma_code() return LZMA_BLOCK_END after every .xz Block. It can be useful in special cases where the boundaries of Blocks have a meaning to the application. For example, if using the seekable decoder, one can seek by Block number and then decode exactly one Block by detecting LZMA_BLOCK_END. Even if one knows the uncompressed size of the Block, the benefit of waiting for LZMA_BLOCK_END is that then one knows that the integrity check has been verified. A decoder might have provided all output from the Block, but if the last input bytes of the Block haven't been provided to the decoder, it cannot have verified the integrity check. --- diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h index 12ed45ba..2f414bea 100644 --- a/src/liblzma/api/lzma/base.h +++ b/src/liblzma/api/lzma/base.h @@ -70,6 +70,17 @@ typedef enum { * output bytes should be picked from strm->next_out. */ + LZMA_BLOCK_END = 14, + /**< + * \brief End of .xz Block was reached + * + * This can only be returned if the LZMA_TELL_BLOCK_END flag + * was used when a .xz decoder was initialized. It indicates + * that end of a .xz Block was successfully reached (including + * verifying its integrity check). Other than providing this + * information, this can be handled like LZMA_OK. + */ + LZMA_NO_CHECK = 2, /**< * \brief Input stream has no integrity check diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h index 0008e8e7..f6f8d193 100644 --- a/src/liblzma/api/lzma/container.h +++ b/src/liblzma/api/lzma/container.h @@ -76,6 +76,7 @@ typedef struct { * - LZMA_IGNORE_CHECK * - LZMA_CONCATENATED * - LZMA_FAIL_FAST + * - LZMA_TELL_BLOCK_END */ uint32_t flags; @@ -718,6 +719,16 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder( #define LZMA_FAIL_FAST UINT32_C(0x20) +/** + * This flag makes lzma_code() return LZMA_BLOCK_END at the end of every .xz + * Block when decoding a .xz file. Other than providing this information, + * LZMA_BLOCK_END can be handled like LZMA_OK. + * + * \since 5.9.1alpha + */ +#define LZMA_TELL_BLOCK_END UINT32_C(0x40) + + /** * \brief Initialize .xz Stream decoder * @@ -731,7 +742,8 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder( * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, - * LZMA_CONCATENATED, LZMA_FAIL_FAST + * LZMA_CONCATENATED, LZMA_FAIL_FAST, + * LZMA_TELL_BLOCK_END * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. @@ -872,7 +884,8 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder_mt( * \param flags Bitwise-or of zero or more of the decoder flags: * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK, - * LZMA_CONCATENATED, LZMA_FAIL_FAST + * LZMA_CONCATENATED, LZMA_FAIL_FAST, + * LZMA_TELL_BLOCK_END * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. @@ -953,9 +966,9 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder( * although only LZMA_CONCATENATED and (in very rare * cases) LZMA_IGNORE_CHECK are actually useful. * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, - * and LZMA_FAIL_FAST do nothing. LZMA_TELL_ANY_CHECK - * is supported for consistency only as CRC32 is - * always used in the .lz format. + * LZMA_FAIL_FAST, and LZMA_TELL_BLOCK_END do nothing. + * LZMA_TELL_ANY_CHECK is supported for consistency + * only as CRC32 is always used in the .lz format. * * \return Possible lzma_ret values: * - LZMA_OK: Initialization was successful. @@ -979,7 +992,8 @@ extern LZMA_API(lzma_ret) lzma_lzip_decoder( * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, * LZMA_IGNORE_CHECK, LZMA_CONCATENATED, * LZMA_FAIL_FAST. Note that LZMA_TELL_ANY_CHECK - * is not allowed and will return LZMA_PROG_ERROR. + * and LZMA_TELL_BLOCK_END are not allowed and will + * return LZMA_PROG_ERROR. * \param allocator lzma_allocator for custom allocator functions. * Set to NULL to use malloc() and free(). * \param in Beginning of the input buffer diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c index 04318694..f06c9c03 100644 --- a/src/liblzma/common/common.c +++ b/src/liblzma/common/common.c @@ -365,6 +365,7 @@ lzma_code(lzma_stream *strm, lzma_action action) FALLTHROUGH; + case LZMA_BLOCK_END: case LZMA_NO_CHECK: case LZMA_UNSUPPORTED_CHECK: case LZMA_GET_CHECK: diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index f2a5d97f..63d7cc10 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -155,7 +155,8 @@ | LZMA_TELL_ANY_CHECK \ | LZMA_IGNORE_CHECK \ | LZMA_CONCATENATED \ - | LZMA_FAIL_FAST ) + | LZMA_FAIL_FAST \ + | LZMA_TELL_BLOCK_END ) /// Largest valid lzma_action value as unsigned integer. diff --git a/src/liblzma/common/seekable_decoder.c b/src/liblzma/common/seekable_decoder.c index f1eaf04b..289e01fb 100644 --- a/src/liblzma/common/seekable_decoder.c +++ b/src/liblzma/common/seekable_decoder.c @@ -39,6 +39,10 @@ typedef struct { /// and verifying the integrity check. bool ignore_check; + /// If true, LZMA_BLOCK_END is returned every time we finish + /// decoding a Block. + bool tell_block_end; + /// Pointer to lzma_stream.seek_pos: /// - This is written to pass the target input seek position to /// the application when we return LZMA_SEEK_NEEDED. @@ -119,9 +123,13 @@ seekable_seek_output(lzma_seekable_coder *coder, if (action == LZMA_SEEK_TO_OFFSET) { // Don't call locate if we already have the right Block. // - // NOTE: SEQ_ITER_NEXT_BLOCK means that we are seeking - // right after the decoder initialization, and thus - // the iterator doesn't contain valid data yet. + // NOTE: SEQ_ITER_NEXT_BLOCK means that we are + // (1) seeking right after the decoder initialization, + // and thus the iterator doesn't contain valid + // data yet; or + // (2) seeking right after returning LZMA_BLOCK_END, + // and thus the desired target offset cannot + // be in the current Block. if (coder->sequence != SEQ_ITER_NEXT_BLOCK && coder->cur_out_offset <= target && target @@ -422,6 +430,12 @@ seekable_decode(void *coder_ptr, const lzma_allocator *allocator, // Block decoder, it has verified that they match the Index; // we don't need to check them here. coder->sequence = SEQ_ITER_NEXT_BLOCK; + + // If the flag LZMA_TELL_BLOCK_END was used, + // return LZMA_BLOCK_END now. + if (coder->tell_block_end) + return LZMA_BLOCK_END; + break; } @@ -478,7 +492,7 @@ seekable_decoder_init( return LZMA_PROG_ERROR; // Not many flags are supported. - if (flags & ~LZMA_IGNORE_CHECK) + if (flags & ~(LZMA_IGNORE_CHECK | LZMA_TELL_BLOCK_END)) return LZMA_OPTIONS_ERROR; lzma_seekable_coder *coder = next->coder; @@ -501,6 +515,7 @@ seekable_decoder_init( coder->memusage = LZMA_MEMUSAGE_BASE; coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; + coder->tell_block_end = (flags & LZMA_TELL_BLOCK_END) != 0; lzma_index_iter_init(&coder->iter, idx); coder->external_seek_pos = seek_pos; diff --git a/src/liblzma/common/stream_buffer_decoder.c b/src/liblzma/common/stream_buffer_decoder.c index c4f91fb4..ffbd3eaf 100644 --- a/src/liblzma/common/stream_buffer_decoder.c +++ b/src/liblzma/common/stream_buffer_decoder.c @@ -26,7 +26,7 @@ lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags, return LZMA_PROG_ERROR; // Catch flags that are not allowed in buffer-to-buffer decoding. - if (flags & LZMA_TELL_ANY_CHECK) + if (flags & (LZMA_TELL_ANY_CHECK | LZMA_TELL_BLOCK_END)) return LZMA_PROG_ERROR; // Initialize the Stream decoder. diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c index 94004b74..a721e9d2 100644 --- a/src/liblzma/common/stream_decoder.c +++ b/src/liblzma/common/stream_decoder.c @@ -60,6 +60,10 @@ typedef struct { /// and verifying the integrity check. bool ignore_check; + /// If true, LZMA_BLOCK_END is returned every time we finish + /// decoding a Block. + bool tell_block_end; + /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input (LZMA_FINISH), @@ -271,6 +275,10 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator, coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; + + if (coder->tell_block_end) + return LZMA_BLOCK_END; + break; } @@ -450,6 +458,7 @@ lzma_stream_decoder_init( = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0; + coder->tell_block_end = (flags & LZMA_TELL_BLOCK_END) != 0; coder->concatenated = (flags & LZMA_CONCATENATED) != 0; coder->first_stream = true; diff --git a/src/liblzma/common/stream_decoder_mt.c b/src/liblzma/common/stream_decoder_mt.c index 271f9b07..bab36613 100644 --- a/src/liblzma/common/stream_decoder_mt.c +++ b/src/liblzma/common/stream_decoder_mt.c @@ -287,6 +287,10 @@ struct lzma_stream_coder { /// and verifying the integrity check. bool ignore_check; + /// If true, LZMA_BLOCK_END is returned every time we finish + /// decoding a Block. + bool tell_block_end; + /// If true, we will decode concatenated Streams that possibly have /// Stream Padding between or after them. LZMA_STREAM_END is returned /// once the application isn't giving us any new input (LZMA_FINISH), @@ -676,12 +680,30 @@ read_output_and_wait(struct lzma_stream_coder *coder, // thread of the next Block (if it is still // running) to start telling the main thread // when new output is available. - if (ret == LZMA_STREAM_END) + if (ret == LZMA_STREAM_END) { lzma_outq_enable_partial_output( &coder->outq, &worker_enable_partial_update); - // Loop until a Block wasn't finished. + // If LZMA_TELL_BLOCK_END was used, + // return LZMA_BLOCK_END. + // + // Also set out_was_filled = true even + // though the out buffer might not + // have become full; LZMA_BLOCK_END + // is a kind of "no more output was + // possible" situation, and thus we + // don't want waiting_allowed to be + // true on the next call to + // stream_decode_mt(). + if (coder->tell_block_end) { + coder->out_was_filled = true; + ret = LZMA_BLOCK_END; + } + } + + // Unless LZMA_TELL_BLOCK_END was used, + // loop until a Block wasn't finished. // It's important to loop around even if // *out_pos == out_size because there could // be an empty Block that will return @@ -690,7 +712,8 @@ read_output_and_wait(struct lzma_stream_coder *coder, } while (ret == LZMA_STREAM_END); // Check if lzma_outq_read reported an error from - // the Block decoder. + // the Block decoder or if we are returning + // LZMA_BLOCK_END. if (ret != LZMA_OK) break; @@ -863,7 +886,7 @@ read_output_and_wait(struct lzma_stream_coder *coder, // If we are returning an error, then the application cannot get // more output from us and thus keeping the threads running is // useless and waste of CPU time. - if (ret != LZMA_OK && ret != LZMA_TIMED_OUT) + if (ret != LZMA_OK && ret != LZMA_TIMED_OUT && ret != LZMA_BLOCK_END) threads_stop(coder); return ret; @@ -1639,6 +1662,10 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator, coder->block_options.uncompressed_size)); coder->sequence = SEQ_BLOCK_HEADER; + + if (coder->tell_block_end) + return LZMA_BLOCK_END; + break; } @@ -1987,6 +2014,7 @@ stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0; coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0; coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0; + coder->tell_block_end = (options->flags & LZMA_TELL_BLOCK_END) != 0; coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0; coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0;