]> git.ipfire.org Git - thirdparty/xz.git/commitdiff
liblzma: Add a new .xz decoder flag LZMA_TELL_BLOCK_END
authorLasse Collin <lasse.collin@tukaani.org>
Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
committerLasse Collin <lasse.collin@tukaani.org>
Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
It makes lzma_code() return LZMA_BLOCK_END after every .xz Block.
It can be useful in special cases where the boundaries of Blocks
have a meaning to the application. For example, if using the
seekable decoder, one can seek by Block number and then decode
exactly one Block by detecting LZMA_BLOCK_END.

Even if one knows the uncompressed size of the Block, the benefit
of waiting for LZMA_BLOCK_END is that then one knows that the
integrity check has been verified. A decoder might have provided all
output from the Block, but if the last input bytes of the Block
haven't been provided to the decoder, it cannot have verified the
integrity check.

src/liblzma/api/lzma/base.h
src/liblzma/api/lzma/container.h
src/liblzma/common/common.c
src/liblzma/common/common.h
src/liblzma/common/seekable_decoder.c
src/liblzma/common/stream_buffer_decoder.c
src/liblzma/common/stream_decoder.c
src/liblzma/common/stream_decoder_mt.c

index 12ed45ba996805c900e4230f6af910b83e20ef88..2f414bead1ac7f10790f6b5ec1e13975ae81719d 100644 (file)
@@ -70,6 +70,17 @@ typedef enum {
                 * output bytes should be picked from strm->next_out.
                 */
 
+       LZMA_BLOCK_END          = 14,
+               /**<
+                * \brief       End of .xz Block was reached
+                *
+                * This can only be returned if the LZMA_TELL_BLOCK_END flag
+                * was used when a .xz decoder was initialized. It indicates
+                * that end of a .xz Block was successfully reached (including
+                * verifying its integrity check). Other than providing this
+                * information, this can be handled like LZMA_OK.
+                */
+
        LZMA_NO_CHECK           = 2,
                /**<
                 * \brief       Input stream has no integrity check
index 0008e8e774fb6e514cd663e577abc6848b23a680..f6f8d1930ca33f8a78b645f20b4e73676d882027 100644 (file)
@@ -76,6 +76,7 @@ typedef struct {
         * - LZMA_IGNORE_CHECK
         * - LZMA_CONCATENATED
         * - LZMA_FAIL_FAST
+        * - LZMA_TELL_BLOCK_END
         */
        uint32_t flags;
 
@@ -718,6 +719,16 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder(
 #define LZMA_FAIL_FAST                  UINT32_C(0x20)
 
 
+/**
+ * This flag makes lzma_code() return LZMA_BLOCK_END at the end of every .xz
+ * Block when decoding a .xz file. Other than providing this information,
+ * LZMA_BLOCK_END can be handled like LZMA_OK.
+ *
+ * \since       5.9.1alpha
+ */
+#define LZMA_TELL_BLOCK_END             UINT32_C(0x40)
+
+
 /**
  * \brief       Initialize .xz Stream decoder
  *
@@ -731,7 +742,8 @@ extern LZMA_API(lzma_ret) lzma_microlzma_encoder(
  * \param       flags       Bitwise-or of zero or more of the decoder flags:
  *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
  *                          LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
- *                          LZMA_CONCATENATED, LZMA_FAIL_FAST
+ *                          LZMA_CONCATENATED, LZMA_FAIL_FAST,
+ *                          LZMA_TELL_BLOCK_END
  *
  * \return      Possible lzma_ret values:
  *              - LZMA_OK: Initialization was successful.
@@ -872,7 +884,8 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder_mt(
  * \param       flags       Bitwise-or of zero or more of the decoder flags:
  *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
  *                          LZMA_TELL_ANY_CHECK, LZMA_IGNORE_CHECK,
- *                          LZMA_CONCATENATED, LZMA_FAIL_FAST
+ *                          LZMA_CONCATENATED, LZMA_FAIL_FAST,
+ *                          LZMA_TELL_BLOCK_END
  *
  * \return      Possible lzma_ret values:
  *              - LZMA_OK: Initialization was successful.
@@ -953,9 +966,9 @@ extern LZMA_API(lzma_ret) lzma_alone_decoder(
  *                          although only LZMA_CONCATENATED and (in very rare
  *                          cases) LZMA_IGNORE_CHECK are actually useful.
  *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
- *                          and LZMA_FAIL_FAST do nothing. LZMA_TELL_ANY_CHECK
- *                          is supported for consistency only as CRC32 is
- *                          always used in the .lz format.
+ *                          LZMA_FAIL_FAST, and LZMA_TELL_BLOCK_END do nothing.
+ *                          LZMA_TELL_ANY_CHECK is supported for consistency
+ *                          only as CRC32 is always used in the .lz format.
  *
  * \return      Possible lzma_ret values:
  *              - LZMA_OK: Initialization was successful.
@@ -979,7 +992,8 @@ extern LZMA_API(lzma_ret) lzma_lzip_decoder(
  *                          LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
  *                          LZMA_IGNORE_CHECK, LZMA_CONCATENATED,
  *                          LZMA_FAIL_FAST. Note that LZMA_TELL_ANY_CHECK
- *                          is not allowed and will return LZMA_PROG_ERROR.
+ *                          and LZMA_TELL_BLOCK_END are not allowed and will
+ *                          return LZMA_PROG_ERROR.
  * \param       allocator   lzma_allocator for custom allocator functions.
  *                          Set to NULL to use malloc() and free().
  * \param       in          Beginning of the input buffer
index 04318694a1a859dd2c14f146a507ebe0ea20267b..f06c9c038244c64dcec09d82842f34c744ad0cce 100644 (file)
@@ -365,6 +365,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
 
                FALLTHROUGH;
 
+       case LZMA_BLOCK_END:
        case LZMA_NO_CHECK:
        case LZMA_UNSUPPORTED_CHECK:
        case LZMA_GET_CHECK:
index f2a5d97f1d95f3512ecb9790a4267e53312ac637..63d7cc108b3f5e3e83ead0416694a6aa45a7ae11 100644 (file)
        | LZMA_TELL_ANY_CHECK \
        | LZMA_IGNORE_CHECK \
        | LZMA_CONCATENATED \
-       | LZMA_FAIL_FAST )
+       | LZMA_FAIL_FAST \
+       | LZMA_TELL_BLOCK_END )
 
 
 /// Largest valid lzma_action value as unsigned integer.
index f1eaf04b8d88774ab95b5e4395a6e4127c2eb4b2..289e01fb2d8a03f23f99066dc77f84570a9b9f3a 100644 (file)
@@ -39,6 +39,10 @@ typedef struct {
        /// and verifying the integrity check.
        bool ignore_check;
 
+       /// If true, LZMA_BLOCK_END is returned every time we finish
+       /// decoding a Block.
+       bool tell_block_end;
+
        /// Pointer to lzma_stream.seek_pos:
        ///   - This is written to pass the target input seek position to
        ///     the application when we return LZMA_SEEK_NEEDED.
@@ -119,9 +123,13 @@ seekable_seek_output(lzma_seekable_coder *coder,
        if (action == LZMA_SEEK_TO_OFFSET) {
                // Don't call locate if we already have the right Block.
                //
-               // NOTE: SEQ_ITER_NEXT_BLOCK means that we are seeking
-               // right after the decoder initialization, and thus
-               // the iterator doesn't contain valid data yet.
+               // NOTE: SEQ_ITER_NEXT_BLOCK means that we are
+               //       (1) seeking right after the decoder initialization,
+               //           and thus the iterator doesn't contain valid
+               //           data yet; or
+               //       (2) seeking right after returning LZMA_BLOCK_END,
+               //           and thus the desired target offset cannot
+               //           be in the current Block.
                if (coder->sequence != SEQ_ITER_NEXT_BLOCK
                                && coder->cur_out_offset <= target
                                && target
@@ -422,6 +430,12 @@ seekable_decode(void *coder_ptr, const lzma_allocator *allocator,
                // Block decoder, it has verified that they match the Index;
                // we don't need to check them here.
                coder->sequence = SEQ_ITER_NEXT_BLOCK;
+
+               // If the flag LZMA_TELL_BLOCK_END was used,
+               // return LZMA_BLOCK_END now.
+               if (coder->tell_block_end)
+                       return LZMA_BLOCK_END;
+
                break;
        }
 
@@ -478,7 +492,7 @@ seekable_decoder_init(
                return LZMA_PROG_ERROR;
 
        // Not many flags are supported.
-       if (flags & ~LZMA_IGNORE_CHECK)
+       if (flags & ~(LZMA_IGNORE_CHECK | LZMA_TELL_BLOCK_END))
                return LZMA_OPTIONS_ERROR;
 
        lzma_seekable_coder *coder = next->coder;
@@ -501,6 +515,7 @@ seekable_decoder_init(
        coder->memusage = LZMA_MEMUSAGE_BASE;
 
        coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+       coder->tell_block_end = (flags & LZMA_TELL_BLOCK_END) != 0;
 
        lzma_index_iter_init(&coder->iter, idx);
        coder->external_seek_pos = seek_pos;
index c4f91fb498393fa89db51b5dd1fa87c5ef1c6c16..ffbd3eaf9bbb7505f53028a75f058a35e46811e8 100644 (file)
@@ -26,7 +26,7 @@ lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags,
                return LZMA_PROG_ERROR;
 
        // Catch flags that are not allowed in buffer-to-buffer decoding.
-       if (flags & LZMA_TELL_ANY_CHECK)
+       if (flags & (LZMA_TELL_ANY_CHECK | LZMA_TELL_BLOCK_END))
                return LZMA_PROG_ERROR;
 
        // Initialize the Stream decoder.
index 94004b74a165915c947b572beb8089b58d2dd024..a721e9d2eb770cf5609aa62b090c6420f6c59eac 100644 (file)
@@ -60,6 +60,10 @@ typedef struct {
        /// and verifying the integrity check.
        bool ignore_check;
 
+       /// If true, LZMA_BLOCK_END is returned every time we finish
+       /// decoding a Block.
+       bool tell_block_end;
+
        /// If true, we will decode concatenated Streams that possibly have
        /// Stream Padding between or after them. LZMA_STREAM_END is returned
        /// once the application isn't giving us any new input (LZMA_FINISH),
@@ -271,6 +275,10 @@ stream_decode(void *coder_ptr, const lzma_allocator *allocator,
                                coder->block_options.uncompressed_size));
 
                coder->sequence = SEQ_BLOCK_HEADER;
+
+               if (coder->tell_block_end)
+                       return LZMA_BLOCK_END;
+
                break;
        }
 
@@ -450,6 +458,7 @@ lzma_stream_decoder_init(
                        = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
        coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
        coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+       coder->tell_block_end = (flags & LZMA_TELL_BLOCK_END) != 0;
        coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
        coder->first_stream = true;
 
index 271f9b07c4b821b7e9dd5c50536ee1765b0ac77b..bab3661356493681470500a905d719c962405564 100644 (file)
@@ -287,6 +287,10 @@ struct lzma_stream_coder {
        /// and verifying the integrity check.
        bool ignore_check;
 
+       /// If true, LZMA_BLOCK_END is returned every time we finish
+       /// decoding a Block.
+       bool tell_block_end;
+
        /// If true, we will decode concatenated Streams that possibly have
        /// Stream Padding between or after them. LZMA_STREAM_END is returned
        /// once the application isn't giving us any new input (LZMA_FINISH),
@@ -676,12 +680,30 @@ read_output_and_wait(struct lzma_stream_coder *coder,
                                // thread of the next Block (if it is still
                                // running) to start telling the main thread
                                // when new output is available.
-                               if (ret == LZMA_STREAM_END)
+                               if (ret == LZMA_STREAM_END) {
                                        lzma_outq_enable_partial_output(
                                                &coder->outq,
                                                &worker_enable_partial_update);
 
-                               // Loop until a Block wasn't finished.
+                                       // If LZMA_TELL_BLOCK_END was used,
+                                       // return LZMA_BLOCK_END.
+                                       //
+                                       // Also set out_was_filled = true even
+                                       // though the out buffer might not
+                                       // have become full; LZMA_BLOCK_END
+                                       // is a kind of "no more output was
+                                       // possible" situation, and thus we
+                                       // don't want waiting_allowed to be
+                                       // true on the next call to
+                                       // stream_decode_mt().
+                                       if (coder->tell_block_end) {
+                                               coder->out_was_filled = true;
+                                               ret = LZMA_BLOCK_END;
+                                       }
+                               }
+
+                               // Unless LZMA_TELL_BLOCK_END was used,
+                               // loop until a Block wasn't finished.
                                // It's important to loop around even if
                                // *out_pos == out_size because there could
                                // be an empty Block that will return
@@ -690,7 +712,8 @@ read_output_and_wait(struct lzma_stream_coder *coder,
                        } while (ret == LZMA_STREAM_END);
 
                        // Check if lzma_outq_read reported an error from
-                       // the Block decoder.
+                       // the Block decoder or if we are returning
+                       // LZMA_BLOCK_END.
                        if (ret != LZMA_OK)
                                break;
 
@@ -863,7 +886,7 @@ read_output_and_wait(struct lzma_stream_coder *coder,
        // If we are returning an error, then the application cannot get
        // more output from us and thus keeping the threads running is
        // useless and waste of CPU time.
-       if (ret != LZMA_OK && ret != LZMA_TIMED_OUT)
+       if (ret != LZMA_OK && ret != LZMA_TIMED_OUT && ret != LZMA_BLOCK_END)
                threads_stop(coder);
 
        return ret;
@@ -1639,6 +1662,10 @@ stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
                                coder->block_options.uncompressed_size));
 
                coder->sequence = SEQ_BLOCK_HEADER;
+
+               if (coder->tell_block_end)
+                       return LZMA_BLOCK_END;
+
                break;
        }
 
@@ -1987,6 +2014,7 @@ stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
                        = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
        coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0;
        coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0;
+       coder->tell_block_end = (options->flags & LZMA_TELL_BLOCK_END) != 0;
        coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0;
        coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0;