liblzma: Add lzma_seekable_decoder()

author Lasse Collin <lasse.collin@tukaani.org>

Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)

committer Lasse Collin <lasse.collin@tukaani.org>

Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
author Lasse Collin <lasse.collin@tukaani.org>
Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
committer Lasse Collin <lasse.collin@tukaani.org>
Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index db43c3249db8620ad850e6ce9cdff62520d11de5..9de0b01af7314fc5c7d20076826c3fb9eeef7ed9 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1118,6 +1118,7 @@ if(HAVE_DECODERS)
          src/liblzma/common/index_decoder.c
          src/liblzma/common/index_decoder.h
          src/liblzma/common/index_hash.c
+        src/liblzma/common/seekable_decoder.c
          src/liblzma/common/stream_buffer_decoder.c
          src/liblzma/common/stream_decoder.c
          src/liblzma/common/stream_flags_decoder.c
diff --git a/src/liblzma/api/lzma/base.h b/src/liblzma/api/lzma/base.h

index 0589cde2058dfbc1fa5ae5f2e67205f3ddc9e62b..12ed45ba996805c900e4230f6af910b83e20ef88 100644 (file)
--- a/src/liblzma/api/lzma/base.h
+++ b/src/liblzma/api/lzma/base.h
@@ -256,6 +256,16 @@ typedef enum {
                  * pass it normally via lzma_stream.next_in and .avail_in.
                  */
  
+       LZMA_SEEK_ERROR         = 13,
+               /**<
+                * \brief       Invalid seek request
+                *
+                * This is returned by lzma_code() if LZMA_SEEK_TO_OFFSET or
+                * LZMA_SEEK_TO_BLOCK is used with seekable .xz decoder
+                * (lzma_seekable_decoder()) and the uncompressed offset
+                * or Block number doesn't exist.
+                */
+
         /*
          * These enumerations may be used internally by liblzma
          * but they will never be returned to applications.
@@ -359,7 +369,7 @@ typedef enum {
                  * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH.
                  */
  
-       LZMA_FINISH = 3
+       LZMA_FINISH = 3,
                 /**<
                  * \brief       Finish the coding operation
                  *
@@ -376,6 +386,22 @@ typedef enum {
                  * effect of LZMA_FINISH is that the amount of input must not
                  * be changed just like in the encoder.
                  */
+
+       LZMA_SEEK_TO_OFFSET = 5,
+               /**<
+                * \brief       Seek to an uncompressed offset
+                *
+                * This is only supported by seekable .xz decoder.
+                * See lzma_seekable_decoder().
+                */
+
+       LZMA_SEEK_TO_BLOCK = 6
+               /**<
+                * \brief       Seek to the beginning of a Block
+                *
+                * This is only supported by seekable .xz decoder.
+                * See lzma_seekable_decoder().
+                */
  } lzma_action;
  
  
diff --git a/src/liblzma/api/lzma/container.h b/src/liblzma/api/lzma/container.h

index 37d9ab92f4b52da5384ffd0b08fdf2a8a486f915..0008e8e774fb6e514cd663e577abc6848b23a680 100644 (file)
--- a/src/liblzma/api/lzma/container.h
+++ b/src/liblzma/api/lzma/container.h
@@ -744,6 +744,76 @@ extern LZMA_API(lzma_ret) lzma_stream_decoder(
                 lzma_nothrow lzma_attr_warn_unused_result;
  
  
+/**
+ * \brief       Initialize seekable .xz decoder
+ *
+ * To initialize a seekable decoder, lzma_index from the .xz file is needed.
+ * This can be acquired using lzma_file_info_decoder(). Read its docs first
+ * (from index.h). The seekable decoder uses LZMA_SEEK_NEEDED like the
+ * file info decoder does, and those instructions aren't repeated here.
+ *
+ * After initialization, the seekable decoder expects the input position to
+ * be at the beginning of the .xz file and the output will start from the
+ * first Block.
+ *
+ * To seek to an uncompressed offset, set strm->seek_pos to the desired
+ * uncompressed offset and call lzma_code() with 'action' set to
+ * LZMA_SEEK_TO_OFFSET. If input seeking is required, lzma_code() will
+ * return LZMA_SEEK_NEEDED and the new input position is available at
+ * strm->seek_pos. If no seeking was required, new output may have been
+ * written to strm->next_out if strm->avail_out was non-zero. To continue
+ * decoding, use LZMA_RUN as the 'action' on later lzma_code() calls.
+ *
+ * If one tries to seek to the end of the file or past it (strm->seek_pos is
+ * greater than or equal to the uncompressed size of the file), lzma_code()
+ * will return LZMA_SEEK_ERROR and the read position in *strm is not changed
+ * (decoding can be continued from the old position or one can seek again).
+ *
+ * To seek to the beginning of a specific Block, set strm->seek_pos to the
+ * desired Block number (the first Block is 1) and call lzma_code() with
+ * 'action' set to LZMA_SEEK_TO_BLOCK. Otherwise this works similarly to
+ * LZMA_SEEK_TO_OFFSET. If a non-existing Block number is specified,
+ * lzma_code() returns LZMA_SEEK_ERROR.
+ *
+ * \note        Even if LZMA_SEEK_TO_OFFSET or LZMA_SEEK_TO_BLOCK was not used
+ *              at all, in certain situations lzma_code() may still return
+ *              LZMA_SEEK_NEEDED. It's because the seekable decoder only
+ *              decodes Blocks; other fields in the .xz file are skipped over.
+ *
+ * While the 'action' LZMA_FINISH is supported at the end of the file like
+ * with other decoders, there is no need to use LZMA_FINISH with the seekable
+ * decoder because it knows the input file size from the provided lzma_index.
+ * Like with other decoders, one cannot change directly from LZMA_FINISH back
+ * to LZMA_RUN, but seekable decoder allows changing from LZMA_FINISH to
+ * LZMA_SEEK_TO_OFFSET or LZMA_SEEK_TO_BLOCK, and then one can use LZMA_RUN.
+ *
+ * \param       strm        Pointer to lzma_stream that is at least initialized
+ *                          with LZMA_STREAM_INIT.
+ * \param       memlimit    Memory usage limit as bytes. Use UINT64_MAX
+ *                          to effectively disable the limiter.
+ * \param       flags       Bitwise-or of zero or more of the decoder flags:
+ *                          LZMA_IGNORE_CHECK
+ * \param       idx         lzma_index of the .xz file. This can be acquired
+ *                          using lzma_file_info_decoder() (see index.h). The
+ *                          lzma_index must remain valid and unchanged as long
+ *                          as lzma_code() might be called on this decoder
+ *                          instance. Multiple seekable decoder instances can
+ *                          share the same lzma_index.
+ *
+ * \return      Possible lzma_ret values:
+ *              - LZMA_OK: Initialization was successful.
+ *              - LZMA_MEM_ERROR: Cannot allocate memory.
+ *              - LZMA_OPTIONS_ERROR: Unsupported flags
+ *              - LZMA_PROG_ERROR
+ *
+ * \since       5.9.1alpha
+ */
+extern LZMA_API(lzma_ret) lzma_seekable_decoder(
+               lzma_stream *strm, uint64_t memlimit, uint32_t flags,
+               const lzma_index *idx)
+               lzma_nothrow lzma_attr_warn_unused_result;
+
+
  /**
   * \brief       Initialize multithreaded .xz Stream decoder
   *
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc

index a7055658355356b0da80e95745dc919cd3e1f3e2..8c32b6d9d48bec01a4937e6f7e8663606cf86dbf 100644 (file)
--- a/src/liblzma/common/Makefile.inc
+++ b/src/liblzma/common/Makefile.inc
@@ -77,6 +77,7 @@ liblzma_la_SOURCES += \
         common/index_decoder.c \
         common/index_decoder.h \
         common/index_hash.c \
+       common/seekable_decoder.c \
         common/stream_buffer_decoder.c \
         common/stream_decoder.c \
         common/stream_decoder.h \
diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c

index 6e031a56c888c21be1a4577bb1b84abf57315e53..04318694a1a859dd2c14f146a507ebe0ea20267b 100644 (file)
--- a/src/liblzma/common/common.c
+++ b/src/liblzma/common/common.c
@@ -229,6 +229,8 @@ lzma_code(lzma_stream *strm, lzma_action action)
         case ISEQ_RUN:
                 switch (action) {
                 case LZMA_RUN:
+               case LZMA_SEEK_TO_OFFSET:
+               case LZMA_SEEK_TO_BLOCK:
                         break;
  
                 case LZMA_SYNC_FLUSH:
@@ -267,7 +269,10 @@ lzma_code(lzma_stream *strm, lzma_action action)
                 break;
  
         case ISEQ_FINISH:
-               if (action != LZMA_FINISH
+               if (action == LZMA_SEEK_TO_OFFSET
+                               || action == LZMA_SEEK_TO_BLOCK)
+                       strm->internal->sequence = ISEQ_RUN;
+               else if (action != LZMA_FINISH
                                 || strm->internal->avail_in != strm->avail_in)
                         return LZMA_PROG_ERROR;
  
@@ -281,6 +286,10 @@ lzma_code(lzma_stream *strm, lzma_action action)
                 break;
  
         case ISEQ_END:
+               if (action == LZMA_SEEK_TO_OFFSET
+                               || action == LZMA_SEEK_TO_BLOCK)
+                       strm->internal->sequence = ISEQ_RUN;
+
                 return LZMA_STREAM_END;
  
         case ISEQ_ERROR:
@@ -360,6 +369,7 @@ lzma_code(lzma_stream *strm, lzma_action action)
         case LZMA_UNSUPPORTED_CHECK:
         case LZMA_GET_CHECK:
         case LZMA_MEMLIMIT_ERROR:
+       case LZMA_SEEK_ERROR:
                 // Something else than LZMA_OK, but not a fatal error,
                 // that is, coding may be continued (except if ISEQ_END).
                 strm->internal->allow_buf_error = false;
diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h

index 20af32f6d6cd9a322bce101674c7eb02a3d6d593..f2a5d97f1d95f3512ecb9790a4267e53312ac637 100644 (file)
--- a/src/liblzma/common/common.h
+++ b/src/liblzma/common/common.h
@@ -159,7 +159,7 @@
  
  
  /// Largest valid lzma_action value as unsigned integer.
-#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER))
+#define LZMA_ACTION_MAX ((unsigned int)(LZMA_SEEK_TO_BLOCK))
  
  
  /// Special return value (lzma_ret) to indicate that a timeout was reached
diff --git a/src/liblzma/common/seekable_decoder.c b/src/liblzma/common/seekable_decoder.c

new file mode 100644 (file)

index 0000000..f1eaf04
--- /dev/null
+++ b/src/liblzma/common/seekable_decoder.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: 0BSD
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       seekable_decoder.c
+/// \brief      Decodes .xz files and supports seeking
+//
+//  Author:     Lasse Collin
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_decoder.h"
+#include "index.h"
+
+
+typedef struct {
+       enum {
+               SEQ_ITER_NEXT_BLOCK,
+               SEQ_BLOCK_HEADER,
+               SEQ_BLOCK_INIT,
+               SEQ_BLOCK_SKIP,
+               SEQ_BLOCK_OUTPUT,
+       } sequence;
+
+       /// File info decoder or Block decoder
+       lzma_next_coder next;
+
+       /// Block options decoded by the Block Header decoder and used by
+       /// the Block decoder.
+       lzma_block block_options;
+
+       /// Memory usage limit
+       uint64_t memlimit;
+
+       /// Amount of memory actually needed (only an estimate)
+       uint64_t memusage;
+
+       /// If true, we will tell the Block decoder to skip calculating
+       /// and verifying the integrity check.
+       bool ignore_check;
+
+       /// Pointer to lzma_stream.seek_pos:
+       ///   - This is written to pass the target input seek position to
+       ///     the application when we return LZMA_SEEK_NEEDED.
+       ///   - An application-provided value is read from this when action
+       ///     is LZMA_SEEK_TO_OFFSET or LZMA_SEEK_TO_BLOCK.
+       uint64_t *external_seek_pos;
+
+       /// Iterator to the application-provided lzma_index.
+       /// The application must keep the lzma_index available
+       /// as long as it's using the seekable .xz decoder.
+       lzma_index_iter iter;
+
+       /// Current read position in the input file
+       uint64_t cur_in_offset;
+
+       /// Current uncompressed position in the file
+       uint64_t cur_out_offset;
+
+       /// Number of uncompressed bytes to skip in SEQ_BLOCK_SKIP
+       /// to reach the requested target offset.
+       uint64_t skip_out;
+
+       /// Write position in buffer[]
+       size_t buffer_pos;
+
+       /// Buffer to hold Block Header in SEQ_BLOCK_HEADER, and
+       /// uncompressed data to be discarded in SEQ_BLOCK_SKIP.
+       uint8_t buffer[my_max(LZMA_BLOCK_HEADER_SIZE_MAX, 4096)];
+} lzma_seekable_coder;
+
+
+/// \brief      Seek input position to the beginning of a Block
+///
+/// \return     LZMA_OK if it was enough to adjust *in_pos.
+///             LZMA_SEEK_NEEDED if input seeking is required.
+static lzma_ret
+seekable_seek_input(lzma_seekable_coder *coder,
+               size_t *restrict in_pos, size_t in_size)
+{
+       const uint64_t old_off = coder->cur_in_offset;
+       const uint64_t new_off = coder->iter.block.compressed_file_offset;
+       coder->cur_in_offset = new_off;
+
+       // When starting to decode a Block Header, buffer_pos starts at 0.
+       // We can get here with any coder->sequence, so we always reset
+       // buffer_pos here.
+       coder->buffer_pos = 0;
+       coder->sequence = SEQ_BLOCK_HEADER;
+
+       if (new_off >= old_off && new_off - old_off <= in_size - *in_pos) {
+               // The new input offset is in the current input buffer
+               // or it's the very next byte after it. There's no
+               // need to seek; it's enough to advance *in_pos.
+               *in_pos += new_off - old_off;
+               return LZMA_OK;
+       }
+
+       // The new input offset isn't available in the current
+       // input buffer. We need to seek.
+       *coder->external_seek_pos = new_off;
+       return LZMA_SEEK_NEEDED;
+}
+
+
+/// \brief      Seek to the requested uncompressed offset or Block number
+///
+/// \return     LZMA_OK if no input seeking is needed. LZMA_SEEK_NEEDED if
+///             input seeking is required. LZMA_SEEK_ERROR if the target
+///             offset or Block doesn't exist in the .xz file.
+static lzma_ret
+seekable_seek_output(lzma_seekable_coder *coder,
+               size_t *restrict in_pos, size_t in_size, lzma_action action)
+{
+       assert(action == LZMA_SEEK_TO_OFFSET || action == LZMA_SEEK_TO_BLOCK);
+
+       const uint64_t target = *coder->external_seek_pos;
+
+       if (action == LZMA_SEEK_TO_OFFSET) {
+               // Don't call locate if we already have the right Block.
+               //
+               // NOTE: SEQ_ITER_NEXT_BLOCK means that we are seeking
+               // right after the decoder initialization, and thus
+               // the iterator doesn't contain valid data yet.
+               if (coder->sequence != SEQ_ITER_NEXT_BLOCK
+                               && coder->cur_out_offset <= target
+                               && target
+                               < coder->iter.block.uncompressed_file_offset
+                               + coder->iter.block.uncompressed_size) {
+                       // The new offset is within the current Block.
+                       // We might need to skip output bytes to reach it.
+                       coder->skip_out = target - coder->cur_out_offset;
+
+                       // We might not have decoded the Block Header yet,
+                       // so only change coder->sequence when it's already
+                       // past SEQ_BLOCK_SKIP.
+                       if (coder->sequence == SEQ_BLOCK_OUTPUT)
+                               coder->sequence = SEQ_BLOCK_SKIP;
+
+                       return LZMA_OK;
+               }
+
+               if (lzma_index_iter_locate(&coder->iter, target))
+                       return LZMA_SEEK_ERROR;
+
+               assert(coder->iter.block.uncompressed_file_offset <= target);
+
+               // The decoding will start at the beginning of the Block.
+               // Determine out how many bytes we need to ignore until
+               // we reach the uncompressed target offset.
+               coder->skip_out = target
+                               - coder->iter.block.uncompressed_file_offset;
+
+       } else {
+               assert(action == LZMA_SEEK_TO_BLOCK);
+
+               // Check if we already are at the beginning of
+               // the target Block.
+               if (coder->sequence != SEQ_ITER_NEXT_BLOCK
+                               && target == coder->iter.block.number_in_file
+                               && coder->cur_out_offset == coder->iter.block
+                                       .uncompressed_file_offset) {
+                       // skip_out could be non-zero due to a previous seek.
+                       // Reset it so that we won't skip any bytes from
+                       // the beginning of the Block.
+                       coder->skip_out = 0;
+                       return LZMA_OK;
+               }
+
+               if (lzma_index_iter_locate_block(&coder->iter, target))
+                       return LZMA_SEEK_ERROR;
+
+               assert(coder->iter.block.number_in_file == target);
+
+               // When seeking to a beginning of a Block, no bytes are
+               // skipped from the beginning of the Block.
+               //
+               // NOTE: Don't do this earlier because we don't want to
+               // change the decoder state on LZMA_SEEK_ERROR.
+               coder->skip_out = 0;
+       }
+
+       // If we get here, on the next lzma_code() call, the input and
+       // output offsets will be at the beginning of the Block.
+       //
+       // If LZMA_SEEK_TO_OFFSET was used, cur_out_offset might be
+       // incremented in SEQ_BLOCK_SKIP to reach the target offset.
+       coder->cur_out_offset = coder->iter.block.uncompressed_file_offset;
+
+       // Adjust *in_pos or tell the application to seek the input.
+       return seekable_seek_input(coder, in_pos, in_size);
+}
+
+
+static lzma_ret
+seekable_decode(void *coder_ptr, const lzma_allocator *allocator,
+               const uint8_t *restrict in, size_t *restrict in_pos,
+               size_t in_size, uint8_t *restrict out,
+               size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+       lzma_seekable_coder *coder = coder_ptr;
+
+       if (action == LZMA_SEEK_TO_OFFSET || action == LZMA_SEEK_TO_BLOCK) {
+               // LZMA_SEEK_NEEDED isn't an error but it's not LZMA_OK
+               // so this macro does the right thing.
+               return_if_error(
+                       seekable_seek_output(coder, in_pos, in_size, action));
+       }
+
+       // When decoding the actual Block, it may be able to produce more
+       // output even if we don't give it any new input.
+       while (true)
+       switch (coder->sequence) {
+       case SEQ_ITER_NEXT_BLOCK: {
+               if (lzma_index_iter_next(&coder->iter,
+                               LZMA_INDEX_ITER_BLOCK)) {
+                       // The .xz file has no Blocks.
+                       return LZMA_STREAM_END;
+               }
+
+               return_if_error(seekable_seek_input(coder, in_pos, in_size));
+
+               // seekable_seek_input() always this.
+               assert(coder->sequence == SEQ_BLOCK_HEADER);
+               FALLTHROUGH;
+       }
+
+       case SEQ_BLOCK_HEADER: {
+               if (*in_pos >= in_size)
+                       return LZMA_OK;
+
+               if (coder->buffer_pos == 0) {
+                       // Check for an unexpected Index Indicator.
+                       if (in[*in_pos] == INDEX_INDICATOR) {
+                               // Consume the Index Indicator. cur_in_offset
+                               // doesn't matter because one cannot recover
+                               // from LZMA_DATA_ERROR by seeking.
+                               ++*in_pos;
+                               return LZMA_DATA_ERROR;
+                       }
+
+                       // Calculate the size of the Block Header. Note that
+                       // Block Header decoder wants to see this byte too
+                       // so don't advance *in_pos.
+                       coder->block_options.header_size
+                                       = lzma_block_header_size_decode(
+                                               in[*in_pos]);
+               }
+
+               // Copy the Block Header to the internal buffer.
+               const size_t in_start = *in_pos;
+               lzma_bufcpy(in, in_pos, in_size,
+                               coder->buffer, &coder->buffer_pos,
+                               coder->block_options.header_size);
+               coder->cur_in_offset += *in_pos - in_start;
+
+               // Return if we didn't get the whole Block Header yet.
+               if (coder->buffer_pos < coder->block_options.header_size)
+                       return LZMA_OK;
+
+               coder->sequence = SEQ_BLOCK_INIT;
+               FALLTHROUGH;
+       }
+
+       case SEQ_BLOCK_INIT: {
+               // Checking memusage and doing the initialization needs
+               // its own sequence point because we need to be able to
+               // retry if we return LZMA_MEMLIMIT_ERROR.
+
+               // Version 1 is needed to support the .ignore_check option.
+               coder->block_options.version = 1;
+
+               // Copy the Check type from Stream Header.
+               coder->block_options.check = coder->iter.stream.flags->check;
+
+               // Set up a buffer to hold the filter chain. Block Header
+               // decoder will initialize all members of this array so
+               // we don't need to do it here.
+               lzma_filter filters[LZMA_FILTERS_MAX + 1];
+               coder->block_options.filters = filters;
+
+               // Decode the Block Header.
+               return_if_error(lzma_block_header_decode(&coder->block_options,
+                               allocator, coder->buffer));
+
+               // The filters array has been allocated. Track the errors
+               // with this variable and free the filters after initializing
+               // the Block decoder or if an error occurred.
+               lzma_ret ret = LZMA_OK;
+
+               // If Compressed Size and/or Uncompressed Size are present,
+               // verify that they match the Index.
+               if (coder->block_options.compressed_size != LZMA_VLI_UNKNOWN
+                               && coder->block_options.compressed_size
+                               != coder->iter.block.unpadded_size
+                               - lzma_check_size(coder->block_options.check)
+                               - coder->block_options.header_size)
+                       ret = LZMA_DATA_ERROR;
+
+               if (coder->block_options.uncompressed_size != LZMA_VLI_UNKNOWN
+                               && coder->block_options.uncompressed_size
+                                       != coder->iter.block.uncompressed_size)
+                       ret = LZMA_DATA_ERROR;
+
+               // Check the memory usage limit.
+               if (ret == LZMA_OK) {
+                       const uint64_t memusage
+                                       = lzma_raw_decoder_memusage(filters);
+                       if (memusage == UINT64_MAX) {
+                               // One or more unknown Filter IDs.
+                               ret = LZMA_OPTIONS_ERROR;
+                       } else {
+                               // The filter chain is valid. We don't want
+                               // lzma_memusage() to return UINT64_MAX in
+                               // case of invalid filter chain, so we
+                               // set coder->memusage only now.
+                               coder->memusage = memusage;
+
+                               if (memusage > coder->memlimit) {
+                                       // The chain would use too much memory.
+                                       ret = LZMA_MEMLIMIT_ERROR;
+                               }
+                       }
+               }
+
+               if (ret == LZMA_OK) {
+                       // The sizes (if present) in Block Header match
+                       // the Index and the memory usage is low enough.
+                       //
+                       // Set Compressed Size and Uncompressed Size from
+                       // Index so that Block decoder will verify them.
+                       coder->block_options.compressed_size
+                               = coder->iter.block.unpadded_size
+                               - lzma_check_size(coder->block_options.check)
+                               - coder->block_options.header_size;
+
+                       coder->block_options.uncompressed_size
+                                       = coder->iter.block.uncompressed_size;
+
+                       // If LZMA_IGNORE_CHECK was used, this flag needs
+                       // to be set. It has to be set after
+                       // lzma_block_header_decode() because
+                       // it always resets this to false.
+                       coder->block_options.ignore_check
+                                       = coder->ignore_check;
+
+                       // Initialize the Block decoder.
+                       ret = lzma_block_decoder_init(&coder->next, allocator,
+                                       &coder->block_options);
+               }
+
+               // Free the allocated filter options since they are needed
+               // only to initialize the Block decoder.
+               lzma_filters_free(filters, allocator);
+               coder->block_options.filters = NULL;
+
+               // Check if Block decoder was initialized.
+               if (ret != LZMA_OK)
+                       return ret;
+
+               coder->sequence = SEQ_BLOCK_SKIP;
+               FALLTHROUGH;
+       }
+
+       case SEQ_BLOCK_SKIP: {
+               // Decode and and throw away output until we reach
+               // the requested uncompressed offset.
+               while (coder->skip_out > 0) {
+                       size_t skip_out_pos = 0;
+                       const size_t skip_out_size = my_min(
+                                       sizeof(coder->buffer),
+                                       coder->skip_out);
+
+                       const size_t in_start = *in_pos;
+                       const lzma_ret ret = coder->next.code(
+                                       coder->next.coder, allocator,
+                                       in, in_pos, in_size,
+                                       coder->buffer, &skip_out_pos,
+                                       skip_out_size, LZMA_RUN);
+
+                       coder->skip_out -= skip_out_pos;
+                       coder->cur_in_offset += *in_pos - in_start;
+                       coder->cur_out_offset += skip_out_pos;
+
+                       if (ret != LZMA_OK) {
+                               // If we reach the end of the Block, it means
+                               // that coder->index doesn't match the .xz
+                               // file contents.
+                               return ret == LZMA_STREAM_END
+                                               ? LZMA_DATA_ERROR : ret;
+                       }
+
+                       if (skip_out_pos < skip_out_size) {
+                               // We need more input to produce more output.
+                               assert(*in_pos == in_size);
+                               return LZMA_OK;
+                       }
+               }
+
+               coder->sequence = SEQ_BLOCK_OUTPUT;
+               FALLTHROUGH;
+       }
+
+       case SEQ_BLOCK_OUTPUT: {
+               // Decode and pass the data to the application.
+               const size_t in_start = *in_pos;
+               const size_t out_start = *out_pos;
+
+               const lzma_ret ret = coder->next.code(
+                               coder->next.coder, allocator,
+                               in, in_pos, in_size, out, out_pos, out_size,
+                               LZMA_RUN);
+
+               coder->cur_in_offset += *in_pos - in_start;
+               coder->cur_out_offset += *out_pos - out_start;
+
+               if (ret != LZMA_STREAM_END)
+                       return ret;
+
+               // Block decoded successfully. Because we told the expected
+               // values of Compressed Size and Uncompressed Size to the
+               // Block decoder, it has verified that they match the Index;
+               // we don't need to check them here.
+               coder->sequence = SEQ_ITER_NEXT_BLOCK;
+               break;
+       }
+
+       default:
+               assert(0);
+               return LZMA_PROG_ERROR;
+       }
+
+       // Never reached
+}
+
+
+static void
+seekable_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+       lzma_seekable_coder *coder = coder_ptr;
+       lzma_next_end(&coder->next, allocator);
+       lzma_free(coder, allocator);
+       return;
+}
+
+
+static lzma_ret
+seekable_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+               uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+       lzma_seekable_coder *coder = coder_ptr;
+
+       *memusage = coder->memusage;
+       *old_memlimit = coder->memlimit;
+
+       if (new_memlimit != 0) {
+               if (new_memlimit < coder->memusage)
+                       return LZMA_MEMLIMIT_ERROR;
+
+               coder->memlimit = new_memlimit;
+       }
+
+       return LZMA_OK;
+}
+
+
+static lzma_ret
+seekable_decoder_init(
+               lzma_next_coder *next, const lzma_allocator *allocator,
+               uint64_t memlimit, uint32_t flags,
+               const lzma_index *idx, uint64_t *seek_pos)
+{
+       lzma_next_coder_init(&seekable_decoder_init, next, allocator);
+
+       assert(seek_pos != NULL);
+
+       if (idx == NULL)
+               return LZMA_PROG_ERROR;
+
+       // Not many flags are supported.
+       if (flags & ~LZMA_IGNORE_CHECK)
+               return LZMA_OPTIONS_ERROR;
+
+       lzma_seekable_coder *coder = next->coder;
+       if (coder == NULL) {
+               coder = lzma_alloc(sizeof(lzma_seekable_coder), allocator);
+               if (coder == NULL)
+                       return LZMA_MEM_ERROR;
+
+               next->coder = coder;
+               next->code = &seekable_decode;
+               next->end = &seekable_decoder_end;
+               next->memconfig = &seekable_decoder_memconfig;
+
+               coder->next = LZMA_NEXT_CODER_INIT;
+       }
+
+       coder->sequence = SEQ_ITER_NEXT_BLOCK;
+
+       coder->memlimit = my_max(1, memlimit);
+       coder->memusage = LZMA_MEMUSAGE_BASE;
+
+       coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+
+       lzma_index_iter_init(&coder->iter, idx);
+       coder->external_seek_pos = seek_pos;
+
+       coder->cur_in_offset = 0;
+       coder->cur_out_offset = 0;
+
+       return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_seekable_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags,
+               const lzma_index *idx)
+{
+       lzma_next_strm_init(seekable_decoder_init, strm, memlimit, flags,
+                       idx, &strm->seek_pos);
+
+       // We allow LZMA_FINISH in addition to LZMA_RUN in case it's
+       // convenient in some application. lzma_code() is able to handle
+       // the LZMA_FINISH + LZMA_SEEK_NEEDED and also allows changing
+       // from LZMA_FINISH to LZMA_SEEK_TO_OFFSET or LZMA_SEEK_TO_BLOCK.
+       strm->internal->supported_actions[LZMA_RUN] = true;
+       strm->internal->supported_actions[LZMA_FINISH] = true;
+       strm->internal->supported_actions[LZMA_SEEK_TO_OFFSET] = true;
+       strm->internal->supported_actions[LZMA_SEEK_TO_BLOCK] = true;
+
+       return LZMA_OK;
+}
diff --git a/src/liblzma/liblzma_generic.map b/src/liblzma/liblzma_generic.map

index b4fcc391b024ce9402964e16d7e7d15ecfc8dd0f..6e4d1bd9e49d3fdba20edccb59a54d6664228e73 100644 (file)
--- a/src/liblzma/liblzma_generic.map
+++ b/src/liblzma/liblzma_generic.map
@@ -140,4 +140,5 @@ global:
  XZ_5.9.0alpha {
  global:
         lzma_index_iter_locate_block;
+       lzma_seekable_decoder;
  } XZ_5.8;
diff --git a/src/liblzma/liblzma_linux.map b/src/liblzma/liblzma_linux.map

index 2aa51db43c3368cab9c5ad037b1ff43d9ac6fe57..6f2f1957dd4acf88fbbe83d4228a85dd5bcec43c 100644 (file)
--- a/src/liblzma/liblzma_linux.map
+++ b/src/liblzma/liblzma_linux.map
@@ -155,4 +155,5 @@ global:
  XZ_5.9.0alpha {
  global:
         lzma_index_iter_locate_block;
+       lzma_seekable_decoder;
  } XZ_5.8;
author	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
committer	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 23 May 2025 11:38:49 +0000 (14:38 +0300)
CMakeLists.txt		patch \| blob \| blame \| history
src/liblzma/api/lzma/base.h		patch \| blob \| blame \| history
src/liblzma/api/lzma/container.h		patch \| blob \| blame \| history
src/liblzma/common/Makefile.inc		patch \| blob \| blame \| history
src/liblzma/common/common.c		patch \| blob \| blame \| history
src/liblzma/common/common.h		patch \| blob \| blame \| history
src/liblzma/common/seekable_decoder.c	[new file with mode: 0644]	patch \| blob
src/liblzma/liblzma_generic.map		patch \| blob \| blame \| history
src/liblzma/liblzma_linux.map		patch \| blob \| blame \| history