size_t ZSTD_get_decompressed_size(const void *src, size_t src_len);
/******* UTILITY MACROS AND TYPES *********************************************/
-// Specification recommends supporting at least 8MB. The maximum possible value
-// is 1.875TB, but this implementation limits it to 512MB to avoid allocating
-// too much memory.
-#define MAX_WINDOW_SIZE ((size_t)512 * 1024 * 1024)
// Max block size decompressed size is 128 KB and literal blocks must be smaller
// than that
#define MAX_LITERALS_SIZE ((size_t)128 * 1024)
/// file. They implement low-level functionality needed for the higher level
/// decompression functions.
-/*** CIRCULAR BUFFER ******************/
-/// A standard circular buffer, used to facilitate back reference commands
-typedef struct {
- u8 *ptr;
- size_t idx, last_flush, size;
-} cbuf_t;
-
-/// Initialize a circular buffer
-static void cbuf_init(cbuf_t *buf, size_t size);
-static void cbuf_free(cbuf_t *buf);
-
-/// Copies up to `src_len` bytes from `src` into the buffer, stopping if it
-/// would need to flush.
-/// Returns the total amount of data copied.
-static size_t cbuf_write_data(cbuf_t *buf, const u8 *src, size_t src_len);
-/// Copies `len` bytes from `offset` back in the buffer, stopping if it would
-/// need to flush.
-/// Returns the number of bytes copied.
-static size_t cbuf_copy_offset(cbuf_t *buf, size_t offset, size_t len);
-/// Writes up to `len` copies of `byte`, stopping if would need to flush.
-/// Returns the number of bytes copied.
-static size_t cbuf_repeat_byte(cbuf_t *buf, u8 byte, size_t len);
-
-/// The `full` versions of the above functions write the full amount requested,
-/// flushing to `out` when necessary.
-/// They return the number of bytes flushed to `out`, if any.
-static size_t cbuf_write_data_full(cbuf_t *buf, const u8 *src, size_t src_len,
- u8 *out, size_t out_len);
-static size_t cbuf_copy_offset_full(cbuf_t *buf, size_t offset, size_t len,
- u8 *out, size_t out_len);
-static size_t cbuf_repeat_byte_full(cbuf_t *buf, u8 byte, size_t len, u8 *out,
- size_t out_len);
-
-/// Flushes any unflushed data to `dst`
-static size_t cbuf_flush(cbuf_t *buf, u8 *dst, size_t dst_len);
-/*** END CIRCULAR BUFFER **************/
-
/*** BITSTREAM OPERATIONS *************/
/// Read `num` bits (up to 64) from `src + offset`, where `offset` is in bits
static inline u64 read_bits_LE(const u8 *src, int num, size_t offset);
// offset too large to be correct
size_t current_total_output;
- // A sliding window of the past `window_size` bytes decoded
- cbuf_t window;
+ const u8 *dict_content;
+ size_t dict_content_len;
// Entropy encoding tables so they can be repeated by future blocks instead
- // of
- // retransmitting
+ // of retransmitting
HUF_dtable literals_dtable;
FSE_dtable ll_dtable;
FSE_dtable ml_dtable;
context->previous_offsets[2] = 4;
context->previous_offsets[3] = 8;
- {
- // Allocate the window buffer
- size_t buffer_size;
- if (context->header.single_segment_flag) {
- buffer_size = context->header.frame_content_size +
- (dict ? dict->content_size : 0);
- } else {
- buffer_size = context->header.window_size;
- }
-
- if (buffer_size > MAX_WINDOW_SIZE) {
- ERROR("Requested window size too large");
- }
- cbuf_init(&context->window, buffer_size);
- }
-
// Apply details from the dict if it exists
frame_context_apply_dict(context, dict);
}
FSE_free_dtable(&context->ml_dtable);
FSE_free_dtable(&context->of_dtable);
- cbuf_free(&context->window);
-
memset(context, 0, sizeof(frame_context_t));
}
header->frame_content_size = 0;
}
+ if (single_segment_flag) {
+ // in this case the effective window size is frame_content_size this
+ // impacts sequence decoding as we need to determine whether to fall
+ // back to the dictionary or not on large offsets
+ header->window_size = header->frame_content_size;
+ }
+
header->header_size = header_size;
}
ERROR("Wrong/no dictionary provided");
}
- // Write the dict data in, and then flush to NULL so it's not sent to the
- // output stream
- cbuf_write_data_full(&ctx->window, dict->content, dict->content_size, NULL,
- -1);
- cbuf_flush(&ctx->window, NULL, -1);
- ctx->current_total_output = dict->content_size;
+ // Copy the pointer in so we can reference it in sequence execution
+ ctx->dict_content = dict->content;
+ ctx->dict_content_len = dict->content_size;
// If it's a formatted dict copy the precomputed tables in so they can
// be used in the table repeat modes
OUT_SIZE();
}
- // Write the raw data into the window buffer
- size_t written =
- cbuf_write_data_full(&ctx->window, streams->src, block_len,
- streams->dst, streams->dst_len);
+ // Copy the raw data into the output
+ memcpy(streams->dst, streams->src, block_len);
+
streams->src += block_len;
streams->src_len -= block_len;
- streams->dst += written;
- streams->dst_len -= written;
+ streams->dst += block_len;
+ streams->dst_len -= block_len;
+
+ ctx->current_total_output += block_len;
break;
}
case 1: {
OUT_SIZE();
}
- // Write streams->src[0] into the buffer block_len times
- size_t written =
- cbuf_repeat_byte_full(&ctx->window, streams->src[0], block_len,
- streams->dst, streams->dst_len);
- streams->dst += written;
- streams->dst_len -= written;
+ // Copy `block_len` copies of `streams->src[0]` to the output
+ memset(streams->dst, streams->src[0], block_len);
+
+ streams->dst += block_len;
+ streams->dst_len -= block_len;
streams->src += 1;
streams->src_len -= 1;
+
+ ctx->current_total_output += block_len;
break;
}
case 2:
}
} while (!last_block);
- // Flush out anything left in the window buffer to the destination stream
- size_t written = cbuf_flush(&ctx->window, streams->dst, streams->dst_len);
- streams->dst += written;
- streams->dst_len -= written;
-
if (ctx->header.content_checksum_flag) {
// This program does not support checking the checksum, so skip over it
// if it's present
CORRUPTION();
}
- {
- // Copy literals to the buffer
- size_t written =
- cbuf_write_data_full(&ctx->window, literals, seq.literal_length,
- streams->dst, streams->dst_len);
+ if (streams->dst_len < seq.literal_length + seq.match_length) {
+ OUT_SIZE();
+ }
+ // Copy literals to output
+ memcpy(streams->dst, literals, seq.literal_length);
- literals += seq.literal_length;
- literals_len -= seq.literal_length;
+ literals += seq.literal_length;
+ literals_len -= seq.literal_length;
- streams->dst += written;
- streams->dst_len -= written;
+ streams->dst += seq.literal_length;
+ streams->dst_len -= seq.literal_length;
- total_output += seq.literal_length;
- }
+ total_output += seq.literal_length;
size_t offset;
offset_hist[1] = offset;
}
- if (offset > total_output) {
- CORRUPTION();
- }
+ size_t match_length = seq.match_length;
+ if (total_output <= ctx->header.window_size) {
+ // In this case offset might go back into the dictionary
+ if (offset > total_output + ctx->dict_content_len) {
+ // The offset goes beyond even the dictionary
+ CORRUPTION();
+ }
- {
- // Do the offset copy operation
- size_t written =
- cbuf_copy_offset_full(&ctx->window, offset, seq.match_length,
- streams->dst, streams->dst_len);
+ if (offset > total_output) {
+ const size_t dict_copy =
+ MIN(offset - total_output, match_length);
+ const size_t dict_offset =
+ ctx->dict_content_len - (offset - total_output);
+ for (size_t i = 0; i < dict_copy; i++) {
+ *streams->dst++ = ctx->dict_content[dict_offset + i];
+ }
+ match_length -= dict_copy;
+ }
+ }
- streams->dst += written;
- streams->dst_len -= written;
- total_output += seq.match_length;
+ // We must copy byte by byte because the match length might be larger
+ // than the offset
+ // ex: if the output so far was "abc", a command with offset=3 and
+ // match_length=6 would produce "abcabcabc" as the new output
+ for (size_t i = 0; i < match_length; i++) {
+ *streams->dst = *(streams->dst - offset);
+ streams->dst++;
}
- }
- {
- // Copy any leftover literal bytes
- size_t written =
- cbuf_write_data_full(&ctx->window, literals, literals_len,
- streams->dst, streams->dst_len);
- streams->dst += written;
- streams->dst_len -= written;
+ streams->dst_len -= seq.match_length;
+ total_output += seq.match_length;
+ }
- total_output += literals_len;
+ if (streams->dst_len < literals_len) {
+ OUT_SIZE();
}
+ // Copy any leftover literals
+ memcpy(streams->dst, literals, literals_len);
+ streams->dst += literals_len;
+ streams->dst_len -= literals_len;
- ctx->current_total_output = total_output;
+ total_output += literals_len;
- return total_output;
+ ctx->current_total_output = total_output;
}
/******* END SEQUENCE EXECUTION ***********************************************/