From: Nick Terrell Date: Thu, 15 Mar 2018 00:26:31 +0000 (-0700) Subject: Expose reference external sequence API X-Git-Tag: v1.3.4~1^2~25^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a271399c978a2224976b5a00e83ea22d812efc15;p=thirdparty%2Fzstd.git Expose reference external sequence API Summary: * Expose the reference external sequences API for zstdmt. Allows external sequences of any length, which get split when necessary. * Reset the LDM window when the context is reset. * Store the maximum number of LDM sequences. * Sequence generation now returns the number of last literals. * Fix sequence generation to not throw out the last literals when blocks of more than 1 MB are encountered. Expose reference external sequence API * Expose the reference external sequences API for zstdmt. * Allows external sequences of any length, which get split when necessary. * Reset the LDM window when the context is reset. * Store the maximum number of LDM sequences. * Sequence generation now returns the number of last literals. * Fix sequence generation to not throw out the last literals when blocks of more than 1 MB are encountered. Test Plan: * CI * Test the zstdmt ldm integration stacked on top of this diff Reviewers: cyan Differential Revision: https://phabricator.intern.facebook.com/D7283968 Tasks: T25664120 --- a271399c978a2224976b5a00e83ea22d812efc15 diff --cc lib/compress/zstd_ldm.c index d75cdf5ae,1565687f9..fa395ed7d --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@@ -473,31 -472,26 +473,34 @@@ size_t ZSTD_ldm_generateSequences { U32 const maxDist = 1U << params->windowLog; BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; size_t const kMaxChunkSize = 1 << 20; size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); - size_t nbSeq = 0; size_t chunk; + size_t leftoverSize = 0; assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); /* Check that ZSTD_window_update() has been called for this chunk prior * to passing it to this function. */ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); - for (chunk = 0; chunk < nbChunks; ++chunk) { + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximmum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { - size_t const chunkStart = chunk * kMaxChunkSize; - size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize); + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; - assert(chunkStart < srcSize); + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window)) { + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, src); @@@ -511,30 -503,15 +514,28 @@@ * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. */ - ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd, - maxDist); + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist); - nbSeq += ZSTD_ldm_generateSequences_internal( - ldmState, sequences + nbSeq, params, chunkStart, chunkSize, - extDict); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( - ldmState, sequences, params, istart + chunkStart, - chunkSize); ++ ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } } - return nbSeq; + return 0; } -#if 0 /** * If the sequence length is longer than remaining then the sequence is split * between this block and the next.