]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Move ldm functions to their own file
authorStella Lau <laus@fb.com>
Thu, 7 Sep 2017 00:56:01 +0000 (17:56 -0700)
committerStella Lau <laus@fb.com>
Thu, 7 Sep 2017 01:09:26 +0000 (18:09 -0700)
13 files changed:
build/VS2008/fullbench/fullbench.vcproj
build/VS2008/fuzzer/fuzzer.vcproj
build/VS2008/zstd/zstd.vcproj
build/VS2008/zstdlib/zstdlib.vcproj
build/VS2010/fullbench/fullbench.vcxproj
build/VS2010/fuzzer/fuzzer.vcxproj
build/VS2010/libzstd-dll/libzstd-dll.vcxproj
build/VS2010/libzstd/libzstd.vcxproj
build/VS2010/zstd/zstd.vcxproj
build/cmake/lib/CMakeLists.txt
lib/compress/zstd_compress.c
lib/compress/zstd_ldm.c [new file with mode: 0644]
lib/compress/zstd_ldm.h [new file with mode: 0644]

index 05ec5ca06a56a00fda77f4b309a47ce71600ca5f..715ea2579f848cc637ce73bf65c057a77dc953d5 100644 (file)
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.c"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.c"
+                               >
                        </File>
                </Filter>
                <Filter
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.h"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.h"
+                               >
                        </File>
                </Filter>
        </Files>
index 700dd7ebdd35107775da82fbd150f84dbfb4f962..1421619a15af72d047db3cee1a3a4645eea947e7 100644 (file)
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.c"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.c"
+                               >
                        </File>
                </Filter>
                <Filter
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.h"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.h"
+                               >
                        </File>
                </Filter>
        </Files>
index 86dd3a254e2b0dded38ef9b2de56aaedac123e4e..dbd211c06ae753cc8d8c61414ed2ea2462c81eaa 100644 (file)
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.c"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.c"
+                               >
                        </File>
                </Filter>
                <Filter
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.h"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.h"
+                               >
                        </File>
                </Filter>
        </Files>
index ac8f896c35cd77a62dea409667ff4f3b3237ff11..340a4cd89e1beb27e9aea82dc5737ba9ca189394 100644 (file)
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.c"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.c"
+                               >
                        </File>
                        <File
                                RelativePath="..\..\..\lib\decompress\zstd_decompress.c"
                        <File
                                RelativePath="..\..\..\lib\compress\zstd_opt.h"
                                >
+      </File>
+                       <File
+                               RelativePath="..\..\..\lib\compress\zstd_ldm.h"
+                               >
                        </File>
                        <File
                                RelativePath="..\..\..\lib\common\zstd_static.h"
index 25c81988e3fb6db73ec38d65b02e6d9d3df616c0..d0cbcae9852e7a9c29a427bb780a4deb42198f99 100644 (file)
     <ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_opt.c" />
+    <ClCompile Include="..\..\..\lib\compress\zstd_ldm.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\programs\datagen.c" />
     <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_ldm.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\programs\datagen.h" />
index 9f00899da1359e79e20b2897ed11dc8cac744884..6fe3272094ecf5ab1980986783207dccdb95a277 100644 (file)
     <ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_opt.c" />
+    <ClCompile Include="..\..\..\lib\compress\zstd_ldm.c" />
     <ClCompile Include="..\..\..\lib\compress\zstdmt_compress.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_ldm.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\divsufsort.h" />
     <ClInclude Include="..\..\..\lib\dictBuilder\zdict.h" />
index 0a4be69df4ad96dbd827b23bf52c9ef4e598c5e8..2d04c6935c4873b86e48d24e1b276855f6cc2e54 100644 (file)
@@ -33,6 +33,7 @@
     <ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_opt.c" />
+    <ClCompile Include="..\..\..\lib\compress\zstd_ldm.c" />
     <ClCompile Include="..\..\..\lib\compress\zstdmt_compress.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
@@ -76,6 +77,7 @@
     <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_ldm.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
   </ItemGroup>
   <ItemGroup>
index 51b8406771d3d53a5057377ac0ae95ff989fd0af..c01a5d1795aeabd26d12f200b0a6f2dcdf4ef7b3 100644 (file)
@@ -33,6 +33,7 @@
     <ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_opt.c" />
+    <ClCompile Include="..\..\..\lib\compress\zstd_ldm.c" />
     <ClCompile Include="..\..\..\lib\compress\zstdmt_compress.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
@@ -76,6 +77,7 @@
     <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_ldm.h" />
     <ClInclude Include="..\..\..\lib\compress\zstdmt_compress.h" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
index 90470180d564982d8cfb4abb59665a28eda75439..ace343465c006da750dbcfbbe2693dc2b981f48e 100644 (file)
@@ -34,6 +34,7 @@
     <ClCompile Include="..\..\..\lib\compress\zstd_double_fast.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_lazy.c" />
     <ClCompile Include="..\..\..\lib\compress\zstd_opt.c" />
+    <ClCompile Include="..\..\..\lib\compress\zstd_ldm.c" />
     <ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
     <ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
     <ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
@@ -69,6 +70,7 @@
     <ClInclude Include="..\..\..\lib\compress\zstd_double_fast.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_lazy.h" />
     <ClInclude Include="..\..\..\lib\compress\zstd_opt.h" />
+    <ClInclude Include="..\..\..\lib\compress\zstd_ldm.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_legacy.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v01.h" />
     <ClInclude Include="..\..\..\lib\legacy\zstd_v02.h" />
index f4b7e37537cb323860223faa4ac67272e6c3ae9f..f5d2eff923cfb19d9e47ac24eef19364a6553667 100644 (file)
@@ -42,6 +42,7 @@ SET(Sources
         ${LIBRARY_DIR}/compress/zstd_double_fast.c
         ${LIBRARY_DIR}/compress/zstd_lazy.c
         ${LIBRARY_DIR}/compress/zstd_opt.c
+        ${LIBRARY_DIR}/compress/zstd_ldm.c
         ${LIBRARY_DIR}/decompress/huf_decompress.c
         ${LIBRARY_DIR}/decompress/zstd_decompress.c
         ${LIBRARY_DIR}/dictBuilder/cover.c
@@ -67,6 +68,7 @@ SET(Headers
         ${LIBRARY_DIR}/compress/zstd_double_fast.h
         ${LIBRARY_DIR}/compress/zstd_lazy.h
         ${LIBRARY_DIR}/compress/zstd_opt.h
+        ${LIBRARY_DIR}/compress/zstd_ldm.h
         ${LIBRARY_DIR}/compress/zstdmt_compress.h
         ${LIBRARY_DIR}/dictBuilder/zdict.h
         ${LIBRARY_DIR}/deprecated/zbuff.h)
index 3abc1e91355d3d024711cef0fd3d7be2f045e020..5ea00b8e2a4a34cdcff40d69b4c319d4c2754a86 100644 (file)
 #include "zstd_double_fast.h"
 #include "zstd_lazy.h"
 #include "zstd_opt.h"
-
-
-#define LDM_BUCKET_SIZE_LOG 3
-#define LDM_MIN_MATCH_LENGTH 64
-#define LDM_WINDOW_LOG 27
-#define LDM_HASH_LOG 20
-#define LDM_HASH_CHAR_OFFSET 10
-#define LDM_HASHEVERYLOG_NOTSET 9999
+#include "zstd_ldm.h"
 
 
 /*-*************************************
@@ -135,33 +128,6 @@ size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
            + cctx->outBuffSize + cctx->inBuffSize
            + ZSTDMT_sizeof_CCtx(cctx->mtctx);
 }
-#if 0
-static void ZSTD_debugPrintCCtxParams(ZSTD_CCtx_params* params)
-{
-    DEBUGLOG(2, "======CCtxParams======");
-    DEBUGLOG(2, "cParams: %u %u %u %u %u %u %u",
-             params->cParams.windowLog,
-             params->cParams.chainLog,
-             params->cParams.hashLog,
-             params->cParams.searchLog,
-             params->cParams.searchLength,
-             params->cParams.targetLength,
-             params->cParams.strategy);
-    DEBUGLOG(2, "fParams: %u %u %u",
-             params->fParams.contentSizeFlag,
-             params->fParams.checksumFlag,
-             params->fParams.noDictIDFlag);
-    DEBUGLOG(2, "cLevel, forceWindow: %u %u",
-             params->compressionLevel,
-             params->forceWindow);
-    DEBUGLOG(2, "ldm: %u %u %u %u %u",
-             params->ldmParams.enableLdm,
-             params->ldmParams.hashLog,
-             params->ldmParams.bucketSizeLog,
-             params->ldmParams.minMatchLength,
-             params->ldmParams.hashEveryLog);
-}
-#endif
 
 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
 {
@@ -274,17 +240,6 @@ size_t ZSTDMT_CCtxParam_setMTCtxParameter(
     ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
 size_t ZSTDMT_initializeCCtxParameters(ZSTD_CCtx_params* params, unsigned nbThreads);
 
-static size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
-{
-    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
-    params->enableLdm = enableLdm>0;
-    params->hashLog = LDM_HASH_LOG;
-    params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
-    params->minMatchLength = LDM_MIN_MATCH_LENGTH;
-    params->hashEveryLog = LDM_HASHEVERYLOG_NOTSET;
-    return 0;
-}
-
 size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
 {
     if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
@@ -454,7 +409,7 @@ size_t ZSTD_CCtxParam_setParameter(
     case ZSTD_p_enableLongDistanceMatching :
         if (value != 0) {
             ZSTD_cLevelToCCtxParams(params);
-            params->cParams.windowLog = LDM_WINDOW_LOG;
+            params->cParams.windowLog = ZSTD_LDM_WINDOW_LOG;
         }
         return ZSTD_ldm_initializeParameters(&params->ldmParams, value);
 
@@ -689,15 +644,6 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
     return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
 }
 
-/* Estimate the space needed for long distance matching tables. */
-static size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
-    size_t const ldmHSize = ((size_t)1) << hashLog;
-    size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
-    size_t const ldmBucketSize =
-        ((size_t)1) << (hashLog - ldmBucketSizeLog);
-    return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
-}
-
 size_t ZSTD_estimateCCtxSize_advanced_usingCCtxParams(const ZSTD_CCtx_params* params)
 {
     /* Estimate CCtx size is supported for single-threaded compression only. */
@@ -832,8 +778,6 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
 typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
 
-static U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
-
 /*! ZSTD_resetCCtx_internal() :
     note : `params` are assumed fully validated at this stage */
 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
@@ -847,7 +791,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         if (ZSTD_equivalentParams(params, zc->appliedParams)) {
             DEBUGLOG(5, "ZSTD_equivalentParams()==1");
             assert(!(params.ldmParams.enableLdm &&
-                     params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET));
+                     params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
             zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
             zc->entropy->offcode_repeatMode = FSE_repeat_none;
             zc->entropy->matchlength_repeatMode = FSE_repeat_none;
@@ -857,13 +801,8 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
 
     if (params.ldmParams.enableLdm) {
         /* Adjust long distance matching parameters */
-        if (params.ldmParams.hashEveryLog == LDM_HASHEVERYLOG_NOTSET) {
-            params.ldmParams.hashEveryLog =
-                    params.cParams.windowLog < params.ldmParams.hashLog ?
-                    0 : params.cParams.windowLog - params.ldmParams.hashLog;
-        }
-        params.ldmParams.bucketSizeLog =
-                MIN(params.ldmParams.bucketSizeLog, params.ldmParams.hashLog);
+        ZSTD_ldm_adjustParameters(&params.ldmParams, params.cParams.windowLog);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
         zc->ldmState.hashPower =
                 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
     }
@@ -994,7 +933,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
             size_t const ldmBucketSize =
                   ((size_t)1) << (params.ldmParams.hashLog -
                                   params.ldmParams.bucketSizeLog);
-            assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
             memset(ptr, 0, ldmBucketSize);
             zc->ldmState.bucketOffsets = (BYTE*)ptr;
             ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
@@ -1553,9 +1491,10 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
 }
 
 /* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
  * assumption : strat is a valid strategy */
 typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
 {
     static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
         { ZSTD_compressBlock_fast  /* default for 0 */,
@@ -1574,676 +1513,6 @@ static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int
     return blockCompressor[extDict!=0][(U32)strat];
 }
 
-/*-*************************************
-*  Long distance matching
-***************************************/
-
-/** ZSTD_ldm_getSmallHash() :
- *  numBits should be <= 32
- *  If numBits==0, returns 0.
- *  @return : the most significant numBits of value. */
-static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
-{
-    assert(numBits <= 32);
-    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
-}
-
-/** ZSTD_ldm_getChecksum() :
- *  numBitsToDiscard should be <= 32
- *  @return : the next most significant 32 bits after numBitsToDiscard */
-static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
-{
-    assert(numBitsToDiscard <= 32);
-    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
-}
-
-/** ZSTD_ldm_getTag() ;
- *  Given the hash, returns the most significant numTagBits bits
- *  after (32 + hbits) bits.
- *
- *  If there are not enough bits remaining, return the last
- *  numTagBits bits. */
-static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
-{
-    assert(numTagBits <= 32 && hbits <= 32);
-    if (32 - hbits < numTagBits) {
-        return hash & ((1 << numTagBits) - 1);
-    } else {
-        return (hash >> (32 - hbits - numTagBits)) & ((1 << numTagBits) - 1);
-    }
-}
-
-/** ZSTD_ldm_getBucket() :
- *  Returns a pointer to the start of the bucket associated with hash. */
-static ldmEntry_t* ZSTD_ldm_getBucket(
-        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
-{
-    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
-}
-
-/** ZSTD_ldm_insertEntry() :
- *  Insert the entry with corresponding hash into the hash table */
-static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
-                                 size_t const hash, const ldmEntry_t entry,
-                                 ldmParams_t const ldmParams)
-{
-    BYTE* const bucketOffsets = ldmState->bucketOffsets;
-    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
-    bucketOffsets[hash]++;
-    bucketOffsets[hash] &= (1 << ldmParams.bucketSizeLog) - 1;
-}
-
-/** ZSTD_ldm_makeEntryAndInsertByTag() :
- *
- *  Gets the small hash, checksum, and tag from the rollingHash.
- *
- *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
- *  creates an ldmEntry from the offset, and inserts it into the hash table.
- *
- *  hBits is the length of the small hash, which is the most significant hBits
- *  of rollingHash. The checksum is the next 32 most significant bits, followed
- *  by ldmParams.hashEveryLog bits that make up the tag. */
-static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
-                                             U64 const rollingHash,
-                                             U32 const hBits,
-                                             U32 const offset,
-                                             ldmParams_t const ldmParams)
-{
-    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
-    U32 const tagMask = (1 << ldmParams.hashEveryLog) - 1;
-    if (tag == tagMask) {
-        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
-        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-        ldmEntry_t entry;
-        entry.offset = offset;
-        entry.checksum = checksum;
-        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
-    }
-}
-
-/** ZSTD_ldm_getRollingHash() :
- *  Get a 64-bit hash using the first len bytes from buf.
- *
- *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
- *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
- *
- *  where the constant a is defined to be prime8bytes.
- *
- *  The implementation adds an offset to each byte, so
- *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
-static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
-{
-    U64 ret = 0;
-    U32 i;
-    for (i = 0; i < len; i++) {
-        ret *= prime8bytes;
-        ret += buf[i] + LDM_HASH_CHAR_OFFSET;
-    }
-    return ret;
-}
-
-/** ZSTD_ldm_ipow() :
- *  Return base^exp. */
-static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
-{
-    U64 ret = 1;
-    while (exp) {
-        if (exp & 1) { ret *= base; }
-        exp >>= 1;
-        base *= base;
-    }
-    return ret;
-}
-
-static U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
-    assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
-    return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
-}
-
-/** ZSTD_ldm_updateHash() :
- *  Updates hash by removing toRemove and adding toAdd. */
-static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
-{
-    hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
-    hash *= prime8bytes;
-    hash += toAdd + LDM_HASH_CHAR_OFFSET;
-    return hash;
-}
-
-/** ZSTD_ldm_countBackwardsMatch() :
- *  Returns the number of bytes that match backwards before pIn and pMatch.
- *
- *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
-static size_t ZSTD_ldm_countBackwardsMatch(
-            const BYTE* pIn, const BYTE* pAnchor,
-            const BYTE* pMatch, const BYTE* pBase)
-{
-    size_t matchLength = 0;
-    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
-        pIn--;
-        pMatch--;
-        matchLength++;
-    }
-    return matchLength;
-}
-
-/** ZSTD_ldm_fillFastTables() :
- *
- *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
- *  This is similar to ZSTD_loadDictionaryContent.
- *
- *  The tables for the other strategies are filled within their
- *  block compressors. */
-static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
-{
-    const BYTE* const iend = (const BYTE*)end;
-    const U32 mls = zc->appliedParams.cParams.searchLength;
-
-    switch(zc->appliedParams.cParams.strategy)
-    {
-    case ZSTD_fast:
-        ZSTD_fillHashTable(zc, iend, mls);
-        zc->nextToUpdate = (U32)(iend - zc->base);
-        break;
-
-    case ZSTD_dfast:
-        ZSTD_fillDoubleHashTable(zc, iend, mls);
-        zc->nextToUpdate = (U32)(iend - zc->base);
-        break;
-
-    case ZSTD_greedy:
-    case ZSTD_lazy:
-    case ZSTD_lazy2:
-    case ZSTD_btlazy2:
-    case ZSTD_btopt:
-    case ZSTD_btultra:
-        break;
-    default:
-        assert(0);  /* not possible : not a valid strategy id */
-    }
-
-    return 0;
-}
-
-/** ZSTD_ldm_fillLdmHashTable() :
- *
- *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
- *  lastHash is the rolling hash that corresponds to lastHashed.
- *
- *  Returns the rolling hash corresponding to position iend-1. */
-static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
-                                     U64 lastHash, const BYTE* lastHashed,
-                                     const BYTE* iend, const BYTE* base,
-                                     U32 hBits, ldmParams_t const ldmParams)
-{
-    U64 rollingHash = lastHash;
-    const BYTE* cur = lastHashed + 1;
-
-    while (cur < iend) {
-        rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
-                                          cur[ldmParams.minMatchLength-1],
-                                          state->hashPower);
-        ZSTD_ldm_makeEntryAndInsertByTag(state,
-                                         rollingHash, hBits,
-                                         (U32)(cur - base), ldmParams);
-        ++cur;
-    }
-    return rollingHash;
-}
-
-
-/** ZSTD_ldm_limitTableUpdate() :
- *
- *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
- *  if it is far way
- *  (after a long match, only update tables a limited amount). */
-static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
-{
-    U32 const current = (U32)(anchor - cctx->base);
-    if (current > cctx->nextToUpdate + 1024) {
-        cctx->nextToUpdate =
-            current - MIN(512, current - cctx->nextToUpdate - 1024);
-    }
-}
-
-/** ZSTD_compressBlock_ldm_generic() :
- *
- *  This is a block compressor intended for long distance matching.
- *
- *  The function searches for matches of length at least
- *  ldmParams.minMatchLength using a hash table in cctx->ldmState.
- *  Matches can be at a distance of up to cParams.windowLog.
- *
- *  Upon finding a match, the unmatched literals are compressed using a
- *  ZSTD_blockCompressor (depending on the strategy in the compression
- *  parameters), which stores the matched sequences. The "long distance"
- *  match is then stored with the remaining literals from the
- *  ZSTD_blockCompressor. */
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
-                                      const void* src, size_t srcSize)
-{
-    ldmState_t* const ldmState = &(cctx->ldmState);
-    const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
-    const U64 hashPower = ldmState->hashPower;
-    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
-    const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
-    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
-    seqStore_t* const seqStorePtr = &(cctx->seqStore);
-    const BYTE* const base = cctx->base;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const U32   lowestIndex = cctx->dictLimit;
-    const BYTE* const lowest = base + lowestIndex;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - ldmParams.minMatchLength;
-
-    const ZSTD_blockCompressor blockCompressor =
-        ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
-    U32* const repToConfirm = seqStorePtr->repToConfirm;
-    U32 savedRep[ZSTD_REP_NUM];
-    U64 rollingHash = 0;
-    const BYTE* lastHashed = NULL;
-    size_t i, lastLiterals;
-
-    /* Save seqStorePtr->rep and copy repToConfirm */
-    for (i = 0; i < ZSTD_REP_NUM; i++)
-        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
-
-    /* Main Search Loop */
-    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
-        size_t mLength;
-        U32 const current = (U32)(ip - base);
-        size_t forwardMatchLength = 0, backwardMatchLength = 0;
-        ldmEntry_t* bestEntry = NULL;
-        if (ip != istart) {
-            rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
-                                              lastHashed[ldmParams.minMatchLength],
-                                              hashPower);
-        } else {
-            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
-        }
-        lastHashed = ip;
-
-        /* Do not insert and do not look for a match */
-        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
-                ldmTagMask) {
-           ip++;
-           continue;
-        }
-
-        /* Get the best entry and compute the match lengths */
-        {
-            ldmEntry_t* const bucket =
-                ZSTD_ldm_getBucket(ldmState,
-                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
-                                   ldmParams);
-            ldmEntry_t* cur;
-            size_t bestMatchLength = 0;
-            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-
-            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
-                const BYTE* const pMatch = cur->offset + base;
-                size_t curForwardMatchLength, curBackwardMatchLength,
-                       curTotalMatchLength;
-                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
-                    continue;
-                }
-
-                curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
-                if (curForwardMatchLength < ldmParams.minMatchLength) {
-                    continue;
-                }
-                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
-                                             ip, anchor, pMatch, lowest);
-                curTotalMatchLength = curForwardMatchLength +
-                                      curBackwardMatchLength;
-
-                if (curTotalMatchLength > bestMatchLength) {
-                    bestMatchLength = curTotalMatchLength;
-                    forwardMatchLength = curForwardMatchLength;
-                    backwardMatchLength = curBackwardMatchLength;
-                    bestEntry = cur;
-                }
-            }
-        }
-
-        /* No match found -- continue searching */
-        if (bestEntry == NULL) {
-            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
-                                             hBits, current,
-                                             ldmParams);
-            ip++;
-            continue;
-        }
-
-        /* Match found */
-        mLength = forwardMatchLength + backwardMatchLength;
-        ip -= backwardMatchLength;
-
-        /* Call the block compressor on the remaining literals */
-        {
-            U32 const matchIndex = bestEntry->offset;
-            const BYTE* const match = base + matchIndex - backwardMatchLength;
-            U32 const offset = (U32)(ip - match);
-
-            /* Overwrite rep codes */
-            for (i = 0; i < ZSTD_REP_NUM; i++)
-                seqStorePtr->rep[i] = repToConfirm[i];
-
-            /* Fill tables for block compressor */
-            ZSTD_ldm_limitTableUpdate(cctx, anchor);
-            ZSTD_ldm_fillFastTables(cctx, anchor);
-
-            /* Call block compressor and get remaining literals */
-            lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
-            cctx->nextToUpdate = (U32)(ip - base);
-
-            /* Update repToConfirm with the new offset */
-            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
-                repToConfirm[i] = repToConfirm[i-1];
-            repToConfirm[0] = offset;
-
-            /* Store the sequence with the leftover literals */
-            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
-                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-        }
-
-        /* Insert the current entry into the hash table */
-        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
-                                         (U32)(lastHashed - base),
-                                         ldmParams);
-
-        assert(ip + backwardMatchLength == lastHashed);
-
-        /* Fill the hash table from lastHashed+1 to ip+mLength*/
-        /* Heuristic: don't need to fill the entire table at end of block */
-        if (ip + mLength < ilimit) {
-            rollingHash = ZSTD_ldm_fillLdmHashTable(
-                              ldmState, rollingHash, lastHashed,
-                              ip + mLength, base, hBits, ldmParams);
-            lastHashed = ip + mLength - 1;
-        }
-        ip += mLength;
-        anchor = ip;
-        /* Check immediate repcode */
-        while ( (ip < ilimit)
-             && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
-             && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
-
-            size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
-                                              iend) + 4;
-            /* Swap repToConfirm[1] <=> repToConfirm[0] */
-            {
-                U32 const tmpOff = repToConfirm[1];
-                repToConfirm[1] = repToConfirm[0];
-                repToConfirm[0] = tmpOff;
-            }
-
-            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
-
-            /* Fill the  hash table from lastHashed+1 to ip+rLength*/
-            if (ip + rLength < ilimit) {
-                rollingHash = ZSTD_ldm_fillLdmHashTable(
-                                ldmState, rollingHash, lastHashed,
-                                ip + rLength, base, hBits, ldmParams);
-                lastHashed = ip + rLength - 1;
-            }
-            ip += rLength;
-            anchor = ip;
-        }
-    }
-
-    /* Overwrite rep */
-    for (i = 0; i < ZSTD_REP_NUM; i++)
-        seqStorePtr->rep[i] = repToConfirm[i];
-
-    ZSTD_ldm_limitTableUpdate(cctx, anchor);
-    ZSTD_ldm_fillFastTables(cctx, anchor);
-
-    lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
-    cctx->nextToUpdate = (U32)(iend - base);
-
-    /* Restore seqStorePtr->rep */
-    for (i = 0; i < ZSTD_REP_NUM; i++)
-        seqStorePtr->rep[i] = savedRep[i];
-
-    /* Return the last literals size */
-    return lastLiterals;
-}
-
-static size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
-                                     const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
-}
-
-static size_t ZSTD_compressBlock_ldm_extDict_generic(
-                                 ZSTD_CCtx* ctx,
-                                 const void* src, size_t srcSize)
-{
-    ldmState_t* const ldmState = &(ctx->ldmState);
-    const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
-    const U64 hashPower = ldmState->hashPower;
-    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
-    const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
-    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
-    seqStore_t* const seqStorePtr = &(ctx->seqStore);
-    const BYTE* const base = ctx->base;
-    const BYTE* const dictBase = ctx->dictBase;
-    const BYTE* const istart = (const BYTE*)src;
-    const BYTE* ip = istart;
-    const BYTE* anchor = istart;
-    const U32   lowestIndex = ctx->lowLimit;
-    const BYTE* const dictStart = dictBase + lowestIndex;
-    const U32   dictLimit = ctx->dictLimit;
-    const BYTE* const lowPrefixPtr = base + dictLimit;
-    const BYTE* const dictEnd = dictBase + dictLimit;
-    const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - ldmParams.minMatchLength;
-
-    const ZSTD_blockCompressor blockCompressor =
-        ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
-    U32* const repToConfirm = seqStorePtr->repToConfirm;
-    U32 savedRep[ZSTD_REP_NUM];
-    U64 rollingHash = 0;
-    const BYTE* lastHashed = NULL;
-    size_t i, lastLiterals;
-
-    /* Save seqStorePtr->rep and copy repToConfirm */
-    for (i = 0; i < ZSTD_REP_NUM; i++) {
-        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
-    }
-
-    /* Search Loop */
-    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
-        size_t mLength;
-        const U32 current = (U32)(ip-base);
-        size_t forwardMatchLength = 0, backwardMatchLength = 0;
-        ldmEntry_t* bestEntry = NULL;
-        if (ip != istart) {
-          rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
-                                       lastHashed[ldmParams.minMatchLength],
-                                       hashPower);
-        } else {
-            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
-        }
-        lastHashed = ip;
-
-        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
-                ldmTagMask) {
-            /* Don't insert and don't look for a match */
-           ip++;
-           continue;
-        }
-
-        /* Get the best entry and compute the match lengths */
-        {
-            ldmEntry_t* const bucket =
-                ZSTD_ldm_getBucket(ldmState,
-                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
-                                   ldmParams);
-            ldmEntry_t* cur;
-            size_t bestMatchLength = 0;
-            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-
-            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
-                const BYTE* const curMatchBase =
-                    cur->offset < dictLimit ? dictBase : base;
-                const BYTE* const pMatch = curMatchBase + cur->offset;
-                const BYTE* const matchEnd =
-                    cur->offset < dictLimit ? dictEnd : iend;
-                const BYTE* const lowMatchPtr =
-                    cur->offset < dictLimit ? dictStart : lowPrefixPtr;
-                size_t curForwardMatchLength, curBackwardMatchLength,
-                       curTotalMatchLength;
-
-                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
-                    continue;
-                }
-
-                curForwardMatchLength = ZSTD_count_2segments(
-                                            ip, pMatch, iend,
-                                            matchEnd, lowPrefixPtr);
-                if (curForwardMatchLength < ldmParams.minMatchLength) {
-                    continue;
-                }
-                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
-                                             ip, anchor, pMatch, lowMatchPtr);
-                curTotalMatchLength = curForwardMatchLength +
-                                      curBackwardMatchLength;
-
-                if (curTotalMatchLength > bestMatchLength) {
-                    bestMatchLength = curTotalMatchLength;
-                    forwardMatchLength = curForwardMatchLength;
-                    backwardMatchLength = curBackwardMatchLength;
-                    bestEntry = cur;
-                }
-            }
-        }
-
-        /* No match found -- continue searching */
-        if (bestEntry == NULL) {
-            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
-                                             (U32)(lastHashed - base),
-                                             ldmParams);
-            ip++;
-            continue;
-        }
-
-        /* Match found */
-        mLength = forwardMatchLength + backwardMatchLength;
-        ip -= backwardMatchLength;
-
-        /* Call the block compressor on the remaining literals */
-        {
-            /* ip = current - backwardMatchLength
-             * The match is at (bestEntry->offset - backwardMatchLength) */
-            U32 const matchIndex = bestEntry->offset;
-            U32 const offset = current - matchIndex;
-
-            /* Overwrite rep codes */
-            for (i = 0; i < ZSTD_REP_NUM; i++)
-                seqStorePtr->rep[i] = repToConfirm[i];
-
-            /* Fill the hash table for the block compressor */
-            ZSTD_ldm_limitTableUpdate(ctx, anchor);
-            ZSTD_ldm_fillFastTables(ctx, anchor);
-
-            /* Call block compressor and get remaining literals  */
-            lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
-            ctx->nextToUpdate = (U32)(ip - base);
-
-            /* Update repToConfirm with the new offset */
-            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
-                repToConfirm[i] = repToConfirm[i-1];
-            repToConfirm[0] = offset;
-
-            /* Store the sequence with the leftover literals */
-            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
-                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-        }
-
-        /* Insert the current entry into the hash table */
-        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
-                                         (U32)(lastHashed - base),
-                                         ldmParams);
-
-        /* Fill the hash table from lastHashed+1 to ip+mLength */
-        assert(ip + backwardMatchLength == lastHashed);
-        if (ip + mLength < ilimit) {
-            rollingHash = ZSTD_ldm_fillLdmHashTable(
-                              ldmState, rollingHash, lastHashed,
-                              ip + mLength, base, hBits,
-                              ldmParams);
-            lastHashed = ip + mLength - 1;
-        }
-        ip += mLength;
-        anchor = ip;
-
-        /* check immediate repcode */
-        while (ip < ilimit) {
-            U32 const current2 = (U32)(ip-base);
-            U32 const repIndex2 = current2 - repToConfirm[1];
-            const BYTE* repMatch2 = repIndex2 < dictLimit ?
-                                    dictBase + repIndex2 : base + repIndex2;
-            if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
-                        (repIndex2 > lowestIndex))  /* intentional overflow */
-               && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                const BYTE* const repEnd2 = repIndex2 < dictLimit ?
-                                            dictEnd : iend;
-                size_t const repLength2 =
-                        ZSTD_count_2segments(ip+4, repMatch2+4, iend,
-                                             repEnd2, lowPrefixPtr) + 4;
-
-                U32 tmpOffset = repToConfirm[1];
-                repToConfirm[1] = repToConfirm[0];
-                repToConfirm[0] = tmpOffset;
-
-                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
-
-                /* Fill the  hash table from lastHashed+1 to ip+repLength2*/
-                if (ip + repLength2 < ilimit) {
-                    rollingHash = ZSTD_ldm_fillLdmHashTable(
-                                      ldmState, rollingHash, lastHashed,
-                                      ip + repLength2, base, hBits,
-                                      ldmParams);
-                    lastHashed = ip + repLength2 - 1;
-                }
-                ip += repLength2;
-                anchor = ip;
-                continue;
-            }
-            break;
-        }
-    }
-
-    /* Overwrite rep */
-    for (i = 0; i < ZSTD_REP_NUM; i++)
-        seqStorePtr->rep[i] = repToConfirm[i];
-
-    ZSTD_ldm_limitTableUpdate(ctx, anchor);
-    ZSTD_ldm_fillFastTables(ctx, anchor);
-
-    /* Call the block compressor one last time on the last literals */
-    lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
-    ctx->nextToUpdate = (U32)(iend - base);
-
-    /* Restore seqStorePtr->rep */
-    for (i = 0; i < ZSTD_REP_NUM; i++)
-        seqStorePtr->rep[i] = savedRep[i];
-
-    /* Return the last literals size */
-    return lastLiterals;
-}
-
-static size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
-                                             const void* src, size_t srcSize)
-{
-    return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
-}
-
 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
                                    const BYTE* anchor, size_t lastLLSize)
 {
diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
new file mode 100644 (file)
index 0000000..4b6d087
--- /dev/null
@@ -0,0 +1,702 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#include "zstd_ldm.h"
+
+#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
+#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_LOG 20
+#define LDM_HASH_CHAR_OFFSET 10
+
+size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
+{
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    params->enableLdm = enableLdm>0;
+    params->hashLog = LDM_HASH_LOG;
+    params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
+    return 0;
+}
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
+{
+    if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
+        params->hashEveryLog =
+                windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
+    size_t const ldmHSize = ((size_t)1) << hashLog;
+    size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
+    size_t const ldmBucketSize =
+        ((size_t)1) << (hashLog - ldmBucketSizeLog);
+    return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
+}
+
+/** ZSTD_ldm_getSmallHash() :
+ *  numBits should be <= 32
+ *  If numBits==0, returns 0.
+ *  @return : the most significant numBits of value. */
+static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
+{
+    assert(numBits <= 32);
+    return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
+}
+
+/** ZSTD_ldm_getChecksum() :
+ *  numBitsToDiscard should be <= 32
+ *  @return : the next most significant 32 bits after numBitsToDiscard */
+static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
+{
+    assert(numBitsToDiscard <= 32);
+    return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
+}
+
+/** ZSTD_ldm_getTag() ;
+ *  Given the hash, returns the most significant numTagBits bits
+ *  after (32 + hbits) bits.
+ *
+ *  If there are not enough bits remaining, return the last
+ *  numTagBits bits. */
+static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
+{
+    assert(numTagBits <= 32 && hbits <= 32);
+    if (32 - hbits < numTagBits) {
+        return hash & ((1 << numTagBits) - 1);
+    } else {
+        return (hash >> (32 - hbits - numTagBits)) & ((1 << numTagBits) - 1);
+    }
+}
+
+/** ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/** ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const bucketOffsets = ldmState->bucketOffsets;
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
+    bucketOffsets[hash]++;
+    bucketOffsets[hash] &= (1 << ldmParams.bucketSizeLog) - 1;
+}
+
+/** ZSTD_ldm_makeEntryAndInsertByTag() :
+ *
+ *  Gets the small hash, checksum, and tag from the rollingHash.
+ *
+ *  If the tag matches (1 << ldmParams.hashEveryLog)-1, then
+ *  creates an ldmEntry from the offset, and inserts it into the hash table.
+ *
+ *  hBits is the length of the small hash, which is the most significant hBits
+ *  of rollingHash. The checksum is the next 32 most significant bits, followed
+ *  by ldmParams.hashEveryLog bits that make up the tag. */
+static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
+                                             U64 const rollingHash,
+                                             U32 const hBits,
+                                             U32 const offset,
+                                             ldmParams_t const ldmParams)
+{
+    U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
+    U32 const tagMask = (1 << ldmParams.hashEveryLog) - 1;
+    if (tag == tagMask) {
+        U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
+        U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+        ldmEntry_t entry;
+        entry.offset = offset;
+        entry.checksum = checksum;
+        ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
+    }
+}
+
+/** ZSTD_ldm_getRollingHash() :
+ *  Get a 64-bit hash using the first len bytes from buf.
+ *
+ *  Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
+ *  H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
+ *
+ *  where the constant a is defined to be prime8bytes.
+ *
+ *  The implementation adds an offset to each byte, so
+ *  H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
+static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
+{
+    U64 ret = 0;
+    U32 i;
+    for (i = 0; i < len; i++) {
+        ret *= prime8bytes;
+        ret += buf[i] + LDM_HASH_CHAR_OFFSET;
+    }
+    return ret;
+}
+
+/** ZSTD_ldm_ipow() :
+ *  Return base^exp. */
+static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
+{
+    U64 ret = 1;
+    while (exp) {
+        if (exp & 1) { ret *= base; }
+        exp >>= 1;
+        base *= base;
+    }
+    return ret;
+}
+
+U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
+    assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
+    return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
+}
+
+/** ZSTD_ldm_updateHash() :
+ *  Updates hash by removing toRemove and adding toAdd. */
+static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
+{
+    hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
+    hash *= prime8bytes;
+    hash += toAdd + LDM_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/** ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/** ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+    const U32 mls = zc->appliedParams.cParams.searchLength;
+
+    switch(zc->appliedParams.cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(zc, iend, mls);
+        zc->nextToUpdate = (U32)(iend - zc->base);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(zc, iend, mls);
+        zc->nextToUpdate = (U32)(iend - zc->base);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+/** ZSTD_ldm_fillLdmHashTable() :
+ *
+ *  Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
+ *  lastHash is the rolling hash that corresponds to lastHashed.
+ *
+ *  Returns the rolling hash corresponding to position iend-1. */
+static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
+                                     U64 lastHash, const BYTE* lastHashed,
+                                     const BYTE* iend, const BYTE* base,
+                                     U32 hBits, ldmParams_t const ldmParams)
+{
+    U64 rollingHash = lastHash;
+    const BYTE* cur = lastHashed + 1;
+
+    while (cur < iend) {
+        rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
+                                          cur[ldmParams.minMatchLength-1],
+                                          state->hashPower);
+        ZSTD_ldm_makeEntryAndInsertByTag(state,
+                                         rollingHash, hBits,
+                                         (U32)(cur - base), ldmParams);
+        ++cur;
+    }
+    return rollingHash;
+}
+
+
+/** ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
+{
+    U32 const current = (U32)(anchor - cctx->base);
+    if (current > cctx->nextToUpdate + 1024) {
+        cctx->nextToUpdate =
+            current - MIN(512, current - cctx->nextToUpdate - 1024);
+    }
+}
+
+typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+/* defined in zstd_compress.c */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
+                                      const void* src, size_t srcSize)
+{
+    ldmState_t* const ldmState = &(cctx->ldmState);
+    const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
+    const U64 hashPower = ldmState->hashPower;
+    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
+    const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
+    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
+    seqStore_t* const seqStorePtr = &(cctx->seqStore);
+    const BYTE* const base = cctx->base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   lowestIndex = cctx->dictLimit;
+    const BYTE* const lowest = base + lowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - ldmParams.minMatchLength;
+
+    const ZSTD_blockCompressor blockCompressor =
+        ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
+    U32* const repToConfirm = seqStorePtr->repToConfirm;
+    U32 savedRep[ZSTD_REP_NUM];
+    U64 rollingHash = 0;
+    const BYTE* lastHashed = NULL;
+    size_t i, lastLiterals;
+
+    /* Save seqStorePtr->rep and copy repToConfirm */
+    for (i = 0; i < ZSTD_REP_NUM; i++)
+        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 const current = (U32)(ip - base);
+        size_t forwardMatchLength = 0, backwardMatchLength = 0;
+        ldmEntry_t* bestEntry = NULL;
+        if (ip != istart) {
+            rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
+                                              lastHashed[ldmParams.minMatchLength],
+                                              hashPower);
+        } else {
+            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+        }
+        lastHashed = ip;
+
+        /* Do not insert and do not look for a match */
+        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
+                ldmTagMask) {
+           ip++;
+           continue;
+        }
+
+        /* Get the best entry and compute the match lengths */
+        {
+            ldmEntry_t* const bucket =
+                ZSTD_ldm_getBucket(ldmState,
+                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
+                                   ldmParams);
+            ldmEntry_t* cur;
+            size_t bestMatchLength = 0;
+            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+
+            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
+                const BYTE* const pMatch = cur->offset + base;
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+
+                curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
+                if (curForwardMatchLength < ldmParams.minMatchLength) {
+                    continue;
+                }
+                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
+                                             ip, anchor, pMatch, lowest);
+                curTotalMatchLength = curForwardMatchLength +
+                                      curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+        }
+
+        /* No match found -- continue searching */
+        if (bestEntry == NULL) {
+            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
+                                             hBits, current,
+                                             ldmParams);
+            ip++;
+            continue;
+        }
+
+        /* Match found */
+        mLength = forwardMatchLength + backwardMatchLength;
+        ip -= backwardMatchLength;
+
+        /* Call the block compressor on the remaining literals */
+        {
+            U32 const matchIndex = bestEntry->offset;
+            const BYTE* const match = base + matchIndex - backwardMatchLength;
+            U32 const offset = (U32)(ip - match);
+
+            /* Overwrite rep codes */
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                seqStorePtr->rep[i] = repToConfirm[i];
+
+            /* Fill tables for block compressor */
+            ZSTD_ldm_limitTableUpdate(cctx, anchor);
+            ZSTD_ldm_fillFastTables(cctx, anchor);
+
+            /* Call block compressor and get remaining literals */
+            lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
+            cctx->nextToUpdate = (U32)(ip - base);
+
+            /* Update repToConfirm with the new offset */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                repToConfirm[i] = repToConfirm[i-1];
+            repToConfirm[0] = offset;
+
+            /* Store the sequence with the leftover literals */
+            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
+                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+        }
+
+        /* Insert the current entry into the hash table */
+        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
+                                         (U32)(lastHashed - base),
+                                         ldmParams);
+
+        assert(ip + backwardMatchLength == lastHashed);
+
+        /* Fill the hash table from lastHashed+1 to ip+mLength*/
+        /* Heuristic: don't need to fill the entire table at end of block */
+        if (ip + mLength < ilimit) {
+            rollingHash = ZSTD_ldm_fillLdmHashTable(
+                              ldmState, rollingHash, lastHashed,
+                              ip + mLength, base, hBits, ldmParams);
+            lastHashed = ip + mLength - 1;
+        }
+        ip += mLength;
+        anchor = ip;
+        /* Check immediate repcode */
+        while ( (ip < ilimit)
+             && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
+             && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
+
+            size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
+                                              iend) + 4;
+            /* Swap repToConfirm[1] <=> repToConfirm[0] */
+            {
+                U32 const tmpOff = repToConfirm[1];
+                repToConfirm[1] = repToConfirm[0];
+                repToConfirm[0] = tmpOff;
+            }
+
+            ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+
+            /* Fill the  hash table from lastHashed+1 to ip+rLength*/
+            if (ip + rLength < ilimit) {
+                rollingHash = ZSTD_ldm_fillLdmHashTable(
+                                ldmState, rollingHash, lastHashed,
+                                ip + rLength, base, hBits, ldmParams);
+                lastHashed = ip + rLength - 1;
+            }
+            ip += rLength;
+            anchor = ip;
+        }
+    }
+
+    /* Overwrite rep */
+    for (i = 0; i < ZSTD_REP_NUM; i++)
+        seqStorePtr->rep[i] = repToConfirm[i];
+
+    ZSTD_ldm_limitTableUpdate(cctx, anchor);
+    ZSTD_ldm_fillFastTables(cctx, anchor);
+
+    lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
+    cctx->nextToUpdate = (U32)(iend - base);
+
+    /* Restore seqStorePtr->rep */
+    for (i = 0; i < ZSTD_REP_NUM; i++)
+        seqStorePtr->rep[i] = savedRep[i];
+
+    /* Return the last literals size */
+    return lastLiterals;
+}
+
+size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
+}
+
+static size_t ZSTD_compressBlock_ldm_extDict_generic(
+                                 ZSTD_CCtx* ctx,
+                                 const void* src, size_t srcSize)
+{
+    ldmState_t* const ldmState = &(ctx->ldmState);
+    const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
+    const U64 hashPower = ldmState->hashPower;
+    const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
+    const U32 ldmBucketSize = (1 << ldmParams.bucketSizeLog);
+    const U32 ldmTagMask = (1 << ldmParams.hashEveryLog) - 1;
+    seqStore_t* const seqStorePtr = &(ctx->seqStore);
+    const BYTE* const base = ctx->base;
+    const BYTE* const dictBase = ctx->dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   lowestIndex = ctx->lowLimit;
+    const BYTE* const dictStart = dictBase + lowestIndex;
+    const U32   dictLimit = ctx->dictLimit;
+    const BYTE* const lowPrefixPtr = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - ldmParams.minMatchLength;
+
+    const ZSTD_blockCompressor blockCompressor =
+        ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
+    U32* const repToConfirm = seqStorePtr->repToConfirm;
+    U32 savedRep[ZSTD_REP_NUM];
+    U64 rollingHash = 0;
+    const BYTE* lastHashed = NULL;
+    size_t i, lastLiterals;
+
+    /* Save seqStorePtr->rep and copy repToConfirm */
+    for (i = 0; i < ZSTD_REP_NUM; i++) {
+        savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
+    }
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        size_t mLength;
+        const U32 current = (U32)(ip-base);
+        size_t forwardMatchLength = 0, backwardMatchLength = 0;
+        ldmEntry_t* bestEntry = NULL;
+        if (ip != istart) {
+          rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
+                                       lastHashed[ldmParams.minMatchLength],
+                                       hashPower);
+        } else {
+            rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+        }
+        lastHashed = ip;
+
+        if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
+                ldmTagMask) {
+            /* Don't insert and don't look for a match */
+           ip++;
+           continue;
+        }
+
+        /* Get the best entry and compute the match lengths */
+        {
+            ldmEntry_t* const bucket =
+                ZSTD_ldm_getBucket(ldmState,
+                                   ZSTD_ldm_getSmallHash(rollingHash, hBits),
+                                   ldmParams);
+            ldmEntry_t* cur;
+            size_t bestMatchLength = 0;
+            U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
+
+            for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
+                const BYTE* const curMatchBase =
+                    cur->offset < dictLimit ? dictBase : base;
+                const BYTE* const pMatch = curMatchBase + cur->offset;
+                const BYTE* const matchEnd =
+                    cur->offset < dictLimit ? dictEnd : iend;
+                const BYTE* const lowMatchPtr =
+                    cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+
+                curForwardMatchLength = ZSTD_count_2segments(
+                                            ip, pMatch, iend,
+                                            matchEnd, lowPrefixPtr);
+                if (curForwardMatchLength < ldmParams.minMatchLength) {
+                    continue;
+                }
+                curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
+                                             ip, anchor, pMatch, lowMatchPtr);
+                curTotalMatchLength = curForwardMatchLength +
+                                      curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+        }
+
+        /* No match found -- continue searching */
+        if (bestEntry == NULL) {
+            ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
+                                             (U32)(lastHashed - base),
+                                             ldmParams);
+            ip++;
+            continue;
+        }
+
+        /* Match found */
+        mLength = forwardMatchLength + backwardMatchLength;
+        ip -= backwardMatchLength;
+
+        /* Call the block compressor on the remaining literals */
+        {
+            /* ip = current - backwardMatchLength
+             * The match is at (bestEntry->offset - backwardMatchLength) */
+            U32 const matchIndex = bestEntry->offset;
+            U32 const offset = current - matchIndex;
+
+            /* Overwrite rep codes */
+            for (i = 0; i < ZSTD_REP_NUM; i++)
+                seqStorePtr->rep[i] = repToConfirm[i];
+
+            /* Fill the hash table for the block compressor */
+            ZSTD_ldm_limitTableUpdate(ctx, anchor);
+            ZSTD_ldm_fillFastTables(ctx, anchor);
+
+            /* Call block compressor and get remaining literals  */
+            lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
+            ctx->nextToUpdate = (U32)(ip - base);
+
+            /* Update repToConfirm with the new offset */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                repToConfirm[i] = repToConfirm[i-1];
+            repToConfirm[0] = offset;
+
+            /* Store the sequence with the leftover literals */
+            ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
+                          offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+        }
+
+        /* Insert the current entry into the hash table */
+        ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
+                                         (U32)(lastHashed - base),
+                                         ldmParams);
+
+        /* Fill the hash table from lastHashed+1 to ip+mLength */
+        assert(ip + backwardMatchLength == lastHashed);
+        if (ip + mLength < ilimit) {
+            rollingHash = ZSTD_ldm_fillLdmHashTable(
+                              ldmState, rollingHash, lastHashed,
+                              ip + mLength, base, hBits,
+                              ldmParams);
+            lastHashed = ip + mLength - 1;
+        }
+        ip += mLength;
+        anchor = ip;
+
+        /* check immediate repcode */
+        while (ip < ilimit) {
+            U32 const current2 = (U32)(ip-base);
+            U32 const repIndex2 = current2 - repToConfirm[1];
+            const BYTE* repMatch2 = repIndex2 < dictLimit ?
+                                    dictBase + repIndex2 : base + repIndex2;
+            if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
+                        (repIndex2 > lowestIndex))  /* intentional overflow */
+               && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                const BYTE* const repEnd2 = repIndex2 < dictLimit ?
+                                            dictEnd : iend;
+                size_t const repLength2 =
+                        ZSTD_count_2segments(ip+4, repMatch2+4, iend,
+                                             repEnd2, lowPrefixPtr) + 4;
+
+                U32 tmpOffset = repToConfirm[1];
+                repToConfirm[1] = repToConfirm[0];
+                repToConfirm[0] = tmpOffset;
+
+                ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
+
+                /* Fill the  hash table from lastHashed+1 to ip+repLength2*/
+                if (ip + repLength2 < ilimit) {
+                    rollingHash = ZSTD_ldm_fillLdmHashTable(
+                                      ldmState, rollingHash, lastHashed,
+                                      ip + repLength2, base, hBits,
+                                      ldmParams);
+                    lastHashed = ip + repLength2 - 1;
+                }
+                ip += repLength2;
+                anchor = ip;
+                continue;
+            }
+            break;
+        }
+    }
+
+    /* Overwrite rep */
+    for (i = 0; i < ZSTD_REP_NUM; i++)
+        seqStorePtr->rep[i] = repToConfirm[i];
+
+    ZSTD_ldm_limitTableUpdate(ctx, anchor);
+    ZSTD_ldm_fillFastTables(ctx, anchor);
+
+    /* Call the block compressor one last time on the last literals */
+    lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
+    ctx->nextToUpdate = (U32)(iend - base);
+
+    /* Restore seqStorePtr->rep */
+    for (i = 0; i < ZSTD_REP_NUM; i++)
+        seqStorePtr->rep[i] = savedRep[i];
+
+    /* Return the last literals size */
+    return lastLiterals;
+}
+
+size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
+                                      const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
+}
diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h
new file mode 100644 (file)
index 0000000..7a62483
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+#include "zstd_compress.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_WINDOW_LOG 27
+#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999
+
+/** ZSTD_compressBlock_ldm_generic() :
+ *
+ *  This is a block compressor intended for long distance matching.
+ *
+ *  The function searches for matches of length at least
+ *  ldmParams.minMatchLength using a hash table in cctx->ldmState.
+ *  Matches can be at a distance of up to cParams.windowLog.
+ *
+ *  Upon finding a match, the unmatched literals are compressed using a
+ *  ZSTD_blockCompressor (depending on the strategy in the compression
+ *  parameters), which stores the matched sequences. The "long distance"
+ *  match is then stored with the remaining literals from the
+ *  ZSTD_blockCompressor. */
+size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize);
+size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
+                                      const void* src, size_t srcSize);
+
+/** ZSTD_ldm_initializeParameters() :
+ *  Initialize the long distance matching parameters to their default values. */
+size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm);
+
+/** ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables. */
+size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog);
+
+/** ZSTD_ldm_getTableSize() :
+ *  Return prime8bytes^(minMatchLength-1) */
+U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
+
+/** ZSTD_ldm_adjustParameters() :
+ *  If the params->hashEveryLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not). */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog);
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_FAST_H */