]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Fixed speed regression
authorYann Collet <yann.collet.73@gmail.com>
Sun, 1 Nov 2015 11:40:22 +0000 (12:40 +0100)
committerYann Collet <yann.collet.73@gmail.com>
Sun, 1 Nov 2015 11:40:22 +0000 (12:40 +0100)
lib/zstdhc.c
programs/paramgrill.c

index 00882076690c4fa2df4098209150d89a4ac1df60..58615f8ccdbe6cf11e5e50a03dce9f71459100d2 100644 (file)
@@ -191,7 +191,7 @@ static size_t ZSTD_HC_hashPtr(const void* p, U32 h, U32 mls)
 *  HC Compression
 ***************************************/
 /* Update chains up to ip (excluded) */
-static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
+static U32 ZSTD_HC_insertAndFindFirstIndex  (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
 {
     U32* const hashTable  = zc->hashTable;
     const U32 hashLog = zc->params.hashLog;
@@ -210,7 +210,7 @@ static U32 ZSTD_HC_insertAndFindFirstIndex (ZSTD_HC_CCtx* zc, const BYTE* ip, U3
     }
 
     zc->nextToUpdate = target;
-    return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
+       return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
 }
 
 
@@ -235,7 +235,7 @@ size_t ZSTD_HC_insertAndFindBestMatch (
     size_t ml=0;
 
     /* HC4 match finder */
-    matchIndex = ZSTD_HC_insertAndFindFirstIndex(zc, ip, matchLengthSearch);
+    matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
 
     while ((matchIndex>=lowLimit) && (nbAttempts))
     {
@@ -273,7 +273,7 @@ size_t ZSTD_HC_insertAndFindBestMatch (
 }
 
 
-static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
+FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
                         ZSTD_HC_CCtx* zc,   /* Index table will be updated */
                         const BYTE* ip, const BYTE* const iLimit,
                         const BYTE** matchpos,
@@ -289,7 +289,7 @@ static size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
 }
 
 
-size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -308,49 +308,73 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
     if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
 
     /* Match Loop */
-    while (ip < ilimit)
+    while (ip <= ilimit)
     {
-        /* repcode */
+        size_t matchLength;
+        size_t offset;
+        const BYTE* start;
+
+        /* try to find a first match */
         if (MEM_read32(ip) == MEM_read32(ip - offset_2))
         {
-            /* store sequence */
-            size_t matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
-            size_t litLength = ip-anchor;
-            size_t offset = offset_2;
+            /* repcode : we take it*/
+            size_t offtmp = offset_2;
+            size_t litLength = ip - anchor;
+            matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
             offset_2 = offset_1;
-            offset_1 = offset;
+            offset_1 = offtmp;
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
             ip += matchLength+MINMATCH;
             anchor = ip;
             continue;
         }
 
-        offset_2 = offset_1;  /* failed once : necessarily offset_1 now */
+        offset_2 = offset_1;
+        matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
+        if (!matchLength) { ip++; continue; }
 
-        /* repcode at ip+1 */
-        if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))
-        {
-            size_t matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
-            size_t litLength = ip+1-anchor;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
-            ip += 1+matchLength+MINMATCH;
-            anchor = ip;
-            continue;
-        }
+        /* let's try to find a better solution */
+        offset = ip - match;
+        start = ip;
 
-        /* search */
+        while (ip<ilimit)
         {
-            size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
-            if (!matchLength) { ip++; continue; }
-            /* store sequence */
+            ip ++;
+            if (MEM_read32(ip) == MEM_read32(ip - offset_1))
             {
-                size_t litLength = ip-anchor;
-                offset_1 = ip-match;
-                ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH);
-                ip += matchLength;
-                anchor = ip;
+                size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
+                int gain2 = (int)(ml2 * 4);
+                int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset));
+                if (gain2 > gain1)
+                {
+                    matchLength = ml2, offset = 0, start = ip;
+                    break;
+                }
             }
+            {
+                size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
+                size_t offset2 = ip - match;
+                int gain2 = (int)(ml2*5 - ZSTD_highbit((U32)offset2));   /* raw approx */
+                int gain1 = (int)(matchLength*5 - ZSTD_highbit((U32)offset));
+                if (gain2 > gain1)
+                {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+                }
+            }
+
+            break;  /* nothing found : store previous one */
         }
+
+        /* store sequence */
+        {
+            size_t litLength = start - anchor;
+            if (offset) offset_1 = offset;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ip = start + matchLength;
+            anchor = ip;
+        }
+
     }
 
     /* Last Literals */
@@ -365,7 +389,8 @@ size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstS
                                   seqStorePtr, srcSize);
 }
 
-size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+
+size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -384,72 +409,49 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
     if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
 
     /* Match Loop */
-    while (ip <= ilimit)
+    while (ip < ilimit)
     {
-        size_t matchLength;
-        size_t offset;
-        const BYTE* start;
-
-        /* try to find a first match */
+        /* repcode */
         if (MEM_read32(ip) == MEM_read32(ip - offset_2))
         {
-            /* repcode : we take it*/
-            size_t offtmp = offset_2;
-            size_t litLength = ip - anchor;
-            matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
+            /* store sequence */
+            size_t matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
+            size_t litLength = ip-anchor;
+            size_t offset = offset_2;
             offset_2 = offset_1;
-            offset_1 = offtmp;
+            offset_1 = offset;
             ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
             ip += matchLength+MINMATCH;
             anchor = ip;
             continue;
         }
 
-        offset_2 = offset_1;
-        matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
-        if (!matchLength) { ip++; continue; }
-
-        /* let's try to find a better solution */
-        offset = ip - match;
-        start = ip;
+        offset_2 = offset_1;  /* failed once : necessarily offset_1 now */
 
-        while (ip<ilimit)
+        /* repcode at ip+1 */
+        if (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))
         {
-            ip ++;
-            if (MEM_read32(ip) == MEM_read32(ip - offset_1))
-            {
-                size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
-                int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 3);
-                if ((int)ml2 > gain1)
-                {
-                    matchLength = ml2, offset = 0, start = ip;
-                    break;
-                }
-            }
-            {
-                size_t ml2 = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
-                size_t offset2 = ip - match;
-                int gain2 = ml2 - (ZSTD_highbit((U32)offset2) / 4);   /* raw approx */
-                int gain1 = matchLength - (ZSTD_highbit((U32)offset) / 4);
-                if (gain2 > gain1)
-                {
-                    matchLength = ml2, offset = offset2, start = ip;
-                    continue;   /* search a better one */
-                }
-            }
-
-            break;  /* nothing found : store previous one */
+            size_t matchLength = ZSTD_count(ip+1+MINMATCH, ip+1+MINMATCH-offset_1, iend);
+            size_t litLength = ip+1-anchor;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
+            ip += 1+matchLength+MINMATCH;
+            anchor = ip;
+            continue;
         }
 
-        /* store sequence */
+        /* search */
         {
-            size_t litLength = start - anchor;
-            if (offset) offset_1 = offset;
-            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
-            ip = start + matchLength;
-            anchor = ip;
+            size_t matchLength = ZSTD_HC_insertAndFindBestMatch_selectMLS(ctx, ip, iend, &match, maxSearches, mls);
+            if (!matchLength) { ip++; continue; }
+            /* store sequence */
+            {
+                size_t litLength = ip-anchor;
+                offset_1 = ip-match;
+                ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset_1, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+            }
         }
-
     }
 
     /* Last Literals */
@@ -461,15 +463,16 @@ size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSiz
 
     /* Final compression stage */
     return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
+
                                   seqStorePtr, srcSize);
 }
 
 
 size_t ZSTD_HC_compressBlock(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
-    if (ctx->params.strategy==ZSTD_HC_greedy)
-        return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize);
-    return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize);
+       if (ctx->params.strategy == ZSTD_HC_greedy)
+       return ZSTD_HC_compressBlock_greedy(ctx, dst, maxDstSize, src, srcSize);
+       return ZSTD_HC_compressBlock_lazy(ctx, dst, maxDstSize, src, srcSize);
 }
 
 
@@ -483,10 +486,17 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr,
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
     BYTE* const oend = op + maxDstSize;
+    size_t (*blockCompressor) (ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
+    if (ctxPtr->params.strategy == ZSTD_HC_greedy)
+               blockCompressor = ZSTD_HC_compressBlock_greedy;
+       else
+               blockCompressor = ZSTD_HC_compressBlock_lazy;
+
 
     while (remaining > blockSize)
     {
-        size_t cSize = ZSTD_HC_compressBlock(ctxPtr, op+3, oend-op, ip, blockSize);
+        size_t cSize = blockCompressor(ctxPtr, op+3, oend-op, ip, blockSize);
 
         if (cSize == 0)
         {
@@ -509,7 +519,7 @@ static size_t ZSTD_HC_compress_generic (ZSTD_HC_CCtx* ctxPtr,
 
     /* last block */
     {
-        size_t cSize = ZSTD_HC_compressBlock(ctxPtr, op+3, oend-op, ip, remaining);
+        size_t cSize = blockCompressor(ctxPtr, op+3, oend-op, ip, remaining);
 
         if (cSize == 0)
         {
index e0dc6970fdc87fa4c708f6545f9c302460c95106..acbb1650af424df0ebb542af408cada0b4c24bab 100644 (file)
@@ -122,7 +122,8 @@ static U32 g_rand = 1;
 static U32 g_singleRun = 0;
 static U32 g_target = 0;
 static U32 g_noSeed = 0;
-static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, 0 };
+static const ZSTD_HC_parameters* g_seedParams = ZSTD_HC_defaultParameters;
+static ZSTD_HC_parameters g_params = { 0, 0, 0, 0, 0, ZSTD_HC_greedy };
 
 void BMK_SetNbIterations(int nbLoops)
 {
@@ -623,9 +624,9 @@ static void playAround(FILE* f, winnerInfo_t* winners,
             case 9:
                 p.searchLength--; break;
             case 10:
-                p.strategy++; break;
+                p.strategy = ZSTD_HC_lazy; break;
             case 11:
-                p.strategy--; break;
+                p.strategy = ZSTD_HC_greedy; break;
             }
         }
 
@@ -674,7 +675,7 @@ static void BMK_selectRandomStart(
         p.searchLog  = FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLOG_MAX+1 - ZSTD_HC_SEARCHLOG_MIN) + ZSTD_HC_SEARCHLOG_MIN;
         p.windowLog  = FUZ_rand(&g_rand) % (ZSTD_HC_WINDOWLOG_MAX+1 - ZSTD_HC_WINDOWLOG_MIN) + ZSTD_HC_WINDOWLOG_MIN;
         p.searchLength=FUZ_rand(&g_rand) % (ZSTD_HC_SEARCHLENGTH_MAX+1 - ZSTD_HC_SEARCHLENGTH_MIN) + ZSTD_HC_SEARCHLENGTH_MIN;
-        p.strategy   = FUZ_rand(&g_rand) & 1;
+        p.strategy   = (ZSTD_HC_strategy) (FUZ_rand(&g_rand) & 1);
         playAround(f, winners, p, srcBuffer, srcSize, ctx);
     }
     else
@@ -682,8 +683,6 @@ static void BMK_selectRandomStart(
 }
 
 
-static const ZSTD_HC_parameters* g_seedParams = ZSTD_HC_defaultParameters;
-
 static void BMK_benchMem(void* srcBuffer, size_t srcSize)
 {
     ZSTD_HC_CCtx* ctx = ZSTD_HC_createCCtx();
@@ -968,12 +967,20 @@ int main(int argc, char** argv)
                             while ((*argument>= '0') && (*argument<='9'))
                                 g_params.searchLog *= 10, g_params.searchLog += *argument++ - '0';
                             continue;
-                        case 'l':
+                        case 'l':  /* search length */
                             g_params.searchLength = 0;
                             argument++;
                             while ((*argument>= '0') && (*argument<='9'))
                                 g_params.searchLength *= 10, g_params.searchLength += *argument++ - '0';
                             continue;
+                        case 't':  /* strategy */
+                            g_params.strategy = ZSTD_HC_greedy;
+                            argument++;
+                            while ((*argument>= '0') && (*argument<='9'))
+                            {
+                                if (*argument++) g_params.strategy = ZSTD_HC_lazy;
+                            }
+                            continue;
                         case 'L':
                             {
                                 int cLevel = 0;