]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
[libzstd] Increase granularity of FSECTable repeat mode
authorNick Terrell <terrelln@fb.com>
Thu, 13 Jul 2017 19:45:39 +0000 (12:45 -0700)
committerNick Terrell <terrelln@fb.com>
Thu, 13 Jul 2017 19:45:39 +0000 (12:45 -0700)
lib/common/fse.h
lib/common/zstd_internal.h
lib/compress/zstd_compress.c

index 54ac98b1cb95d5dcc4aea05ed6ace51e991d36ff..1c44f8375078350bcb5c4b3be307baf9d89ca615 100644 (file)
@@ -384,6 +384,11 @@ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
 
+typedef enum {
+   FSE_repeat_none,  /**< Cannot use the previous table */
+   FSE_repeat_check, /**< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+ } FSE_repeat;
 
 /* *****************************************
 *  FSE symbol compression API
index 42e5e7b5d9a5ebffc2f0eb12512703428dad2e85..f3779e8448a0906a5e78d0a0ba0f59183ae3a975 100644 (file)
@@ -271,13 +271,15 @@ typedef struct {
 } seqStore_t;
 
 typedef struct {
-    HUF_repeat hufCTable_repeatMode;
     U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
-    U32 fseCTables_ready;
     FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
     FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
     FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
     U32 workspace[HUF_WORKSPACE_SIZE_U32];
+    HUF_repeat hufCTable_repeatMode;
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
 } ZSTD_entropyCTables_t;
 
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
index b362a19192ec171549d5d6fd157f5d2b62e9667b..dfcb0266194297611afdb2c4e95b740ec3fc59e0 100644 (file)
@@ -616,8 +616,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
     if (crp == ZSTDcrp_continue) {
         if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) {
             DEBUGLOG(5, "ZSTD_equivalentParams()==1");
-            zc->entropy->fseCTables_ready = 0;
             zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
+            zc->entropy->offcode_repeatMode = FSE_repeat_none;
+            zc->entropy->matchlength_repeatMode = FSE_repeat_none;
+            zc->entropy->litlength_repeatMode = FSE_repeat_none;
             return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
     }   }
 
@@ -681,8 +683,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         zc->stage = ZSTDcs_init;
         zc->dictID = 0;
         zc->loadedDictEnd = 0;
-        zc->entropy->fseCTables_ready = 0;
         zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
+        zc->entropy->offcode_repeatMode = FSE_repeat_none;
+        zc->entropy->matchlength_repeatMode = FSE_repeat_none;
+        zc->entropy->litlength_repeatMode = FSE_repeat_none;
         zc->nextToUpdate = 1;
         zc->nextSrc = NULL;
         zc->base = NULL;
@@ -1067,11 +1071,13 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
             *op++ = llCodeTable[0];
             FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
             LLtype = set_rle;
-        } else if ((zc->entropy->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            zc->entropy->litlength_repeatMode = FSE_repeat_check;
+        } else if ((zc->entropy->litlength_repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             LLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
             FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_basic;
+            zc->entropy->litlength_repeatMode = FSE_repeat_valid;
         } else {
             size_t nbSeq_1 = nbSeq;
             const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
@@ -1082,6 +1088,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
               op += NCountSize; }
             FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             LLtype = set_compressed;
+            zc->entropy->litlength_repeatMode = FSE_repeat_check;
     }   }
 
     /* CTable for Offsets */
@@ -1091,11 +1098,13 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
             *op++ = ofCodeTable[0];
             FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
             Offtype = set_rle;
-        } else if ((zc->entropy->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            zc->entropy->offcode_repeatMode = FSE_repeat_check;
+        } else if ((zc->entropy->offcode_repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             Offtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
             FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_basic;
+            zc->entropy->offcode_repeatMode = FSE_repeat_valid;
         } else {
             size_t nbSeq_1 = nbSeq;
             const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
@@ -1106,6 +1115,7 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
               op += NCountSize; }
             FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_compressed;
+            zc->entropy->offcode_repeatMode = FSE_repeat_check;
     }   }
 
     /* CTable for MatchLengths */
@@ -1115,11 +1125,13 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
             *op++ = *mlCodeTable;
             FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
             MLtype = set_rle;
-        } else if ((zc->entropy->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+            zc->entropy->matchlength_repeatMode = FSE_repeat_check;
+        } else if ((zc->entropy->matchlength_repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
             MLtype = set_repeat;
         } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
             FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_basic;
+            zc->entropy->matchlength_repeatMode = FSE_repeat_valid;
         } else {
             size_t nbSeq_1 = nbSeq;
             const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
@@ -1130,10 +1142,10 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc,
               op += NCountSize; }
             FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer));
             MLtype = set_compressed;
+            zc->entropy->matchlength_repeatMode = FSE_repeat_check;
     }   }
 
     *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
-    zc->entropy->fseCTables_ready = 0;
 
     /* Encoding Sequences */
     {   BIT_CStream_t blockStream;
@@ -1213,6 +1225,9 @@ _check_compressibility:
         size_t const maxCSize = srcSize - minGain;
         if ((size_t)(op-ostart) >= maxCSize) {
             zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
+            zc->entropy->offcode_repeatMode = FSE_repeat_none;
+            zc->entropy->matchlength_repeatMode = FSE_repeat_none;
+            zc->entropy->litlength_repeatMode = FSE_repeat_none;
             return 0;
     }   }
 
@@ -3124,8 +3139,10 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
                 if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
         }   }
 
-        cctx->entropy->fseCTables_ready = 1;
         cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid;
+        cctx->entropy->offcode_repeatMode = FSE_repeat_valid;
+        cctx->entropy->matchlength_repeatMode = FSE_repeat_valid;
+        cctx->entropy->litlength_repeatMode = FSE_repeat_valid;
         return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
     }
 }