/* dynamic splitting has a cpu cost for analysis,
* due to that cost it's only used for higher levels */
if (strat >= ZSTD_btopt)
- return ZSTD_splitBlock(src, blockSizeMax, split_lvl3, cctx->tmpWorkspace, cctx->tmpWkspSize);
+ return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_lazy2)
- return ZSTD_splitBlock(src, blockSizeMax, split_lvl2, cctx->tmpWorkspace, cctx->tmpWkspSize);
+ return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
if (strat >= ZSTD_greedy)
- return ZSTD_splitBlock(src, blockSizeMax, split_lvl1, cctx->tmpWorkspace, cctx->tmpWkspSize);
+ return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
+ if (strat >= ZSTD_dfast)
+ return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
/* blind split strategy
* heuristic value, tested as being "generally better".
* no cpu cost, but can over-split homegeneous data.
#define HASHMASK (HASHTABLESIZE - 1)
#define KNUTH 0x9e3779b9
+/* for hashLog > 8, hash 2 bytes.
+ * for hashLog == 8, just take the byte, no hashing.
+ * The speed of this method relies on compile-time constant propagation */
FORCE_INLINE_TEMPLATE unsigned hash2(const void *p, unsigned hashLog)
{
+ assert(hashLog >= 8);
+ if (hashLog == 8) return (U32)((const BYTE*)p)[0];
assert(hashLog <= HASHLOG_MAX);
return (U32)(MEM_read16(p)) * KNUTH >> (32 - hashLog);
}
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
ZSTD_GEN_RECORD_FINGERPRINT(5, 10)
ZSTD_GEN_RECORD_FINGERPRINT(11, 9)
+ZSTD_GEN_RECORD_FINGERPRINT(43, 8)
static U64 abs64(S64 s64) { return (U64)((s64 < 0) ? -s64 : s64); }
#define CHUNKSIZE (8 << 10)
static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
- ZSTD_SplitBlock_strategy_e splitStrat,
+ int level,
void* workspace, size_t wkspSize)
{
static const RecordEvents_f records_fs[] = {
- FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
+ FP_RECORD(43), FP_RECORD(11), FP_RECORD(5), FP_RECORD(1)
};
- static const unsigned hashParams[] = { 9, 10, 10 };
- const RecordEvents_f record_f = (assert(splitStrat<=split_lvl3), records_fs[splitStrat]);
+ static const unsigned hashParams[] = { 8, 9, 10, 10 };
+ const RecordEvents_f record_f = (assert(0<=level && level<=3), records_fs[level]);
FPStats* const fpstats = (FPStats*)workspace;
const char* p = (const char*)blockStart;
int penalty = THRESHOLD_PENALTY;
record_f(&fpstats->pastEvents, p, CHUNKSIZE);
for (pos = CHUNKSIZE; pos <= blockSize - CHUNKSIZE; pos += CHUNKSIZE) {
record_f(&fpstats->newEvents, p + pos, CHUNKSIZE);
- if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[splitStrat])) {
+ if (compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, penalty, hashParams[level])) {
return pos;
} else {
mergeEvents(&fpstats->pastEvents, &fpstats->newEvents);
}
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
- ZSTD_SplitBlock_strategy_e splitStrat,
+ int level,
void* workspace, size_t wkspSize)
{
- assert(splitStrat <= split_lvl3);
- return ZSTD_splitBlock_byChunks(blockStart, blockSize, splitStrat, workspace, wkspSize);
+ assert(0<=level && level<=3);
+ return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
}
extern "C" {
#endif
-typedef enum { split_lvl1, split_lvl2, split_lvl3 } ZSTD_SplitBlock_strategy_e;
-
#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
-/* note:
+/* @level must be a value between 0 and 3.
+ * higher levels spend more energy to find block boundaries
* @workspace must be aligned on 8-bytes boundaries
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
* note2:
* This could be extended to smaller sizes in the future.
*/
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
- ZSTD_SplitBlock_strategy_e splitStrat,
+ int level,
void* workspace, size_t wkspSize);
#if defined (__cplusplus)