[test] Add large dict/data --patch-from test

author Nick Terrell <terrelln@fb.com>

Wed, 5 May 2021 00:09:32 +0000 (17:09 -0700)

committer Nick Terrell <terrelln@fb.com>

Wed, 5 May 2021 00:31:32 +0000 (17:31 -0700)
author Nick Terrell <terrelln@fb.com>
Wed, 5 May 2021 00:09:32 +0000 (17:09 -0700)
committer Nick Terrell <terrelln@fb.com>
Wed, 5 May 2021 00:31:32 +0000 (17:31 -0700)
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c

index 14b284af0d4d1f821eb7503dc1e61957169f1957..6e18b68e5be1f383d63c158b19d2293dd278c69b 100644 (file)
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -4121,6 +4121,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
  
      /* Assert that we the ms params match the params we're being given */
      ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
      if (srcSize > ZSTD_CHUNKSIZE_MAX) {
          /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
           * Dictionaries right at the edge will immediately trigger overflow
@@ -4153,7 +4154,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
      ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
  
      if (loadLdmDict)
-        ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
  
      switch(params->cParams.strategy)
      {
@@ -4167,22 +4168,20 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
      case ZSTD_greedy:
      case ZSTD_lazy:
      case ZSTD_lazy2:
-        if (srcSize >= HASH_READ_SIZE) {
-            if (ms->dedicatedDictSearch) {
-                assert(ms->chainTable != NULL);
-                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        assert(srcSize >= HASH_READ_SIZE);
+        if (ms->dedicatedDictSearch) {
+            assert(ms->chainTable != NULL);
+            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        } else {
+            assert(params->useRowMatchFinder != ZSTD_urm_auto);
+            if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+                ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using row-based hash table for lazy dict");
              } else {
-                assert(params->useRowMatchFinder != ZSTD_urm_auto);
-                if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
-                    size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
-                    if (ip == src)
-                        ZSTD_memset(ms->tagTable, 0, tagTableSize);
-                    ZSTD_row_update(ms, iend-HASH_READ_SIZE);
-                    DEBUGLOG(4, "Using row-based hash table for lazy dict");
-                } else {
-                    ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
-                    DEBUGLOG(4, "Using chain-based hash table for lazy dict");
-                }
+                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
              }
          }
          break;
@@ -4191,8 +4190,8 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
      case ZSTD_btopt:
      case ZSTD_btultra:
      case ZSTD_btultra2:
-        if (srcSize >= HASH_READ_SIZE)
-            ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+        assert(srcSize >= HASH_READ_SIZE);
+        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
          break;
  
      default:
diff --git a/tests/playTests.sh b/tests/playTests.sh

index 04a90e3c2ee931ff6acffc887a5d60b7e4709226..bfb2e378f4af57124c49424c0e02971d853cdac2 100755 (executable)
--- a/tests/playTests.sh
+++ b/tests/playTests.sh
@@ -1430,6 +1430,14 @@ datagen -g5000000 > tmp_patch
  zstd -15 --patch-from=tmp_dict tmp_patch 2>&1 | grep "long mode automatically triggered"
  rm -rf tmp*
  
+println "\n===> patch-from very large dictionary and file test"
+datagen -g550000000 -P0 > tmp_dict
+datagen -g100000000 -P1 > tmp_patch
+zstd --long=30 -1f --patch-from tmp_dict tmp_patch
+zstd --long=30 -df --patch-from tmp_dict tmp_patch.zst -o tmp_patch_recon
+$DIFF -s tmp_patch_recon tmp_patch
+rm -rf tmp*
+
  println "\n===> patch-from --stream-size test"
  datagen -g1000 -P50 > tmp_dict
  datagen -g1000 -P10 > tmp_patch
author	Nick Terrell <terrelln@fb.com>
	Wed, 5 May 2021 00:09:32 +0000 (17:09 -0700)
committer	Nick Terrell <terrelln@fb.com>
	Wed, 5 May 2021 00:31:32 +0000 (17:31 -0700)
lib/compress/zstd_compress.c		patch \| blob \| blame \| history
tests/playTests.sh		patch \| blob \| blame \| history