+++ /dev/null
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3
-INCLUDES := -I ../randomDictBuilder -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-RANDOM_FILE := ../randomDictBuilder/random.c
-IO_FILE := ../randomDictBuilder/io.c
-
-all: run clean
-
-.PHONY: run
-run: benchmark
- echo "Benchmarking with $(ARG)"
- ./benchmark $(ARG)
-
-.PHONY: test
-test: benchmarkTest clean
-
-.PHONY: benchmarkTest
-benchmarkTest: benchmark test.sh
- sh test.sh
-
-benchmark: benchmark.o io.o random.o libzstd.a
- $(CC) $(CFLAGS) benchmark.o io.o random.o libzstd.a -o benchmark
-
-benchmark.o: benchmark.c
- $(CC) $(CFLAGS) $(INCLUDES) -c benchmark.c
-
-random.o: $(RANDOM_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(RANDOM_FILE)
-
-io.o: $(IO_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
-
-libzstd.a:
- $(MAKE) -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: clean
-clean:
- rm -f *.o benchmark libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
+++ /dev/null
-Benchmarking Dictionary Builder
-
-### Permitted Argument:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-
-###Running Test:
-make test
-
-###Usage:
-Benchmark given input files: make ARG= followed by permitted arguments
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
-
-###Benchmarking Result:
-- First Cover is optimize cover, second Cover uses optimized d and k from first one.
-- For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one. This is run for accel values from 1 to 10.
-- Fourth column is chosen d and fifth column is chosen k
-
-github:
-NODICT 0.000004 2.999642
-RANDOM 0.024560 8.791189
-LEGACY 0.727109 8.173529
-COVER 40.565676 10.652243 8 1298
-COVER 3.608284 10.652243 8 1298
-FAST f=15 a=1 4.181024 10.570882 8 1154
-FAST f=15 a=1 0.040788 10.570882 8 1154
-FAST f=15 a=2 3.548352 10.574287 6 1970
-FAST f=15 a=2 0.035535 10.574287 6 1970
-FAST f=15 a=3 3.287364 10.613950 6 1010
-FAST f=15 a=3 0.032182 10.613950 6 1010
-FAST f=15 a=4 3.184976 10.573883 6 1058
-FAST f=15 a=4 0.029878 10.573883 6 1058
-FAST f=15 a=5 3.045513 10.580640 8 1154
-FAST f=15 a=5 0.022162 10.580640 8 1154
-FAST f=15 a=6 3.003296 10.583677 6 1010
-FAST f=15 a=6 0.028091 10.583677 6 1010
-FAST f=15 a=7 2.952655 10.622551 6 1106
-FAST f=15 a=7 0.02724 10.622551 6 1106
-FAST f=15 a=8 2.945674 10.614657 6 1010
-FAST f=15 a=8 0.027264 10.614657 6 1010
-FAST f=15 a=9 3.153439 10.564018 8 1154
-FAST f=15 a=9 0.020635 10.564018 8 1154
-FAST f=15 a=10 2.950416 10.511454 6 1010
-FAST f=15 a=10 0.026606 10.511454 6 1010
-FAST f=16 a=1 3.970029 10.681035 8 1154
-FAST f=16 a=1 0.038188 10.681035 8 1154
-FAST f=16 a=2 3.422892 10.484978 6 1874
-FAST f=16 a=2 0.034702 10.484978 6 1874
-FAST f=16 a=3 3.215836 10.632631 8 1154
-FAST f=16 a=3 0.026084 10.632631 8 1154
-FAST f=16 a=4 3.081353 10.626533 6 1106
-FAST f=16 a=4 0.030032 10.626533 6 1106
-FAST f=16 a=5 3.041241 10.545027 8 1922
-FAST f=16 a=5 0.022882 10.545027 8 1922
-FAST f=16 a=6 2.989390 10.638284 6 1874
-FAST f=16 a=6 0.028308 10.638284 6 1874
-FAST f=16 a=7 3.001581 10.797136 6 1106
-FAST f=16 a=7 0.027479 10.797136 6 1106
-FAST f=16 a=8 2.984107 10.658356 8 1058
-FAST f=16 a=8 0.021099 10.658356 8 1058
-FAST f=16 a=9 2.925788 10.523869 6 1010
-FAST f=16 a=9 0.026905 10.523869 6 1010
-FAST f=16 a=10 2.889605 10.745841 6 1874
-FAST f=16 a=10 0.026846 10.745841 6 1874
-FAST f=17 a=1 4.031953 10.672080 8 1202
-FAST f=17 a=1 0.040658 10.672080 8 1202
-FAST f=17 a=2 3.458107 10.589352 8 1106
-FAST f=17 a=2 0.02926 10.589352 8 1106
-FAST f=17 a=3 3.291189 10.662714 8 1154
-FAST f=17 a=3 0.026531 10.662714 8 1154
-FAST f=17 a=4 3.154950 10.549456 8 1346
-FAST f=17 a=4 0.024991 10.549456 8 1346
-FAST f=17 a=5 3.092271 10.541670 6 1202
-FAST f=17 a=5 0.038285 10.541670 6 1202
-FAST f=17 a=6 3.166146 10.729112 6 1874
-FAST f=17 a=6 0.038217 10.729112 6 1874
-FAST f=17 a=7 3.035467 10.810485 6 1106
-FAST f=17 a=7 0.036655 10.810485 6 1106
-FAST f=17 a=8 3.035668 10.530532 6 1058
-FAST f=17 a=8 0.037715 10.530532 6 1058
-FAST f=17 a=9 2.987917 10.589802 8 1922
-FAST f=17 a=9 0.02217 10.589802 8 1922
-FAST f=17 a=10 2.981647 10.722579 8 1106
-FAST f=17 a=10 0.021948 10.722579 8 1106
-FAST f=18 a=1 4.067144 10.634943 8 1154
-FAST f=18 a=1 0.041386 10.634943 8 1154
-FAST f=18 a=2 3.507377 10.546230 6 1970
-FAST f=18 a=2 0.037572 10.546230 6 1970
-FAST f=18 a=3 3.323015 10.648061 8 1154
-FAST f=18 a=3 0.028306 10.648061 8 1154
-FAST f=18 a=4 3.216735 10.705402 6 1010
-FAST f=18 a=4 0.030755 10.705402 6 1010
-FAST f=18 a=5 3.175794 10.588154 8 1874
-FAST f=18 a=5 0.025315 10.588154 8 1874
-FAST f=18 a=6 3.127459 10.751104 8 1106
-FAST f=18 a=6 0.023897 10.751104 8 1106
-FAST f=18 a=7 3.083017 10.780402 6 1106
-FAST f=18 a=7 0.029158 10.780402 6 1106
-FAST f=18 a=8 3.069700 10.547226 8 1346
-FAST f=18 a=8 0.024046 10.547226 8 1346
-FAST f=18 a=9 3.056591 10.674759 6 1010
-FAST f=18 a=9 0.028496 10.674759 6 1010
-FAST f=18 a=10 3.063588 10.737578 8 1106
-FAST f=18 a=10 0.023033 10.737578 8 1106
-FAST f=19 a=1 4.164041 10.650333 8 1154
-FAST f=19 a=1 0.042906 10.650333 8 1154
-FAST f=19 a=2 3.585409 10.577066 6 1058
-FAST f=19 a=2 0.038994 10.577066 6 1058
-FAST f=19 a=3 3.439643 10.639403 8 1154
-FAST f=19 a=3 0.028427 10.639403 8 1154
-FAST f=19 a=4 3.268869 10.554410 8 1298
-FAST f=19 a=4 0.026866 10.554410 8 1298
-FAST f=19 a=5 3.238225 10.615109 6 1010
-FAST f=19 a=5 0.03078 10.615109 6 1010
-FAST f=19 a=6 3.199558 10.609782 6 1874
-FAST f=19 a=6 0.030099 10.609782 6 1874
-FAST f=19 a=7 3.132395 10.794753 6 1106
-FAST f=19 a=7 0.028964 10.794753 6 1106
-FAST f=19 a=8 3.148446 10.554842 8 1298
-FAST f=19 a=8 0.024277 10.554842 8 1298
-FAST f=19 a=9 3.108324 10.668763 6 1010
-FAST f=19 a=9 0.02896 10.668763 6 1010
-FAST f=19 a=10 3.159863 10.757347 8 1106
-FAST f=19 a=10 0.023351 10.757347 8 1106
-FAST f=20 a=1 4.462698 10.661788 8 1154
-FAST f=20 a=1 0.047174 10.661788 8 1154
-FAST f=20 a=2 3.820269 10.678612 6 1106
-FAST f=20 a=2 0.040807 10.678612 6 1106
-FAST f=20 a=3 3.644955 10.648424 8 1154
-FAST f=20 a=3 0.031398 10.648424 8 1154
-FAST f=20 a=4 3.546257 10.559756 8 1298
-FAST f=20 a=4 0.029856 10.559756 8 1298
-FAST f=20 a=5 3.485248 10.646637 6 1010
-FAST f=20 a=5 0.033756 10.646637 6 1010
-FAST f=20 a=6 3.490438 10.775824 8 1106
-FAST f=20 a=6 0.028338 10.775824 8 1106
-FAST f=20 a=7 3.631289 10.801795 6 1106
-FAST f=20 a=7 0.035228 10.801795 6 1106
-FAST f=20 a=8 3.758936 10.545116 8 1346
-FAST f=20 a=8 0.027495 10.545116 8 1346
-FAST f=20 a=9 3.707024 10.677454 6 1010
-FAST f=20 a=9 0.031326 10.677454 6 1010
-FAST f=20 a=10 3.586593 10.756017 8 1106
-FAST f=20 a=10 0.027122 10.756017 8 1106
-FAST f=21 a=1 5.701396 10.655398 8 1154
-FAST f=21 a=1 0.067744 10.655398 8 1154
-FAST f=21 a=2 5.270542 10.650743 6 1106
-FAST f=21 a=2 0.052999 10.650743 6 1106
-FAST f=21 a=3 4.945294 10.652380 8 1154
-FAST f=21 a=3 0.052678 10.652380 8 1154
-FAST f=21 a=4 4.894079 10.543185 8 1298
-FAST f=21 a=4 0.04997 10.543185 8 1298
-FAST f=21 a=5 4.785417 10.630321 6 1010
-FAST f=21 a=5 0.045294 10.630321 6 1010
-FAST f=21 a=6 4.789381 10.664477 6 1874
-FAST f=21 a=6 0.046578 10.664477 6 1874
-FAST f=21 a=7 4.302955 10.805179 6 1106
-FAST f=21 a=7 0.041205 10.805179 6 1106
-FAST f=21 a=8 4.034630 10.551211 8 1298
-FAST f=21 a=8 0.040121 10.551211 8 1298
-FAST f=21 a=9 4.523868 10.799114 6 1010
-FAST f=21 a=9 0.043592 10.799114 6 1010
-FAST f=21 a=10 4.760736 10.750255 8 1106
-FAST f=21 a=10 0.043483 10.750255 8 1106
-FAST f=22 a=1 6.743064 10.640537 8 1154
-FAST f=22 a=1 0.086967 10.640537 8 1154
-FAST f=22 a=2 6.121739 10.626638 6 1970
-FAST f=22 a=2 0.066337 10.626638 6 1970
-FAST f=22 a=3 5.248851 10.640688 8 1154
-FAST f=22 a=3 0.054935 10.640688 8 1154
-FAST f=22 a=4 5.436579 10.588333 8 1298
-FAST f=22 a=4 0.064113 10.588333 8 1298
-FAST f=22 a=5 5.812815 10.652653 6 1010
-FAST f=22 a=5 0.058189 10.652653 6 1010
-FAST f=22 a=6 5.745472 10.666437 6 1874
-FAST f=22 a=6 0.057188 10.666437 6 1874
-FAST f=22 a=7 5.716393 10.806911 6 1106
-FAST f=22 a=7 0.056 10.806911 6 1106
-FAST f=22 a=8 5.698799 10.530784 8 1298
-FAST f=22 a=8 0.0583 10.530784 8 1298
-FAST f=22 a=9 5.710533 10.777391 6 1010
-FAST f=22 a=9 0.054945 10.777391 6 1010
-FAST f=22 a=10 5.685395 10.745023 8 1106
-FAST f=22 a=10 0.056526 10.745023 8 1106
-FAST f=23 a=1 7.836923 10.638828 8 1154
-FAST f=23 a=1 0.099522 10.638828 8 1154
-FAST f=23 a=2 6.627834 10.631061 6 1970
-FAST f=23 a=2 0.066769 10.631061 6 1970
-FAST f=23 a=3 5.602533 10.647288 8 1154
-FAST f=23 a=3 0.064513 10.647288 8 1154
-FAST f=23 a=4 6.005580 10.568747 8 1298
-FAST f=23 a=4 0.062022 10.568747 8 1298
-FAST f=23 a=5 5.481816 10.676921 6 1010
-FAST f=23 a=5 0.058959 10.676921 6 1010
-FAST f=23 a=6 5.460444 10.666194 6 1874
-FAST f=23 a=6 0.057687 10.666194 6 1874
-FAST f=23 a=7 5.659822 10.800377 6 1106
-FAST f=23 a=7 0.06783 10.800377 6 1106
-FAST f=23 a=8 6.826940 10.522167 8 1298
-FAST f=23 a=8 0.070533 10.522167 8 1298
-FAST f=23 a=9 6.804757 10.577799 8 1682
-FAST f=23 a=9 0.069949 10.577799 8 1682
-FAST f=23 a=10 6.774933 10.742093 8 1106
-FAST f=23 a=10 0.068395 10.742093 8 1106
-FAST f=24 a=1 8.444110 10.632783 8 1154
-FAST f=24 a=1 0.094357 10.632783 8 1154
-FAST f=24 a=2 7.289578 10.631061 6 1970
-FAST f=24 a=2 0.098515 10.631061 6 1970
-FAST f=24 a=3 8.619780 10.646289 8 1154
-FAST f=24 a=3 0.098041 10.646289 8 1154
-FAST f=24 a=4 8.508455 10.555199 8 1298
-FAST f=24 a=4 0.093885 10.555199 8 1298
-FAST f=24 a=5 8.471145 10.674363 6 1010
-FAST f=24 a=5 0.088676 10.674363 6 1010
-FAST f=24 a=6 8.426727 10.667228 6 1874
-FAST f=24 a=6 0.087247 10.667228 6 1874
-FAST f=24 a=7 8.356826 10.803027 6 1106
-FAST f=24 a=7 0.085835 10.803027 6 1106
-FAST f=24 a=8 6.756811 10.522049 8 1298
-FAST f=24 a=8 0.07107 10.522049 8 1298
-FAST f=24 a=9 6.548169 10.571882 8 1682
-FAST f=24 a=9 0.0713 10.571882 8 1682
-FAST f=24 a=10 8.238079 10.736453 8 1106
-FAST f=24 a=10 0.07004 10.736453 8 1106
-
-
-hg-commands:
-NODICT 0.000005 2.425276
-RANDOM 0.046332 3.490331
-LEGACY 0.720351 3.911682
-COVER 45.507731 4.132653 8 386
-COVER 1.868810 4.132653 8 386
-FAST f=15 a=1 4.561427 3.866894 8 1202
-FAST f=15 a=1 0.048946 3.866894 8 1202
-FAST f=15 a=2 3.574462 3.892119 8 1538
-FAST f=15 a=2 0.033677 3.892119 8 1538
-FAST f=15 a=3 3.230227 3.888791 6 1346
-FAST f=15 a=3 0.034312 3.888791 6 1346
-FAST f=15 a=4 3.042388 3.899739 8 1010
-FAST f=15 a=4 0.024307 3.899739 8 1010
-FAST f=15 a=5 2.800148 3.896220 8 818
-FAST f=15 a=5 0.022331 3.896220 8 818
-FAST f=15 a=6 2.706518 3.882039 8 578
-FAST f=15 a=6 0.020955 3.882039 8 578
-FAST f=15 a=7 2.701820 3.885430 6 866
-FAST f=15 a=7 0.026074 3.885430 6 866
-FAST f=15 a=8 2.604445 3.906932 8 1826
-FAST f=15 a=8 0.021789 3.906932 8 1826
-FAST f=15 a=9 2.598568 3.870324 6 1682
-FAST f=15 a=9 0.026004 3.870324 6 1682
-FAST f=15 a=10 2.575920 3.920783 8 1442
-FAST f=15 a=10 0.020228 3.920783 8 1442
-FAST f=16 a=1 4.630623 4.001430 8 770
-FAST f=16 a=1 0.047497 4.001430 8 770
-FAST f=16 a=2 3.674721 3.974431 8 1874
-FAST f=16 a=2 0.035761 3.974431 8 1874
-FAST f=16 a=3 3.338384 3.978703 8 1010
-FAST f=16 a=3 0.029436 3.978703 8 1010
-FAST f=16 a=4 3.004412 3.983035 8 1010
-FAST f=16 a=4 0.025744 3.983035 8 1010
-FAST f=16 a=5 2.881892 3.987710 8 770
-FAST f=16 a=5 0.023211 3.987710 8 770
-FAST f=16 a=6 2.807410 3.952717 8 1298
-FAST f=16 a=6 0.023199 3.952717 8 1298
-FAST f=16 a=7 2.819623 3.994627 8 770
-FAST f=16 a=7 0.021806 3.994627 8 770
-FAST f=16 a=8 2.740092 3.954032 8 1826
-FAST f=16 a=8 0.0226 3.954032 8 1826
-FAST f=16 a=9 2.682564 3.969879 6 1442
-FAST f=16 a=9 0.026324 3.969879 6 1442
-FAST f=16 a=10 2.657959 3.969755 8 674
-FAST f=16 a=10 0.020413 3.969755 8 674
-FAST f=17 a=1 4.729228 4.046000 8 530
-FAST f=17 a=1 0.049703 4.046000 8 530
-FAST f=17 a=2 3.764510 3.991519 8 1970
-FAST f=17 a=2 0.038195 3.991519 8 1970
-FAST f=17 a=3 3.416992 4.006296 6 914
-FAST f=17 a=3 0.036244 4.006296 6 914
-FAST f=17 a=4 3.145626 3.979182 8 1970
-FAST f=17 a=4 0.028676 3.979182 8 1970
-FAST f=17 a=5 2.995070 4.050070 8 770
-FAST f=17 a=5 0.025707 4.050070 8 770
-FAST f=17 a=6 2.911833 4.040024 8 770
-FAST f=17 a=6 0.02453 4.040024 8 770
-FAST f=17 a=7 2.894796 4.015884 8 818
-FAST f=17 a=7 0.023956 4.015884 8 818
-FAST f=17 a=8 2.789962 4.039303 8 530
-FAST f=17 a=8 0.023219 4.039303 8 530
-FAST f=17 a=9 2.787625 3.996762 8 1634
-FAST f=17 a=9 0.023651 3.996762 8 1634
-FAST f=17 a=10 2.754796 4.005059 8 1058
-FAST f=17 a=10 0.022537 4.005059 8 1058
-FAST f=18 a=1 4.779117 4.038214 8 242
-FAST f=18 a=1 0.048814 4.038214 8 242
-FAST f=18 a=2 3.829753 4.045768 8 722
-FAST f=18 a=2 0.036541 4.045768 8 722
-FAST f=18 a=3 3.495053 4.021497 8 770
-FAST f=18 a=3 0.032648 4.021497 8 770
-FAST f=18 a=4 3.221395 4.039623 8 770
-FAST f=18 a=4 0.027818 4.039623 8 770
-FAST f=18 a=5 3.059369 4.050414 8 530
-FAST f=18 a=5 0.026296 4.050414 8 530
-FAST f=18 a=6 3.019292 4.010714 6 962
-FAST f=18 a=6 0.031104 4.010714 6 962
-FAST f=18 a=7 2.949322 4.031439 6 770
-FAST f=18 a=7 0.030745 4.031439 6 770
-FAST f=18 a=8 2.876425 4.032088 6 386
-FAST f=18 a=8 0.027407 4.032088 6 386
-FAST f=18 a=9 2.850958 4.053372 8 674
-FAST f=18 a=9 0.023799 4.053372 8 674
-FAST f=18 a=10 2.884352 4.020148 8 1730
-FAST f=18 a=10 0.024401 4.020148 8 1730
-FAST f=19 a=1 4.815669 4.061203 8 674
-FAST f=19 a=1 0.051425 4.061203 8 674
-FAST f=19 a=2 3.951356 4.013822 8 1442
-FAST f=19 a=2 0.039968 4.013822 8 1442
-FAST f=19 a=3 3.554682 4.050425 8 722
-FAST f=19 a=3 0.032725 4.050425 8 722
-FAST f=19 a=4 3.242585 4.054677 8 722
-FAST f=19 a=4 0.028194 4.054677 8 722
-FAST f=19 a=5 3.105909 4.064524 8 818
-FAST f=19 a=5 0.02675 4.064524 8 818
-FAST f=19 a=6 3.059901 4.036857 8 1250
-FAST f=19 a=6 0.026396 4.036857 8 1250
-FAST f=19 a=7 3.016151 4.068234 6 770
-FAST f=19 a=7 0.031501 4.068234 6 770
-FAST f=19 a=8 2.962902 4.077509 8 530
-FAST f=19 a=8 0.023333 4.077509 8 530
-FAST f=19 a=9 2.899607 4.067328 8 530
-FAST f=19 a=9 0.024553 4.067328 8 530
-FAST f=19 a=10 2.950978 4.059901 8 434
-FAST f=19 a=10 0.023852 4.059901 8 434
-FAST f=20 a=1 5.259834 4.027579 8 1634
-FAST f=20 a=1 0.061123 4.027579 8 1634
-FAST f=20 a=2 4.382150 4.025093 8 1634
-FAST f=20 a=2 0.048009 4.025093 8 1634
-FAST f=20 a=3 4.104323 4.060842 8 530
-FAST f=20 a=3 0.040965 4.060842 8 530
-FAST f=20 a=4 3.853340 4.023504 6 914
-FAST f=20 a=4 0.041072 4.023504 6 914
-FAST f=20 a=5 3.728841 4.018089 6 1634
-FAST f=20 a=5 0.037469 4.018089 6 1634
-FAST f=20 a=6 3.683045 4.069138 8 578
-FAST f=20 a=6 0.028011 4.069138 8 578
-FAST f=20 a=7 3.726973 4.063160 8 722
-FAST f=20 a=7 0.028437 4.063160 8 722
-FAST f=20 a=8 3.555073 4.057690 8 386
-FAST f=20 a=8 0.027588 4.057690 8 386
-FAST f=20 a=9 3.551095 4.067253 8 482
-FAST f=20 a=9 0.025976 4.067253 8 482
-FAST f=20 a=10 3.490127 4.068518 8 530
-FAST f=20 a=10 0.025971 4.068518 8 530
-FAST f=21 a=1 7.343816 4.064945 8 770
-FAST f=21 a=1 0.085035 4.064945 8 770
-FAST f=21 a=2 5.930894 4.048206 8 386
-FAST f=21 a=2 0.067349 4.048206 8 386
-FAST f=21 a=3 6.770775 4.063417 8 578
-FAST f=21 a=3 0.077104 4.063417 8 578
-FAST f=21 a=4 6.889409 4.066761 8 626
-FAST f=21 a=4 0.0717 4.066761 8 626
-FAST f=21 a=5 6.714896 4.051813 8 914
-FAST f=21 a=5 0.071026 4.051813 8 914
-FAST f=21 a=6 6.539890 4.047263 8 1922
-FAST f=21 a=6 0.07127 4.047263 8 1922
-FAST f=21 a=7 6.511052 4.068373 8 482
-FAST f=21 a=7 0.065467 4.068373 8 482
-FAST f=21 a=8 6.458788 4.071597 8 482
-FAST f=21 a=8 0.063817 4.071597 8 482
-FAST f=21 a=9 6.377591 4.052905 8 434
-FAST f=21 a=9 0.063112 4.052905 8 434
-FAST f=21 a=10 6.360752 4.047773 8 530
-FAST f=21 a=10 0.063606 4.047773 8 530
-FAST f=22 a=1 10.523471 4.040812 8 962
-FAST f=22 a=1 0.14214 4.040812 8 962
-FAST f=22 a=2 9.454758 4.059396 8 914
-FAST f=22 a=2 0.118343 4.059396 8 914
-FAST f=22 a=3 9.043197 4.043019 8 1922
-FAST f=22 a=3 0.109798 4.043019 8 1922
-FAST f=22 a=4 8.716261 4.044819 8 770
-FAST f=22 a=4 0.099687 4.044819 8 770
-FAST f=22 a=5 8.529472 4.070576 8 530
-FAST f=22 a=5 0.093127 4.070576 8 530
-FAST f=22 a=6 8.424241 4.070565 8 722
-FAST f=22 a=6 0.093703 4.070565 8 722
-FAST f=22 a=7 8.403391 4.070591 8 578
-FAST f=22 a=7 0.089763 4.070591 8 578
-FAST f=22 a=8 8.285221 4.089171 8 530
-FAST f=22 a=8 0.087716 4.089171 8 530
-FAST f=22 a=9 8.282506 4.047470 8 722
-FAST f=22 a=9 0.089773 4.047470 8 722
-FAST f=22 a=10 8.241809 4.064151 8 818
-FAST f=22 a=10 0.090413 4.064151 8 818
-FAST f=23 a=1 12.389208 4.051635 6 530
-FAST f=23 a=1 0.147796 4.051635 6 530
-FAST f=23 a=2 11.300910 4.042835 6 914
-FAST f=23 a=2 0.133178 4.042835 6 914
-FAST f=23 a=3 10.879455 4.047415 8 626
-FAST f=23 a=3 0.129571 4.047415 8 626
-FAST f=23 a=4 10.522718 4.038269 6 914
-FAST f=23 a=4 0.118121 4.038269 6 914
-FAST f=23 a=5 10.348043 4.066884 8 434
-FAST f=23 a=5 0.112098 4.066884 8 434
-FAST f=23 a=6 10.238630 4.048635 8 1010
-FAST f=23 a=6 0.120281 4.048635 8 1010
-FAST f=23 a=7 10.213255 4.061809 8 530
-FAST f=23 a=7 0.1121 4.061809 8 530
-FAST f=23 a=8 10.107879 4.074104 8 818
-FAST f=23 a=8 0.116544 4.074104 8 818
-FAST f=23 a=9 10.063424 4.064811 8 674
-FAST f=23 a=9 0.109045 4.064811 8 674
-FAST f=23 a=10 10.035801 4.054918 8 530
-FAST f=23 a=10 0.108735 4.054918 8 530
-FAST f=24 a=1 14.963878 4.073490 8 722
-FAST f=24 a=1 0.206344 4.073490 8 722
-FAST f=24 a=2 13.833472 4.036100 8 962
-FAST f=24 a=2 0.17486 4.036100 8 962
-FAST f=24 a=3 13.404631 4.026281 6 1106
-FAST f=24 a=3 0.153961 4.026281 6 1106
-FAST f=24 a=4 13.041164 4.065448 8 674
-FAST f=24 a=4 0.155509 4.065448 8 674
-FAST f=24 a=5 12.879412 4.054636 8 674
-FAST f=24 a=5 0.148282 4.054636 8 674
-FAST f=24 a=6 12.773736 4.081376 8 530
-FAST f=24 a=6 0.142563 4.081376 8 530
-FAST f=24 a=7 12.711310 4.059834 8 770
-FAST f=24 a=7 0.149321 4.059834 8 770
-FAST f=24 a=8 12.635459 4.052050 8 1298
-FAST f=24 a=8 0.15095 4.052050 8 1298
-FAST f=24 a=9 12.558104 4.076516 8 722
-FAST f=24 a=9 0.144361 4.076516 8 722
-FAST f=24 a=10 10.661348 4.062137 8 818
-FAST f=24 a=10 0.108232 4.062137 8 818
-
-
-hg-changelog:
-NODICT 0.000017 1.377590
-RANDOM 0.186171 2.097487
-LEGACY 1.670867 2.058907
-COVER 173.561948 2.189685 8 98
-COVER 4.811180 2.189685 8 98
-FAST f=15 a=1 18.685906 2.129682 8 434
-FAST f=15 a=1 0.173376 2.129682 8 434
-FAST f=15 a=2 12.928259 2.131890 8 482
-FAST f=15 a=2 0.102582 2.131890 8 482
-FAST f=15 a=3 11.132343 2.128027 8 386
-FAST f=15 a=3 0.077122 2.128027 8 386
-FAST f=15 a=4 10.120683 2.125797 8 434
-FAST f=15 a=4 0.065175 2.125797 8 434
-FAST f=15 a=5 9.479092 2.127697 8 386
-FAST f=15 a=5 0.057905 2.127697 8 386
-FAST f=15 a=6 9.159523 2.127132 8 1682
-FAST f=15 a=6 0.058604 2.127132 8 1682
-FAST f=15 a=7 8.724003 2.129914 8 434
-FAST f=15 a=7 0.0493 2.129914 8 434
-FAST f=15 a=8 8.595001 2.127137 8 338
-FAST f=15 a=8 0.0474 2.127137 8 338
-FAST f=15 a=9 8.356405 2.125512 8 482
-FAST f=15 a=9 0.046126 2.125512 8 482
-FAST f=15 a=10 8.207111 2.126066 8 338
-FAST f=15 a=10 0.043292 2.126066 8 338
-FAST f=16 a=1 18.464436 2.144040 8 242
-FAST f=16 a=1 0.172156 2.144040 8 242
-FAST f=16 a=2 12.844825 2.148171 8 194
-FAST f=16 a=2 0.099619 2.148171 8 194
-FAST f=16 a=3 11.082568 2.140837 8 290
-FAST f=16 a=3 0.079165 2.140837 8 290
-FAST f=16 a=4 10.066749 2.144405 8 386
-FAST f=16 a=4 0.068411 2.144405 8 386
-FAST f=16 a=5 9.501121 2.140720 8 386
-FAST f=16 a=5 0.061316 2.140720 8 386
-FAST f=16 a=6 9.179332 2.139478 8 386
-FAST f=16 a=6 0.056322 2.139478 8 386
-FAST f=16 a=7 8.849438 2.142412 8 194
-FAST f=16 a=7 0.050493 2.142412 8 194
-FAST f=16 a=8 8.810919 2.143454 8 434
-FAST f=16 a=8 0.051304 2.143454 8 434
-FAST f=16 a=9 8.553900 2.140339 8 194
-FAST f=16 a=9 0.047285 2.140339 8 194
-FAST f=16 a=10 8.398027 2.143130 8 386
-FAST f=16 a=10 0.046386 2.143130 8 386
-FAST f=17 a=1 18.644657 2.157192 8 98
-FAST f=17 a=1 0.173884 2.157192 8 98
-FAST f=17 a=2 13.071242 2.159830 8 146
-FAST f=17 a=2 0.10388 2.159830 8 146
-FAST f=17 a=3 11.332366 2.153654 6 194
-FAST f=17 a=3 0.08983 2.153654 6 194
-FAST f=17 a=4 10.362413 2.156813 8 242
-FAST f=17 a=4 0.070389 2.156813 8 242
-FAST f=17 a=5 9.808159 2.155098 6 338
-FAST f=17 a=5 0.072661 2.155098 6 338
-FAST f=17 a=6 9.451165 2.153845 6 146
-FAST f=17 a=6 0.064959 2.153845 6 146
-FAST f=17 a=7 9.163097 2.155424 6 242
-FAST f=17 a=7 0.064323 2.155424 6 242
-FAST f=17 a=8 9.047276 2.156640 8 242
-FAST f=17 a=8 0.053382 2.156640 8 242
-FAST f=17 a=9 8.807671 2.152396 8 146
-FAST f=17 a=9 0.049617 2.152396 8 146
-FAST f=17 a=10 8.649827 2.152370 8 146
-FAST f=17 a=10 0.047849 2.152370 8 146
-FAST f=18 a=1 18.809502 2.168116 8 98
-FAST f=18 a=1 0.175226 2.168116 8 98
-FAST f=18 a=2 13.756502 2.170870 6 242
-FAST f=18 a=2 0.119507 2.170870 6 242
-FAST f=18 a=3 12.059748 2.163094 6 98
-FAST f=18 a=3 0.093912 2.163094 6 98
-FAST f=18 a=4 11.410294 2.172372 8 98
-FAST f=18 a=4 0.073048 2.172372 8 98
-FAST f=18 a=5 10.560297 2.166388 8 98
-FAST f=18 a=5 0.065136 2.166388 8 98
-FAST f=18 a=6 10.071390 2.162672 8 98
-FAST f=18 a=6 0.059402 2.162672 8 98
-FAST f=18 a=7 10.084214 2.166624 6 194
-FAST f=18 a=7 0.073276 2.166624 6 194
-FAST f=18 a=8 9.953226 2.167454 8 98
-FAST f=18 a=8 0.053659 2.167454 8 98
-FAST f=18 a=9 8.982461 2.161593 6 146
-FAST f=18 a=9 0.05955 2.161593 6 146
-FAST f=18 a=10 8.986092 2.164373 6 242
-FAST f=18 a=10 0.059135 2.164373 6 242
-FAST f=19 a=1 18.908277 2.176021 8 98
-FAST f=19 a=1 0.177316 2.176021 8 98
-FAST f=19 a=2 13.471313 2.176103 8 98
-FAST f=19 a=2 0.106344 2.176103 8 98
-FAST f=19 a=3 11.571406 2.172812 8 98
-FAST f=19 a=3 0.083293 2.172812 8 98
-FAST f=19 a=4 10.632775 2.177770 6 146
-FAST f=19 a=4 0.079864 2.177770 6 146
-FAST f=19 a=5 10.030190 2.175574 6 146
-FAST f=19 a=5 0.07223 2.175574 6 146
-FAST f=19 a=6 9.717818 2.169997 8 98
-FAST f=19 a=6 0.060049 2.169997 8 98
-FAST f=19 a=7 9.397531 2.172770 8 146
-FAST f=19 a=7 0.057188 2.172770 8 146
-FAST f=19 a=8 9.281061 2.175822 8 98
-FAST f=19 a=8 0.053711 2.175822 8 98
-FAST f=19 a=9 9.165242 2.169849 6 146
-FAST f=19 a=9 0.059898 2.169849 6 146
-FAST f=19 a=10 9.048763 2.173394 8 98
-FAST f=19 a=10 0.049757 2.173394 8 98
-FAST f=20 a=1 21.166917 2.183923 6 98
-FAST f=20 a=1 0.205425 2.183923 6 98
-FAST f=20 a=2 15.642753 2.182349 6 98
-FAST f=20 a=2 0.135957 2.182349 6 98
-FAST f=20 a=3 14.053730 2.173544 6 98
-FAST f=20 a=3 0.11266 2.173544 6 98
-FAST f=20 a=4 15.270019 2.183656 8 98
-FAST f=20 a=4 0.107892 2.183656 8 98
-FAST f=20 a=5 15.497927 2.174661 6 98
-FAST f=20 a=5 0.100305 2.174661 6 98
-FAST f=20 a=6 13.973505 2.172391 8 98
-FAST f=20 a=6 0.087565 2.172391 8 98
-FAST f=20 a=7 14.083296 2.172443 8 98
-FAST f=20 a=7 0.078062 2.172443 8 98
-FAST f=20 a=8 12.560048 2.175581 8 98
-FAST f=20 a=8 0.070282 2.175581 8 98
-FAST f=20 a=9 13.078645 2.173975 6 146
-FAST f=20 a=9 0.081041 2.173975 6 146
-FAST f=20 a=10 12.823328 2.177778 8 98
-FAST f=20 a=10 0.074522 2.177778 8 98
-FAST f=21 a=1 29.825370 2.183057 6 98
-FAST f=21 a=1 0.334453 2.183057 6 98
-FAST f=21 a=2 29.476474 2.182752 8 98
-FAST f=21 a=2 0.286602 2.182752 8 98
-FAST f=21 a=3 25.937186 2.175867 8 98
-FAST f=21 a=3 0.17626 2.175867 8 98
-FAST f=21 a=4 20.413865 2.179780 8 98
-FAST f=21 a=4 0.206085 2.179780 8 98
-FAST f=21 a=5 20.541889 2.178328 6 146
-FAST f=21 a=5 0.199157 2.178328 6 146
-FAST f=21 a=6 21.090670 2.174443 6 146
-FAST f=21 a=6 0.190645 2.174443 6 146
-FAST f=21 a=7 20.221569 2.177384 6 146
-FAST f=21 a=7 0.184278 2.177384 6 146
-FAST f=21 a=8 20.322357 2.179456 6 98
-FAST f=21 a=8 0.178458 2.179456 6 98
-FAST f=21 a=9 20.683912 2.174396 6 146
-FAST f=21 a=9 0.190829 2.174396 6 146
-FAST f=21 a=10 20.840865 2.174905 8 98
-FAST f=21 a=10 0.172515 2.174905 8 98
-FAST f=22 a=1 36.822827 2.181612 6 98
-FAST f=22 a=1 0.437389 2.181612 6 98
-FAST f=22 a=2 30.616902 2.183142 8 98
-FAST f=22 a=2 0.324284 2.183142 8 98
-FAST f=22 a=3 28.472482 2.178130 8 98
-FAST f=22 a=3 0.236538 2.178130 8 98
-FAST f=22 a=4 25.847028 2.181878 8 98
-FAST f=22 a=4 0.263744 2.181878 8 98
-FAST f=22 a=5 27.095881 2.180775 8 98
-FAST f=22 a=5 0.24988 2.180775 8 98
-FAST f=22 a=6 25.939172 2.170916 8 98
-FAST f=22 a=6 0.240033 2.170916 8 98
-FAST f=22 a=7 27.064194 2.177849 8 98
-FAST f=22 a=7 0.242383 2.177849 8 98
-FAST f=22 a=8 25.140221 2.178216 8 98
-FAST f=22 a=8 0.237601 2.178216 8 98
-FAST f=22 a=9 25.505283 2.177455 6 146
-FAST f=22 a=9 0.223217 2.177455 6 146
-FAST f=22 a=10 24.529362 2.176705 6 98
-FAST f=22 a=10 0.222876 2.176705 6 98
-FAST f=23 a=1 39.127310 2.183006 6 98
-FAST f=23 a=1 0.417338 2.183006 6 98
-FAST f=23 a=2 32.468161 2.183524 6 98
-FAST f=23 a=2 0.351645 2.183524 6 98
-FAST f=23 a=3 31.577620 2.172604 6 98
-FAST f=23 a=3 0.319659 2.172604 6 98
-FAST f=23 a=4 30.129247 2.183932 6 98
-FAST f=23 a=4 0.307239 2.183932 6 98
-FAST f=23 a=5 29.103376 2.183529 6 146
-FAST f=23 a=5 0.285533 2.183529 6 146
-FAST f=23 a=6 29.776045 2.174367 8 98
-FAST f=23 a=6 0.276846 2.174367 8 98
-FAST f=23 a=7 28.940407 2.178022 6 146
-FAST f=23 a=7 0.274082 2.178022 6 146
-FAST f=23 a=8 29.256009 2.179462 6 98
-FAST f=23 a=8 0.26949 2.179462 6 98
-FAST f=23 a=9 29.347312 2.170407 8 98
-FAST f=23 a=9 0.265034 2.170407 8 98
-FAST f=23 a=10 29.140081 2.171762 8 98
-FAST f=23 a=10 0.259183 2.171762 8 98
-FAST f=24 a=1 44.871179 2.182115 6 98
-FAST f=24 a=1 0.509433 2.182115 6 98
-FAST f=24 a=2 38.694867 2.180549 8 98
-FAST f=24 a=2 0.406695 2.180549 8 98
-FAST f=24 a=3 38.363769 2.172821 8 98
-FAST f=24 a=3 0.359581 2.172821 8 98
-FAST f=24 a=4 36.580797 2.184142 8 98
-FAST f=24 a=4 0.340614 2.184142 8 98
-FAST f=24 a=5 33.125701 2.183301 8 98
-FAST f=24 a=5 0.324874 2.183301 8 98
-FAST f=24 a=6 34.776068 2.173019 6 146
-FAST f=24 a=6 0.340397 2.173019 6 146
-FAST f=24 a=7 34.417625 2.176561 6 146
-FAST f=24 a=7 0.308223 2.176561 6 146
-FAST f=24 a=8 35.470291 2.182161 6 98
-FAST f=24 a=8 0.307724 2.182161 6 98
-FAST f=24 a=9 34.927252 2.172682 6 146
-FAST f=24 a=9 0.300598 2.172682 6 146
-FAST f=24 a=10 33.238355 2.173395 6 98
-FAST f=24 a=10 0.249916 2.173395 6 98
-
-
-hg-manifest:
-NODICT 0.000004 1.866377
-RANDOM 0.696346 2.309436
-LEGACY 7.064527 2.506977
-COVER 876.312865 2.582528 8 434
-COVER 35.684533 2.582528 8 434
-FAST f=15 a=1 76.618201 2.404013 8 1202
-FAST f=15 a=1 0.700722 2.404013 8 1202
-FAST f=15 a=2 49.213058 2.409248 6 1826
-FAST f=15 a=2 0.473393 2.409248 6 1826
-FAST f=15 a=3 41.753197 2.409677 8 1490
-FAST f=15 a=3 0.336848 2.409677 8 1490
-FAST f=15 a=4 38.648295 2.407996 8 1538
-FAST f=15 a=4 0.283952 2.407996 8 1538
-FAST f=15 a=5 36.144936 2.402895 8 1874
-FAST f=15 a=5 0.270128 2.402895 8 1874
-FAST f=15 a=6 35.484675 2.394873 8 1586
-FAST f=15 a=6 0.251637 2.394873 8 1586
-FAST f=15 a=7 34.280599 2.397311 8 1778
-FAST f=15 a=7 0.23984 2.397311 8 1778
-FAST f=15 a=8 32.122572 2.396089 6 1490
-FAST f=15 a=8 0.251508 2.396089 6 1490
-FAST f=15 a=9 29.909842 2.390092 6 1970
-FAST f=15 a=9 0.251233 2.390092 6 1970
-FAST f=15 a=10 30.102938 2.400086 6 1682
-FAST f=15 a=10 0.23688 2.400086 6 1682
-FAST f=16 a=1 67.750401 2.475460 6 1346
-FAST f=16 a=1 0.796035 2.475460 6 1346
-FAST f=16 a=2 52.812027 2.480860 6 1730
-FAST f=16 a=2 0.480384 2.480860 6 1730
-FAST f=16 a=3 44.179259 2.469304 8 1970
-FAST f=16 a=3 0.332657 2.469304 8 1970
-FAST f=16 a=4 37.612728 2.478208 6 1970
-FAST f=16 a=4 0.32498 2.478208 6 1970
-FAST f=16 a=5 35.056222 2.475568 6 1298
-FAST f=16 a=5 0.302824 2.475568 6 1298
-FAST f=16 a=6 34.713012 2.486079 8 1730
-FAST f=16 a=6 0.24755 2.486079 8 1730
-FAST f=16 a=7 33.713687 2.477180 6 1682
-FAST f=16 a=7 0.280358 2.477180 6 1682
-FAST f=16 a=8 31.571412 2.475418 8 1538
-FAST f=16 a=8 0.241241 2.475418 8 1538
-FAST f=16 a=9 31.608069 2.478263 8 1922
-FAST f=16 a=9 0.241764 2.478263 8 1922
-FAST f=16 a=10 31.358002 2.472263 8 1442
-FAST f=16 a=10 0.221661 2.472263 8 1442
-FAST f=17 a=1 66.185775 2.536085 6 1346
-FAST f=17 a=1 0.713549 2.536085 6 1346
-FAST f=17 a=2 50.365000 2.546105 8 1298
-FAST f=17 a=2 0.467846 2.546105 8 1298
-FAST f=17 a=3 42.712843 2.536250 8 1298
-FAST f=17 a=3 0.34047 2.536250 8 1298
-FAST f=17 a=4 39.514227 2.535555 8 1442
-FAST f=17 a=4 0.302989 2.535555 8 1442
-FAST f=17 a=5 35.189292 2.524925 8 1202
-FAST f=17 a=5 0.273451 2.524925 8 1202
-FAST f=17 a=6 35.791683 2.523466 8 1202
-FAST f=17 a=6 0.268261 2.523466 8 1202
-FAST f=17 a=7 37.416136 2.526625 6 1010
-FAST f=17 a=7 0.277558 2.526625 6 1010
-FAST f=17 a=8 37.084707 2.533274 6 1250
-FAST f=17 a=8 0.285104 2.533274 6 1250
-FAST f=17 a=9 34.183814 2.532765 8 1298
-FAST f=17 a=9 0.235133 2.532765 8 1298
-FAST f=17 a=10 31.149235 2.528722 8 1346
-FAST f=17 a=10 0.232679 2.528722 8 1346
-FAST f=18 a=1 72.942176 2.559857 6 386
-FAST f=18 a=1 0.718618 2.559857 6 386
-FAST f=18 a=2 51.690440 2.559572 8 290
-FAST f=18 a=2 0.403978 2.559572 8 290
-FAST f=18 a=3 45.344908 2.561040 8 962
-FAST f=18 a=3 0.357205 2.561040 8 962
-FAST f=18 a=4 39.804522 2.558446 8 1010
-FAST f=18 a=4 0.310526 2.558446 8 1010
-FAST f=18 a=5 38.134888 2.561811 8 626
-FAST f=18 a=5 0.273743 2.561811 8 626
-FAST f=18 a=6 35.091890 2.555518 8 722
-FAST f=18 a=6 0.260135 2.555518 8 722
-FAST f=18 a=7 34.639523 2.562938 8 290
-FAST f=18 a=7 0.234294 2.562938 8 290
-FAST f=18 a=8 36.076431 2.563567 8 1586
-FAST f=18 a=8 0.274075 2.563567 8 1586
-FAST f=18 a=9 36.376433 2.560950 8 722
-FAST f=18 a=9 0.240106 2.560950 8 722
-FAST f=18 a=10 32.624790 2.559340 8 578
-FAST f=18 a=10 0.234704 2.559340 8 578
-FAST f=19 a=1 70.513761 2.572441 8 194
-FAST f=19 a=1 0.726112 2.572441 8 194
-FAST f=19 a=2 59.263032 2.574560 8 482
-FAST f=19 a=2 0.451554 2.574560 8 482
-FAST f=19 a=3 51.509594 2.571546 6 194
-FAST f=19 a=3 0.393014 2.571546 6 194
-FAST f=19 a=4 55.393906 2.573386 8 482
-FAST f=19 a=4 0.38819 2.573386 8 482
-FAST f=19 a=5 43.201736 2.567589 8 674
-FAST f=19 a=5 0.292155 2.567589 8 674
-FAST f=19 a=6 42.911687 2.572666 6 434
-FAST f=19 a=6 0.303988 2.572666 6 434
-FAST f=19 a=7 44.687591 2.573613 6 290
-FAST f=19 a=7 0.308721 2.573613 6 290
-FAST f=19 a=8 37.372868 2.571039 6 194
-FAST f=19 a=8 0.287137 2.571039 6 194
-FAST f=19 a=9 36.074230 2.566473 6 482
-FAST f=19 a=9 0.280721 2.566473 6 482
-FAST f=19 a=10 33.731720 2.570306 8 194
-FAST f=19 a=10 0.224073 2.570306 8 194
-FAST f=20 a=1 79.670634 2.581146 6 290
-FAST f=20 a=1 0.899986 2.581146 6 290
-FAST f=20 a=2 58.827141 2.579782 8 386
-FAST f=20 a=2 0.602288 2.579782 8 386
-FAST f=20 a=3 51.289004 2.579627 8 722
-FAST f=20 a=3 0.446091 2.579627 8 722
-FAST f=20 a=4 47.711068 2.581508 8 722
-FAST f=20 a=4 0.473007 2.581508 8 722
-FAST f=20 a=5 47.402929 2.578062 6 434
-FAST f=20 a=5 0.497131 2.578062 6 434
-FAST f=20 a=6 54.797102 2.577365 8 482
-FAST f=20 a=6 0.515061 2.577365 8 482
-FAST f=20 a=7 51.370877 2.583050 8 386
-FAST f=20 a=7 0.402878 2.583050 8 386
-FAST f=20 a=8 51.437931 2.574875 6 242
-FAST f=20 a=8 0.453094 2.574875 6 242
-FAST f=20 a=9 44.105456 2.576700 6 242
-FAST f=20 a=9 0.456633 2.576700 6 242
-FAST f=20 a=10 44.447580 2.578305 8 338
-FAST f=20 a=10 0.409121 2.578305 8 338
-FAST f=21 a=1 113.031686 2.582449 6 242
-FAST f=21 a=1 1.456971 2.582449 6 242
-FAST f=21 a=2 97.700932 2.582124 8 194
-FAST f=21 a=2 1.072078 2.582124 8 194
-FAST f=21 a=3 96.563648 2.585479 8 434
-FAST f=21 a=3 0.949528 2.585479 8 434
-FAST f=21 a=4 90.597813 2.582366 6 386
-FAST f=21 a=4 0.76944 2.582366 6 386
-FAST f=21 a=5 86.815980 2.579043 8 434
-FAST f=21 a=5 0.858167 2.579043 8 434
-FAST f=21 a=6 91.235820 2.578378 8 530
-FAST f=21 a=6 0.684274 2.578378 8 530
-FAST f=21 a=7 84.392788 2.581243 8 386
-FAST f=21 a=7 0.814386 2.581243 8 386
-FAST f=21 a=8 82.052310 2.582547 8 338
-FAST f=21 a=8 0.822633 2.582547 8 338
-FAST f=21 a=9 74.696074 2.579319 8 194
-FAST f=21 a=9 0.811028 2.579319 8 194
-FAST f=21 a=10 76.211170 2.578766 8 290
-FAST f=21 a=10 0.809715 2.578766 8 290
-FAST f=22 a=1 138.976871 2.580478 8 194
-FAST f=22 a=1 1.748932 2.580478 8 194
-FAST f=22 a=2 120.164097 2.583633 8 386
-FAST f=22 a=2 1.333239 2.583633 8 386
-FAST f=22 a=3 111.986474 2.582566 6 194
-FAST f=22 a=3 1.305734 2.582566 6 194
-FAST f=22 a=4 108.548148 2.583068 6 194
-FAST f=22 a=4 1.314026 2.583068 6 194
-FAST f=22 a=5 103.173017 2.583495 6 290
-FAST f=22 a=5 1.228664 2.583495 6 290
-FAST f=22 a=6 108.421262 2.582349 8 530
-FAST f=22 a=6 1.076773 2.582349 8 530
-FAST f=22 a=7 103.284127 2.581022 8 386
-FAST f=22 a=7 1.112117 2.581022 8 386
-FAST f=22 a=8 96.330279 2.581073 8 290
-FAST f=22 a=8 1.109303 2.581073 8 290
-FAST f=22 a=9 97.651348 2.580075 6 194
-FAST f=22 a=9 0.933032 2.580075 6 194
-FAST f=22 a=10 101.660621 2.584886 8 194
-FAST f=22 a=10 0.796823 2.584886 8 194
-FAST f=23 a=1 159.322978 2.581474 6 242
-FAST f=23 a=1 2.015878 2.581474 6 242
-FAST f=23 a=2 134.331775 2.581619 8 194
-FAST f=23 a=2 1.545845 2.581619 8 194
-FAST f=23 a=3 127.724552 2.579888 6 338
-FAST f=23 a=3 1.444496 2.579888 6 338
-FAST f=23 a=4 126.077675 2.578137 6 242
-FAST f=23 a=4 1.364394 2.578137 6 242
-FAST f=23 a=5 124.914027 2.580843 8 338
-FAST f=23 a=5 1.116059 2.580843 8 338
-FAST f=23 a=6 122.874153 2.577637 6 338
-FAST f=23 a=6 1.164584 2.577637 6 338
-FAST f=23 a=7 123.099257 2.582715 6 386
-FAST f=23 a=7 1.354042 2.582715 6 386
-FAST f=23 a=8 122.026753 2.577681 8 194
-FAST f=23 a=8 1.210966 2.577681 8 194
-FAST f=23 a=9 121.164312 2.584599 6 290
-FAST f=23 a=9 1.174859 2.584599 6 290
-FAST f=23 a=10 117.462222 2.580358 8 194
-FAST f=23 a=10 1.075258 2.580358 8 194
-FAST f=24 a=1 169.539659 2.581642 6 194
-FAST f=24 a=1 1.916804 2.581642 6 194
-FAST f=24 a=2 160.539270 2.580421 6 290
-FAST f=24 a=2 1.71087 2.580421 6 290
-FAST f=24 a=3 155.455874 2.580449 6 242
-FAST f=24 a=3 1.60307 2.580449 6 242
-FAST f=24 a=4 147.630320 2.582953 6 338
-FAST f=24 a=4 1.396364 2.582953 6 338
-FAST f=24 a=5 133.767428 2.580589 6 290
-FAST f=24 a=5 1.19933 2.580589 6 290
-FAST f=24 a=6 146.437535 2.579453 8 194
-FAST f=24 a=6 1.385405 2.579453 8 194
-FAST f=24 a=7 147.227507 2.584155 8 386
-FAST f=24 a=7 1.48942 2.584155 8 386
-FAST f=24 a=8 138.005773 2.584115 8 194
-FAST f=24 a=8 1.352 2.584115 8 194
-FAST f=24 a=9 141.442625 2.582902 8 290
-FAST f=24 a=9 1.39647 2.582902 8 290
-FAST f=24 a=10 142.157446 2.582701 8 434
-FAST f=24 a=10 1.498889 2.582701 8 434
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include <time.h>
-#include "random.h"
-#include "dictBuilder.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_DISPLAYLEVEL 2
-
-
-/*-*************************************
-* Struct
-***************************************/
-typedef struct {
- const void* dictBuffer;
- size_t dictSize;
-} dictInfo;
-
-
-/*-*************************************
-* Dictionary related operations
-***************************************/
-/** createDictFromFiles() :
- * Based on type of param given, train dictionary using the corresponding algorithm
- * @return dictInfo containing dictionary buffer and dictionary size
- */
-dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
- ZDICT_random_params_t *randomParams, ZDICT_cover_params_t *coverParams,
- ZDICT_legacy_params_t *legacyParams, ZDICT_fastCover_params_t *fastParams) {
- unsigned const displayLevel = randomParams ? randomParams->zParams.notificationLevel :
- coverParams ? coverParams->zParams.notificationLevel :
- legacyParams ? legacyParams->zParams.notificationLevel :
- fastParams ? fastParams->zParams.notificationLevel :
- DEFAULT_DISPLAYLEVEL; /* no dict */
- void* const dictBuffer = malloc(maxDictSize);
-
- dictInfo* dInfo = NULL;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- if(randomParams) {
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *randomParams);
- }else if(coverParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!coverParams->d || !coverParams->k){
- dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, coverParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *coverParams);
- }
- } else if(legacyParams) {
- dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *legacyParams);
- } else if(fastParams) {
- /* Run the optimize version if either k or d is not provided */
- if (!fastParams->d || !fastParams->k) {
- dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, fastParams);
- } else {
- dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *fastParams);
- }
- } else {
- dictSize = 0;
- }
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- free(dictBuffer);
- return dInfo;
- }
- dInfo = (dictInfo *)malloc(sizeof(dictInfo));
- dInfo->dictBuffer = dictBuffer;
- dInfo->dictSize = dictSize;
- }
- return dInfo;
-}
-
-
-/** compressWithDict() :
- * Compress samples from sample buffer given dictionary stored on dictionary buffer and compression level
- * @return compression ratio
- */
-double compressWithDict(sampleInfo *srcInfo, dictInfo* dInfo, int compressionLevel, int displayLevel) {
- /* Local variables */
- size_t totalCompressedSize = 0;
- size_t totalOriginalSize = 0;
- const unsigned hasDict = dInfo->dictSize > 0 ? 1 : 0;
- double cRatio;
- size_t dstCapacity;
- int i;
-
- /* Pointers */
- ZSTD_CDict *cdict = NULL;
- ZSTD_CCtx* cctx = NULL;
- size_t *offsets = NULL;
- void* dst = NULL;
-
- /* Allocate dst with enough space to compress the maximum sized sample */
- {
- size_t maxSampleSize = 0;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- maxSampleSize = MAX(srcInfo->samplesSizes[i], maxSampleSize);
- }
- dstCapacity = ZSTD_compressBound(maxSampleSize);
- dst = malloc(dstCapacity);
- }
-
- /* Calculate offset for each sample */
- offsets = (size_t *)malloc((srcInfo->nbSamples + 1) * sizeof(size_t));
- offsets[0] = 0;
- for (i = 1; i <= srcInfo->nbSamples; i++) {
- offsets[i] = offsets[i - 1] + srcInfo->samplesSizes[i - 1];
- }
-
- /* Create the cctx */
- cctx = ZSTD_createCCtx();
- if(!cctx || !dst) {
- cRatio = -1;
- goto _cleanup;
- }
-
- /* Create CDict if there's a dictionary stored on buffer */
- if (hasDict) {
- cdict = ZSTD_createCDict(dInfo->dictBuffer, dInfo->dictSize, compressionLevel);
- if(!cdict) {
- cRatio = -1;
- goto _cleanup;
- }
- }
-
- /* Compress each sample and sum their sizes*/
- const BYTE *const samples = (const BYTE *)srcInfo->srcBuffer;
- for (i = 0; i < srcInfo->nbSamples; i++) {
- size_t compressedSize;
- if(hasDict) {
- compressedSize = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, samples + offsets[i], srcInfo->samplesSizes[i], cdict);
- } else {
- compressedSize = ZSTD_compressCCtx(cctx, dst, dstCapacity,samples + offsets[i], srcInfo->samplesSizes[i], compressionLevel);
- }
- if (ZSTD_isError(compressedSize)) {
- cRatio = -1;
- goto _cleanup;
- }
- totalCompressedSize += compressedSize;
- }
-
- /* Sum original sizes */
- for (i = 0; i<srcInfo->nbSamples; i++) {
- totalOriginalSize += srcInfo->samplesSizes[i];
- }
-
- /* Calculate compression ratio */
- DISPLAYLEVEL(2, "original size is %lu\n", totalOriginalSize);
- DISPLAYLEVEL(2, "compressed size is %lu\n", totalCompressedSize);
- cRatio = (double)totalOriginalSize/(double)totalCompressedSize;
-
-_cleanup:
- free(dst);
- free(offsets);
- ZSTD_freeCCtx(cctx);
- ZSTD_freeCDict(cdict);
- return cRatio;
-}
-
-
-/** FreeDictInfo() :
- * Free memory allocated for dictInfo
- */
-void freeDictInfo(dictInfo* info) {
- if (!info) return;
- if (info->dictBuffer) free((void*)(info->dictBuffer));
- free(info);
-}
-
-
-
-/*-********************************************************
- * Benchmarking functions
-**********************************************************/
-/** benchmarkDictBuilder() :
- * Measure how long a dictionary builder takes and compression ratio with the dictionary built
- * @return 0 if benchmark successfully, 1 otherwise
- */
-int benchmarkDictBuilder(sampleInfo *srcInfo, unsigned maxDictSize, ZDICT_random_params_t *randomParam,
- ZDICT_cover_params_t *coverParam, ZDICT_legacy_params_t *legacyParam,
- ZDICT_fastCover_params_t *fastParam) {
- /* Local variables */
- const unsigned displayLevel = randomParam ? randomParam->zParams.notificationLevel :
- coverParam ? coverParam->zParams.notificationLevel :
- legacyParam ? legacyParam->zParams.notificationLevel :
- fastParam ? fastParam->zParams.notificationLevel:
- DEFAULT_DISPLAYLEVEL; /* no dict */
- const char* name = randomParam ? "RANDOM" :
- coverParam ? "COVER" :
- legacyParam ? "LEGACY" :
- fastParam ? "FAST":
- "NODICT"; /* no dict */
- const unsigned cLevel = randomParam ? randomParam->zParams.compressionLevel :
- coverParam ? coverParam->zParams.compressionLevel :
- legacyParam ? legacyParam->zParams.compressionLevel :
- fastParam ? fastParam->zParams.compressionLevel:
- DEFAULT_CLEVEL; /* no dict */
- int result = 0;
-
- /* Calculate speed */
- const UTIL_time_t begin = UTIL_getTime();
- dictInfo* dInfo = createDictFromFiles(srcInfo, maxDictSize, randomParam, coverParam, legacyParam, fastParam);
- const U64 timeMicro = UTIL_clockSpanMicro(begin);
- const double timeSec = timeMicro / (double)SEC_TO_MICRO;
- if (!dInfo) {
- DISPLAYLEVEL(1, "%s does not train successfully\n", name);
- result = 1;
- goto _cleanup;
- }
- DISPLAYLEVEL(1, "%s took %f seconds to execute \n", name, timeSec);
-
- /* Calculate compression ratio */
- const double cRatio = compressWithDict(srcInfo, dInfo, cLevel, displayLevel);
- if (cRatio < 0) {
- DISPLAYLEVEL(1, "Compressing with %s dictionary does not work\n", name);
- result = 1;
- goto _cleanup;
-
- }
- DISPLAYLEVEL(1, "Compression ratio with %s dictionary is %f\n", name, cRatio);
-
-_cleanup:
- freeDictInfo(dInfo);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- const int displayLevel = DEFAULT_DISPLAYLEVEL;
- const char* programName = argv[0];
- int result = 0;
-
- /* Initialize arguments to default values */
- unsigned k = 200;
- unsigned d = 8;
- unsigned f;
- unsigned accel;
- unsigned i;
- const unsigned cLevel = DEFAULT_CLEVEL;
- const unsigned dictID = 0;
- const unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- const int followLinks = 0;
- const char** extendedFileList = NULL;
-
- /* Parse arguments */
- for (i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- DISPLAYLEVEL(1, "benchmark: Incorrect parameters\n");
- return 1;
- }
-
- /* Get the list of all files recursively (because followLinks==0)*/
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- /* get sampleInfo */
- size_t blockSize = 0;
- sampleInfo* srcInfo= getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, displayLevel);
-
- /* set up zParams */
- ZDICT_params_t zParams;
- zParams.compressionLevel = cLevel;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
-
- /* with no dict */
- {
- const int noDictResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, NULL);
- if(noDictResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for random */
- {
- ZDICT_random_params_t randomParam;
- randomParam.zParams = zParams;
- randomParam.k = k;
- const int randomResult = benchmarkDictBuilder(srcInfo, maxDictSize, &randomParam, NULL, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\n", randomParam.k);
- if(randomResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for legacy */
- {
- ZDICT_legacy_params_t legacyParam;
- legacyParam.zParams = zParams;
- legacyParam.selectivityLevel = 9;
- const int legacyResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, &legacyParam, NULL);
- DISPLAYLEVEL(2, "selectivityLevel=%u\n", legacyParam.selectivityLevel);
- if(legacyResult) {
- result = 1;
- goto _cleanup;
- }
- }
-
- /* for cover */
- {
- /* for cover (optimizing k and d) */
- ZDICT_cover_params_t coverParam;
- memset(&coverParam, 0, sizeof(coverParam));
- coverParam.zParams = zParams;
- coverParam.splitPoint = 1.0;
- coverParam.steps = 40;
- coverParam.nbThreads = 1;
- const int coverOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for cover (with k and d provided) */
- const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &coverParam, NULL, NULL);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParam.k, coverParam.d, coverParam.steps, (unsigned)(coverParam.splitPoint * 100));
- if(coverResult) {
- result = 1;
- goto _cleanup;
- }
-
- }
-
- /* for fastCover */
- for (f = 15; f < 25; f++){
- DISPLAYLEVEL(2, "current f is %u\n", f);
- for (accel = 1; accel < 11; accel++) {
- DISPLAYLEVEL(2, "current accel is %u\n", accel);
- /* for fastCover (optimizing k and d) */
- ZDICT_fastCover_params_t fastParam;
- memset(&fastParam, 0, sizeof(fastParam));
- fastParam.zParams = zParams;
- fastParam.f = f;
- fastParam.steps = 40;
- fastParam.nbThreads = 1;
- fastParam.accel = accel;
- const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastOptResult) {
- result = 1;
- goto _cleanup;
- }
-
- /* for fastCover (with k and d provided) */
- for (i = 0; i < 5; i++) {
- const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100), fastParam.accel);
- if(fastResult) {
- result = 1;
- goto _cleanup;
- }
- }
- }
- }
-
-
- /* Free allocated memory */
-_cleanup:
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(srcInfo);
- return result;
-}
+++ /dev/null
-/* ZDICT_trainFromBuffer_legacy() :
- * issue : samplesBuffer need to be followed by a noisy guard band.
- * work around : duplicate the buffer, and add the noise */
-size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_legacy_params_t params);
+++ /dev/null
-echo "Benchmark with in=../../lib/common"
-./benchmark in=../../../lib/common
+++ /dev/null
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3 -g
-INCLUDES := -I ../../../programs -I ../randomDictBuilder -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-IO_FILE := ../randomDictBuilder/io.c
-
-TEST_INPUT := ../../../lib
-TEST_OUTPUT := fastCoverDict
-
-all: main run clean
-
-.PHONY: test
-test: main testrun testshell clean
-
-.PHONY: run
-run:
- echo "Building a fastCover dictionary with given arguments"
- ./main $(ARG)
-
-main: main.o io.o fastCover.o libzstd.a
- $(CC) $(CFLAGS) main.o io.o fastCover.o libzstd.a -o main
-
-main.o: main.c
- $(CC) $(CFLAGS) $(INCLUDES) -c main.c
-
-fastCover.o: fastCover.c
- $(CC) $(CFLAGS) $(INCLUDES) -c fastCover.c
-
-io.o: $(IO_FILE)
- $(CC) $(CFLAGS) $(INCLUDES) -c $(IO_FILE)
-
-libzstd.a:
- $(MAKE) MOREFLAGS=-g -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: testrun
-testrun: main
- echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
- ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
- zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
- rm -f $(TEST_OUTPUT)
-
-.PHONY: testshell
-testshell: test.sh
- sh test.sh
- echo "Finish running test.sh"
-
-.PHONY: clean
-clean:
- rm -f *.o main libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
+++ /dev/null
-FastCover Dictionary Builder
-
-### Permitted Arguments:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-Output Dictionary (out=dictName): if not provided, default to fastCoverDict
-Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
-Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
-Size of Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
-Size of Dmer (d=#): either 6 or 8; if not provided, default to 8
-Number of steps (steps=#): positive number, if not provided, default to 32
-Percentage of samples used for training(split=#): positive number; if not provided, default to 100
-
-
-###Running Test:
-make test
-
-
-###Usage:
-To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
-If k or d is not provided, the optimize version of FASTCOVER is run.
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
+++ /dev/null
-/*-*************************************
-* Dependencies
-***************************************/
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "mem.h" /* read */
-#include "pool.h"
-#include "threading.h"
-#include "fastCover.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include "zdict.h"
-
-
-/*-*************************************
-* Constants
-***************************************/
-#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
-#define FASTCOVER_MAX_F 32
-#define DEFAULT_SPLITPOINT 1.0
-
-/*-*************************************
-* Console display
-***************************************/
-static int g_displayLevel = 2;
-#define DISPLAY(...) \
- { \
- fprintf(stderr, __VA_ARGS__); \
- fflush(stderr); \
- }
-#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- DISPLAY(__VA_ARGS__); \
- } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
-#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
-
-#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
- g_time = clock(); \
- DISPLAY(__VA_ARGS__); \
- } \
- }
-#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
-
-
-/*-*************************************
-* Hash Functions
-***************************************/
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
-static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
-static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
-
-
-/**
- * Hash the d-byte value pointed to by p and mod 2^f
- */
-static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
- if (d == 6) {
- return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
- }
- return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
-}
-
-
-/*-*************************************
-* Context
-***************************************/
-typedef struct {
- const BYTE *samples;
- size_t *offsets;
- const size_t *samplesSizes;
- size_t nbSamples;
- size_t nbTrainSamples;
- size_t nbTestSamples;
- size_t nbDmers;
- U32 *freqs;
- U16 *segmentFreqs;
- unsigned d;
-} FASTCOVER_ctx_t;
-
-
-/*-*************************************
-* Helper functions
-***************************************/
-/**
- * Returns the sum of the sample sizes.
- */
-static size_t FASTCOVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
- size_t sum = 0;
- unsigned i;
- for (i = 0; i < nbSamples; ++i) {
- sum += samplesSizes[i];
- }
- return sum;
-}
-
-
-/*-*************************************
-* fast functions
-***************************************/
-/**
- * A segment is a range in the source as well as the score of the segment.
- */
-typedef struct {
- U32 begin;
- U32 end;
- U32 score;
-} FASTCOVER_segment_t;
-
-
-/**
- * Selects the best segment in an epoch.
- * Segments of are scored according to the function:
- *
- * Let F(d) be the frequency of all dmers with hash value d.
- * Let S_i be hash value of the dmer at position i of segment S which has length k.
- *
- * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
- *
- * Once the dmer with hash value d is in the dictionary we set F(d) = F(d)/2.
- */
-static FASTCOVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
- U32 *freqs, U32 begin,U32 end,
- ZDICT_fastCover_params_t parameters) {
- /* Constants */
- const U32 k = parameters.k;
- const U32 d = parameters.d;
- const U32 dmersInK = k - d + 1;
- /* Try each segment (activeSegment) and save the best (bestSegment) */
- FASTCOVER_segment_t bestSegment = {0, 0, 0};
- FASTCOVER_segment_t activeSegment;
- /* Reset the activeDmers in the segment */
- /* The activeSegment starts at the beginning of the epoch. */
- activeSegment.begin = begin;
- activeSegment.end = begin;
- activeSegment.score = 0;
- {
- /* Slide the activeSegment through the whole epoch.
- * Save the best segment in bestSegment.
- */
- while (activeSegment.end < end) {
- /* Get hash value of current dmer */
- const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, parameters.f, ctx->d);
- /* Add frequency of this index to score if this is the first occurrence of index in active segment */
- if (ctx->segmentFreqs[index] == 0) {
- activeSegment.score += freqs[index];
- }
- ctx->segmentFreqs[index] += 1;
- /* Increment end of segment */
- activeSegment.end += 1;
- /* If the window is now too large, drop the first position */
- if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
- /* Get hash value of the dmer to be eliminated from active segment */
- const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
- ctx->segmentFreqs[delIndex] -= 1;
- /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
- if (ctx->segmentFreqs[delIndex] == 0) {
- activeSegment.score -= freqs[delIndex];
- }
- /* Increment start of segment */
- activeSegment.begin += 1;
- }
- /* If this segment is the best so far save it */
- if (activeSegment.score > bestSegment.score) {
- bestSegment = activeSegment;
- }
- }
- /* Zero out rest of segmentFreqs array */
- while (activeSegment.begin < end) {
- const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, parameters.f, ctx->d);
- ctx->segmentFreqs[delIndex] -= 1;
- activeSegment.begin += 1;
- }
- }
- {
- /* Trim off the zero frequency head and tail from the segment. */
- U32 newBegin = bestSegment.end;
- U32 newEnd = bestSegment.begin;
- U32 pos;
- for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
- const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
- U32 freq = freqs[index];
- if (freq != 0) {
- newBegin = MIN(newBegin, pos);
- newEnd = pos + 1;
- }
- }
- bestSegment.begin = newBegin;
- bestSegment.end = newEnd;
- }
- {
- /* Zero the frequency of hash value of each dmer covered by the chosen segment. */
- U32 pos;
- for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
- const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, parameters.f, ctx->d);
- freqs[i] = 0;
- }
- }
- return bestSegment;
-}
-
-/**
- * Check the validity of the parameters.
- * Returns non-zero if the parameters are valid and 0 otherwise.
- */
-static int FASTCOVER_checkParameters(ZDICT_fastCover_params_t parameters,
- size_t maxDictSize) {
- /* k, d, and f are required parameters */
- if (parameters.d == 0 || parameters.k == 0 || parameters.f == 0) {
- return 0;
- }
- /* d has to be 6 or 8 */
- if (parameters.d != 6 && parameters.d != 8) {
- return 0;
- }
- /* 0 < f <= FASTCOVER_MAX_F */
- if (parameters.f > FASTCOVER_MAX_F) {
- return 0;
- }
- /* k <= maxDictSize */
- if (parameters.k > maxDictSize) {
- return 0;
- }
- /* d <= k */
- if (parameters.d > parameters.k) {
- return 0;
- }
- /* 0 < splitPoint <= 1 */
- if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
- return 0;
- }
- return 1;
-}
-
-
-/**
- * Clean up a context initialized with `FASTCOVER_ctx_init()`.
- */
-static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
- if (!ctx) {
- return;
- }
- if (ctx->segmentFreqs) {
- free(ctx->segmentFreqs);
- ctx->segmentFreqs = NULL;
- }
- if (ctx->freqs) {
- free(ctx->freqs);
- ctx->freqs = NULL;
- }
- if (ctx->offsets) {
- free(ctx->offsets);
- ctx->offsets = NULL;
- }
-}
-
-/**
- * Calculate for frequency of hash value of each dmer in ctx->samples
- */
-static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
- size_t start; /* start of current dmer */
- for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
- size_t currSampleStart = ctx->offsets[i];
- size_t currSampleEnd = ctx->offsets[i+1];
- start = currSampleStart;
- while (start + ctx->d <= currSampleEnd) {
- const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
- freqs[dmerIndex]++;
- start++;
- }
- }
-}
-
-/**
- * Prepare a context for dictionary building.
- * The context is only dependent on the parameter `d` and can used multiple
- * times.
- * Returns 1 on success or zero on error.
- * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
- */
-static int FASTCOVER_ctx_init(FASTCOVER_ctx_t *ctx, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- unsigned d, double splitPoint, unsigned f) {
- const BYTE *const samples = (const BYTE *)samplesBuffer;
- const size_t totalSamplesSize = FASTCOVER_sum(samplesSizes, nbSamples);
- /* Split samples into testing and training sets */
- const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
- const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
- const size_t trainingSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
- const size_t testSamplesSize = splitPoint < 1.0 ? FASTCOVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
- /* Checks */
- if (totalSamplesSize < MAX(d, sizeof(U64)) ||
- totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
- DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
- (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
- return 0;
- }
- /* Check if there are at least 5 training samples */
- if (nbTrainSamples < 5) {
- DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
- return 0;
- }
- /* Check if there's testing sample */
- if (nbTestSamples < 1) {
- DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
- return 0;
- }
- /* Zero the context */
- memset(ctx, 0, sizeof(*ctx));
- DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
- (U32)trainingSamplesSize);
- DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
- (U32)testSamplesSize);
-
- ctx->samples = samples;
- ctx->samplesSizes = samplesSizes;
- ctx->nbSamples = nbSamples;
- ctx->nbTrainSamples = nbTrainSamples;
- ctx->nbTestSamples = nbTestSamples;
- ctx->nbDmers = trainingSamplesSize - d + 1;
- ctx->d = d;
-
- /* The offsets of each file */
- ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
- if (!ctx->offsets) {
- DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
- FASTCOVER_ctx_destroy(ctx);
- return 0;
- }
-
- /* Fill offsets from the samplesSizes */
- {
- U32 i;
- ctx->offsets[0] = 0;
- for (i = 1; i <= nbSamples; ++i) {
- ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
- }
- }
-
- /* Initialize frequency array of size 2^f */
- ctx->freqs = (U32 *)calloc((1 << f), sizeof(U32));
- ctx->segmentFreqs = (U16 *)calloc((1 << f), sizeof(U16));
- DISPLAYLEVEL(2, "Computing frequencies\n");
- FASTCOVER_computeFrequency(ctx->freqs, f, ctx);
-
- return 1;
-}
-
-
-/**
- * Given the prepared context build the dictionary.
- */
-static size_t FASTCOVER_buildDictionary(const FASTCOVER_ctx_t *ctx, U32 *freqs,
- void *dictBuffer,
- size_t dictBufferCapacity,
- ZDICT_fastCover_params_t parameters){
- BYTE *const dict = (BYTE *)dictBuffer;
- size_t tail = dictBufferCapacity;
- /* Divide the data up into epochs of equal size.
- * We will select at least one segment from each epoch.
- */
- const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
- const U32 epochSize = (U32)(ctx->nbDmers / epochs);
- size_t epoch;
- DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
- epochSize);
- /* Loop through the epochs until there are no more segments or the dictionary
- * is full.
- */
- for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
- const U32 epochBegin = (U32)(epoch * epochSize);
- const U32 epochEnd = epochBegin + epochSize;
- size_t segmentSize;
- /* Select a segment */
- FASTCOVER_segment_t segment = FASTCOVER_selectSegment(
- ctx, freqs, epochBegin, epochEnd, parameters);
-
- /* If the segment covers no dmers, then we are out of content */
- if (segment.score == 0) {
- break;
- }
-
- /* Trim the segment if necessary and if it is too small then we are done */
- segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
- if (segmentSize < parameters.d) {
- break;
- }
-
- /* We fill the dictionary from the back to allow the best segments to be
- * referenced with the smallest offsets.
- */
- tail -= segmentSize;
- memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
- DISPLAYUPDATE(
- 2, "\r%u%% ",
- (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
- }
- DISPLAYLEVEL(2, "\r%79s\r", "");
- return tail;
-}
-
-
-/**
- * FASTCOVER_best_t is used for two purposes:
- * 1. Synchronizing threads.
- * 2. Saving the best parameters and dictionary.
- *
- * All of the methods except FASTCOVER_best_init() are thread safe if zstd is
- * compiled with multithreaded support.
- */
-typedef struct fast_best_s {
- ZSTD_pthread_mutex_t mutex;
- ZSTD_pthread_cond_t cond;
- size_t liveJobs;
- void *dict;
- size_t dictSize;
- ZDICT_fastCover_params_t parameters;
- size_t compressedSize;
-} FASTCOVER_best_t;
-
-/**
- * Initialize the `FASTCOVER_best_t`.
- */
-static void FASTCOVER_best_init(FASTCOVER_best_t *best) {
- if (best==NULL) return; /* compatible with init on NULL */
- (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
- (void)ZSTD_pthread_cond_init(&best->cond, NULL);
- best->liveJobs = 0;
- best->dict = NULL;
- best->dictSize = 0;
- best->compressedSize = (size_t)-1;
- memset(&best->parameters, 0, sizeof(best->parameters));
-}
-
-/**
- * Wait until liveJobs == 0.
- */
-static void FASTCOVER_best_wait(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- ZSTD_pthread_mutex_lock(&best->mutex);
- while (best->liveJobs != 0) {
- ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
- }
- ZSTD_pthread_mutex_unlock(&best->mutex);
-}
-
-/**
- * Call FASTCOVER_best_wait() and then destroy the FASTCOVER_best_t.
- */
-static void FASTCOVER_best_destroy(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- FASTCOVER_best_wait(best);
- if (best->dict) {
- free(best->dict);
- }
- ZSTD_pthread_mutex_destroy(&best->mutex);
- ZSTD_pthread_cond_destroy(&best->cond);
-}
-
-/**
- * Called when a thread is about to be launched.
- * Increments liveJobs.
- */
-static void FASTCOVER_best_start(FASTCOVER_best_t *best) {
- if (!best) {
- return;
- }
- ZSTD_pthread_mutex_lock(&best->mutex);
- ++best->liveJobs;
- ZSTD_pthread_mutex_unlock(&best->mutex);
-}
-
-/**
- * Called when a thread finishes executing, both on error or success.
- * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
- * If this dictionary is the best so far save it and its parameters.
- */
-static void FASTCOVER_best_finish(FASTCOVER_best_t *best, size_t compressedSize,
- ZDICT_fastCover_params_t parameters, void *dict,
- size_t dictSize) {
- if (!best) {
- return;
- }
- {
- size_t liveJobs;
- ZSTD_pthread_mutex_lock(&best->mutex);
- --best->liveJobs;
- liveJobs = best->liveJobs;
- /* If the new dictionary is better */
- if (compressedSize < best->compressedSize) {
- /* Allocate space if necessary */
- if (!best->dict || best->dictSize < dictSize) {
- if (best->dict) {
- free(best->dict);
- }
- best->dict = malloc(dictSize);
- if (!best->dict) {
- best->compressedSize = ERROR(GENERIC);
- best->dictSize = 0;
- return;
- }
- }
- /* Save the dictionary, parameters, and size */
- memcpy(best->dict, dict, dictSize);
- best->dictSize = dictSize;
- best->parameters = parameters;
- best->compressedSize = compressedSize;
- }
- ZSTD_pthread_mutex_unlock(&best->mutex);
- if (liveJobs == 0) {
- ZSTD_pthread_cond_broadcast(&best->cond);
- }
- }
-}
-
-/**
- * Parameters for FASTCOVER_tryParameters().
- */
-typedef struct FASTCOVER_tryParameters_data_s {
- const FASTCOVER_ctx_t *ctx;
- FASTCOVER_best_t *best;
- size_t dictBufferCapacity;
- ZDICT_fastCover_params_t parameters;
-} FASTCOVER_tryParameters_data_t;
-
-/**
- * Tries a set of parameters and updates the FASTCOVER_best_t with the results.
- * This function is thread safe if zstd is compiled with multithreaded support.
- * It takes its parameters as an *OWNING* opaque pointer to support threading.
- */
-static void FASTCOVER_tryParameters(void *opaque) {
- /* Save parameters as local variables */
- FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
- const FASTCOVER_ctx_t *const ctx = data->ctx;
- const ZDICT_fastCover_params_t parameters = data->parameters;
- size_t dictBufferCapacity = data->dictBufferCapacity;
- size_t totalCompressedSize = ERROR(GENERIC);
- /* Allocate space for hash table, dict, and freqs */
- BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
- U32 *freqs = (U32*) malloc((1 << parameters.f) * sizeof(U32));
- if (!dict || !freqs) {
- DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
- goto _cleanup;
- }
- /* Copy the frequencies because we need to modify them */
- memcpy(freqs, ctx->freqs, (1 << parameters.f) * sizeof(U32));
- /* Build the dictionary */
- {
- const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict,
- dictBufferCapacity, parameters);
-
- dictBufferCapacity = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
- parameters.zParams);
- if (ZDICT_isError(dictBufferCapacity)) {
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
- goto _cleanup;
- }
- }
- /* Check total compressed size */
- {
- /* Pointers */
- ZSTD_CCtx *cctx;
- ZSTD_CDict *cdict;
- void *dst;
- /* Local variables */
- size_t dstCapacity;
- size_t i;
- /* Allocate dst with enough space to compress the maximum sized sample */
- {
- size_t maxSampleSize = 0;
- i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
- for (; i < ctx->nbSamples; ++i) {
- maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
- }
- dstCapacity = ZSTD_compressBound(maxSampleSize);
- dst = malloc(dstCapacity);
- }
- /* Create the cctx and cdict */
- cctx = ZSTD_createCCtx();
- cdict = ZSTD_createCDict(dict, dictBufferCapacity,
- parameters.zParams.compressionLevel);
- if (!dst || !cctx || !cdict) {
- goto _compressCleanup;
- }
- /* Compress each sample and sum their sizes (or error) */
- totalCompressedSize = dictBufferCapacity;
- i = parameters.splitPoint < 1.0 ? ctx->nbTrainSamples : 0;
- for (; i < ctx->nbSamples; ++i) {
- const size_t size = ZSTD_compress_usingCDict(
- cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
- ctx->samplesSizes[i], cdict);
- if (ZSTD_isError(size)) {
- totalCompressedSize = ERROR(GENERIC);
- goto _compressCleanup;
- }
- totalCompressedSize += size;
- }
- _compressCleanup:
- ZSTD_freeCCtx(cctx);
- ZSTD_freeCDict(cdict);
- if (dst) {
- free(dst);
- }
- }
-
-_cleanup:
- FASTCOVER_best_finish(data->best, totalCompressedSize, parameters, dict,
- dictBufferCapacity);
- free(data);
- if (dict) {
- free(dict);
- }
- if (freqs) {
- free(freqs);
- }
-}
-
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
- BYTE* const dict = (BYTE*)dictBuffer;
- FASTCOVER_ctx_t ctx;
- parameters.splitPoint = 1.0;
- /* Initialize global data */
- g_displayLevel = parameters.zParams.notificationLevel;
- /* Checks */
- if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- /* Initialize context */
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
- parameters.d, parameters.splitPoint, parameters.f)) {
- DISPLAYLEVEL(1, "Failed to initialize context\n");
- return ERROR(GENERIC);
- }
- /* Build the dictionary */
- DISPLAYLEVEL(2, "Building dictionary\n");
- {
- const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
- dictBufferCapacity, parameters);
-
- const size_t dictionarySize = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
- parameters.zParams);
- if (!ZSTD_isError(dictionarySize)) {
- DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
- (U32)dictionarySize);
- }
- FASTCOVER_ctx_destroy(&ctx);
- return dictionarySize;
- }
-}
-
-
-
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t *parameters) {
- /* constants */
- const unsigned nbThreads = parameters->nbThreads;
- const double splitPoint =
- parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
- const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
- const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
- const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
- const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
- const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
- const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
- const unsigned kIterations =
- (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
- const unsigned f = parameters->f == 0 ? 23 : parameters->f;
-
- /* Local variables */
- const int displayLevel = parameters->zParams.notificationLevel;
- unsigned iteration = 1;
- unsigned d;
- unsigned k;
- FASTCOVER_best_t best;
- POOL_ctx *pool = NULL;
-
- /* Checks */
- if (splitPoint <= 0 || splitPoint > 1) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
- return ERROR(GENERIC);
- }
- if (kMinK < kMaxD || kMaxK < kMinK) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- if (nbThreads > 1) {
- pool = POOL_create(nbThreads, 1);
- if (!pool) {
- return ERROR(memory_allocation);
- }
- }
- /* Initialization */
- FASTCOVER_best_init(&best);
- /* Turn down global display level to clean up display at level 2 and below */
- g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
- /* Loop through d first because each new value needs a new context */
- LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
- kIterations);
- for (d = kMinD; d <= kMaxD; d += 2) {
- /* Initialize the context for this value of d */
- FASTCOVER_ctx_t ctx;
- LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return ERROR(GENERIC);
- }
- /* Loop through k reusing the same context */
- for (k = kMinK; k <= kMaxK; k += kStepSize) {
- /* Prepare the arguments */
- FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
- sizeof(FASTCOVER_tryParameters_data_t));
- LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
- if (!data) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
- FASTCOVER_best_destroy(&best);
- FASTCOVER_ctx_destroy(&ctx);
- POOL_free(pool);
- return ERROR(GENERIC);
- }
- data->ctx = &ctx;
- data->best = &best;
- data->dictBufferCapacity = dictBufferCapacity;
- data->parameters = *parameters;
- data->parameters.k = k;
- data->parameters.d = d;
- data->parameters.f = f;
- data->parameters.splitPoint = splitPoint;
- data->parameters.steps = kSteps;
- data->parameters.zParams.notificationLevel = g_displayLevel;
- /* Check the parameters */
- if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "fastCover parameters incorrect\n");
- free(data);
- continue;
- }
- /* Call the function and pass ownership of data to it */
- FASTCOVER_best_start(&best);
- if (pool) {
- POOL_add(pool, &FASTCOVER_tryParameters, data);
- } else {
- FASTCOVER_tryParameters(data);
- }
- /* Print status */
- LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
- (U32)((iteration * 100) / kIterations));
- ++iteration;
- }
- FASTCOVER_best_wait(&best);
- FASTCOVER_ctx_destroy(&ctx);
- }
- LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
- /* Fill the output buffer and parameters with output of the best parameters */
- {
- const size_t dictSize = best.dictSize;
- if (ZSTD_isError(best.compressedSize)) {
- const size_t compressedSize = best.compressedSize;
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return compressedSize;
- }
- *parameters = best.parameters;
- memcpy(dictBuffer, best.dict, dictSize);
- FASTCOVER_best_destroy(&best);
- POOL_free(pool);
- return dictSize;
- }
-
-}
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "mem.h" /* read */
-#include "pool.h"
-#include "threading.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
- unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
- unsigned f; /* log of size of frequency array */
- unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
- unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
- double splitPoint; /* Percentage of samples used for training: the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
- ZDICT_params_t zParams;
-} ZDICT_fastCover_params_t;
-
-
-/*! ZDICT_optimizeTrainFromBuffer_fastCover():
- * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * All of the parameters except for f are optional.
- * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
- * if steps is zero it defaults to its default value.
- * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
- *
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- * On success `*parameters` contains the parameters selected.
- */
- ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_fastCover_params_t *parameters);
-
-
-/*! ZDICT_trainFromBuffer_fastCover():
- * Train a dictionary from an array of samples using a modified version of the COVER algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * d, k, and f are required.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
- void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "fastCover.h"
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-
-
-/*-*************************************
-* FASTCOVER
-***************************************/
-int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
- unsigned maxDictSize,
- ZDICT_fastCover_params_t *params) {
- unsigned const displayLevel = params->zParams.notificationLevel;
- void* const dictBuffer = malloc(maxDictSize);
-
- int result = 0;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- /* Run the optimize version if either k or d is not provided */
- if (!params->d || !params->k) {
- dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, params);
- } else {
- dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *params);
- }
- DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- result = 1;
- goto _done;
- }
- /* save dict */
- DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
- saveDict(dictFileName, dictBuffer, dictSize);
- }
-
- /* clean up */
-_done:
- free(dictBuffer);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- int displayLevel = 2;
- const char* programName = argv[0];
- int operationResult = 0;
-
- /* Initialize arguments to default values */
- unsigned k = 0;
- unsigned d = 0;
- unsigned f = 23;
- unsigned steps = 32;
- unsigned nbThreads = 1;
- unsigned split = 100;
- const char* outputFile = "fastCoverDict";
- unsigned dictID = 0;
- unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- int followLinks = 0; /* follow directory recursively */
- const char** extendedFileList = NULL;
-
- /* Parse arguments */
- for (int i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "d=")) { d = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "f=")) { f = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "steps=")) { steps = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "split=")) { split = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- if (longCommandWArg(&argument, "out=")) {
- outputFile = argument;
- continue;
- }
- DISPLAYLEVEL(1, "Incorrect parameters\n");
- operationResult = 1;
- return operationResult;
- }
-
- /* Get the list of all files recursively (because followLinks==0)*/
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- size_t blockSize = 0;
-
- /* Set up zParams */
- ZDICT_params_t zParams;
- zParams.compressionLevel = DEFAULT_CLEVEL;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
-
- /* Set up fastCover params */
- ZDICT_fastCover_params_t params;
- params.zParams = zParams;
- params.k = k;
- params.d = d;
- params.f = f;
- params.steps = steps;
- params.nbThreads = nbThreads;
- params.splitPoint = (double)split/100;
-
- /* Build dictionary */
- sampleInfo* info = getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
- operationResult = FASTCOVER_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
-
- /* Free allocated memory */
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(info);
-
- return operationResult;
-}
+++ /dev/null
-echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
-./main in=../../../lib/common f=20 out=dict1
-zstd -be3 -D dict1 -r ../../../lib/common -q
-echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
-./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
-zstd -be3 -D dict2 -r ../../../lib/common -q
-echo "Building fastCover dictionary with 2 sample sources"
-./main in=../../../lib/common in=../../../lib/compress out=dict3
-zstd -be3 -D dict3 -r ../../../lib/common -q
-echo "Removing dict1 dict2 dict3"
-rm -f dict1 dict2 dict3
-
-echo "Testing with invalid parameters, should fail"
-! ./main in=../../../lib/common r=10
-! ./main in=../../../lib/common d=10
+++ /dev/null
-ARG :=
-
-CC ?= gcc
-CFLAGS ?= -O3
-INCLUDES := -I ../../../programs -I ../../../lib/common -I ../../../lib -I ../../../lib/dictBuilder
-
-TEST_INPUT := ../../../lib
-TEST_OUTPUT := randomDict
-
-all: main run clean
-
-.PHONY: test
-test: main testrun testshell clean
-
-.PHONY: run
-run:
- echo "Building a random dictionary with given arguments"
- ./main $(ARG)
-
-main: main.o io.o random.o libzstd.a
- $(CC) $(CFLAGS) main.o io.o random.o libzstd.a -o main
-
-main.o: main.c
- $(CC) $(CFLAGS) $(INCLUDES) -c main.c
-
-random.o: random.c
- $(CC) $(CFLAGS) $(INCLUDES) -c random.c
-
-io.o: io.c
- $(CC) $(CFLAGS) $(INCLUDES) -c io.c
-
-libzstd.a:
- $(MAKE) -C ../../../lib libzstd.a
- mv ../../../lib/libzstd.a .
-
-.PHONY: testrun
-testrun: main
- echo "Run with $(TEST_INPUT) and $(TEST_OUTPUT) "
- ./main in=$(TEST_INPUT) out=$(TEST_OUTPUT)
- zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
- rm -f $(TEST_OUTPUT)
-
-.PHONY: testshell
-testshell: test.sh
- sh test.sh
- echo "Finish running test.sh"
-
-.PHONY: clean
-clean:
- rm -f *.o main libzstd.a
- $(MAKE) -C ../../../lib clean
- echo "Cleaning is completed"
+++ /dev/null
-Random Dictionary Builder
-
-### Permitted Arguments:
-Input File/Directory (in=fileName): required; file/directory used to build dictionary; if directory, will operate recursively for files inside directory; can include multiple files/directories, each following "in="
-Output Dictionary (out=dictName): if not provided, default to defaultDict
-Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
-Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
-Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
-
-###Running Test:
-make test
-
-
-###Usage:
-To build a random dictionary with the provided arguments: make ARG= followed by arguments
-
-
-### Examples:
-make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"
-make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "io.h"
-#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
-#include "platform.h" /* Large Files support */
-#include "util.h"
-#include "zdict.h"
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-
-#define SAMPLESIZE_MAX (128 KB)
-#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
-#define RANDOM_MEMMULT 9
-static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
- (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
-
-#define NOISELENGTH 32
-
-
-/*-*************************************
-* Commandline related functions
-***************************************/
-unsigned readU32FromChar(const char** stringPtr){
- const char errorMsg[] = "error: numeric value too large";
- unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9')) {
- unsigned const max = (((unsigned)(-1)) / 10) - 1;
- if (result > max) exit(1);
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- }
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- unsigned const maxK = ((unsigned)(-1)) >> 10;
- if (result > maxK) exit(1);
- result <<= 10;
- if (**stringPtr=='M') {
- if (result > maxK) exit(1);
- result <<= 10;
- }
- (*stringPtr)++; /* skip `K` or `M` */
- if (**stringPtr=='i') (*stringPtr)++;
- if (**stringPtr=='B') (*stringPtr)++;
- }
- return result;
-}
-
-unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
- size_t const comSize = strlen(longCommand);
- int const result = !strncmp(*stringPtr, longCommand, comSize);
- if (result) *stringPtr += comSize;
- return result;
-}
-
-
-/* ********************************************************
-* File related operations
-**********************************************************/
-/** loadFiles() :
- * load samples from files listed in fileNamesTable into buffer.
- * works even if buffer is too small to load all samples.
- * Also provides the size of each sample into sampleSizes table
- * which must be sized correctly, using DiB_fileStats().
- * @return : nb of samples effectively loaded into `buffer`
- * *bufferSizePtr is modified, it provides the amount data loaded within buffer.
- * sampleSizes is filled with the size of each sample.
- */
-static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
- unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
- size_t targetChunkSize, unsigned displayLevel) {
- char* const buff = (char*)buffer;
- size_t pos = 0;
- unsigned nbLoadedChunks = 0, fileIndex;
-
- for (fileIndex=0; fileIndex<nbFiles; fileIndex++) {
- const char* const fileName = fileNamesTable[fileIndex];
- unsigned long long const fs64 = UTIL_getFileSize(fileName);
- unsigned long long remainingToLoad = (fs64 == UTIL_FILESIZE_UNKNOWN) ? 0 : fs64;
- U32 const nbChunks = targetChunkSize ? (U32)((fs64 + (targetChunkSize-1)) / targetChunkSize) : 1;
- U64 const chunkSize = targetChunkSize ? MIN(targetChunkSize, fs64) : fs64;
- size_t const maxChunkSize = (size_t)MIN(chunkSize, SAMPLESIZE_MAX);
- U32 cnb;
- FILE* const f = fopen(fileName, "rb");
- if (f==NULL) EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileName, strerror(errno));
- DISPLAYUPDATE(2, "Loading %s... \r", fileName);
- for (cnb=0; cnb<nbChunks; cnb++) {
- size_t const toLoad = (size_t)MIN(maxChunkSize, remainingToLoad);
- if (toLoad > *bufferSizePtr-pos) break;
- { size_t const readSize = fread(buff+pos, 1, toLoad, f);
- if (readSize != toLoad) EXM_THROW(11, "Pb reading %s", fileName);
- pos += readSize;
- sampleSizes[nbLoadedChunks++] = toLoad;
- remainingToLoad -= targetChunkSize;
- if (nbLoadedChunks == sstSize) { /* no more space left in sampleSizes table */
- fileIndex = nbFiles; /* stop there */
- break;
- }
- if (toLoad < targetChunkSize) {
- fseek(f, (long)(targetChunkSize - toLoad), SEEK_CUR);
- } } }
- fclose(f);
- }
- DISPLAYLEVEL(2, "\r%79s\r", "");
- *bufferSizePtr = pos;
- DISPLAYLEVEL(4, "loaded : %u KB \n", (U32)(pos >> 10))
- return nbLoadedChunks;
-}
-
-#define rotl32(x,r) ((x << r) | (x >> (32 - r)))
-static U32 getRand(U32* src)
-{
- static const U32 prime1 = 2654435761U;
- static const U32 prime2 = 2246822519U;
- U32 rand32 = *src;
- rand32 *= prime1;
- rand32 ^= prime2;
- rand32 = rotl32(rand32, 13);
- *src = rand32;
- return rand32 >> 5;
-}
-
-/* shuffle() :
- * shuffle a table of file names in a semi-random way
- * It improves dictionary quality by reducing "locality" impact, so if sample set is very large,
- * it will load random elements from it, instead of just the first ones. */
-static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
- U32 seed = 0xFD2FB528;
- unsigned i;
- for (i = nbFiles - 1; i > 0; --i) {
- unsigned const j = getRand(&seed) % (i + 1);
- const char* const tmp = fileNamesTable[j];
- fileNamesTable[j] = fileNamesTable[i];
- fileNamesTable[i] = tmp;
- }
-}
-
-
-/*-********************************************************
-* Dictionary training functions
-**********************************************************/
-size_t findMaxMem(unsigned long long requiredMem) {
- size_t const step = 8 MB;
- void* testmem = NULL;
-
- requiredMem = (((requiredMem >> 23) + 1) << 23);
- requiredMem += step;
- if (requiredMem > g_maxMemory) requiredMem = g_maxMemory;
-
- while (!testmem) {
- testmem = malloc((size_t)requiredMem);
- requiredMem -= step;
- }
-
- free(testmem);
- return (size_t)requiredMem;
-}
-
-void saveDict(const char* dictFileName,
- const void* buff, size_t buffSize) {
- FILE* const f = fopen(dictFileName, "wb");
- if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
-
- { size_t const n = fwrite(buff, 1, buffSize, f);
- if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) }
-
- { size_t const n = (size_t)fclose(f);
- if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) }
-}
-
-/*! getFileStats() :
- * Given a list of files, and a chunkSize (0 == no chunk, whole files)
- * provides the amount of data to be loaded and the resulting nb of samples.
- * This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
- */
-static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
- size_t chunkSize, unsigned displayLevel) {
- fileStats fs;
- unsigned n;
- memset(&fs, 0, sizeof(fs));
- for (n=0; n<nbFiles; n++) {
- U64 const fileSize = UTIL_getFileSize(fileNamesTable[n]);
- U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? 0 : fileSize;
- U32 const nbSamples = (U32)(chunkSize ? (srcSize + (chunkSize-1)) / chunkSize : 1);
- U64 const chunkToLoad = chunkSize ? MIN(chunkSize, srcSize) : srcSize;
- size_t const cappedChunkSize = (size_t)MIN(chunkToLoad, SAMPLESIZE_MAX);
- fs.totalSizeToLoad += cappedChunkSize * nbSamples;
- fs.oneSampleTooLarge |= (chunkSize > 2*SAMPLESIZE_MAX);
- fs.nbSamples += nbSamples;
- }
- DISPLAYLEVEL(4, "Preparing to load : %u KB \n", (U32)(fs.totalSizeToLoad >> 10));
- return fs;
-}
-
-
-
-
-sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
- unsigned maxDictSize, const unsigned displayLevel) {
- fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
- size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
- size_t const memMult = RANDOM_MEMMULT;
- size_t const maxMem = findMaxMem(fs.totalSizeToLoad * memMult) / memMult;
- size_t loadedSize = (size_t) MIN ((unsigned long long)maxMem, fs.totalSizeToLoad);
- void* const srcBuffer = malloc(loadedSize+NOISELENGTH);
-
- /* Checks */
- if ((!sampleSizes) || (!srcBuffer))
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
- if (fs.oneSampleTooLarge) {
- DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n");
- DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n");
- DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX);
- }
- if (fs.nbSamples < 5) {
- DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
- DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
- DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n");
- EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */
- }
- if (fs.totalSizeToLoad < (unsigned long long)(8 * maxDictSize)) {
- DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n");
- DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n");
- }
-
- /* init */
- if (loadedSize < fs.totalSizeToLoad)
- DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(loadedSize >> 20));
-
- /* Load input buffer */
- DISPLAYLEVEL(3, "Shuffling input files\n");
- shuffle(fileNamesTable, nbFiles);
- nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
- fileNamesTable, nbFiles, chunkSize, displayLevel);
-
- sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
-
- info->nbSamples = fs.nbSamples;
- info->samplesSizes = sampleSizes;
- info->srcBuffer = srcBuffer;
-
- return info;
-}
-
-
-void freeSampleInfo(sampleInfo *info) {
- if (!info) return;
- if (info->samplesSizes) free((void*)(info->samplesSizes));
- if (info->srcBuffer) free((void*)(info->srcBuffer));
- free(info);
-}
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "zstd_internal.h" /* includes zstd.h */
-#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
-#include "platform.h" /* Large Files support */
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Structs
-***************************************/
-typedef struct {
- U64 totalSizeToLoad;
- unsigned oneSampleTooLarge;
- unsigned nbSamples;
-} fileStats;
-
-typedef struct {
- const void* srcBuffer;
- const size_t *samplesSizes;
- size_t nbSamples;
-}sampleInfo;
-
-
-
-/*! getSampleInfo():
- * Load from input files and add samples to buffer
- * @return: a sampleInfo struct containing infomation about buffer where samples are stored,
- * size of each sample, and total number of samples
- */
-sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
- unsigned maxDictSize, const unsigned displayLevel);
-
-
-
-/*! freeSampleInfo():
- * Free memory allocated for info
- */
-void freeSampleInfo(sampleInfo *info);
-
-
-
-/*! saveDict():
- * Save data stored on buff to dictFileName
- */
-void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
-
-
-unsigned readU32FromChar(const char** stringPtr);
-
-/** longCommandWArg() :
- * check if *stringPtr is the same as longCommand.
- * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
- * @return 0 and doesn't modify *stringPtr otherwise.
- */
-unsigned longCommandWArg(const char** stringPtr, const char* longCommand);
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* strcmp, strlen */
-#include <errno.h> /* errno */
-#include <ctype.h>
-#include "random.h"
-#include "io.h"
-#include "util.h"
-#include "zdict.h"
-
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-static const U64 g_refreshRate = SEC_TO_MICRO / 6;
-static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
-
-#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
- if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
- { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
- if (displayLevel>=4) fflush(stderr); } } }
-
-
-/*-*************************************
-* Exceptions
-***************************************/
-#ifndef DEBUG
-# define DEBUG 0
-#endif
-#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
-#define EXM_THROW(error, ...) \
-{ \
- DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
- DISPLAY("Error %i : ", error); \
- DISPLAY(__VA_ARGS__); \
- DISPLAY("\n"); \
- exit(error); \
-}
-
-
-/*-*************************************
-* Constants
-***************************************/
-static const unsigned g_defaultMaxDictSize = 110 KB;
-#define DEFAULT_CLEVEL 3
-#define DEFAULT_k 200
-#define DEFAULT_OUTPUTFILE "defaultDict"
-#define DEFAULT_DICTID 0
-
-
-
-/*-*************************************
-* RANDOM
-***************************************/
-int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
- unsigned maxDictSize,
- ZDICT_random_params_t *params) {
- unsigned const displayLevel = params->zParams.notificationLevel;
- void* const dictBuffer = malloc(maxDictSize);
-
- int result = 0;
-
- /* Checks */
- if (!dictBuffer)
- EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
-
- { size_t dictSize;
- dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
- info->samplesSizes, info->nbSamples, *params);
- DISPLAYLEVEL(2, "k=%u\n", params->k);
- if (ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
- result = 1;
- goto _done;
- }
- /* save dict */
- DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (U32)dictSize, dictFileName);
- saveDict(dictFileName, dictBuffer, dictSize);
- }
-
- /* clean up */
-_done:
- free(dictBuffer);
- return result;
-}
-
-
-
-int main(int argCount, const char* argv[])
-{
- int displayLevel = 2;
- const char* programName = argv[0];
- int operationResult = 0;
-
- /* Initialize arguments to default values */
- unsigned k = DEFAULT_k;
- const char* outputFile = DEFAULT_OUTPUTFILE;
- unsigned dictID = DEFAULT_DICTID;
- unsigned maxDictSize = g_defaultMaxDictSize;
-
- /* Initialize table to store input files */
- const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
- unsigned filenameIdx = 0;
-
- /* Parse arguments */
- for (int i = 1; i < argCount; i++) {
- const char* argument = argv[i];
- if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
- if (longCommandWArg(&argument, "in=")) {
- filenameTable[filenameIdx] = argument;
- filenameIdx++;
- continue;
- }
- if (longCommandWArg(&argument, "out=")) {
- outputFile = argument;
- continue;
- }
- DISPLAYLEVEL(1, "Incorrect parameters\n");
- operationResult = 1;
- return operationResult;
- }
-
- char* fileNamesBuf = NULL;
- unsigned fileNamesNb = filenameIdx;
- int followLinks = 0; /* follow directory recursively */
- const char** extendedFileList = NULL;
- extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
- &fileNamesNb, followLinks);
- if (extendedFileList) {
- unsigned u;
- for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
- free((void*)filenameTable);
- filenameTable = extendedFileList;
- filenameIdx = fileNamesNb;
- }
-
- size_t blockSize = 0;
-
- ZDICT_random_params_t params;
- ZDICT_params_t zParams;
- zParams.compressionLevel = DEFAULT_CLEVEL;
- zParams.notificationLevel = displayLevel;
- zParams.dictID = dictID;
- params.zParams = zParams;
- params.k = k;
-
- sampleInfo* info = getSampleInfo(filenameTable,
- filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
- operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
-
- /* Free allocated memory */
- UTIL_freeFileList(extendedFileList, fileNamesBuf);
- freeSampleInfo(info);
-
- return operationResult;
-}
+++ /dev/null
-/*-*************************************
-* Dependencies
-***************************************/
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "random.h"
-#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-/*-*************************************
-* Console display
-***************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
-#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
- if (displayLevel >= l) { \
- if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
- g_time = clock(); \
- DISPLAY(__VA_ARGS__); \
- } \
- }
-#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(displayLevel, l, __VA_ARGS__)
-static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
-static clock_t g_time = 0;
-
-
-
-/* ********************************************************
-* Random Dictionary Builder
-**********************************************************/
-/**
- * Returns the sum of the sample sizes.
- */
-static size_t RANDOM_sum(const size_t *samplesSizes, unsigned nbSamples) {
- size_t sum = 0;
- unsigned i;
- for (i = 0; i < nbSamples; ++i) {
- sum += samplesSizes[i];
- }
- return sum;
-}
-
-
-/**
- * A segment is an inclusive range in the source.
- */
-typedef struct {
- U32 begin;
- U32 end;
-} RANDOM_segment_t;
-
-
-/**
- * Selects a random segment from totalSamplesSize - k + 1 possible segments
- */
-static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
- ZDICT_random_params_t parameters) {
- const U32 k = parameters.k;
- RANDOM_segment_t segment;
- unsigned index;
-
- /* Randomly generate a number from 0 to sampleSizes - k */
- index = rand()%(totalSamplesSize - k + 1);
-
- /* inclusive */
- segment.begin = index;
- segment.end = index + k - 1;
-
- return segment;
-}
-
-
-/**
- * Check the validity of the parameters.
- * Returns non-zero if the parameters are valid and 0 otherwise.
- */
-static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
- size_t maxDictSize) {
- /* k is a required parameter */
- if (parameters.k == 0) {
- return 0;
- }
- /* k <= maxDictSize */
- if (parameters.k > maxDictSize) {
- return 0;
- }
- return 1;
-}
-
-
-/**
- * Given the prepared context build the dictionary.
- */
-static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *samples,
- void *dictBuffer, size_t dictBufferCapacity,
- ZDICT_random_params_t parameters) {
- BYTE *const dict = (BYTE *)dictBuffer;
- size_t tail = dictBufferCapacity;
- const int displayLevel = parameters.zParams.notificationLevel;
- while (tail > 0) {
-
- /* Select a segment */
- RANDOM_segment_t segment = RANDOM_selectSegment(totalSamplesSize, parameters);
-
- size_t segmentSize;
- segmentSize = MIN(segment.end - segment.begin + 1, tail);
-
- tail -= segmentSize;
- memcpy(dict + tail, samples + segment.begin, segmentSize);
- DISPLAYUPDATE(
- 2, "\r%u%% ",
- (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
- }
-
- return tail;
-}
-
-
-
-
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
- void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_random_params_t parameters) {
- const int displayLevel = parameters.zParams.notificationLevel;
- BYTE* const dict = (BYTE*)dictBuffer;
- /* Checks */
- if (!RANDOM_checkParameters(parameters, dictBufferCapacity)) {
- DISPLAYLEVEL(1, "k is incorrect\n");
- return ERROR(GENERIC);
- }
- if (nbSamples == 0) {
- DISPLAYLEVEL(1, "Random must have at least one input file\n");
- return ERROR(GENERIC);
- }
- if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
- DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
- ZDICT_DICTSIZE_MIN);
- return ERROR(dstSize_tooSmall);
- }
- const size_t totalSamplesSize = RANDOM_sum(samplesSizes, nbSamples);
- const BYTE *const samples = (const BYTE *)samplesBuffer;
-
- DISPLAYLEVEL(2, "Building dictionary\n");
- {
- const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
- dictBuffer, dictBufferCapacity, parameters);
- const size_t dictSize = ZDICT_finalizeDictionary(
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
- if (!ZSTD_isError(dictSize)) {
- DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
- (U32)dictSize);
- }
- return dictSize;
- }
-}
+++ /dev/null
-#include <stdio.h> /* fprintf */
-#include <stdlib.h> /* malloc, free, qsort */
-#include <string.h> /* memset */
-#include <time.h> /* clock */
-#include "zstd_internal.h" /* includes zstd.h */
-#ifndef ZDICT_STATIC_LINKING_ONLY
-#define ZDICT_STATIC_LINKING_ONLY
-#endif
-#include "zdict.h"
-
-
-
-typedef struct {
- unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
- ZDICT_params_t zParams;
-} ZDICT_random_params_t;
-
-
-/*! ZDICT_trainFromBuffer_random():
- * Train a dictionary from an array of samples.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- */
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
- const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- ZDICT_random_params_t parameters);
+++ /dev/null
-echo "Building random dictionary with in=../../lib/common k=200 out=dict1"
-./main in=../../../lib/common k=200 out=dict1
-zstd -be3 -D dict1 -r ../../../lib/common -q
-echo "Building random dictionary with in=../../lib/common k=500 out=dict2 dictID=100 maxdict=140000"
-./main in=../../../lib/common k=500 out=dict2 dictID=100 maxdict=140000
-zstd -be3 -D dict2 -r ../../../lib/common -q
-echo "Building random dictionary with 2 sample sources"
-./main in=../../../lib/common in=../../../lib/compress out=dict3
-zstd -be3 -D dict3 -r ../../../lib/common -q
-echo "Removing dict1 dict2 dict3"
-rm -f dict1 dict2 dict3
-
-echo "Testing with invalid parameters, should fail"
-! ./main r=10