make-all:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make all
run: make all
DEVNULLRIGHTS: 1
READFROMBLOCKDEVICE: 1
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make test
run: make test
make-test-osx:
runs-on: macos-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: OS-X test
run: make test # make -c lib all doesn't work because of the fact that it's not a tty
no-intrinsics-fuzztest:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: no intrinsics fuzztest
run: MOREFLAGS="-DZSTD_NO_INTRINSICS" make -C tests fuzztest
tsan-zstreamtest:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: thread sanitizer zstreamtest
run: CC=clang ZSTREAM_TESTTIME=-T3mn make tsan-test-zstream
ubsan-zstreamtest:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: undefined behavior sanitizer zstreamtest
run: CC=clang make uasan-test-zstream
tsan-fuzztest:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: thread sanitizer fuzztest
run: CC=clang make tsan-fuzztest
gcc-8-asan-ubsan-testzstd:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: gcc-8 + ASan + UBSan + Test Zstd
# See https://askubuntu.com/a/1428822
run: |
clang-asan-ubsan-testzstd:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + ASan + UBSan + Test Zstd
run: CC=clang make -j uasan-test-zstd </dev/null V=1
gcc-asan-ubsan-testzstd-32bit:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: ASan + UBSan + Test Zstd, 32bit mode
run: |
sudo apt-get -qqq update
gcc-8-asan-ubsan-fuzz:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: gcc-8 + ASan + UBSan + Fuzz Test
# See https://askubuntu.com/a/1428822
run: |
clang-asan-ubsan-fuzz:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + ASan + UBSan + Fuzz Test
run: CC=clang FUZZER_FLAGS="--long-tests" make clean uasan-fuzztest
gcc-asan-ubsan-fuzz32:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: ASan + UBSan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
clang-asan-ubsan-fuzz32:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + ASan + UBSan + Fuzz Test 32bit
run: |
sudo apt-get -qqq update
asan-ubsan-regression:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: ASan + UBSan + Regression Test
run: make -j uasanregressiontest
clang-ubsan-regression:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + ASan + UBSan + Regression Test
run: CC=clang make -j uasanregressiontest
msan-regression:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: MSan + Regression Test
run: make -j msanregressiontest
clang-msan-fuzz:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + MSan + Fuzz Test
run: |
sudo apt-get -qqq update
clang-msan-testzstd:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: clang + MSan + Test Zstd
run: |
sudo apt-get update
armfuzz:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Qemu ARM emulation + Fuzz Test
run: |
sudo apt-get -qqq update
valgrind-fuzz-test:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: valgrind + fuzz test stack mode # ~ 7mn
shell: 'script -q -e -c "bash {0}"'
run: |
{ compiler: gcc, platform: x64, action: test, script: ""},
]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Mingw long test
run: |
$env:PATH_ORIGINAL = $env:PATH
dry-run: false
sanitizer: ${{ matrix.sanitizer }}
- name: Upload Crash
- uses: actions/upload-artifact@v1
+ uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # tag=v3.1.1
if: failure() && steps.build.outcome == 'success'
with:
name: ${{ matrix.sanitizer }}-artifacts
linux-kernel:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: linux kernel, library + build + test
run: make -C contrib/linux-kernel test CFLAGS="-Werror -Wunused-const-variable -Wunused-but-set-variable"
benchmarking:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make benchmarking
run: make benchmarking
check-32bit: # designed to catch https://github.com/facebook/zstd/issues/2428
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make check on 32-bit
run: |
sudo apt update
check-x32:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make check on x32 ABI # https://en.wikipedia.org/wiki/X32_ABI
env:
CHECK_CONSTRAINED_MEM: true
gcc-7-libzstd:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: gcc-7 + libzstdmt compilation
# See https://askubuntu.com/a/1428822
run: |
cmake-build-and-test-check:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: cmake build and test check
run: |
FUZZERTEST=-T1mn ZSTREAM_TESTTIME=-T1mn make cmakebuild
cpp-gnu90-c99-compatibility:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: C++, gnu90 and c99 compatibility
run: |
make cxxtest
mingw-cross-compilation:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: mingw cross-compilation
run: |
# sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix; (doesn't work)
armbuild:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: ARM Build Test
run: |
sudo apt-get -qqq update
bourne-shell:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Bourne shell compatibility (shellcheck)
run: |
wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz
zlib-wrapper:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: zlib wrapper test
run: |
sudo apt-get -qqq update
lz4-threadpool-libs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: LZ4, thread pool, and libs build testslib wrapper test
run: |
make lz4install
gcc-make-tests-32bit:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Make all, 32bit mode
run: |
sudo apt-get -qqq update
gcc-8-make:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: gcc-8 build
# See https://askubuntu.com/a/1428822
run: |
implicit-fall-through:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: -Wimplicit-fallthrough build
run: |
make clean
make clean
CC=clang MOREFLAGS="-Werror -Wimplicit-fallthrough -O0" make -C lib -j libzstd.a ZSTD_LEGACY_SUPPORT=0
+ meson-linux:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
+ - name: Install packages
+ run: |
+ sudo apt-get update
+ sudo apt-get -y install build-essential python3-pip ninja-build liblz4-dev
+ pip install --pre meson
+ - name: Build with Meson
+ run: |
+ meson setup \
+ --buildtype=debugoptimized \
+ -Db_lundef=false \
+ -Dauto_features=enabled \
+ -Dbin_programs=true \
+ -Dbin_tests=true \
+ -Dbin_contrib=true \
+ -Ddefault_library=both \
+ build/meson builddir
+ ninja -C builddir/
+ meson test -C builddir/ --print-errorlogs
+ meson install -C builddir --destdir staging/
+
+ meson-windows:
+ runs-on: windows-latest
+ steps:
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
+ - name: Install packages
+ run: pip install --pre meson
+ - name: Initialize the MSVC dev command prompt
+ uses: ilammy/msvc-dev-cmd@7315a94840631165970262a99c72cfb48a65d25d
+ - name: Configure with Meson
+ run: |
+ meson setup build/meson/ builddir -Dbin_tests=true -Dbin_programs=true -Dbin_contrib=true
+ - name: Build with Meson
+ run: |
+ ninja -C builddir/
+ - name: Test with Meson
+ run: |
+ meson test -C builddir/ --print-errorlogs
+ - name: Install with Meson
+ run: |
+ meson install -C builddir --destdir staging/
+
cmake-visual-2019:
runs-on: windows-2019
strategy:
flags: "-A Win32"
- generator: "MinGW Makefiles"
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@v1.1.3
+ uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # tag=v1.1.3
- name: Build
working-directory: ${{env.GITHUB_WORKSPACE}}
run: |
platform: [x64, Win32]
configuration: [Debug, Release]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@v1.1.3
+ uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # tag=v1.1.3
- name: Build
working-directory: ${{env.GITHUB_WORKSPACE}}
# See https://docs.microsoft.com/visualstudio/msbuild/msbuild-command-line-reference
# platform: [x64, Win32]
# configuration: [Debug, Release]
# steps:
-# - uses: actions/checkout@v3
+# - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
# - name: Add MSBuild to PATH
-# uses: microsoft/setup-msbuild@v1.1.3
+# uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # tag=v1.1.3
# - name: Build
# working-directory: ${{env.GITHUB_WORKSPACE}}
# run: >
# msbuild "build\VS2010\zstd.sln" /m /verbosity:minimal /property:PlatformToolset=v140
# /t:Clean,Build /p:Platform=${{matrix.platform}} /p:Configuration=${{matrix.configuration}}
+ # This tests that we don't accidently grow the size too much.
+ # If the size grows intentionally, you can raise these numbers.
+ # But we do need to think about binary size, since it is a concern.
+ libzstd-size:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
+ - name: libzstd size test
+ run: |
+ make clean && make -j -C lib libzstd && ./tests/check_size.py lib/libzstd.so 1100000
+ make clean && make -j -C lib libzstd ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 400000
+ make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 && ./tests/check_size.py lib/libzstd.so 300000
+ make clean && make -j -C lib libzstd ZSTD_LIB_MINIFY=1 ZSTD_LIB_COMPRESSION=0 ZSTD_LIB_DICTBUILDER=0 && ./tests/check_size.py lib/libzstd.so 80000
+
minimal-decompressor-macros:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: minimal decompressor macros
run: |
make clean && make -j all ZSTD_LIB_MINIFY=1 MOREFLAGS="-Werror"
dynamic-bmi2:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: dynamic bmi2 tests
run: |
make clean && make -j check MOREFLAGS="-O0 -Werror -mbmi2"
test-variants:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: make all variants & validate
run: |
make -j -C programs allVariants MOREFLAGS=-O0
XCC: ${{ matrix.xcc }}
XEMU: ${{ matrix.xemu }}
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: apt update & install
run: |
sudo apt-get update
{ compiler: clang, platform: x64, script: "CFLAGS='--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion' make -j allzstd V=1"},
]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Mingw short test
run: |
ECHO "Building ${{matrix.compiler}} ${{matrix.platform}}"
platform: [x64, Win32]
configuration: [Release]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Add MSBuild to PATH
- uses: microsoft/setup-msbuild@v1.1.3
+ uses: microsoft/setup-msbuild@34cfbaee7f672c76950673338facd8a73f637506 # tag=v1.1.3
- name: Build and run tests
working-directory: ${{env.GITHUB_WORKSPACE}}
env:
intel-cet-compatibility:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Build Zstd
run: |
make -j zstd V=1
container:
image: debian:testing
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Install dependencies
run: |
apt -y update
cc -Wall -Wextra -Wpedantic -Werror -o simple examples/simple_compression.c $(pkg-config --cflags --libs libzstd)
./simple LICENSE
+ versions-compatibility:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
+ - name: Versions Compatibility Test
+ run: |
+ make -C tests versionsTest
-# This test currently fails on Github Actions specifically.
-# Possible reason : TTY emulation.
-# Note that the same test works fine locally and on travisCI.
-# This will have to be fixed before transferring the test to GA.
-# versions-compatibility:
-# runs-on: ubuntu-latest
-# steps:
-# - uses: actions/checkout@v3
-# - name: Versions Compatibility Test
-# run: |
-# make -C tests versionsTest
+ clangbuild:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
+ - name: make clangbuild
+ run: |
+ make clangbuild
# For reference : icc tests
# sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
# sudo apt-get update
# sudo apt-get install -y intel-basekit intel-hpckit
-# - uses: actions/checkout@v3
+# - uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
# - name: make check
# run: |
# make CC=/opt/intel/oneapi/compiler/latest/linux/bin/intel64/icc check
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
- name: Archive
env:
steps:
- name: "Checkout code"
- uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
+ uses: actions/checkout@755da8c3cf115ac066823e79a1e1788f8940201b # tag=v3
with:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # tag=v2.0.6
+ uses: ossf/scorecard-action@937ffa90d79c7d720498178154ad4c7ba1e4ad8c # tag=v2.1.0
with:
results_file: results.sarif
results_format: sarif
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@6673cd052c4cd6fcf4b4e6e60ea986c889389535 # tag=v3.0.0
+ uses: actions/upload-artifact@83fd05a356d7e2593de66fc9913b3002723633cb # tag=v3.1.1
with:
name: SARIF file
path: results.sarif
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # tag=v1.0.26
+ uses: github/codeql-action/upload-sarif@959cbb7472c4d4ad70cdfe6f4976053fe48ab394 # tag=v2.1.37
with:
sarif_file: results.sarif
For Zstandard software
-Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
- * Neither the name Facebook nor the names of its contributors may be used to
- endorse or promote products derived from this software without specific
- prior written permission.
+ * Neither the name Facebook, nor Meta, nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# ################################################################
-# Copyright (c) 2015-2021, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
$(MAKE) -C contrib/seekable_format/examples all
$(MAKE) -C contrib/seekable_format/tests test
$(MAKE) -C contrib/largeNbDicts all
+ $(MAKE) -C contrib/externalMatchfinder all
cd build/single_file_libs/ ; ./build_decoder_test.sh
cd build/single_file_libs/ ; ./build_library_test.sh
$(Q)$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
$(Q)$(MAKE) -C contrib/seekable_format/tests $@ > $(VOID)
$(Q)$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
+ $(Q)$(MAKE) -C contrib/externalMatchfinder $@ > $(VOID)
$(Q)$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
$(Q)$(RM) -r lz4
@echo Cleaning completed
BEGIN
BLOCK "040904B0"
BEGIN
- VALUE "CompanyName", "Yann Collet, Facebook, Inc."
+ VALUE "CompanyName", "Meta Platforms, Inc."
VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
VALUE "FileVersion", ZSTD_VERSION_STRING
VALUE "InternalName", "libzstd.dll"
- VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
+ VALUE "LegalCopyright", "Copyright (c) Meta Platforms, Inc. and affiliates."
VALUE "OriginalFilename", "libzstd.dll"
VALUE "ProductName", "Zstandard"
VALUE "ProductVersion", ZSTD_VERSION_STRING
BEGIN
BLOCK "040904B0"
BEGIN
- VALUE "CompanyName", "Yann Collet, Facebook, Inc."
+ VALUE "CompanyName", "Meta Platforms, Inc."
VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
VALUE "FileVersion", ZSTD_VERSION_STRING
VALUE "InternalName", "zstd.exe"
- VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
+ VALUE "LegalCopyright", "Copyright (c) Meta Platforms, Inc. and affiliates."
VALUE "OriginalFilename", "zstd.exe"
VALUE "ProductName", "Zstandard"
VALUE "ProductVersion", ZSTD_VERSION_STRING
# ################################################################
-# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
-
-# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
-# Set and use the newest cmake policies that are validated to work
-set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
+
+# As of 2018-12-26 ZSTD has been validated to build with cmake version 3.13.2 new policies.
+# Set and use the newest cmake policies that are validated to work
+set(ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION "3")
set(ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION "13") #Policies never changed at PATCH level
if("${CMAKE_MAJOR_VERSION}" LESS 3)
- set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
-elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
+ set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
+elseif( "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}" EQUAL "${CMAKE_MAJOR_VERSION}" AND
"${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}" GREATER "${CMAKE_MINOR_VERSION}")
- set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
-else()
- set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
+ set(ZSTD_CMAKE_POLICY_VERSION "${CMAKE_VERSION}")
+else()
+ set(ZSTD_CMAKE_POLICY_VERSION "${ZSTD_MAX_VALIDATED_CMAKE_MAJOR_VERSION}.${ZSTD_MAX_VALIDATED_CMAKE_MINOR_VERSION}.0")
endif()
cmake_policy(VERSION ${ZSTD_CMAKE_POLICY_VERSION})
set(PROJECT_VERSION_PATCH ${zstd_VERSION_PATCH})
set(PROJECT_VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}")
enable_language(C) # Main library is in C
+ enable_language(ASM) # And ASM
enable_language(CXX) # Testing contributed code also utilizes CXX
else()
project(zstd
VERSION "${zstd_VERSION_MAJOR}.${zstd_VERSION_MINOR}.${zstd_VERSION_PATCH}"
LANGUAGES C # Main library is in C
+ ASM # And ASM
CXX # Testing contributed code also utilizes CXX
)
endif()
include(CheckCXXCompilerFlag)
include(CheckCCompilerFlag)
+include(CheckLinkerFlag)
-function(EnableCompilerFlag _flag _C _CXX)
+function(EnableCompilerFlag _flag _C _CXX _LD)
string(REGEX REPLACE "\\+" "PLUS" varname "${_flag}")
string(REGEX REPLACE "[^A-Za-z0-9]+" "_" varname "${varname}")
string(REGEX REPLACE "^_+" "" varname "${varname}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_flag}" PARENT_SCOPE)
endif ()
endif ()
+ if (_LD)
+ CHECK_LINKER_FLAG(C ${_flag} LD_FLAG_${varname})
+ if (LD_FLAG_${varname})
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${_flag}" PARENT_SCOPE)
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${_flag}" PARENT_SCOPE)
+ endif ()
+ endif ()
endfunction()
macro(ADD_ZSTD_COMPILATION_FLAGS)
# EnableCompilerFlag("-std=c99" true false) # Set C compiation to c99 standard
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND MSVC)
# clang-cl normally maps -Wall to -Weverything.
- EnableCompilerFlag("/clang:-Wall" true true)
+ EnableCompilerFlag("/clang:-Wall" true true false)
else ()
- EnableCompilerFlag("-Wall" true true)
+ EnableCompilerFlag("-Wall" true true false)
endif ()
- EnableCompilerFlag("-Wextra" true true)
- EnableCompilerFlag("-Wundef" true true)
- EnableCompilerFlag("-Wshadow" true true)
- EnableCompilerFlag("-Wcast-align" true true)
- EnableCompilerFlag("-Wcast-qual" true true)
- EnableCompilerFlag("-Wstrict-prototypes" true false)
+ EnableCompilerFlag("-Wextra" true true false)
+ EnableCompilerFlag("-Wundef" true true false)
+ EnableCompilerFlag("-Wshadow" true true false)
+ EnableCompilerFlag("-Wcast-align" true true false)
+ EnableCompilerFlag("-Wcast-qual" true true false)
+ EnableCompilerFlag("-Wstrict-prototypes" true false false)
# Enable asserts in Debug mode
if (CMAKE_BUILD_TYPE MATCHES "Debug")
- EnableCompilerFlag("-DDEBUGLEVEL=1" true true)
+ EnableCompilerFlag("-DDEBUGLEVEL=1" true true false)
endif ()
+ # Add noexecstack flags
+ # LDFLAGS
+ EnableCompilerFlag("-z noexecstack" false false true)
+ # CFLAGS & CXXFLAGS
+ EnableCompilerFlag("-Qunused-arguments" true true false)
+ EnableCompilerFlag("-Wa,--noexecstack" true true false)
elseif (MSVC) # Add specific compilation flags for Windows Visual
set(ACTIVATE_MULTITHREADED_COMPILATION "ON" CACHE BOOL "activate multi-threaded compilation (/MP flag)")
if (CMAKE_GENERATOR MATCHES "Visual Studio" AND ACTIVATE_MULTITHREADED_COMPILATION)
- EnableCompilerFlag("/MP" true true)
+ EnableCompilerFlag("/MP" true true false)
endif ()
# UNICODE SUPPORT
- EnableCompilerFlag("/D_UNICODE" true true)
- EnableCompilerFlag("/DUNICODE" true true)
+ EnableCompilerFlag("/D_UNICODE" true true false)
+ EnableCompilerFlag("/DUNICODE" true true false)
# Enable asserts in Debug mode
if (CMAKE_BUILD_TYPE MATCHES "Debug")
- EnableCompilerFlag("/DDEBUGLEVEL=1" true true)
+ EnableCompilerFlag("/DDEBUGLEVEL=1" true true false)
endif ()
endif ()
# ################################################################
-# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2016-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
set(PlatformDependResources ${MSVC_RESOURCE_DIR}/libzstd-dll.rc)
endif ()
+# Explicitly set the language to C for all files, including ASM files.
+# Our assembly expects to be compiled by a C compiler, and is only enabled for
+# __GNUC__ compatible compilers. Otherwise all the ASM code is disabled by
+# macros.
+set_source_files_properties(${Sources} PROPERTIES LANGUAGE C)
+
# Split project to static and shared libraries build
set(library_targets)
if (ZSTD_BUILD_SHARED)
# ################################################################
-# Copyright (c) 2015-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
# zstd - Makefile
-# Copyright (C) Yann Collet 2014-present
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# BSD license
#
# zstreamtest
#
-add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c)
+add_executable(zstreamtest ${PROGRAMS_DIR}/datagen.c ${PROGRAMS_DIR}/util.c ${PROGRAMS_DIR}/timefn.c ${TESTS_DIR}/seqgen.c ${TESTS_DIR}/zstreamtest.c ${TESTS_DIR}/external_matchfinder.c)
if (NOT MSVC)
target_compile_options(zstreamtest PRIVATE "-Wno-deprecated-declarations")
endif()
AddTestFlagsOption(ZSTD_PLAYTESTS_FLAGS "$ENV{PLAYTESTS_FLAGS}"
"Semicolon-separated list of flags to pass to the playTests.sh test")
add_test(NAME playTests COMMAND sh -c "\"${TESTS_DIR}/playTests.sh\" ${ZSTD_PLAYTESTS_FLAGS}")
-if (ZSTD_BUILD_PROGRAMS)
+find_program(UNAME uname) # Run script only in unix shell environments
+if (ZSTD_BUILD_PROGRAMS AND UNAME)
set_property(TEST playTests APPEND PROPERTY ENVIRONMENT
"ZSTD_BIN=$<TARGET_FILE:zstd>"
"DATAGEN_BIN=$<TARGET_FILE:datagen>"
)
else()
- message(STATUS "Disabling playTests.sh test because ZSTD_BUILD_PROGRAMS is not enabled")
+ message(STATUS "Disabling playTests.sh test because requirements not met")
set_tests_properties(playTests PROPERTIES DISABLED YES)
endif()
FUZZERTEST = '-T200s'
ZSTREAM_TESTTIME = '-T90s'
DECODECORPUS_TESTTIME = '-T30'
-ZSTDRTTEST = ['--test-large-data']
# =============================================================================
# Executables
dependencies: [ testcommon_dep, thread_dep ],
install: false)
-zstreamtest_sources = [join_paths(zstd_rootdir, 'tests/seqgen.c'),
- join_paths(zstd_rootdir, 'tests/zstreamtest.c')]
+zstreamtest_sources = [
+ join_paths(zstd_rootdir, 'tests/seqgen.c'),
+ join_paths(zstd_rootdir, 'tests/zstreamtest.c'),
+ join_paths(zstd_rootdir, 'tests/external_matchfinder.c')]
zstreamtest = executable('zstreamtest',
zstreamtest_sources,
include_directories: test_includes,
# =============================================================================
if tests_supported_oses.contains(host_machine_os)
- valgrind_prog = find_program('valgrind', ['/usr/bin/valgrind'], required: true)
+ valgrind_prog = find_program('valgrind', ['/usr/bin/valgrind'], required: false)
valgrindTest_py = files('valgrindTest.py')
- test('valgrindTest',
- valgrindTest_py,
- args: [valgrind_prog.path(), zstd, datagen, fuzzer, fullbench],
- depends: [zstd, datagen, fuzzer, fullbench],
- timeout: 600) # Timeout should work on HDD drive
+ if valgrind_prog.found()
+ test('valgrindTest',
+ valgrindTest_py,
+ args: [valgrind_prog.path(), zstd, datagen, fuzzer, fullbench],
+ depends: [zstd, datagen, fuzzer, fullbench],
+ timeout: 600) # Timeout should work on HDD drive
+ endif
endif
if host_machine_os != os_windows
playTests_sh = find_program(join_paths(zstd_rootdir, 'tests/playTests.sh'), required: true)
- test('test-zstd',
- playTests_sh,
- args: ZSTDRTTEST,
- env: ['ZSTD_BIN=' + zstd.full_path(), 'DATAGEN_BIN=./datagen'],
- depends: [datagen],
- workdir: meson.current_build_dir(),
- timeout: 2800) # Timeout should work on HDD drive
+
+ # add slow tests only if the meson version is new enough to support
+ # test setups with default-excluded suites
+ if meson.version().version_compare('>=0.57.0')
+ matrix = {'fast': [], 'slow': ['--test-large-data']}
+ else
+ matrix = {'fast': []}
+ endif
+
+ foreach suite, opt: matrix
+ test('test-zstd-'+suite,
+ playTests_sh,
+ args: opt,
+ env: ['ZSTD_BIN=' + zstd.full_path(), 'DATAGEN_BIN=./datagen'],
+ depends: [datagen],
+ suite: suite,
+ workdir: meson.current_build_dir(),
+ timeout: 2800) # Timeout should work on HDD drive
+ endforeach
endif
test('test-fullbench-1',
args: ['-t', DECODECORPUS_TESTTIME],
timeout: 60)
test('test-poolTests', poolTests) # should be fast
+
+if meson.version().version_compare('>=0.57.0')
+ add_test_setup('fast',
+ is_default: true,
+ exclude_suites: ['slow'])
+ add_test_setup('slow',
+ exclude_suites: ['fast'])
+endif
* \endcode
*/
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* \endcode
*/
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2019-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2019-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
--- /dev/null
+# build artifacts
+externalMatchfinder
--- /dev/null
+# ################################################################
+# Copyright (c) Yann Collet, Meta Platforms, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+PROGDIR = ../../programs
+LIBDIR = ../../lib
+
+LIBZSTD = $(LIBDIR)/libzstd.a
+
+CPPFLAGS+= -I$(LIBDIR) -I$(LIBDIR)/compress -I$(LIBDIR)/common
+
+CFLAGS ?= -O3
+CFLAGS += -std=gnu99
+DEBUGFLAGS= -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
+ -Wstrict-aliasing=1 -Wswitch-enum \
+ -Wstrict-prototypes -Wundef -Wpointer-arith \
+ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+ -Wredundant-decls
+CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
+
+default: externalMatchfinder
+
+all: externalMatchfinder
+
+externalMatchfinder: matchfinder.c main.c $(LIBZSTD)
+ $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
+
+.PHONY: $(LIBZSTD)
+$(LIBZSTD):
+ $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
+
+clean:
+ $(RM) *.o
+ $(MAKE) -C $(LIBDIR) clean > /dev/null
+ $(RM) externalMatchfinder
--- /dev/null
+externalMatchfinder
+=====================
+
+`externalMatchfinder` is a test tool for the external matchfinder API.
+It demonstrates how to use the API to perform a simple round-trip test.
+
+A sample matchfinder is provided in matchfinder.c, but the user can swap
+this out with a different one if desired. The sample matchfinder implements
+LZ compression with a 1KB hashtable. Dictionary compression is not currently supported.
+
+Command line :
+```
+externalMatchfinder filename
+```
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+#include "zstd_errors.h"
+#include "matchfinder.h" // simpleExternalMatchFinder
+
+#define CHECK(res) \
+do { \
+ if (ZSTD_isError(res)) { \
+ printf("ERROR: %s\n", ZSTD_getErrorName(res)); \
+ return 1; \
+ } \
+} while (0) \
+
+int main(int argc, char *argv[]) {
+ if (argc != 2) {
+ printf("Usage: exampleMatchfinder <file>\n");
+ return 1;
+ }
+
+ ZSTD_CCtx* const zc = ZSTD_createCCtx();
+
+ int simpleExternalMatchState = 0xdeadbeef;
+
+ // Here is the crucial bit of code!
+ ZSTD_registerExternalMatchFinder(
+ zc,
+ &simpleExternalMatchState,
+ simpleExternalMatchFinder
+ );
+
+ {
+ size_t const res = ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 1);
+ CHECK(res);
+ }
+
+ FILE *f = fopen(argv[1], "rb");
+ assert(f);
+ {
+ int const ret = fseek(f, 0, SEEK_END);
+ assert(ret == 0);
+ }
+ size_t const srcSize = ftell(f);
+ {
+ int const ret = fseek(f, 0, SEEK_SET);
+ assert(ret == 0);
+ }
+
+ char* const src = malloc(srcSize + 1);
+ assert(src);
+ {
+ size_t const ret = fread(src, srcSize, 1, f);
+ assert(ret == 1);
+ int const ret2 = fclose(f);
+ assert(ret2 == 0);
+ }
+
+ size_t const dstSize = ZSTD_compressBound(srcSize);
+ char* const dst = malloc(dstSize);
+ assert(dst);
+
+ size_t const cSize = ZSTD_compress2(zc, dst, dstSize, src, srcSize);
+ CHECK(cSize);
+
+ char* const val = malloc(srcSize);
+ assert(val);
+
+ {
+ size_t const res = ZSTD_decompress(val, srcSize, dst, cSize);
+ CHECK(res);
+ }
+
+ if (memcmp(src, val, srcSize) == 0) {
+ printf("Compression and decompression were successful!\n");
+ printf("Original size: %lu\n", srcSize);
+ printf("Compressed size: %lu\n", cSize);
+ } else {
+ printf("ERROR: input and validation buffers don't match!\n");
+ for (size_t i = 0; i < srcSize; i++) {
+ if (src[i] != val[i]) {
+ printf("First bad index: %zu\n", i);
+ break;
+ }
+ }
+ return 1;
+ }
+
+ ZSTD_freeCCtx(zc);
+ free(src);
+ free(dst);
+ free(val);
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "matchfinder.h"
+
+#define HSIZE 1024
+static U32 const HLOG = 10;
+static U32 const MLS = 4;
+static U32 const BADIDX = 0xffffffff;
+
+size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ const BYTE* const istart = (const BYTE*)src;
+ const BYTE* const iend = istart + srcSize;
+ const BYTE* ip = istart;
+ const BYTE* anchor = istart;
+ size_t seqCount = 0;
+ U32 hashTable[HSIZE];
+
+ (void)externalMatchState;
+ (void)dict;
+ (void)dictSize;
+ (void)outSeqsCapacity;
+ (void)compressionLevel;
+
+ { int i;
+ for (i=0; i < HSIZE; i++) {
+ hashTable[i] = BADIDX;
+ } }
+
+ while (ip + MLS < iend) {
+ size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
+ U32 const matchIndex = hashTable[hash];
+ hashTable[hash] = (U32)(ip - istart);
+
+ if (matchIndex != BADIDX) {
+ const BYTE* const match = istart + matchIndex;
+ U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
+ if (matchLen >= ZSTD_MINMATCH_MIN) {
+ U32 const litLen = (U32)(ip - anchor);
+ U32 const offset = (U32)(ip - match);
+ ZSTD_Sequence const seq = {
+ offset, litLen, matchLen, 0
+ };
+
+ /* Note: it's crucial to stay within the window size! */
+ if (offset <= windowSize) {
+ outSeqs[seqCount++] = seq;
+ ip += matchLen;
+ anchor = ip;
+ continue;
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ { ZSTD_Sequence const finalSeq = {
+ 0, (U32)(iend - anchor), 0, 0
+ };
+ outSeqs[seqCount++] = finalSeq;
+ }
+
+ return seqCount;
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MATCHFINDER_H
+#define MATCHFINDER_H
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+#endif
#!/usr/bin/env python3
# ################################################################
-# Copyright (c) 2021-2021, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2016-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
CONTROL(cTotalSizeNoDict != 0);
DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
clevel,
- (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
+ (double)totalSrcSlicesSize / (double)cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
CONTROL(cSizes != NULL);
CONTROL(cTotalSize != 0);
DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
(unsigned)dictBuffer.size,
- (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
+ (double)totalSrcSlicesSize / (double)cTotalSize, (unsigned)cTotalSize);
/* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
shrinkSizes(dstSlices, cSizes);
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
-UZSTD_MULTITHREAD \
-U_MSC_VER \
-U_WIN32 \
- -RZSTDLIB_VISIBILITY= \
- -RZSTDERRORLIB_VISIBILITY= \
+ -RZSTDLIB_VISIBLE= \
+ -RZSTDERRORLIB_VISIBLE= \
-RZSTD_FALLTHROUGH=fallthrough \
-DZSTD_HAVE_WEAK_SYMBOLS=0 \
-DZSTD_TRACE=0 \
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2016-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#include "Pzstd.h"
#include "SkippableFrame.h"
#include "utils/FileSystem.h"
+#include "utils/Portability.h"
#include "utils/Range.h"
#include "utils/ScopeGuard.h"
#include "utils/ThreadPool.h"
#include "utils/WorkQueue.h"
+#include <algorithm>
#include <chrono>
#include <cinttypes>
#include <cstddef>
const ZSTD_parameters ¶ms) {
(void)size;
(void)numThreads;
+ // Not validated to work correctly for window logs > 23.
+ // It will definitely fail if windowLog + 2 is >= 4GB because
+ // the skippable frame can only store sizes up to 4GB.
+ assert(params.cParams.windowLog <= 23);
return size_t{1} << (params.cParams.windowLog + 2);
}
// start writing before compression is done because we need to know the
// compressed size.
// Wait for the compressed size to be available and write skippable frame
- SkippableFrame frame(out->size());
+ assert(uint64_t(out->size()) < uint64_t(1) << 32);
+ SkippableFrame frame(uint32_t(out->size()));
if (!writeData(frame.data(), outputFd)) {
errorHolder.setError("Failed to write output");
return bytesWritten;
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
*/
#pragma once
+#include "utils/Portability.h"
#include "utils/Range.h"
#include <sys/stat.h>
#include <cerrno>
#include <cstdint>
+#include <limits>
#include <system_error>
// A small subset of `std::filesystem`.
std::error_code& ec) noexcept {
auto stat = status(path, ec);
if (ec) {
- return -1;
+ return std::numeric_limits<uintmax_t>::max();
}
if (!is_regular_file(stat)) {
ec.assign(ENOTSUP, std::generic_category());
- return -1;
+ return std::numeric_limits<uintmax_t>::max();
}
ec.clear();
return stat.st_size;
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
--- /dev/null
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ */
+
+#pragma once
+
+#include <algorithm>
+
+// Required for windows, which defines min/max, but we want the std:: version.
+#undef min
+#undef max
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#pragma once
#include "utils/Likely.h"
+#include "utils/Portability.h"
+#include <algorithm>
#include <cstddef>
#include <cstring>
#include <stdexcept>
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2019-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2017-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2017-present, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
### Notices
-Copyright (c) 2017-present Facebook, Inc.
+Copyright (c) Meta Platforms, Inc. and affiliates.
Permission is granted to copy and distribute this document
for any purpose and without charge,
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2017-present, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
### Notices
-Copyright (c) 2016-2021 Yann Collet, Facebook, Inc.
+Copyright (c) Meta Platforms, Inc. and affiliates.
Permission is granted to copy and distribute this document
for any purpose and without charge,
or they can be decoded on the flow during [Sequence Execution].
Literals can be stored uncompressed or compressed using Huffman prefix codes.
-When compressed, an optional tree description can be present,
+When compressed, a tree description may optionally be present,
followed by 1 or 4 streams.
| `Literals_Section_Header` | [`Huffman_Tree_Description`] | [jumpTable] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
`Regenerated_Size = (Literals_Section_Header[0]>>4) + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12)`
Only Stream1 is present for these cases.
-Note : it's allowed to represent a short value (for example `13`)
+Note : it's allowed to represent a short value (for example `27`)
using a long format, even if it's less efficient.
__`Size_Format` for `Compressed_Literals_Block` and `Treeless_Literals_Block`__ :
Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
`Literals_Section_Header` uses 3 bytes.
- `Size_Format` == 01 : 4 streams.
- Both `Regenerated_Size` and `Compressed_Size` use 10 bits (0-1023).
+ Both `Regenerated_Size` and `Compressed_Size` use 10 bits (6-1023).
`Literals_Section_Header` uses 3 bytes.
- `Size_Format` == 10 : 4 streams.
- Both `Regenerated_Size` and `Compressed_Size` use 14 bits (0-16383).
+ Both `Regenerated_Size` and `Compressed_Size` use 14 bits (6-16383).
`Literals_Section_Header` uses 4 bytes.
- `Size_Format` == 11 : 4 streams.
- Both `Regenerated_Size` and `Compressed_Size` use 18 bits (0-262143).
+ Both `Regenerated_Size` and `Compressed_Size` use 18 bits (6-262143).
`Literals_Section_Header` uses 5 bytes.
Both `Compressed_Size` and `Regenerated_Size` fields follow __little-endian__ convention.
Note: `Compressed_Size` __includes__ the size of the Huffman Tree description
_when_ it is present.
+4 streams is superior to 1 stream in decompression speed,
+by exploiting instruction level parallelism.
+But it's also more expensive,
+costing on average ~7.3 bytes more than the 1 stream mode, mostly from the jump table.
+
+In general, use the 4 streams mode when there are more literals to decode,
+to favor higher decompression speeds.
+Beyond 1KB, the 4 streams mode is compulsory anyway.
+
+Note that a minimum of 6 bytes is required for the 4 streams mode.
+That's a technical minimum, but it's not recommended to employ the 4 streams mode
+for such a small quantity, that would be wasteful.
+A more practical lower bound would be around ~256 bytes.
+
#### Raw Literals Block
The data in Stream1 is `Regenerated_Size` bytes long,
it contains the raw literals data to be used during [Sequence Execution].
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Martin Liska, SUSE, Facebook, Inc.
+ * Copyright (c) Martin Liska, SUSE, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
`ZSTD_DCtx` decompression contexts,
but might also result in a small decompression speed cost.
+- The C compiler macros `ZSTDLIB_VISIBLE`, `ZSTDERRORLIB_VISIBLE` and `ZDICTLIB_VISIBLE`
+ can be overridden to control the visibility of zstd's API. Additionally,
+ `ZSTDLIB_STATIC_API` and `ZDICTLIB_STATIC_API` can be overridden to control the visibility
+ of zstd's static API. Specifically, it can be set to `ZSTDLIB_HIDDEN` to hide the symbols
+ from the shared library. These macros default to `ZSTDLIB_VISIBILITY`,
+ `ZSTDERRORLIB_VSIBILITY`, and `ZDICTLIB_VISIBILITY` if unset, for backwards compatibility
+ with the old macro names.
#### Windows : using MinGW+MSYS to create DLL
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ******************************************************************
* bitstream
* Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ******************************************************************
* debug
* Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* debug
* Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* Common functions of New Generation Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
case PREFIX(corruption_detected): return "Data corruption detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+ case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
+ case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+ case PREFIX(externalMatchFinder_failed): return "External matchfinder returned an error code";
case PREFIX(maxCode):
default: return notErrorCode;
}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ******************************************************************
* FSE : Finite State Entropy codec
* Public Prototypes declaration
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* FSE : Finite State Entropy decoder
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* huff0 huffman codec,
* part of Finite State Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
- * Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
+ * Same as HUF_compress2(), but uses externally allocated @workSpace.
+ * @workSpace's size, aka @wkspSize, must be >= HUF_WORKSPACE_SIZE
+ * @srcSize must be >= 6
+ */
#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* Join all of the threads */
{ size_t i;
for (i = 0; i < ctx->threadCapacity; ++i) {
- ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
+ ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */
} }
}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* === Implementation === */
+typedef struct {
+ void* (*start_routine)(void*);
+ void* arg;
+ int initialized;
+ ZSTD_pthread_cond_t initialized_cond;
+ ZSTD_pthread_mutex_t initialized_mutex;
+} ZSTD_thread_params_t;
+
static unsigned __stdcall worker(void *arg)
{
- ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
- thread->arg = thread->start_routine(thread->arg);
+ void* (*start_routine)(void*);
+ void* thread_arg;
+
+ /* Inialized thread_arg and start_routine and signal main thread that we don't need it
+ * to wait any longer.
+ */
+ {
+ ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg;
+ thread_arg = thread_param->arg;
+ start_routine = thread_param->start_routine;
+
+ /* Signal main thread that we are running and do not depend on its memory anymore */
+ ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex);
+ thread_param->initialized = 1;
+ ZSTD_pthread_cond_signal(&thread_param->initialized_cond);
+ ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex);
+ }
+
+ start_routine(thread_arg);
+
return 0;
}
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg)
{
+ ZSTD_thread_params_t thread_param;
(void)unused;
- thread->arg = arg;
- thread->start_routine = start_routine;
- thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
- if (!thread->handle)
+ thread_param.start_routine = start_routine;
+ thread_param.arg = arg;
+ thread_param.initialized = 0;
+ *thread = NULL;
+
+ /* Setup thread initialization synchronization */
+ if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
+ /* Should never happen on Windows */
+ return -1;
+ }
+ if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) {
+ /* Should never happen on Windows */
+ ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
+ return -1;
+ }
+
+ /* Spawn thread */
+ *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
+ if (!thread) {
+ ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
+ ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
return errno;
- else
- return 0;
+ }
+
+ /* Wait for thread to be initialized */
+ ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex);
+ while(!thread_param.initialized) {
+ ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex);
+ }
+ ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex);
+ ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
+ ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
+
+ return 0;
}
-int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
+int ZSTD_pthread_join(ZSTD_pthread_t thread)
{
DWORD result;
- if (!thread.handle) return 0;
+ if (!thread) return 0;
- result = WaitForSingleObject(thread.handle, INFINITE);
- CloseHandle(thread.handle);
+ result = WaitForSingleObject(thread, INFINITE);
+ CloseHandle(thread);
switch (result) {
case WAIT_OBJECT_0:
- if (value_ptr) *value_ptr = thread.arg;
return 0;
case WAIT_ABANDONED:
return EINVAL;
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
/* ZSTD_pthread_create() and ZSTD_pthread_join() */
-typedef struct {
- HANDLE handle;
- void* (*start_routine)(void*);
- void* arg;
-} ZSTD_pthread_t;
+typedef HANDLE ZSTD_pthread_t;
int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg);
-int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
+int ZSTD_pthread_join(ZSTD_pthread_t thread);
/**
* add here more wrappers as required
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
-#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+#define ZSTD_pthread_join(a) pthread_join((a),NULL)
#else /* DEBUGLEVEL >= 1 */
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
-#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
+#define ZSTD_pthread_join(a) pthread_join((a),NULL)
#endif
/*
* xxHash - Fast Hash algorithm
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - xxHash homepage: https://cyan4973.github.io/xxHash/
/*
* xxHash - Fast Hash algorithm
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - xxHash homepage: https://cyan4973.github.io/xxHash/
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */
+#define MIN_LITERALS_FOR_4_STREAMS 6
typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ******************************************************************
* FSE : Finite State Entropy encoder
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* hist : Histogram functions
* part of Finite State Entropy project
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* ******************************************************************
* Huffman encoder, part of New Generation Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
/* renorm totalCost from 2^largestBits to 2^targetNbBits
* note : totalCost is necessarily a multiple of baseCost */
- assert((totalCost & (baseCost - 1)) == 0);
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
totalCost >>= (largestBits - targetNbBits);
assert(totalCost > 0);
return minBitsSymbols;
}
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, size_t wkspSize, HUF_CElt* table, const unsigned* count, HUF_depth_mode depthMode)
+unsigned HUF_optimalTableLog(
+ unsigned maxTableLog,
+ size_t srcSize,
+ unsigned maxSymbolValue,
+ void* workSpace, size_t wkspSize,
+ HUF_CElt* table,
+ const unsigned* count,
+ HUF_depth_mode depthMode)
{
- unsigned optLog = FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
assert(srcSize > 1); /* Not supported, RLE should be used instead */
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
- if (depthMode == HUF_depth_optimal) { /** Test valid depths and return optimal **/
- BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
+ if (depthMode != HUF_depth_optimal) {
+ /* cheap evaluation, based on FSE */
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+ }
+
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
- size_t optSize = ((size_t) ~0);
- unsigned huffLog;
size_t maxBits, hSize, newSize;
const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
+ size_t optSize = ((size_t) ~0) - 1;
+ unsigned optLog = maxTableLog, optLogGuess;
- if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) return optLog;
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
- for (huffLog = HUF_minTableLog(symbolCardinality); huffLog <= maxTableLog; huffLog++) {
- maxBits = HUF_buildCTable_wksp(table, count,
- maxSymbolValue, huffLog,
- workSpace, wkspSize);
+ /* Search until size increases */
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
+ maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
if (ERR_isError(maxBits)) continue;
- hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits,
- workSpace, wkspSize);
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+
if (ERR_isError(hSize)) continue;
newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
+ if (newSize > optSize + 1) {
+ break;
+ }
+
if (newSize < optSize) {
optSize = newSize;
- optLog = huffLog;
+ optLog = optLogGuess;
}
}
+ assert(optLog <= HUF_TABLELOG_MAX);
+ return optLog;
}
- assert(optLog <= HUF_TABLELOG_MAX);
- return optLog;
}
/* HUF_compress_internal() :
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* Helper functions
***************************************/
/* ZSTD_compressBound()
- * Note that the result from this function is only compatible with the "normal"
- * full-block strategy.
- * When there are a lot of small blocks due to frequent flush in streaming mode
- * the overhead of headers can make the compressed data to be larger than the
- * return value of ZSTD_compressBound().
+ * Note that the result from this function is only valid for
+ * the one-pass compression functions.
+ * When employing the streaming mode,
+ * if flushes are frequently altering the size of blocks,
+ * the overhead from block headers can make the compressed data larger
+ * than the return value of ZSTD_compressBound().
*/
size_t ZSTD_compressBound(size_t srcSize) {
- return ZSTD_COMPRESSBOUND(srcSize);
+ size_t const r = ZSTD_COMPRESSBOUND(srcSize);
+ if (r==0) return ERROR(srcSize_wrong);
+ return r;
}
return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
}
+/* Enables validation for external sequences in debug builds. */
+static int ZSTD_resolveExternalSequenceValidation(int mode) {
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2)
+ (void)mode;
+ return 1;
+#else
+ return mode;
+#endif
+}
+
/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged.
* If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */
static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) {
}
cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
+ cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences);
assert(!ZSTD_checkCParams(cParams));
return cctxParams;
}
#define ZSTD_NO_CLEVEL 0
/**
- * Initializes the cctxParams from params and compressionLevel.
+ * Initializes `cctxParams` from `params` and `compressionLevel`.
* @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
*/
-static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+static void
+ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams,
+ const ZSTD_parameters* params,
+ int compressionLevel)
{
assert(!ZSTD_checkCParams(params->cParams));
ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams);
cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams);
cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams);
+ cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences);
DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
}
/**
* Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
- * @param param Validated zstd parameters.
+ * @param params Validated zstd parameters.
*/
static void ZSTD_CCtxParams_setZstdParams(
ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
bounds.upperBound = (int)ZSTD_ps_disable;
return bounds;
+ case ZSTD_c_enableMatchFinderFallback:
+ bounds.lowerBound = 0;
+ bounds.upperBound = 1;
+ return bounds;
+
+ case ZSTD_c_maxBlockSize:
+ bounds.lowerBound = 1;
+ bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
+ return bounds;
+
default:
bounds.error = ERROR(parameter_unsupported);
return bounds;
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
+ case ZSTD_c_enableMatchFinderFallback:
+ case ZSTD_c_maxBlockSize:
default:
return 0;
}
if (ZSTD_isUpdateAuthorized(param)) {
cctx->cParamsChanged = 1;
} else {
- RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+ RETURN_ERROR(stage_wrong, "can only set params in cctx init stage");
} }
switch(param)
case ZSTD_c_useRowMatchFinder:
case ZSTD_c_deterministicRefPrefix:
case ZSTD_c_prefetchCDictTables:
+ case ZSTD_c_enableMatchFinderFallback:
+ case ZSTD_c_maxBlockSize:
break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
case ZSTD_c_forceAttachDict : {
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
- BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+ BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref);
CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref;
}
case ZSTD_c_literalCompressionMode : {
const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
- BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+ BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm);
CCtxParams->literalCompressionMode = lcm;
return CCtxParams->literalCompressionMode;
}
CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value;
return CCtxParams->prefetchCDictTables;
+ case ZSTD_c_enableMatchFinderFallback:
+ BOUNDCHECK(ZSTD_c_enableMatchFinderFallback, value);
+ CCtxParams->enableMatchFinderFallback = value;
+ return CCtxParams->enableMatchFinderFallback;
+
+ case ZSTD_c_maxBlockSize:
+ BOUNDCHECK(ZSTD_c_maxBlockSize, value);
+ CCtxParams->maxBlockSize = value;
+ return CCtxParams->prefetchCDictTables;
+
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
}
case ZSTD_c_prefetchCDictTables:
*value = (int)CCtxParams->prefetchCDictTables;
break;
+ case ZSTD_c_enableMatchFinderFallback:
+ *value = CCtxParams->enableMatchFinderFallback;
+ break;
+ case ZSTD_c_maxBlockSize:
+ *value = (int)CCtxParams->maxBlockSize;
+ break;
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
}
return 0;
return 0;
}
+size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams)
+{
+ DEBUGLOG(4, "ZSTD_CCtx_setCParams");
+ assert(cctx != NULL);
+ if (cctx->streamStage != zcss_init) {
+ /* All parameters in @cparams are allowed to be updated during MT compression.
+ * This must be signaled, so that MT compression picks up the changes */
+ cctx->cParamsChanged = 1;
+ }
+ /* only update if parameters are valid */
+ FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), "");
+ cctx->requestedParams.cParams = cparams;
+ return 0;
+}
+
size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
{
DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize);
RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
"Can't reset parameters only when not in init stage.");
ZSTD_clearAllDicts(cctx);
+ ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
return ZSTD_CCtxParams_reset(&cctx->requestedParams);
}
return 0;
return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
}
+/* Helper function for calculating memory requirements.
+ * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */
+static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useExternalMatchFinder) {
+ U32 const divider = (minMatch==3 || useExternalMatchFinder) ? 3 : 4;
+ return blockSize / divider;
+}
+
static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
const ZSTD_compressionParameters* cParams,
const ldmParams_t* ldmParams,
const ZSTD_paramSwitch_e useRowMatchFinder,
const size_t buffInSize,
const size_t buffOutSize,
- const U64 pledgedSrcSize)
+ const U64 pledgedSrcSize,
+ int useExternalMatchFinder)
{
size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- U32 const divider = (cParams->minMatch==3) ? 3 : 4;
- size_t const maxNbSeq = blockSize / divider;
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ size_t const externalSeqSpace = useExternalMatchFinder
+ ? ZSTD_cwksp_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence))
+ : 0;
+
size_t const neededSpace =
cctxSpace +
entropySpace +
ldmSeqSpace +
matchStateSize +
tokenSpace +
- bufferSpace;
+ bufferSpace +
+ externalSeqSpace;
DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
return neededSpace;
* be needed. However, we still allocate two 0-sized buffers, which can
* take space under ASAN. */
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
- &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+ &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
}
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
&cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
- ZSTD_CONTENTSIZE_UNKNOWN);
+ ZSTD_CONTENTSIZE_UNKNOWN, params->useExternalMatchFinder);
}
}
{ size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;
- size_t const maxNbSeq = blockSize / divider;
+ size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useExternalMatchFinder);
size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
? ZSTD_compressBound(blockSize) + 1
: 0;
size_t const neededSpace =
ZSTD_estimateCCtxSize_usingCCtxParams_internal(
¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
- buffInSize, buffOutSize, pledgedSrcSize);
+ buffInSize, buffOutSize, pledgedSrcSize, params->useExternalMatchFinder);
int resizeWorkspace;
FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
zc->ldmState.loadedDictEnd = 0;
}
+ /* reserve space for block-level external sequences */
+ if (params->useExternalMatchFinder) {
+ size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
+ zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
+ zc->externalMatchCtx.seqBuffer =
+ (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
+ }
+
DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
* entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
*/
static ZSTD_symbolEncodingTypeStats_t
-ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
- const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
- BYTE* dst, const BYTE* const dstEnd,
- ZSTD_strategy strategy, unsigned* countWorkspace,
- void* entropyWorkspace, size_t entropyWkspSize) {
+ZSTD_buildSequencesStatistics(
+ const seqStore_t* seqStorePtr, size_t nbSeq,
+ const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+ BYTE* dst, const BYTE* const dstEnd,
+ ZSTD_strategy strategy, unsigned* countWorkspace,
+ void* entropyWorkspace, size_t entropyWkspSize)
+{
BYTE* const ostart = dst;
const BYTE* const oend = dstEnd;
BYTE* op = ostart;
*/
#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
MEM_STATIC size_t
-ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
- const ZSTD_entropyCTables_t* prevEntropy,
- ZSTD_entropyCTables_t* nextEntropy,
- const ZSTD_CCtx_params* cctxParams,
- void* dst, size_t dstCapacity,
- void* entropyWorkspace, size_t entropyWkspSize,
- const int bmi2)
+ZSTD_entropyCompressSeqStore_internal(
+ const seqStore_t* seqStorePtr,
+ const ZSTD_entropyCTables_t* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ void* dst, size_t dstCapacity,
+ void* entropyWorkspace, size_t entropyWkspSize,
+ const int bmi2)
{
const int longOffsets = cctxParams->cParams.windowLog >= STREAM_ACCUMULATOR_MIN;
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
const seqDef* const sequences = seqStorePtr->sequencesStart;
- const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+ const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
const BYTE* const llCodeTable = seqStorePtr->llCode;
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
- size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
- size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
+ size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+ size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
/* Base suspicion of uncompressibility on ratio of literals to sequences */
unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
size_t const litSize = (size_t)(seqStorePtr->lit - literals);
- HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast;
size_t const cSize = ZSTD_compressLiterals(
- &prevEntropy->huf, &nextEntropy->huf,
- cctxParams->cParams.strategy,
- ZSTD_literalsCompressionIsDisabled(cctxParams),
op, dstCapacity,
literals, litSize,
entropyWorkspace, entropyWkspSize,
- bmi2, suspectUncompressible, depthMode);
+ &prevEntropy->huf, &nextEntropy->huf,
+ cctxParams->cParams.strategy,
+ ZSTD_literalsCompressionIsDisabled(cctxParams),
+ suspectUncompressible, bmi2);
FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
assert(cSize <= dstCapacity);
op += cSize;
}
MEM_STATIC size_t
-ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
- const ZSTD_entropyCTables_t* prevEntropy,
- ZSTD_entropyCTables_t* nextEntropy,
- const ZSTD_CCtx_params* cctxParams,
- void* dst, size_t dstCapacity,
- size_t srcSize,
- void* entropyWorkspace, size_t entropyWkspSize,
- int bmi2)
+ZSTD_entropyCompressSeqStore(
+ const seqStore_t* seqStorePtr,
+ const ZSTD_entropyCTables_t* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ void* dst, size_t dstCapacity,
+ size_t srcSize,
+ void* entropyWorkspace, size_t entropyWkspSize,
+ int bmi2)
{
size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
if (cSize >= maxCSize) return 0; /* block not compressed */
}
DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
+ /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly.
+ * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above.
+ */
+ assert(cSize < ZSTD_BLOCKSIZE_MAX);
return cSize;
}
ssPtr->longLengthType = ZSTD_llt_none;
}
+/* ZSTD_postProcessExternalMatchFinderResult() :
+ * Validates and post-processes sequences obtained through the external matchfinder API:
+ * - Checks whether nbExternalSeqs represents an error condition.
+ * - Appends a block delimiter to outSeqs if one is not already present.
+ * See zstd.h for context regarding block delimiters.
+ * Returns the number of sequences after post-processing, or an error code. */
+static size_t ZSTD_postProcessExternalMatchFinderResult(
+ ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize
+) {
+ RETURN_ERROR_IF(
+ nbExternalSeqs > outSeqsCapacity,
+ externalMatchFinder_failed,
+ "External matchfinder returned error code %lu",
+ (unsigned long)nbExternalSeqs
+ );
+
+ RETURN_ERROR_IF(
+ nbExternalSeqs == 0 && srcSize > 0,
+ externalMatchFinder_failed,
+ "External matchfinder produced zero sequences for a non-empty src buffer!"
+ );
+
+ if (srcSize == 0) {
+ ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence));
+ return 1;
+ }
+
+ {
+ ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1];
+
+ /* We can return early if lastSeq is already a block delimiter. */
+ if (lastSeq.offset == 0 && lastSeq.matchLength == 0) {
+ return nbExternalSeqs;
+ }
+
+ /* This error condition is only possible if the external matchfinder
+ * produced an invalid parse, by definition of ZSTD_sequenceBound(). */
+ RETURN_ERROR_IF(
+ nbExternalSeqs == outSeqsCapacity,
+ externalMatchFinder_failed,
+ "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!"
+ );
+
+ /* lastSeq is not a block delimiter, so we need to append one. */
+ ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence));
+ return nbExternalSeqs + 1;
+ }
+}
+
typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
}
if (zc->externSeqStore.pos < zc->externSeqStore.size) {
assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
+
+ /* External matchfinder + LDM is technically possible, just not implemented yet.
+ * We need to revisit soon and implement it. */
+ RETURN_ERROR_IF(
+ zc->appliedParams.useExternalMatchFinder,
+ parameter_combination_unsupported,
+ "Long-distance matching with external matchfinder enabled is not currently supported."
+ );
+
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&zc->externSeqStore,
} else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+ /* External matchfinder + LDM is technically possible, just not implemented yet.
+ * We need to revisit soon and implement it. */
+ RETURN_ERROR_IF(
+ zc->appliedParams.useExternalMatchFinder,
+ parameter_combination_unsupported,
+ "Long-distance matching with external matchfinder enabled is not currently supported."
+ );
+
ldmSeqStore.seq = zc->ldmSequences;
ldmSeqStore.capacity = zc->maxNbLdmSequences;
/* Updates ldmSeqStore.size */
zc->appliedParams.useRowMatchFinder,
src, srcSize);
assert(ldmSeqStore.pos == ldmSeqStore.size);
- } else { /* not long range mode */
+ } else if (zc->appliedParams.useExternalMatchFinder) {
+ assert(
+ zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+ );
+ assert(zc->externalMatchCtx.mFinder != NULL);
+
+ { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
+
+ size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
+ zc->externalMatchCtx.mState,
+ zc->externalMatchCtx.seqBuffer,
+ zc->externalMatchCtx.seqBufferCapacity,
+ src, srcSize,
+ NULL, 0, /* dict and dictSize, currently not supported */
+ zc->appliedParams.compressionLevel,
+ windowSize
+ );
+
+ size_t const nbPostProcessedSeqs = ZSTD_postProcessExternalMatchFinderResult(
+ zc->externalMatchCtx.seqBuffer,
+ nbExternalSeqs,
+ zc->externalMatchCtx.seqBufferCapacity,
+ srcSize
+ );
+
+ /* Return early if there is no error, since we don't need to worry about last literals */
+ if (!ZSTD_isError(nbPostProcessedSeqs)) {
+ ZSTD_sequencePosition seqPos = {0,0,0};
+ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
+ zc, &seqPos, zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs, src, srcSize
+ );
+ ms->ldmSeqStore = NULL;
+ DEBUGLOG(5, "Copied %lu sequences from external matchfinder to internal seqStore.", (unsigned long)nbExternalSeqs);
+ return ZSTDbss_compress;
+ }
+
+ /* Propagate the error if fallback is disabled */
+ if (!zc->appliedParams.enableMatchFinderFallback) {
+ return nbPostProcessedSeqs;
+ }
+
+ /* Fallback to software matchfinder */
+ { ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+ zc->appliedParams.useRowMatchFinder,
+ dictMode);
+ ms->ldmSeqStore = NULL;
+ DEBUGLOG(
+ 5,
+ "External matchfinder returned error code %lu. Falling back to internal matchfinder.",
+ (unsigned long)nbExternalSeqs
+ );
+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+ } }
+ } else { /* not long range mode and no external matchfinder */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
zc->appliedParams.useRowMatchFinder,
dictMode);
+ assert(zc->externalMatchCtx.mFinder == NULL);
ms->ldmSeqStore = NULL;
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
}
const size_t unrollMask = unrollSize - 1;
const size_t prefixLength = length & unrollMask;
size_t i;
- size_t u;
if (length == 1) return 1;
/* Check if prefix is RLE first before using unrolled loop */
if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
return 0;
}
for (i = prefixLength; i != length; i += unrollSize) {
+ size_t u;
for (u = 0; u < unrollSize; u += sizeof(size_t)) {
if (MEM_readST(ip + i + u) != valueST) {
return 0;
- }
- }
- }
+ } } }
return 1;
}
return nbSeqs < 4 && nbLits < 10;
}
-static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
+static void
+ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
{
ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
bs->prevCBlock = bs->nextCBlock;
}
/* Writes the block header */
-static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+static void
+writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock)
+{
U32 const cBlockHeader = cSize == 1 ?
lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
* Stores literals block type (raw, rle, compressed, repeat) and
* huffman description table to hufMetadata.
* Requires ENTROPY_WORKSPACE_SIZE workspace
- * @return : size of huffman description table or error code */
-static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
- const ZSTD_hufCTables_t* prevHuf,
- ZSTD_hufCTables_t* nextHuf,
- ZSTD_hufCTablesMetadata_t* hufMetadata,
- const int literalsCompressionIsDisabled,
- void* workspace, size_t wkspSize, HUF_depth_mode depthMode)
+ * @return : size of huffman description table, or an error code
+ */
+static size_t
+ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+ const ZSTD_hufCTables_t* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_hufCTablesMetadata_t* hufMetadata,
+ const int literalsCompressionIsDisabled,
+ void* workspace, size_t wkspSize,
+ HUF_depth_mode depthMode)
{
BYTE* const wkspStart = (BYTE*)workspace;
BYTE* const wkspEnd = wkspStart + wkspSize;
/* small ? don't even attempt compression (speed opt) */
#ifndef COMPRESS_LITERALS_SIZE_MIN
-#define COMPRESS_LITERALS_SIZE_MIN 63
+# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */
#endif
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) {
DEBUGLOG(5, "set_basic - too small");
hufMetadata->hType = set_basic;
return 0;
- }
- }
+ } }
/* Scan input and build symbol stats */
- { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+ { size_t const largest =
+ HIST_count_wksp (countWksp, &maxSymbolValue,
+ (const BYTE*)src, srcSize,
+ workspace, wkspSize);
FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
if (largest == srcSize) {
+ /* only one literal symbol */
DEBUGLOG(5, "set_rle");
hufMetadata->hType = set_rle;
return 0;
}
if (largest <= (srcSize >> 7)+4) {
+ /* heuristic: likely not compressible */
DEBUGLOG(5, "set_basic - no gain");
hufMetadata->hType = set_basic;
return 0;
- }
- }
+ } }
/* Validate the previous Huffman table */
- if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+ if (repeat == HUF_repeat_check
+ && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
repeat = HUF_repeat_none;
}
nodeWksp, nodeWkspSize);
FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
huffLog = (U32)maxBits;
- { /* Build and write the CTable */
- size_t const newCSize = HUF_estimateCompressedSize(
- (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
- size_t const hSize = HUF_writeCTable_wksp(
- hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
- (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
- nodeWksp, nodeWkspSize);
- /* Check against repeating the previous CTable */
- if (repeat != HUF_repeat_none) {
- size_t const oldCSize = HUF_estimateCompressedSize(
- (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
- if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
- DEBUGLOG(5, "set_repeat - smaller");
- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- hufMetadata->hType = set_repeat;
- return 0;
- }
- }
- if (newCSize + hSize >= srcSize) {
- DEBUGLOG(5, "set_basic - no gains");
+ }
+ { /* Build and write the CTable */
+ size_t const newCSize = HUF_estimateCompressedSize(
+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+ size_t const hSize = HUF_writeCTable_wksp(
+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+ nodeWksp, nodeWkspSize);
+ /* Check against repeating the previous CTable */
+ if (repeat != HUF_repeat_none) {
+ size_t const oldCSize = HUF_estimateCompressedSize(
+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+ DEBUGLOG(5, "set_repeat - smaller");
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- hufMetadata->hType = set_basic;
+ hufMetadata->hType = set_repeat;
return 0;
- }
- DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
- hufMetadata->hType = set_compressed;
- nextHuf->repeatMode = HUF_repeat_check;
- return hSize;
+ } }
+ if (newCSize + hSize >= srcSize) {
+ DEBUGLOG(5, "set_basic - no gains");
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ hufMetadata->hType = set_basic;
+ return 0;
}
+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+ hufMetadata->hType = set_compressed;
+ nextHuf->repeatMode = HUF_repeat_check;
+ return hSize;
}
}
* and updates nextEntropy to the appropriate repeatMode.
*/
static ZSTD_symbolEncodingTypeStats_t
-ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
+{
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
nextEntropy->litlength_repeatMode = FSE_repeat_none;
nextEntropy->offcode_repeatMode = FSE_repeat_none;
* Builds entropy for the sequences.
* Stores symbol compression modes and fse table to fseMetadata.
* Requires ENTROPY_WORKSPACE_SIZE wksp.
- * @return : size of fse tables or error code */
-static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
- const ZSTD_fseCTables_t* prevEntropy,
- ZSTD_fseCTables_t* nextEntropy,
- const ZSTD_CCtx_params* cctxParams,
- ZSTD_fseCTablesMetadata_t* fseMetadata,
- void* workspace, size_t wkspSize)
+ * @return : size of fse tables or error code */
+static size_t
+ZSTD_buildBlockEntropyStats_sequences(
+ const seqStore_t* seqStorePtr,
+ const ZSTD_fseCTables_t* prevEntropy,
+ ZSTD_fseCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ ZSTD_fseCTablesMetadata_t* fseMetadata,
+ void* workspace, size_t wkspSize)
{
ZSTD_strategy const strategy = cctxParams->cParams.strategy;
- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+ size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
BYTE* const ostart = fseMetadata->fseTablesBuffer;
BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
BYTE* op = ostart;
/** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block.
* Requires workspace size ENTROPY_WORKSPACE_SIZE
- *
- * @return : 0 on success or error code
+ * @return : 0 on success, or an error code
+ * Note : also employed in superblock
*/
-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
- const ZSTD_entropyCTables_t* prevEntropy,
- ZSTD_entropyCTables_t* nextEntropy,
- const ZSTD_CCtx_params* cctxParams,
- ZSTD_entropyCTablesMetadata_t* entropyMetadata,
- void* workspace, size_t wkspSize)
-{
- size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
- HUF_depth_mode depthMode = cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_depth_optimal : HUF_depth_fast;
+size_t ZSTD_buildBlockEntropyStats(
+ const seqStore_t* seqStorePtr,
+ const ZSTD_entropyCTables_t* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ void* workspace, size_t wkspSize)
+{
+ size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart);
+ int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD);
+ HUF_depth_mode const depthMode = huf_useOptDepth ? HUF_depth_optimal : HUF_depth_fast;
entropyMetadata->hufMetadata.hufDesSize =
ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
}
/* Returns the size estimate for the literals section (header + content) of a block */
-static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
- const ZSTD_hufCTables_t* huf,
- const ZSTD_hufCTablesMetadata_t* hufMetadata,
- void* workspace, size_t wkspSize,
- int writeEntropy)
+static size_t
+ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+ const ZSTD_hufCTables_t* huf,
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
+ void* workspace, size_t wkspSize,
+ int writeEntropy)
{
unsigned* const countWksp = (unsigned*)workspace;
unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
}
/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
-static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
- const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
- const FSE_CTable* fseCTable,
- const U8* additionalBits,
- short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
- void* workspace, size_t wkspSize)
+static size_t
+ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+ const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+ const FSE_CTable* fseCTable,
+ const U8* additionalBits,
+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+ void* workspace, size_t wkspSize)
{
unsigned* const countWksp = (unsigned*)workspace;
const BYTE* ctp = codeTable;
}
/* Returns the size estimate for the sequences section (header + content) of a block */
-static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
- const BYTE* llCodeTable,
- const BYTE* mlCodeTable,
- size_t nbSeq,
- const ZSTD_fseCTables_t* fseTables,
- const ZSTD_fseCTablesMetadata_t* fseMetadata,
- void* workspace, size_t wkspSize,
- int writeEntropy)
+static size_t
+ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+ const BYTE* llCodeTable,
+ const BYTE* mlCodeTable,
+ size_t nbSeq,
+ const ZSTD_fseCTables_t* fseTables,
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
+ void* workspace, size_t wkspSize,
+ int writeEntropy)
{
size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
size_t cSeqSizeEstimate = 0;
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
- fseTables->offcodeCTable, NULL,
- OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
- workspace, wkspSize);
+ fseTables->offcodeCTable, NULL,
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+ workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
- fseTables->litlengthCTable, LL_bits,
- LL_defaultNorm, LL_defaultNormLog, MaxLL,
- workspace, wkspSize);
+ fseTables->litlengthCTable, LL_bits,
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
+ workspace, wkspSize);
cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
- fseTables->matchlengthCTable, ML_bits,
- ML_defaultNorm, ML_defaultNormLog, MaxML,
- workspace, wkspSize);
+ fseTables->matchlengthCTable, ML_bits,
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
+ workspace, wkspSize);
if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
return cSeqSizeEstimate + sequencesSectionHeaderSize;
}
/* Returns the size estimate for a given stream of literals, of, ll, ml */
-static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
- const BYTE* ofCodeTable,
- const BYTE* llCodeTable,
- const BYTE* mlCodeTable,
- size_t nbSeq,
- const ZSTD_entropyCTables_t* entropy,
- const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
- void* workspace, size_t wkspSize,
- int writeLitEntropy, int writeSeqEntropy) {
+static size_t
+ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+ const BYTE* ofCodeTable,
+ const BYTE* llCodeTable,
+ const BYTE* mlCodeTable,
+ size_t nbSeq,
+ const ZSTD_entropyCTables_t* entropy,
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ void* workspace, size_t wkspSize,
+ int writeLitEntropy, int writeSeqEntropy)
+{
size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
- &entropy->huf, &entropyMetadata->hufMetadata,
- workspace, wkspSize, writeLitEntropy);
+ &entropy->huf, &entropyMetadata->hufMetadata,
+ workspace, wkspSize, writeLitEntropy);
size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
- nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
- workspace, wkspSize, writeSeqEntropy);
+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+ workspace, wkspSize, writeSeqEntropy);
return seqSize + literalsSize + ZSTD_blockHeaderSize;
}
/* Builds entropy statistics and uses them for blocksize estimation.
*
- * Returns the estimated compressed size of the seqStore, or a zstd error.
+ * @return: estimated compressed size of the seqStore, or a zstd error.
*/
-static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
- ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
+static size_t
+ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc)
+{
+ ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
&zc->blockState.prevCBlock->entropy,
&zc->blockState.nextCBlock->entropy,
&zc->appliedParams,
entropyMetadata,
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
- return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), "");
+ return ZSTD_estimateBlockSize(
+ seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
(size_t)(seqStore->sequences - seqStore->sequencesStart),
- &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+ &zc->blockState.nextCBlock->entropy,
+ entropyMetadata,
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
(int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
}
/* Returns literals bytes represented in a seqStore */
-static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore)
+{
size_t literalsBytes = 0;
- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
size_t i;
for (i = 0; i < nbSeqs; ++i) {
- seqDef seq = seqStore->sequencesStart[i];
+ seqDef const seq = seqStore->sequencesStart[i];
literalsBytes += seq.litLength;
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
literalsBytes += 0x10000;
- }
- }
+ } }
return literalsBytes;
}
/* Returns match bytes represented in a seqStore */
-static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore)
+{
size_t matchBytes = 0;
- size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+ size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
size_t i;
for (i = 0; i < nbSeqs; ++i) {
seqDef seq = seqStore->sequencesStart[i];
matchBytes += seq.mlBase + MINMATCH;
if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
matchBytes += 0x10000;
- }
- }
+ } }
return matchBytes;
}
U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */
assert(OFFBASE_IS_REPCODE(offBase));
if (adjustedRepCode == ZSTD_REP_NUM) {
+ assert(ll0);
/* litlength == 0 and offCode == 2 implies selection of first repcode - 1
* This is only valid if it results in a valid offset value, aka > 0.
* Note : it may happen that `rep[0]==1` in exceptional circumstances.
* 1-3 : repcode 1-3
* 4+ : real_offset+3
*/
-static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
- seqStore_t* const seqStore, U32 const nbSeq) {
+static void
+ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+ const seqStore_t* const seqStore, U32 const nbSeq)
+{
U32 idx = 0;
for (; idx < nbSeq; ++idx) {
seqDef* const seq = seqStore->sequencesStart + idx;
U32 const ll0 = (seq->litLength == 0);
U32 const offBase = seq->offBase;
- assert(seq->offBase > 0);
+ assert(offBase > 0);
if (OFFBASE_IS_REPCODE(offBase)) {
U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0);
U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0);
* repcode history.
*/
if (dRawOffset != cRawOffset) {
- seq->offBase = cRawOffset + ZSTD_REP_NUM;
+ seq->offBase = OFFSET_TO_OFFBASE(cRawOffset);
}
}
/* Compression repcode history is always updated with values directly from the unmodified seqStore.
* Returns the total size of that block (including header) or a ZSTD error code.
*/
static size_t
-ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
+ const seqStore_t* const seqStore,
repcodes_t* const dRep, repcodes_t* const cRep,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
+ const void* src, size_t srcSize,
U32 lastBlock, U32 isPartition)
{
const U32 rleMaxLength = 25;
/* Helper function to perform the recursive search for block splits.
* Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
- * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
- * we do not recurse.
+ * If advantageous to split, then we recurse down the two sub-blocks.
+ * If not, or if an error occurred in estimation, then we do not recurse.
*
- * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences,
+ * defined by MIN_SEQUENCES_BLOCK_SPLITTING.
* In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
* In practice, recursion depth usually doesn't go beyond 4.
*
ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
{
- seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
- seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
- seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
+ seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
+ seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
+ seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
size_t estimatedOriginalSize;
size_t estimatedFirstHalfSize;
size_t estimatedSecondHalfSize;
size_t midIdx = (startIdx + endIdx)/2;
+ DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
+ assert(endIdx >= startIdx);
if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
- DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
+ DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx);
return;
}
- DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
}
}
-/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
+/* Base recursive function.
+ * Populates a table with intra-block partition indices that can improve compression ratio.
*
- * Returns the number of splits made (which equals the size of the partition table - 1).
+ * @return: number of splits made (which equals the size of the partition table - 1).
*/
static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq)
{
seqStoreSplits splits = {partitions, 0};
if (nbSeq <= 4) {
- DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split");
+ DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq);
/* Refuse to try and split anything with less than 4 sequences */
return 0;
}
* Returns combined size of all blocks (which includes headers), or a ZSTD error code.
*/
static size_t
-ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
- const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t blockSize,
+ U32 lastBlock, U32 nbSeq)
{
size_t cSize = 0;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
size_t i = 0;
size_t srcBytesTotal = 0;
- U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
- seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
- seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
- size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+ U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
+ seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
+ seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore;
+ size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
/* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
* may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
- DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate);
if (numSplits == 0) {
- size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
- &dRep, &cRep,
- op, dstCapacity,
- ip, blockSize,
- lastBlock, 0 /* isPartition */);
+ size_t cSizeSingleBlock =
+ ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+ &dRep, &cRep,
+ op, dstCapacity,
+ ip, blockSize,
+ lastBlock, 0 /* isPartition */);
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
- assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+ assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX);
+ assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize);
return cSizeSingleBlock;
}
op, dstCapacity,
ip, srcBytes,
lastBlockEntireSrc, 1 /* isPartition */);
- DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
+ DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size",
+ ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
ip += srcBytes;
dstCapacity -= cSizeChunk;
cSize += cSizeChunk;
*currSeqStore = *nextSeqStore;
- assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+ assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize);
}
- /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
- * for the next block.
+ /* cRep and dRep may have diverged during the compression.
+ * If so, we use the dRep repcodes for the next block.
*/
ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
return cSize;
void* dst, size_t dstCapacity,
const void* src, size_t srcSize, U32 lastBlock)
{
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
U32 nbSeq;
size_t cSize;
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
if (bss == ZSTDbss_noCompress) {
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
- cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+ cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
return cSize;
* * cSize >= blockBound(srcSize): We have expanded the block too much so
* emit an uncompressed block.
*/
- {
- size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+ { size_t const cSize =
+ ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
if (cSize != ERROR(dstSize_tooSmall)) {
- size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+ size_t const maxCSize =
+ srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
}
}
}
- }
+ } /* if (bss == ZSTDbss_compress)*/
DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
/* Superblock compression failed, attempt to emit a single no compress block.
* All blocks will be terminated, all input will be consumed.
* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
* Frame is supposed already started (header already produced)
-* @return : compressed size, or an error code
+* @return : compressed size, or an error code
*/
static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
MEM_writeLE24(op, cBlockHeader);
cSize += ZSTD_blockHeaderSize;
}
- }
+ } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/
ip += blockSize;
params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams);
params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams);
params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams);
+ params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences);
#ifdef ZSTD_MULTITHREAD
if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
}
}
-typedef struct {
- U32 idx; /* Index in array of ZSTD_Sequence */
- U32 posInSequence; /* Position within sequence at idx */
- size_t posInSrc; /* Number of bytes given by sequences provided so far */
-} ZSTD_sequencePosition;
-
/* ZSTD_validateSequence() :
* @offCode : is presumed to follow format required by ZSTD_storeSeq()
* @returns a ZSTD error code if sequence is not valid
return offBase;
}
-/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
- * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
- */
-static size_t
+size_t
ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
return 0;
}
-/* Returns the number of bytes to move the current read position back by.
- * Only non-zero if we ended up splitting a sequence.
- * Otherwise, it may return a ZSTD error if something went wrong.
- *
- * This function will attempt to scan through blockSize bytes
- * represented by the sequences in @inSeqs,
- * storing any (partial) sequences.
- *
- * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
- * avoid splitting a match, or to avoid splitting a match such that it would produce a match
- * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
- */
-static size_t
+size_t
ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
const void* src, size_t blockSize)
if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
}
+
+void ZSTD_registerExternalMatchFinder(
+ ZSTD_CCtx* zc, void* mState,
+ ZSTD_externalMatchFinder_F* mFinder
+) {
+ ZSTD_externalMatchCtx emctx = {
+ mState,
+ mFinder,
+
+ /* seqBuffer is allocated later (from the cwskp) */
+ NULL, /* seqBuffer */
+ 0 /* seqBufferCapacity */
+ };
+ zc->externalMatchCtx = emctx;
+ zc->requestedParams.useExternalMatchFinder = 1;
+}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/** ZSTD_buildBlockEntropyStats() :
* Builds entropy for the block.
* @return : 0 on success or error code */
-size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
- const ZSTD_entropyCTables_t* prevEntropy,
- ZSTD_entropyCTables_t* nextEntropy,
- const ZSTD_CCtx_params* cctxParams,
- ZSTD_entropyCTablesMetadata_t* entropyMetadata,
- void* workspace, size_t wkspSize);
+size_t ZSTD_buildBlockEntropyStats(
+ const seqStore_t* seqStorePtr,
+ const ZSTD_entropyCTables_t* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ const ZSTD_CCtx_params* cctxParams,
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+ void* workspace, size_t wkspSize);
/*********************************
* Compression internals structs *
size_t capacity; /* The capacity starting from `seq` pointer */
} rawSeqStore_t;
+typedef struct {
+ U32 idx; /* Index in array of ZSTD_Sequence */
+ U32 posInSequence; /* Position within sequence at idx */
+ size_t posInSrc; /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
typedef struct {
/* Controls prefetching in some dictMatchState matchfinders */
ZSTD_paramSwitch_e prefetchCDictTables;
+
+ /* Controls whether zstd will fall back to an internal matchfinder
+ * if the external matchfinder returns an error code. */
+ int enableMatchFinderFallback;
+
+ /* Indicates whether an external matchfinder has been referenced.
+ * Users can't set this externally.
+ * It is set internally in ZSTD_registerExternalMatchFinder(). */
+ int useExternalMatchFinder;
+
+ /* Adjust the max block size*/
+ size_t maxBlockSize;
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
ZSTD_entropyCTablesMetadata_t entropyMetadata;
} ZSTD_blockSplitCtx;
+/* Context for block-level external matchfinder API */
+typedef struct {
+ void* mState;
+ ZSTD_externalMatchFinder_F* mFinder;
+ ZSTD_Sequence* seqBuffer;
+ size_t seqBufferCapacity;
+} ZSTD_externalMatchCtx;
+
struct ZSTD_CCtx_s {
ZSTD_compressionStage_e stage;
int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
/* Workspace for block splitter */
ZSTD_blockSplitCtx blockSplitCtx;
+
+ /* Workspace for external matchfinder */
+ ZSTD_externalMatchCtx externalMatchCtx;
};
typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
*/
void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ * Note that the block delimiter must include the last literals of the block.
+ */
+size_t
+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+ ZSTD_sequencePosition* seqPos,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ const void* src, size_t blockSize);
+
+/* Returns the number of bytes to move the current read position back by.
+ * Only non-zero if we ended up splitting a sequence.
+ * Otherwise, it may return a ZSTD error if something went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes
+ * represented by the sequences in @inSeqs,
+ * storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+size_t
+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+ const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+ const void* src, size_t blockSize);
+
#endif /* ZSTD_COMPRESS_H */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
return flSize+1;
}
-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
- ZSTD_hufCTables_t* nextHuf,
- ZSTD_strategy strategy, int disableLiteralCompression,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
- void* entropyWorkspace, size_t entropyWorkspaceSize,
- const int bmi2,
- unsigned suspectUncompressible, HUF_depth_mode depthMode)
+/* ZSTD_minLiteralsToCompress() :
+ * returns minimal amount of literals
+ * for literal compression to even be attempted.
+ * Minimum is made tighter as compression strategy increases.
+ */
+static size_t
+ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
+{
+ assert((int)strategy >= 0);
+ assert((int)strategy <= 9);
+ /* btultra2 : min 8 bytes;
+ * then 2x larger for each successive compression strategy
+ * max threshold 64 bytes */
+ { int const shift = MIN(9-strategy, 3);
+ size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : 8 << shift;
+ DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
+ return mintc;
+ }
+}
+
+size_t ZSTD_compressLiterals (
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
+ const ZSTD_hufCTables_t* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_strategy strategy,
+ int disableLiteralCompression,
+ int suspectUncompressible,
+ int bmi2)
{
- size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- /* small ? don't even attempt compression (speed opt) */
-# define COMPRESS_LITERALS_SIZE_MIN 63
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- }
+ /* if too small, don't even attempt compression (speed opt) */
+ if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = (strategy < ZSTD_lazy) ? srcSize <= 1024 : 0;
+ HUF_depth_mode const depthMode = (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD) ? HUF_depth_optimal : HUF_depth_fast;
typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int, int, unsigned, HUF_depth_mode);
huf_compress_f huf_compress;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
}
}
- if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
- }
+ { size_t const minGain = ZSTD_minGain(srcSize, strategy);
+ if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+ } }
if (cLitSize==1) {
ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
+ if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
-size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
- ZSTD_hufCTables_t* nextHuf,
- ZSTD_strategy strategy, int disableLiteralCompression,
- void* dst, size_t dstCapacity,
+/* ZSTD_compressLiterals():
+ * @entropyWorkspace: must be aligned on 4-bytes boundaries
+ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
+ * @suspectUncompressible: sampling checks, to potentially skip huffman coding
+ */
+size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* entropyWorkspace, size_t entropyWorkspaceSize,
- const int bmi2,
- unsigned suspectUncompressible, HUF_depth_mode depthMode);
+ const ZSTD_hufCTables_t* prevHuf,
+ ZSTD_hufCTables_t* nextHuf,
+ ZSTD_strategy strategy, int disableLiteralCompression,
+ int suspectUncompressible,
+ int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30)
-#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
/*-*************************************
#if 0 /* approximation at bit level (for tests) */
# define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
+# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy (for tests) */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif
+/* ZSTD_bitWeight() :
+ * provide estimated "cost" of a stat in full bits only */
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
}
+/* ZSTD_fracWeight() :
+ * provide fractional-bit "cost" of a stat,
+ * using linear interpolation approximation */
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
{
U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER;
+ /* Fweight was meant for "Fractional weight"
+ * but it's effectively a value between 1 and 2
+ * using fixed point arithmetic */
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31);
return total;
}
-static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
+typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
+
+static U32
+ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
{
U32 s, sum=0;
- DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
+ (unsigned)lastEltIndex+1, (unsigned)shift );
assert(shift < 30);
for (s=0; s<lastEltIndex+1; s++) {
- table[s] = 1 + (table[s] >> shift);
- sum += table[s];
+ unsigned const base = base1 ? 1 : (table[s]>0);
+ unsigned const newStat = base + (table[s] >> shift);
+ sum += newStat;
+ table[s] = newStat;
}
return sum;
}
/* ZSTD_scaleStats() :
- * reduce all elements in table is sum too large
+ * reduce all elt frequencies in table if sum too large
* return the resulting sum of elements */
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
{
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
assert(logTarget < 30);
if (factor <= 1) return prevsum;
- return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
}
/* ZSTD_rescaleFreqs() :
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic;
- if (optPtr->litLengthSum == 0) { /* first block : init */
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
+
+ /* heuristic: use pre-defined stats for too small inputs */
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
optPtr->priceType = zop_predef;
}
assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
- /* huffman table presumed generated by dictionary */
+
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
optPtr->priceType = zop_dynamic;
if (compressedLiterals) {
+ /* generate literals statistics from huffman table */
unsigned lit;
assert(optPtr->litFreq != NULL);
optPtr->litSum = 0;
optPtr->offCodeSum += optPtr->offCodeFreq[of];
} }
- } else { /* not a dictionary */
+ } else { /* first block, no dictionary */
assert(optPtr->litFreq != NULL);
if (compressedLiterals) {
+ /* base initial cost of literals on direct frequency within src */
unsigned lit = MaxLit;
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
- optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
}
{ unsigned const baseLLfreqs[MaxLL+1] = {
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
}
-
}
- } else { /* new block : re-use previous statistics, scaled down */
+ } else { /* new block : scale down accumulated statistics */
if (compressedLiterals)
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
if (optPtr->priceType == zop_predef)
return WEIGHT(litLength, optLevel);
- /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
- * because it isn't representable in the zstd format. So instead just
- * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
- * would be all literals.
+
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
+ * because it isn't representable in the zstd format.
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
+ * In such a case, the block would be all literals.
*/
if (litLength == ZSTD_BLOCKSIZE_MAX)
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
}
/* ZSTD_getMatchPrice() :
- * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
U32 const mlBase = matchLength - MINMATCH;
assert(matchLength >= MINMATCH);
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
+ return WEIGHT(mlBase, optLevel)
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
/* dynamic statistics */
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
optPtr->litLengthSum++;
}
- /* offset code : expected to follow storeSeq() numeric representation */
+ /* offset code : follows storeSeq() numeric representation */
{ U32 const offCode = ZSTD_highbit32(offBase);
assert(offCode <= MaxOff);
optPtr->offCodeFreq[offCode]++;
/* ZSTD_initStats_ultra():
* make a first compression pass, just to seed stats with more accurate starting values.
* only works on first block, with no dictionary and no ldm.
- * this function cannot error, hence its contract must be respected.
+ * this function cannot error out, its narrow contract must be respected.
*/
static void
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
- /* invalidate first scan from history */
+ /* invalidate first scan from history, only keep entropy stats */
ZSTD_resetSeqStore(seqStore);
ms->window.base -= srcSize;
ms->window.dictLimit += (U32)srcSize;
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
- /* 2-pass strategy:
+ /* 2-passes strategy:
* this strategy makes a first pass over first block to collect statistics
- * and seed next round's statistics with it.
- * After 1st pass, function forgets everything, and starts a new block.
+ * in order to seed next round's statistics with it.
+ * After 1st pass, function forgets history, and starts a new block.
* Consequently, this can only work if no data has been previously loaded in tables,
* aka, no dictionary, no prefix, no ldm preprocessing.
* The compression ratio gain is generally small (~0.5% on first block),
- * the cost is 2x cpu time on first block. */
+ ** the cost is 2x cpu time on first block. */
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
if ( (ms->opt.litLengthSum==0) /* first block */
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
) {
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ******************************************************************
* huff0 huffman decoder,
* part of Finite State Entropy library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
U64 D4;
if (MEM_isLittleEndian()) {
- D4 = (symbol << 8) + nbBits;
+ D4 = (U64)((symbol << 8) + nbBits);
} else {
- D4 = symbol + (nbBits << 8);
+ D4 = (U64)(symbol + (nbBits << 8));
}
+ assert(D4 < (1U << 16));
D4 *= 0x0001000100010001ULL;
return D4;
}
* rankStart[0] is not filled because there are no entries in the table for
* weight 0.
*/
- {
- int n;
- int nextRankStart = 0;
+ { int n;
+ U32 nextRankStart = 0;
int const unroll = 4;
int const nLimit = (int)nbSymbols - unroll + 1;
for (n=0; n<(int)tableLog+1; n++) {
* We can switch based on the length to a different inner loop which is
* optimized for that particular case.
*/
- {
- U32 w;
- int symbol=wksp->rankVal[0];
- int rankStart=0;
+ { U32 w;
+ int symbol = wksp->rankVal[0];
+ int rankStart = 0;
for (w=1; w<tableLog+1; ++w) {
int const symbolCount = wksp->rankVal[w];
int const length = (1 << w) >> 1;
while (p < pEnd)
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
- return pEnd-pStart;
+ return (size_t)(pEnd-pStart);
}
FORCE_INLINE_TEMPLATE size_t
return dstSize;
}
+/* HUF_decompress4X1_usingDTable_internal_body():
+ * Conditions :
+ * @dstSize >= 6
+ */
FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X1_usingDTable_internal_body(
void* dst, size_t dstSize,
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
const BYTE* const iend = (const BYTE*)cSrc + 6;
BYTE* const oend = (BYTE*)dst + dstSize;
HUF_DecompressAsmArgs args;
- {
- size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+ { size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
FORWARD_IF_ERROR(ret, "Failed to init asm args");
if (ret != 0)
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
(void)iend;
/* finish bit streams one by one. */
- {
- size_t const segmentSize = (dstSize+3) / 4;
+ { size_t const segmentSize = (dstSize+3) / 4;
BYTE* segmentEnd = (BYTE*)dst;
int i;
for (i = 0; i < 4; ++i) {
/* decoded size */
return dstSize;
}
+
+/* HUF_decompress4X2_usingDTable_internal_body():
+ * Conditions:
+ * @dstSize >= 6
+ */
FORCE_INLINE_TEMPLATE size_t
HUF_decompress4X2_usingDTable_internal_body(
void* dst, size_t dstSize,
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
+ if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
? zfh.frameContentSize
- : nbBlocks * zfh.blockSizeMax;
+ : (unsigned long long)nbBlocks * zfh.blockSizeMax;
return frameSizeInfo;
}
}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
}
RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+ if (!singleStream)
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
} else {
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
hufSuccess = HUF_decompress4X_usingDTable_bmi2(
dctx->litBuffer, litSize, istart+lhSize, litCSize,
dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
for (i = 8; i < n; i += 8) {
MEM_write64(spread + pos + i, sv);
}
- pos += n;
+ assert(n>=0);
+ pos += (size_t)n;
}
}
/* Now we spread those positions across the table.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
* In block mode, window size is not known, so we have to be conservative.
- * (note: but it could be evaluated from current-lowLimit)
+ * (note: it could possibly be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+ /* Note : the wording of the specification
+ * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
+ * This generally does not happen, as it makes little sense,
+ * since an uncompressed block would feature same size and have no decompression cost.
+ * Also, note that decoder from reference libzstd before < v1.5.4
+ * would consider this edge case as an error.
+ * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
+ RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
/* Decode literals section */
{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
{
- const double ratio = (double)nbDmers / maxDictSize;
+ const double ratio = (double)nbDmers / (double)maxDictSize;
if (ratio >= 10) {
return;
}
return COVER_dictSelectionError(totalCompressedSize);
}
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+ if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) {
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
free(largestDictbuffer);
return selection;
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
elt = table[u];
/* sort : improve rank */
while ((u>1) && (table[u-1].savings < elt.savings))
- table[u] = table[u-1], u--;
+ table[u] = table[u-1], u--;
table[u] = elt;
return u;
} }
if (solution.length==0) { cursor++; continue; }
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
cursor += solution.length;
- DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
+ DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
} }
_cleanup:
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# Zstd lib directory
LIBZSTD ?= ./
+# ZSTD_LIB_MINIFY is a helper variable that
+# configures a bunch of other variables to space-optimized defaults.
+ZSTD_LIB_MINIFY ?= 0
+
# Legacy support
-ZSTD_LEGACY_SUPPORT ?= 5
+ifneq ($(ZSTD_LIB_MINIFY), 0)
+ ZSTD_LEGACY_SUPPORT ?= 0
+else
+ ZSTD_LEGACY_SUPPORT ?= 5
+endif
ZSTD_LEGACY_MULTITHREADED_API ?= 0
# Build size optimizations
-HUF_FORCE_DECOMPRESS_X1 ?= 0
-HUF_FORCE_DECOMPRESS_X2 ?= 0
-ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 0
-ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0
-ZSTD_NO_INLINE ?= 0
-ZSTD_STRIP_ERROR_STRINGS ?= 0
+ifneq ($(ZSTD_LIB_MINIFY), 0)
+ HUF_FORCE_DECOMPRESS_X1 ?= 1
+ HUF_FORCE_DECOMPRESS_X2 ?= 0
+ ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 1
+ ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0
+ ZSTD_NO_INLINE ?= 1
+ ZSTD_STRIP_ERROR_STRINGS ?= 1
+else
+ HUF_FORCE_DECOMPRESS_X1 ?= 0
+ HUF_FORCE_DECOMPRESS_X2 ?= 0
+ ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 0
+ ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG ?= 0
+ ZSTD_NO_INLINE ?= 0
+ ZSTD_STRIP_ERROR_STRINGS ?= 0
+endif
# Assembly support
ZSTD_NO_ASM ?= 0
CCVER := $(shell $(CC) --version)
ZSTD_VERSION?= $(LIBVER)
-# ZSTD_LIB_MINIFY is a helper variable that
-# configures a bunch of other variables to space-optimized defaults.
-ZSTD_LIB_MINIFY ?= 0
ifneq ($(ZSTD_LIB_MINIFY), 0)
HAVE_CC_OZ ?= $(shell echo "" | $(CC) -Oz -x c -c - -o /dev/null 2> /dev/null && echo 1 || echo 0)
- ZSTD_LEGACY_SUPPORT ?= 0
- ZSTD_LIB_DEPRECATED ?= 0
- HUF_FORCE_DECOMPRESS_X1 ?= 1
- ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT ?= 1
- ZSTD_NO_INLINE ?= 1
- ZSTD_STRIP_ERROR_STRINGS ?= 1
ifneq ($(HAVE_CC_OZ), 0)
# Some compilers (clang) support an even more space-optimized setting.
CFLAGS += -Oz
# ZSTD - standard compression algorithm
-# Copyright (C) 2014-2016, Yann Collet, Facebook
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
prefix=@PREFIX@
module libzstd [extern_c] {
header "zstd.h"
export *
- config_macros [exhaustive] /* zstd.h */ \
+ config_macros [exhaustive] \
+ /* zstd.h */ \
ZSTD_STATIC_LINKING_ONLY, \
+ ZSTDLIB_VISIBILITY, \
ZSTDLIB_VISIBLE, \
+ ZSTDLIB_HIDDEN, \
ZSTD_DLL_EXPORT, \
ZSTDLIB_STATIC_API, \
ZSTD_DISABLE_DEPRECATE_WARNINGS, \
ZSTD_CLEVEL_DEFAULT, \
- /* zdict.h */ ZDICT_STATIC_LINKING_ONLY, \
+ /* zdict.h */ \
+ ZDICT_STATIC_LINKING_ONLY, \
+ ZDICTLIB_VISIBLE, \
+ ZDICTLIB_HIDDEN, \
ZDICTLIB_VISIBILITY, \
+ ZDICTLIB_STATIC_API, \
ZDICT_DISABLE_DEPRECATE_WARNINGS, \
- /* zstd_errors.h */ ZSTDERRORLIB_VISIBILITY
+ /* zstd_errors.h */ \
+ ZSTDERRORLIB_VISIBLE, \
+ ZSTDERRORLIB_HIDDEN, \
+ ZSTDERRORLIB_VISIBILITY
module dictbuilder [extern_c] {
header "zdict.h"
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* You may select, at your option, one of the above-listed licenses.
*/
-#ifndef DICTBUILDER_H_001
-#define DICTBUILDER_H_001
-
#if defined (__cplusplus)
extern "C" {
#endif
+#ifndef ZSTD_ZDICT_H
+#define ZSTD_ZDICT_H
/*====== Dependencies ======*/
#include <stddef.h> /* size_t */
/* ===== ZDICTLIB_API : control library symbols visibility ===== */
-#ifndef ZDICTLIB_VISIBILITY
-# if defined(__GNUC__) && (__GNUC__ >= 4)
-# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#ifndef ZDICTLIB_VISIBLE
+ /* Backwards compatibility with old macro name */
+# ifdef ZDICTLIB_VISIBILITY
+# define ZDICTLIB_VISIBLE ZDICTLIB_VISIBILITY
+# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+# define ZDICTLIB_VISIBLE __attribute__ ((visibility ("default")))
+# else
+# define ZDICTLIB_VISIBLE
+# endif
+#endif
+
+#ifndef ZDICTLIB_HIDDEN
+# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+# define ZDICTLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
-# define ZDICTLIB_VISIBILITY
+# define ZDICTLIB_HIDDEN
# endif
#endif
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY
+# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
-# define ZDICTLIB_API ZDICTLIB_VISIBILITY
+# define ZDICTLIB_API ZDICTLIB_VISIBLE
#endif
/*******************************************************************************
const size_t* samplesSizes, unsigned nbSamples);
typedef struct {
- int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
- unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
- unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value)
+ int compressionLevel; /**< optimize for a specific zstd compression level; 0 means default */
+ unsigned notificationLevel; /**< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
+ unsigned dictID; /**< force dictID value; 0 means auto mode (32-bits random value)
* NOTE: The zstd format reserves some dictionary IDs for future use.
* You may use them in private settings, but be warned that they
* may be used by zstd in a public dictionary registry in the future.
ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode);
ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
+#endif /* ZSTD_ZDICT_H */
+#if defined(ZDICT_STATIC_LINKING_ONLY) && !defined(ZSTD_ZDICT_H_STATIC)
+#define ZSTD_ZDICT_H_STATIC
-#ifdef ZDICT_STATIC_LINKING_ONLY
+/* This can be overridden externally to hide static symbols. */
+#ifndef ZDICTLIB_STATIC_API
+# if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
+# define ZDICTLIB_STATIC_API __declspec(dllexport) ZDICTLIB_VISIBLE
+# elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
+# define ZDICTLIB_STATIC_API __declspec(dllimport) ZDICTLIB_VISIBLE
+# else
+# define ZDICTLIB_STATIC_API ZDICTLIB_VISIBLE
+# endif
+#endif
/* ====================================================================================
* The definitions in this section are considered experimental.
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t parameters);
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
*/
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_cover_params_t* parameters);
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
+ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t parameters);
* See ZDICT_trainFromBuffer() for details on failure modes.
* Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
*/
-ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
+ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
size_t dictBufferCapacity, const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t* parameters);
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
*/
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
+ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_legacy(
void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t parameters);
or _CRT_SECURE_NO_WARNINGS in Visual.
Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */
#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */
+# define ZDICT_DEPRECATED(message) /* disable deprecation warnings */
#else
# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
-# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API
+# define ZDICT_DEPRECATED(message) [[deprecated(message)]]
# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message)))
+# define ZDICT_DEPRECATED(message) __attribute__((deprecated(message)))
# elif (ZDICT_GCC_VERSION >= 301)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated))
+# define ZDICT_DEPRECATED(message) __attribute__((deprecated))
# elif defined(_MSC_VER)
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message))
+# define ZDICT_DEPRECATED(message) __declspec(deprecated(message))
# else
# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler")
-# define ZDICT_DEPRECATED(message) ZDICTLIB_API
+# define ZDICT_DEPRECATED(message)
# endif
#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */
ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead")
+ZDICTLIB_STATIC_API
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
-#endif /* ZDICT_STATIC_LINKING_ONLY */
+#endif /* ZSTD_ZDICT_H_STATIC */
#if defined (__cplusplus)
}
#endif
-
-#endif /* DICTBUILDER_H_001 */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ===== ZSTDLIB_API : control library symbols visibility ===== */
#ifndef ZSTDLIB_VISIBLE
-# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+ /* Backwards compatibility with old macro name */
+# ifdef ZSTDLIB_VISIBILITY
+# define ZSTDLIB_VISIBLE ZSTDLIB_VISIBILITY
+# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
-# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
# else
# define ZSTDLIB_VISIBLE
+# endif
+#endif
+
+#ifndef ZSTDLIB_HIDDEN
+# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+# define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
+# else
# define ZSTDLIB_HIDDEN
# endif
#endif
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
/*====== Helper functions ======*/
-#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+/* ZSTD_compressBound() :
+ * maximum compressed size in worst case single-pass scenario.
+ * When invoking `ZSTD_compress()` or any other one-pass compression function,
+ * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize)
+ * as it eliminates one potential failure scenario,
+ * aka not enough room in dst buffer to write the compressed frame.
+ * Note : ZSTD_compressBound() itself can fail, if @srcSize > ZSTD_MAX_INPUT_SIZE .
+ * In which case, ZSTD_compressBound() will return an error code
+ * which can be tested using ZSTD_isError().
+ *
+ * ZSTD_COMPRESSBOUND() :
+ * same as ZSTD_compressBound(), but as a macro.
+ * It can be used to produce constants, which can be useful for static allocation,
+ * for example to size a static array on stack.
+ * Will produce constant value 0 if srcSize too large.
+ */
+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U)
+#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+/* ZSTD_isError() :
+ * Most ZSTD_* functions returning a size_t value can be tested for error,
+ * using ZSTD_isError().
+ * @return 1 if error, 0 otherwise
+ */
ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */
* ZSTD_c_useBlockSplitter
* ZSTD_c_useRowMatchFinder
* ZSTD_c_prefetchCDictTables
+ * ZSTD_c_enableMatchFinderFallback
+ * ZSTD_c_maxBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
ZSTD_c_experimentalParam13=1010,
ZSTD_c_experimentalParam14=1011,
ZSTD_c_experimentalParam15=1012,
- ZSTD_c_experimentalParam16=1013
+ ZSTD_c_experimentalParam16=1013,
+ ZSTD_c_experimentalParam17=1014,
+ ZSTD_c_experimentalParam18=1015,
+
} ZSTD_cParameter;
typedef struct {
* They will be used to compress next frame.
* Resetting session never fails.
* - The parameters : changes all parameters back to "default".
- * This removes any reference to any dictionary too.
+ * This also removes any reference to any dictionary or external matchfinder.
* Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
* otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
* - Both : similar to resetting the session, followed by resetting parameters.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
* meaning "return to no-dictionary mode".
- * Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
- * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ * Note 1 : Dictionary is sticky, it will be used for all future compressed frames,
+ * until parameters are reset, a new dictionary is loaded, or the dictionary
+ * is explicitly invalidated by loading a NULL dictionary.
* Note 2 : Loading a dictionary involves building tables.
* It's also a CPU consuming operation, with non-negligible impact on latency.
* Tables are dependent on compression parameters, and for this reason,
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
- * Reference a prepared dictionary, to be used for all next compressed frames.
+ * Reference a prepared dictionary, to be used for all future compressed frames.
* Note that compression parameters are enforced from within CDict,
* and supersede any compression parameter previously set within CCtx.
* The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
const void* prefix, size_t prefixSize);
/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
- * Create an internal DDict from dict buffer,
- * to be used to decompress next frames.
- * The dictionary remains valid for all future frames, until explicitly invalidated.
+ * Create an internal DDict from dict buffer, to be used to decompress all future frames.
+ * The dictionary remains valid for all future frames, until explicitly invalidated, or
+ * a new dictionary is loaded.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
* meaning "return to no-dictionary mode".
* The memory for the table is allocated on the first call to refDDict, and can be
* freed with ZSTD_freeDCtx().
*
+ * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary
+ * will be managed, and referencing a dictionary effectively "discards" any previous one.
+ *
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
- * Note 1 : Currently, only one dictionary can be managed.
- * Referencing a new dictionary effectively "discards" any previous one.
* Special: referencing a NULL DDict means "return to no-dictionary mode".
* Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
*/
* This function never fails (wide contract) */
ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+/*! ZSTD_CCtx_setCParams() :
+ * Set all parameters provided within @cparams into the working @cctx.
+ * Note : if modifying parameters during compression (MT mode only),
+ * note that changes to the .windowLog parameter will be ignored.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams);
+
/*! ZSTD_compress_advanced() :
* Note : this function is now DEPRECATED.
* It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
ZSTD_DEPRECATED("use ZSTD_compress2")
ZSTDLIB_STATIC_API
size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
- void* dst, size_t dstCapacity,
- const void* src, size_t srcSize,
- const void* dict,size_t dictSize,
- ZSTD_parameters params);
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ const void* dict,size_t dictSize,
+ ZSTD_parameters params);
/*! ZSTD_compress_usingCDict_advanced() :
* Note : this function is now DEPRECATED.
*/
#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16
+/* ZSTD_c_enableMatchFinderFallback
+ * Allowed values are 0 (disable) and 1 (enable). The default setting is 0.
+ *
+ * Controls whether zstd will fall back to an internal matchfinder if an
+ * external matchfinder is registered and returns an error code. This fallback is
+ * block-by-block: the internal matchfinder will only be called for blocks where
+ * the external matchfinder returns an error code. Fallback compression will
+ * follow any other cParam settings, such as compression level, the same as in a
+ * normal (fully-internal) compression operation.
+ *
+ * The user is strongly encouraged to read the full external matchfinder API
+ * documentation (below) before setting this parameter. */
+#define ZSTD_c_enableMatchFinderFallback ZSTD_c_experimentalParam17
+
+/* ZSTD_c_maxBlockSize
+ *
+ * Default is ZSTD_BLOCKSIZE_MAX.
+ *
+ */
+#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18
+
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */
-ZSTDLIB_STATIC_API
ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.")
+ZSTDLIB_STATIC_API
size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
Data fragment must be large enough to ensure successful decoding.
`ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
- @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
- >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+ result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+ >0 : `srcSize` is too small, please provide at least result bytes on next attempt.
errorCode, which can be tested using ZSTD_isError().
It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
The most memory efficient way is to use a round buffer of sufficient size.
Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
- which can @return an error code if required value is too large for current system (in 32-bits mode).
+ which can return an error code if required value is too large for current system (in 32-bits mode).
In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
- @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+ result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
It can also be an error code, which can be tested with ZSTD_isError().
ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+/* ********************** EXTERNAL MATCHFINDER API **********************
+ *
+ * *** OVERVIEW ***
+ * This API allows users to replace the zstd internal block-level matchfinder
+ * with an external matchfinder function. Potential applications of the API
+ * include hardware-accelerated matchfinders and matchfinders specialized to
+ * particular types of data.
+ *
+ * See contrib/externalMatchfinder for an example program employing the
+ * external matchfinder API.
+ *
+ * *** USAGE ***
+ * The user is responsible for implementing a function of type
+ * ZSTD_externalMatchFinder_F. For each block, zstd will pass the following
+ * arguments to the user-provided function:
+ *
+ * - externalMatchState: a pointer to a user-managed state for the external
+ * matchfinder.
+ *
+ * - outSeqs, outSeqsCapacity: an output buffer for sequences produced by the
+ * external matchfinder. outSeqsCapacity is guaranteed >=
+ * ZSTD_sequenceBound(srcSize). The memory backing outSeqs is managed by
+ * the CCtx.
+ *
+ * - src, srcSize: an input buffer which the external matchfinder must parse
+ * into sequences. srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX.
+ *
+ * - dict, dictSize: a history buffer, which may be empty, which the external
+ * matchfinder may reference as it produces sequences for the src buffer.
+ * Currently, zstd will always pass dictSize == 0 into external matchfinders,
+ * but this will change in the future.
+ *
+ * - compressionLevel: a signed integer representing the zstd compression level
+ * set by the user for the current operation. The external matchfinder may
+ * choose to use this information to change its compression strategy and
+ * speed/ratio tradeoff. Note: The compression level does not reflect zstd
+ * parameters set through the advanced API.
+ *
+ * - windowSize: a size_t representing the maximum allowed offset for external
+ * sequences. Note that sequence offsets are sometimes allowed to exceed the
+ * windowSize if a dictionary is present, see doc/zstd_compression_format.md
+ * for details.
+ *
+ * The user-provided function shall return a size_t representing the number of
+ * sequences written to outSeqs. This return value will be treated as an error
+ * code if it is greater than outSeqsCapacity. The return value must be non-zero
+ * if srcSize is non-zero. The ZSTD_EXTERNAL_MATCHFINDER_ERROR macro is provided
+ * for convenience, but any value greater than outSeqsCapacity will be treated as
+ * an error code.
+ *
+ * If the user-provided function does not return an error code, the sequences
+ * written to outSeqs must be a valid parse of the src buffer. Data corruption may
+ * occur if the parse is not valid. A parse is defined to be valid if the
+ * following conditions hold:
+ * - The sum of matchLengths and literalLengths is equal to srcSize.
+ * - All sequences in the parse have matchLength != 0, except for the final
+ * sequence. matchLength is not constrained for the final sequence.
+ * - All offsets respect the windowSize parameter as specified in
+ * doc/zstd_compression_format.md.
+ *
+ * zstd will only validate these conditions (and fail compression if they do not
+ * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence
+ * validation has a performance cost.
+ *
+ * If the user-provided function returns an error, zstd will either fall back
+ * to an internal matchfinder or fail the compression operation. The user can
+ * choose between the two behaviors by setting the
+ * ZSTD_c_enableMatchFinderFallback cParam. Fallback compression will follow any
+ * other cParam settings, such as compression level, the same as in a normal
+ * compression operation.
+ *
+ * The user shall instruct zstd to use a particular ZSTD_externalMatchFinder_F
+ * function by calling ZSTD_registerExternalMatchFinder(cctx, externalMatchState,
+ * externalMatchFinder). This setting will persist until the next parameter reset
+ * of the CCtx.
+ *
+ * The externalMatchState must be initialized by the user before calling
+ * ZSTD_registerExternalMatchFinder. The user is responsible for destroying the
+ * externalMatchState.
+ *
+ * *** LIMITATIONS ***
+ * External matchfinders are compatible with all zstd compression APIs. There are
+ * only two limitations.
+ *
+ * First, the ZSTD_c_enableLongDistanceMatching cParam is not supported.
+ * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with an
+ * external matchfinder.
+ * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in
+ * some cases (see its documentation for details). Users must explicitly set
+ * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an
+ * external matchfinder is registered.
+ * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default
+ * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should
+ * check the docs on ZSTD_c_enableLongDistanceMatching whenever the external
+ * matchfinder API is used in conjunction with advanced settings (like windowLog).
+ *
+ * Second, history buffers are not supported. Concretely, zstd will always pass
+ * dictSize == 0 to the external matchfinder (for now). This has two implications:
+ * - Dictionaries are not supported. Compression will *not* fail if the user
+ * references a dictionary, but the dictionary won't have any effect.
+ * - Stream history is not supported. All compression APIs, including streaming
+ * APIs, work with the external matchfinder, but the external matchfinder won't
+ * receive any history from the previous block. Each block is an independent chunk.
+ *
+ * Long-term, we plan to overcome both limitations. There is no technical blocker to
+ * overcoming them. It is purely a question of engineering effort.
+ */
+
+#define ZSTD_EXTERNAL_MATCHFINDER_ERROR ((size_t)(-1))
+
+typedef size_t ZSTD_externalMatchFinder_F (
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+/*! ZSTD_registerExternalMatchFinder() :
+ * Instruct zstd to use an external matchfinder function.
+ *
+ * The externalMatchState must be initialized by the caller, and the caller is
+ * responsible for managing its lifetime. This parameter is sticky across
+ * compressions. It will remain set until the user explicitly resets compression
+ * parameters.
+ *
+ * The user is strongly encouraged to read the full API documentation (above)
+ * before calling this function. */
+ZSTDLIB_STATIC_API void
+ZSTD_registerExternalMatchFinder(
+ ZSTD_CCtx* cctx,
+ void* externalMatchState,
+ ZSTD_externalMatchFinder_F* externalMatchFinder
+);
+
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */
-#ifndef ZSTDERRORLIB_VISIBILITY
-# if defined(__GNUC__) && (__GNUC__ >= 4)
-# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default")))
+#ifndef ZSTDERRORLIB_VISIBLE
+ /* Backwards compatibility with old macro name */
+# ifdef ZSTDERRORLIB_VISIBILITY
+# define ZSTDERRORLIB_VISIBLE ZSTDERRORLIB_VISIBILITY
+# elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+# define ZSTDERRORLIB_VISIBLE __attribute__ ((visibility ("default")))
# else
-# define ZSTDERRORLIB_VISIBILITY
+# define ZSTDERRORLIB_VISIBLE
# endif
#endif
+
+#ifndef ZSTDERRORLIB_HIDDEN
+# if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden")))
+# else
+# define ZSTDERRORLIB_HIDDEN
+# endif
+#endif
+
#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
-# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY
+# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBLE
#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1)
-# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBLE /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
-# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE
#endif
/*-*********************************************
ZSTD_error_frameParameter_windowTooLarge = 16,
ZSTD_error_corruption_detected = 20,
ZSTD_error_checksum_wrong = 22,
+ ZSTD_error_literals_headerWrong = 24,
ZSTD_error_dictionary_corrupted = 30,
ZSTD_error_dictionary_wrong = 32,
ZSTD_error_dictionaryCreation_failed = 34,
ZSTD_error_parameter_unsupported = 40,
+ ZSTD_error_parameter_combination_unsupported = 41,
ZSTD_error_parameter_outOfBound = 42,
ZSTD_error_tableLog_tooLarge = 44,
ZSTD_error_maxSymbolValue_tooLarge = 46,
ZSTD_error_seekableIO = 102,
ZSTD_error_dstBuffer_wrong = 104,
ZSTD_error_srcBuffer_wrong = 105,
+ ZSTD_error_externalMatchFinder_failed = 106,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
} ZSTD_ErrorCode;
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
/* estimate nbLoops for next run to last approximately 1 second */
- if (loopDuration_ns > (runBudget_ns / 50)) {
+ if (loopDuration_ns > ((double)runBudget_ns / 50)) {
double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
- cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
+ cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1;
} else {
/* previous run was too short : blindly increase workload by x multiplier */
const unsigned multiplier = 10;
cont->nbLoops *= multiplier;
}
- if(loopDuration_ns < runTimeMin_ns) {
+ if(loopDuration_ns < (double)runTimeMin_ns) {
/* don't report results for which benchmark run time was too small : increased risks of rounding errors */
assert(completed == 0);
continue;
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
DISPLAYUPDATE_PROGRESS(
"\rRead : %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20),
- (double)outFileSize/inFileSize*100)
+ (double)outFileSize/(double)inFileSize*100)
} else {
DISPLAYUPDATE_PROGRESS(
"\rRead : %u / %u MB ==> %.2f%% ",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
- (double)outFileSize/inFileSize*100);
+ (double)outFileSize/(double)inFileSize*100);
} }
while (1) {
if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20),
- (double)outFileSize/inFileSize*100)
+ (double)outFileSize/(double)inFileSize*100)
else
DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
- (double)outFileSize/inFileSize*100);
+ (double)outFileSize/(double)inFileSize*100);
if (ret == LZMA_STREAM_END) break;
}
if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20),
- (double)outFileSize/inFileSize*100)
+ (double)outFileSize/(double)inFileSize*100)
} else {
DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
(unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
- (double)outFileSize/inFileSize*100);
+ (double)outFileSize/(double)inFileSize*100);
}
/* Write Block */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
+static int g_traceDepth = 0;
+int g_traceFileStat = 0;
+
+#define UTIL_TRACE_CALL(...) \
+ { \
+ if (g_traceFileStat) { \
+ UTIL_DISPLAY("Trace:FileStat: %*s> ", g_traceDepth, ""); \
+ UTIL_DISPLAY(__VA_ARGS__); \
+ UTIL_DISPLAY("\n"); \
+ ++g_traceDepth; \
+ } \
+ }
+
+#define UTIL_TRACE_RET(ret) \
+ { \
+ if (g_traceFileStat) { \
+ --g_traceDepth; \
+ UTIL_DISPLAY("Trace:FileStat: %*s< %d\n", g_traceDepth, "", (ret)); \
+ } \
+ }
+
/* A modified version of realloc().
* If UTIL_realloc() fails the original block is freed.
*/
* Functions
***************************************/
+void UTIL_traceFileStat(void)
+{
+ g_traceFileStat = 1;
+}
+
int UTIL_stat(const char* filename, stat_t* statbuf)
{
+ int ret;
+ UTIL_TRACE_CALL("UTIL_stat(%s)", filename);
#if defined(_MSC_VER)
- return !_stat64(filename, statbuf);
+ ret = !_stat64(filename, statbuf);
#elif defined(__MINGW32__) && defined (__MSVCRT__)
- return !_stati64(filename, statbuf);
+ ret = !_stati64(filename, statbuf);
#else
- return !stat(filename, statbuf);
+ ret = !stat(filename, statbuf);
#endif
+ UTIL_TRACE_RET(ret);
+ return ret;
}
int UTIL_isRegularFile(const char* infilename)
{
stat_t statbuf;
- return UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf);
+ int ret;
+ UTIL_TRACE_CALL("UTIL_isRegularFile(%s)", infilename);
+ ret = UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf);
+ UTIL_TRACE_RET(ret);
+ return ret;
}
int UTIL_isRegularFileStat(const stat_t* statbuf)
int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions)
{
stat_t localStatBuf;
+ UTIL_TRACE_CALL("UTIL_chmod(%s, %u)", filename, (unsigned)permissions);
if (statbuf == NULL) {
- if (!UTIL_stat(filename, &localStatBuf)) return 0;
+ if (!UTIL_stat(filename, &localStatBuf)) {
+ UTIL_TRACE_RET(0);
+ return 0;
+ }
statbuf = &localStatBuf;
}
- if (!UTIL_isRegularFileStat(statbuf)) return 0; /* pretend success, but don't change anything */
- return chmod(filename, permissions);
+ if (!UTIL_isRegularFileStat(statbuf)) {
+ UTIL_TRACE_RET(0);
+ return 0; /* pretend success, but don't change anything */
+ }
+ UTIL_TRACE_CALL("chmod");
+ {
+ int const ret = chmod(filename, permissions);
+ UTIL_TRACE_RET(ret);
+ UTIL_TRACE_RET(ret);
+ return ret;
+ }
}
/* set access and modification times */
int UTIL_utime(const char* filename, const stat_t *statbuf)
{
int ret;
+ UTIL_TRACE_CALL("UTIL_utime(%s)", filename);
/* We check that st_mtime is a macro here in order to give us confidence
* that struct stat has a struct timespec st_mtim member. We need this
* check because there are some platforms that claim to be POSIX 2008
* compliant but which do not have st_mtim... */
#if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime)
- /* (atime, mtime) */
- struct timespec timebuf[2] = { {0, UTIME_NOW} };
- timebuf[1] = statbuf->st_mtim;
- ret = utimensat(AT_FDCWD, filename, timebuf, 0);
+ {
+ /* (atime, mtime) */
+ struct timespec timebuf[2] = { {0, UTIME_NOW} };
+ timebuf[1] = statbuf->st_mtim;
+ ret = utimensat(AT_FDCWD, filename, timebuf, 0);
+ }
#else
- struct utimbuf timebuf;
- timebuf.actime = time(NULL);
- timebuf.modtime = statbuf->st_mtime;
- ret = utime(filename, &timebuf);
+ {
+ struct utimbuf timebuf;
+ timebuf.actime = time(NULL);
+ timebuf.modtime = statbuf->st_mtime;
+ ret = utime(filename, &timebuf);
+ }
#endif
errno = 0;
+ UTIL_TRACE_RET(ret);
return ret;
}
int UTIL_setFileStat(const char *filename, const stat_t *statbuf)
{
int res = 0;
-
stat_t curStatBuf;
- if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf))
+ UTIL_TRACE_CALL("UTIL_setFileStat(%s)", filename);
+
+ if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) {
+ UTIL_TRACE_RET(-1);
return -1;
+ }
/* set access and modification times */
res += UTIL_utime(filename, statbuf);
res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 07777); /* Copy file permissions */
errno = 0;
+ UTIL_TRACE_RET(-res);
return -res; /* number of errors is returned */
}
int UTIL_isDirectory(const char* infilename)
{
stat_t statbuf;
- return UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf);
+ int ret;
+ UTIL_TRACE_CALL("UTIL_isDirectory(%s)", infilename);
+ ret = UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf);
+ UTIL_TRACE_RET(ret);
+ return ret;
}
int UTIL_isDirectoryStat(const stat_t* statbuf)
int UTIL_isSameFile(const char* fName1, const char* fName2)
{
+ int ret;
assert(fName1 != NULL); assert(fName2 != NULL);
+ UTIL_TRACE_CALL("UTIL_isSameFile(%s, %s)", fName1, fName2);
#if defined(_MSC_VER) || defined(_WIN32)
/* note : Visual does not support file identification by inode.
* inode does not work on Windows, even with a posix layer, like msys2.
* The following work-around is limited to detecting exact name repetition only,
* aka `filename` is considered different from `subdir/../filename` */
- return !strcmp(fName1, fName2);
+ ret = !strcmp(fName1, fName2);
#else
{ stat_t file1Stat;
stat_t file2Stat;
- return UTIL_stat(fName1, &file1Stat)
+ ret = UTIL_stat(fName1, &file1Stat)
&& UTIL_stat(fName2, &file2Stat)
&& (file1Stat.st_dev == file2Stat.st_dev)
&& (file1Stat.st_ino == file2Stat.st_ino);
}
#endif
+ UTIL_TRACE_RET(ret);
+ return ret;
}
/* UTIL_isFIFO : distinguish named pipes */
int UTIL_isFIFO(const char* infilename)
{
+ UTIL_TRACE_CALL("UTIL_isFIFO(%s)", infilename);
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
#if PLATFORM_POSIX_VERSION >= 200112L
- stat_t statbuf;
- if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) return 1;
+ {
+ stat_t statbuf;
+ if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) {
+ UTIL_TRACE_RET(1);
+ return 1;
+ }
+ }
#endif
(void)infilename;
+ UTIL_TRACE_RET(0);
return 0;
}
int UTIL_isLink(const char* infilename)
{
+ UTIL_TRACE_CALL("UTIL_isLink(%s)", infilename);
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
#if PLATFORM_POSIX_VERSION >= 200112L
- stat_t statbuf;
- int const r = lstat(infilename, &statbuf);
- if (!r && S_ISLNK(statbuf.st_mode)) return 1;
+ {
+ stat_t statbuf;
+ int const r = lstat(infilename, &statbuf);
+ if (!r && S_ISLNK(statbuf.st_mode)) {
+ UTIL_TRACE_RET(1);
+ return 1;
+ }
+ }
#endif
(void)infilename;
+ UTIL_TRACE_RET(0);
return 0;
}
int UTIL_isConsole(FILE* file)
{
+ int ret;
+ UTIL_TRACE_CALL("UTIL_isConsole(%d)", fileno(file));
if (file == stdin && g_fakeStdinIsConsole)
- return 1;
- if (file == stderr && g_fakeStderrIsConsole)
- return 1;
- if (file == stdout && g_fakeStdoutIsConsole)
- return 1;
- return IS_CONSOLE(file);
+ ret = 1;
+ else if (file == stderr && g_fakeStderrIsConsole)
+ ret = 1;
+ else if (file == stdout && g_fakeStdoutIsConsole)
+ ret = 1;
+ else
+ ret = IS_CONSOLE(file);
+ UTIL_TRACE_RET(ret);
+ return ret;
}
void UTIL_fakeStdinIsConsole(void)
U64 UTIL_getFileSize(const char* infilename)
{
stat_t statbuf;
- if (!UTIL_stat(infilename, &statbuf)) return UTIL_FILESIZE_UNKNOWN;
- return UTIL_getFileSizeStat(&statbuf);
+ UTIL_TRACE_CALL("UTIL_getFileSize(%s)", infilename);
+ if (!UTIL_stat(infilename, &statbuf)) {
+ UTIL_TRACE_RET(-1);
+ return UTIL_FILESIZE_UNKNOWN;
+ }
+ {
+ U64 const size = UTIL_getFileSizeStat(&statbuf);
+ UTIL_TRACE_RET((int)size);
+ return size;
+ }
}
U64 UTIL_getFileSizeStat(const stat_t* statbuf)
{
U64 total = 0;
unsigned n;
+ UTIL_TRACE_CALL("UTIL_getTotalFileSize(%u)", nbFiles);
for (n=0; n<nbFiles; n++) {
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
- if (size == UTIL_FILESIZE_UNKNOWN) return UTIL_FILESIZE_UNKNOWN;
+ if (size == UTIL_FILESIZE_UNKNOWN) {
+ UTIL_TRACE_RET(-1);
+ return UTIL_FILESIZE_UNKNOWN;
+ }
total += size;
}
+ UTIL_TRACE_RET((int)total);
return total;
}
/*
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
void UTIL_fakeStdoutIsConsole(void);
void UTIL_fakeStderrIsConsole(void);
+/**
+ * Emit traces for functions that read, or modify file metadata.
+ */
+void UTIL_traceFileStat(void);
+
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
U64 UTIL_getFileSize(const char* infilename);
U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles);
/*! UTIL_expandFNT() :
* read names from @fnt, and expand those corresponding to directories
* update @fnt, now containing only file names,
- * @return : 0 in case of success, 1 if error
* note : in case of error, @fnt[0] is NULL
*/
void UTIL_expandFNT(FileNamesTable** fnt, int followLinks);
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
BEGIN
BLOCK "040904B0"
BEGIN
- VALUE "CompanyName", "Yann Collet, Facebook, Inc."
+ VALUE "CompanyName", "Meta Platforms, Inc."
VALUE "FileDescription", "Zstandard - Fast and efficient compression algorithm"
VALUE "FileVersion", ZSTD_VERSION_STRING
VALUE "InternalName", "zstd.exe"
- VALUE "LegalCopyright", "Copyright (c) 2013-present, Yann Collet, Facebook, Inc."
+ VALUE "LegalCopyright", "Copyright (c) Meta Platforms, Inc. and affiliates."
VALUE "OriginalFilename", "zstd.exe"
VALUE "ProductName", "Zstandard"
VALUE "ProductVersion", ZSTD_VERSION_STRING
.
-.TH "ZSTD" "1" "August 2022" "zstd 1.5.3" "User Commands"
+.TH "ZSTD" "1" "December 2022" "zstd 1.5.3" "User Commands"
.
.SH "NAME"
\fBzstd\fR \- zstd, zstdmt, unzstd, zstdcat \- Compress or decompress \.zst files
\fBzstdcat\fR is equivalent to \fBzstd \-dcf\fR
.
.SH "DESCRIPTION"
-\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip (1)\fR and \fBxz (1)\fR\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
+\fBzstd\fR is a fast lossless compression algorithm and data compression tool, with command line syntax similar to \fBgzip\fR(1) and \fBxz\fR(1)\. It is based on the \fBLZ77\fR family, with further FSE & huff0 entropy stages\. \fBzstd\fR offers highly configurable compression speed, from fast modes at > 200 MB/s per core, to strong modes with excellent compression ratios\. It also features a very fast decoder, with speeds > 500 MB/s per core\.
.
.P
-\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences :
+\fBzstd\fR command line syntax is generally similar to gzip, but features the following differences:
.
.IP "\(bu" 4
Source files are preserved by default\. It\'s possible to remove them automatically by using the \fB\-\-rm\fR command\.
.IP "\(bu" 4
\fBzstd\fR does not accept input from console, though it does accept \fBstdin\fR when it\'s not the console\.
.
+.IP "\(bu" 4
+\fBzstd\fR does not store the input\'s filename or attributes, only its contents\.
+.
.IP "" 0
.
.P
-\fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal : it will display an error message and skip the \fIfile\fR\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
+\fBzstd\fR processes each \fIfile\fR according to the selected operation mode\. If no \fIfiles\fR are given or \fIfile\fR is \fB\-\fR, \fBzstd\fR reads from standard input and writes the processed data to standard output\. \fBzstd\fR will refuse to write compressed data to standard output if it is a terminal: it will display an error message and skip the file\. Similarly, \fBzstd\fR will refuse to read compressed data from standard input if it is a terminal\.
.
.P
Unless \fB\-\-stdout\fR or \fB\-o\fR is specified, \fIfiles\fR are written to a new file whose name is derived from the source \fIfile\fR name:
.
.IP "" 0
.
-.SS "Concatenation with \.zst files"
+.SS "Concatenation with \.zst Files"
It is possible to concatenate multiple \fB\.zst\fR files\. \fBzstd\fR will decompress such agglomerated file as if it was a single \fB\.zst\fR file\.
.
.SH "OPTIONS"
.
-.SS "Integer suffixes and special values"
+.SS "Integer Suffixes and Special Values"
In most places where an integer argument is expected, an optional suffix is supported to easily indicate large integers\. There must be no space between the integer and the suffix\.
.
.TP
\fBMiB\fR
Multiply the integer by 1,048,576 (2^20)\. \fBMi\fR, \fBM\fR, and \fBMB\fR are accepted as synonyms for \fBMiB\fR\.
.
-.SS "Operation mode"
+.SS "Operation Mode"
If multiple operation mode options are given, the last one takes effect\.
.
.TP
.
.TP
\fB\-b#\fR
-Benchmark file(s) using compression level #
+Benchmark file(s) using compression level \fI#\fR\. See \fIBENCHMARK\fR below for a description of this operation\.
.
.TP
-\fB\-\-train FILEs\fR
-Use FILEs as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\.
+\fB\-\-train FILES\fR
+Use \fIFILES\fR as a training set to create a dictionary\. The training set should contain a lot of small files (> 100)\. See \fIDICTIONARY BUILDER\fR below for a description of this operation\.
.
.TP
\fB\-l\fR, \fB\-\-list\fR
Display information related to a zstd compressed file, such as size, ratio, and checksum\. Some of these fields may not be available\. This command\'s output can be augmented with the \fB\-v\fR modifier\.
.
-.SS "Operation modifiers"
+.SS "Operation Modifiers"
.
.IP "\(bu" 4
-\fB\-#\fR: \fB#\fR compression level [1\-19] (default: 3)
+\fB\-#\fR: selects \fB#\fR compression level [1\-19] (default: 3)
.
.IP "\(bu" 4
\fB\-\-ultra\fR: unlocks high compression levels 20+ (maximum 22), using a lot more memory\. Note that decompression will also require more memory when using these levels\.
\fB\-T#\fR, \fB\-\-threads=#\fR: Compress using \fB#\fR working threads (default: 1)\. If \fB#\fR is 0, attempt to detect and use the number of physical CPU cores\. In all cases, the nb of threads is capped to \fBZSTDMT_NBWORKERS_MAX\fR, which is either 64 in 32\-bit mode, or 256 for 64\-bit environments\. This modifier does nothing if \fBzstd\fR is compiled without multithread support\.
.
.IP "\(bu" 4
-\fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\. Note 1 : this mode is the only available one when multithread support is disabled\. Note 2 : this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\.
+\fB\-\-single\-thread\fR: Use a single thread for both I/O and compression\. As compression is serialized with I/O, this can be slightly slower\. Single\-thread mode features significantly lower memory usage, which can be useful for systems with limited amount of memory, such as 32\-bit systems\.
+.
+.IP
+Note 1: this mode is the only available one when multithread support is disabled\.
+.
+.IP
+Note 2: this mode is different from \fB\-T1\fR, which spawns 1 compression thread in parallel with I/O\. Final compressed result is also slightly different from \fB\-T1\fR\.
.
.IP "\(bu" 4
\fB\-\-auto\-threads={physical,logical} (default: physical)\fR: When using a default amount of threads via \fB\-T0\fR, choose the default based on the number of detected physical or logical cores\.
.
.IP "\(bu" 4
-\fB\-\-adapt[=min=#,max=#]\fR : \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\. \fInote\fR : at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
+\fB\-\-adapt[=min=#,max=#]\fR: \fBzstd\fR will dynamically adapt compression level to perceived I/O conditions\. Compression level adaptation can be observed live by using command \fB\-v\fR\. Adaptation can be constrained between supplied \fBmin\fR and \fBmax\fR levels\. The feature works when combined with multi\-threading and \fB\-\-long\fR mode\. It does not work with \fB\-\-single\-thread\fR\. It sets window size to 8 MiB by default (can be changed manually, see \fBwlog\fR)\. Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible\.
+.
+.IP
+\fINote\fR: at the time of this writing, \fB\-\-adapt\fR can remain stuck at low speed when combined with multiple worker threads (>=2)\.
.
.IP "\(bu" 4
\fB\-\-long[=#]\fR: enables long distance matching with \fB#\fR \fBwindowLog\fR, if \fB#\fR is not present it defaults to \fB27\fR\. This increases the window size (\fBwindowLog\fR) and memory usage for both the compressor and decompressor\. This setting is designed to improve the compression ratio for files with long matches at a large distance\.
\fB\-D DICT\fR: use \fBDICT\fR as Dictionary to compress or decompress FILE(s)
.
.IP "\(bu" 4
-\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that windowSize > srcSize\.
+\fB\-\-patch\-from FILE\fR: Specify the file to be used as a reference point for zstd\'s diff engine\. This is effectively dictionary compression with some convenient parameter selection, namely that \fIwindowSize\fR > \fIsrcSize\fR\.
+.
+.IP
+Note: cannot use both this and \fB\-D\fR together\.
.
.IP
-Note: cannot use both this and \-D together Note: \fB\-\-long\fR mode will be automatically activated if chainLog < fileLog (fileLog being the windowLog required to cover the whole file)\. You can also manually force it\. Note: for all levels, you can use \-\-patch\-from in \-\-single\-thread mode to improve compression ratio at the cost of speed Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR
+Note: \fB\-\-long\fR mode will be automatically activated if \fIchainLog\fR < \fIfileLog\fR (\fIfileLog\fR being the \fIwindowLog\fR required to cover the whole file)\. You can also manually force it\.
+.
+.IP
+Note: for all levels, you can use \fB\-\-patch\-from\fR in \fB\-\-single\-thread\fR mode to improve compression ratio at the cost of speed\.
+.
+.IP
+Note: for level 19, you can get increased compression ratio at the cost of speed by specifying \fB\-\-zstd=targetLength=\fR to be something large (i\.e\. 4096), and by setting a large \fB\-\-zstd=chainLog=\fR\.
.
.IP "\(bu" 4
-\fB\-\-rsyncable\fR : \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\.
+\fB\-\-rsyncable\fR: \fBzstd\fR will periodically synchronize the compression state to make the compressed file more rsync\-friendly\. There is a negligible impact to compression ratio, and the faster compression levels will see a small compression speed hit\. This feature does not work with \fB\-\-single\-thread\fR\. You probably don\'t want to use it with long range mode, since it will decrease the effectiveness of the synchronization points, but your mileage may vary\.
.
.IP "\(bu" 4
\fB\-C\fR, \fB\-\-[no\-]check\fR: add integrity check computed from uncompressed data (default: enabled)
.
.IP "\(bu" 4
-\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \-\-content\-size (meaning that the original size will be placed in the header)\.
+\fB\-\-[no\-]content\-size\fR: enable / disable whether or not the original size of the file is placed in the header of the compressed file\. The default option is \fB\-\-content\-size\fR (meaning that the original size will be placed in the header)\.
.
.IP "\(bu" 4
\fB\-\-no\-dictID\fR: do not store dictionary ID within frame header (dictionary compression)\. The decoder will have to rely on implicit knowledge about which dictionary to use, it won\'t be able to check if it\'s correct\.
.
.IP "\(bu" 4
-\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\.
+\fB\-M#\fR, \fB\-\-memory=#\fR: Set a memory usage limit\. By default, \fBzstd\fR uses 128 MiB for decompression as the maximum amount of memory the decompressor is allowed to use, but you can override this manually if need be in either direction (i\.e\. you can increase or decrease it)\.
.
.IP
-This is also used during compression when using with \-\-patch\-from=\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MB)\.
+This is also used during compression when using with \fB\-\-patch\-from=\fR\. In this case, this parameter overrides that maximum size allowed for a dictionary\. (128 MiB)\.
.
.IP
-Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GB\. zstd will load training samples up to the memory limit and ignore the rest\.
+Additionally, this can be used to limit memory for dictionary training\. This parameter overrides the default limit of 2 GiB\. zstd will load training samples up to the memory limit and ignore the rest\.
.
.IP "\(bu" 4
-\fB\-\-stream\-size=#\fR : Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
+\fB\-\-stream\-size=#\fR: Sets the pledged source size of input coming from a stream\. This value must be exact, as it will be included in the produced frame header\. Incorrect stream sizes will cause an error\. This information will be used to better optimize compression parameters, resulting in better and potentially faster compression, especially for smaller source sizes\.
.
.IP "\(bu" 4
\fB\-\-size\-hint=#\fR: When handling input from a stream, \fBzstd\fR must guess how large the source size will be when optimizing compression parameters\. If the stream size is relatively small, this guess may be a poor one, resulting in a higher compression ratio than expected\. This feature allows for controlling the guess when needed\. Exact guesses result in better compression ratios\. Overestimates result in slightly degraded compression ratios, while underestimates may result in significant degradation\.
.
.IP "\(bu" 4
-\fB\-o FILE\fR: save result into \fBFILE\fR
+\fB\-o FILE\fR: save result into \fBFILE\fR\.
.
.IP "\(bu" 4
\fB\-f\fR, \fB\-\-force\fR: disable input and output checks\. Allows overwriting existing files, input from console, output to stdout, operating on links, block devices, etc\. During decompression and when the output destination is stdout, pass\-through unrecognized formats as\-is\.
\fB\-\-[no\-]sparse\fR: enable / disable sparse FS support, to make files with many zeroes smaller on disk\. Creating sparse files may save disk space and speed up decompression by reducing the amount of disk I/O\. default: enabled when output is into a file, and disabled when output is stdout\. This setting overrides default and can force sparse mode over stdout\.
.
.IP "\(bu" 4
-\fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\-f) option is set\.
+\fB\-\-[no\-]pass\-through\fR enable / disable passing through uncompressed files as\-is\. During decompression when pass\-through is enabled, unrecognized formats will be copied as\-is from the input to the output\. By default, pass\-through will occur when the output destination is stdout and the force (\fB\-f\fR) option is set\.
.
.IP "\(bu" 4
-\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \-o, will trigger a confirmation prompt (which can be silenced with \-f), as this is a destructive operation\.
+\fB\-\-rm\fR: remove source file(s) after successful compression or decompression\. If used in combination with \fB\-o\fR, will trigger a confirmation prompt (which can be silenced with \fB\-f\fR), as this is a destructive operation\.
.
.IP "\(bu" 4
\fB\-k\fR, \fB\-\-keep\fR: keep source file(s) after successful compression or decompression\. This is the default behavior\.
\fB\-h\fR/\fB\-H\fR, \fB\-\-help\fR: display help/long help and exit
.
.IP "\(bu" 4
-\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced : \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
+\fB\-V\fR, \fB\-\-version\fR: display version number and exit\. Advanced: \fB\-vV\fR also displays supported formats\. \fB\-vvV\fR also displays POSIX support\. \fB\-q\fR will only display the version number, suitable for machine reading\.
.
.IP "\(bu" 4
\fB\-v\fR, \fB\-\-verbose\fR: verbose mode, display more information
\fB\-\-no\-progress\fR: do not display the progress bar, but keep all other messages\.
.
.IP "\(bu" 4
-\fB\-\-show\-default\-cparams\fR: Shows the default compression parameters that will be used for a particular src file\. If the provided src file is not a regular file (e\.g\. named pipe), the cli will just output the default parameters\. That is, the parameters that are used when the src size is unknown\.
+\fB\-\-show\-default\-cparams\fR: shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size\. If the provided file is not a regular file (e\.g\. a pipe), this flag will output the parameters used for inputs of unknown size\.
.
.IP "\(bu" 4
\fB\-\-\fR: All arguments after \fB\-\-\fR are treated as files
.
.IP "" 0
.
-.SS "gzip Operation modifiers"
+.SS "gzip Operation Modifiers"
When invoked via a \fBgzip\fR symlink, \fBzstd\fR will support further options that intend to mimic the \fBgzip\fR behavior:
.
.TP
\fB\-\-best\fR
alias to the option \fB\-9\fR\.
.
-.SS "Interactions with Environment Variables"
+.SS "Environment Variables"
Employing environment variables to set parameters has security implications\. Therefore, this avenue is intentionally limited\. Only \fBZSTD_CLEVEL\fR and \fBZSTD_NBTHREADS\fR are currently supported\. They set the compression level and number of threads to use during compression, respectively\.
.
.P
Use FILEs as training set to create a dictionary\. The training set should ideally contain a lot of samples (> 100), and weight typically 100x the target dictionary size (for example, ~10 MB for a 100 KB dictionary)\. \fB\-\-train\fR can be combined with \fB\-r\fR to indicate a directory rather than listing all the files, which can be useful to circumvent shell expansion limits\.
.
.IP
-Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KB of these samples will be used for training\.
+Since dictionary compression is mostly effective for small files, the expectation is that the training set will only contain small files\. In the case where some samples happen to be large, only the first 128 KiB of these samples will be used for training\.
.
.IP
\fB\-\-train\fR supports multithreading if \fBzstd\fR is compiled with threading support (default)\. Additional advanced parameters can be specified with \fB\-\-train\-fastcover\fR\. The legacy dictionary builder can be accessed with \fB\-\-train\-legacy\fR\. The slower cover dictionary builder can be accessed with \fB\-\-train\-cover\fR\. Default \fB\-\-train\fR is equivalent to \fB\-\-train\-fastcover=d=8,steps=4\fR\.
.
.TP
\fB\-\-dictID=#\fR
-A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage : an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
+A dictionary ID is a locally unique ID\. The decoder will use this value to verify it is using the right dictionary\. By default, zstd will create a 4\-bytes random number ID\. It\'s possible to provide an explicit number ID instead\. It\'s up to the dictionary manager to not assign twice the same ID to 2 different dictionaries\. Note that short numbers have an advantage: an ID < 256 will only need 1 byte in the compressed frame header, and an ID < 65536 will only need 2 bytes\. This compares favorably to 4 bytes default\.
+.
+.IP
+Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2^31, so they should not be used in public\.
.
.TP
\fB\-\-train\-cover[=k#,d=#,steps=#,split=#,shrink[=#]]\fR
set process priority to real\-time
.
.P
-\fBOutput Format:\fR CompressionLevel#Filename : InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
+\fBOutput Format:\fR CompressionLevel#Filename: InputSize \-> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
.
.P
\fBMethodology:\fR For both compression and decompression speed, the entire input is compressed/decompressed in\-memory to measure speed\. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy\.
Specify the size of each compression job\. This parameter is only available when multi\-threading is enabled\. Each compression job is run in parallel, so this value indirectly impacts the nb of active threads\. Default job size varies depending on compression level (generally \fB4 * windowSize\fR)\. \fB\-B#\fR makes it possible to manually select a custom size\. Note that job size must respect a minimum value which is enforced transparently\. This minimum is either 512 KB, or \fBoverlapSize\fR, whichever is largest\. Different job sizes will lead to non\-identical compressed frames\.
.
.SS "\-\-zstd[=options]:"
-\fBzstd\fR provides 22 predefined compression levels\. The selected or default predefined compression level can be changed with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
+\fBzstd\fR provides 22 predefined regular compression levels plus the fast levels\. This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor\. (You can see the result of this translation with \fB\-\-show\-default\-cparams\fR\.) These specific parameters can be overridden with advanced compression options\. The \fIoptions\fR are provided as a comma\-separated list\. You may specify only the options you want to change and the rest will be taken from the selected or default compression level\. The list of available \fIoptions\fR:
.
.TP
\fBstrategy\fR=\fIstrat\fR, \fBstrat\fR=\fIstrat\fR
Specify a strategy used by a match finder\.
.
.IP
-There are 9 strategies numbered from 1 to 9, from faster to stronger: 1=ZSTD_fast, 2=ZSTD_dfast, 3=ZSTD_greedy, 4=ZSTD_lazy, 5=ZSTD_lazy2, 6=ZSTD_btlazy2, 7=ZSTD_btopt, 8=ZSTD_btultra, 9=ZSTD_btultra2\.
+There are 9 strategies numbered from 1 to 9, from fastest to strongest: 1=\fBZSTD_fast\fR, 2=\fBZSTD_dfast\fR, 3=\fBZSTD_greedy\fR, 4=\fBZSTD_lazy\fR, 5=\fBZSTD_lazy2\fR, 6=\fBZSTD_btlazy2\fR, 7=\fBZSTD_btopt\fR, 8=\fBZSTD_btultra\fR, 9=\fBZSTD_btultra2\fR\.
.
.TP
\fBwindowLog\fR=\fIwlog\fR, \fBwlog\fR=\fIwlog\fR
Bigger hash tables cause fewer collisions which usually makes compression faster, but requires more memory during compression\.
.
.IP
-The minimum \fIhlog\fR is 6 (64 B) and the maximum is 30 (1 GiB)\.
+The minimum \fIhlog\fR is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB)\.
.
.TP
\fBchainLog\fR=\fIclog\fR, \fBclog\fR=\fIclog\fR
-Specify the maximum number of bits for a hash chain or a binary tree\.
+Specify the maximum number of bits for the secondary search structure, whose form depends on the selected \fBstrategy\fR\.
.
.IP
-Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the ZSTD_fast strategy\.
+Higher numbers of bits increases the chance to find a match which usually improves compression ratio\. It also slows down compression speed and increases memory requirements for compression\. This option is ignored for the \fBZSTD_fast\fR \fBstrategy\fR, which only has the primary hash table\.
.
.IP
-The minimum \fIclog\fR is 6 (64 B) and the maximum is 29 (524 Mib) on 32\-bit platforms and 30 (1 Gib) on 64\-bit platforms\.
+The minimum \fIclog\fR is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32\-bit platforms and 30 (1B entries / 4 GiB) on 64\-bit platforms\.
.
.TP
\fBsearchLog\fR=\fIslog\fR, \fBslog\fR=\fIslog\fR
The impact of this field vary depending on selected strategy\.
.
.IP
-For ZSTD_btopt, ZSTD_btultra and ZSTD_btultra2, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\. t For ZSTD_fast, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed : a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
+For \fBZSTD_btopt\fR, \fBZSTD_btultra\fR and \fBZSTD_btultra2\fR, it specifies the minimum match length that causes match finder to stop searching\. A larger \fBtargetLength\fR usually improves compression ratio but decreases compression speed\.
+.
+.IP
+For \fBZSTD_fast\fR, it triggers ultra\-fast mode when > 0\. The value represents the amount of data skipped between match sampling\. Impact is reversed: a larger \fBtargetLength\fR increases compression speed but decreases compression ratio\.
.
.IP
For all other strategies, this field has no impact\.
.
.IP
-The minimum \fItlen\fR is 0 and the maximum is 128 Kib\.
+The minimum \fItlen\fR is 0 and the maximum is 128 KiB\.
.
.TP
\fBoverlapLog\fR=\fIovlog\fR, \fBovlog\fR=\fIovlog\fR
Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
.
.IP
-The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default" : \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
+The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default": \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
.
.TP
\fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR
.P
\fB\-\-zstd\fR=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
.
+.SH "SEE ALSO"
+\fBzstdgrep\fR(1), \fBzstdless\fR(1), \fBgzip\fR(1), \fBxz\fR(1)
+.
+.P
+The \fIzstandard\fR format is specified in Y\. Collet, "Zstandard Compression and the \'application/zstd\' Media Type", https://www\.ietf\.org/rfc/rfc8878\.txt, Internet RFC 8878 (February 2021)\.
+.
.SH "BUGS"
Report bugs at: https://github\.com/facebook/zstd/issues
.
SYNOPSIS
--------
-`zstd` [*OPTIONS*] [-|_INPUT-FILE_] [-o _OUTPUT-FILE_]
+`zstd` [<OPTIONS>] [-|<INPUT-FILE>] [-o <OUTPUT-FILE>]
`zstdmt` is equivalent to `zstd -T0`
DESCRIPTION
-----------
`zstd` is a fast lossless compression algorithm and data compression tool,
-with command line syntax similar to `gzip (1)` and `xz (1)`.
+with command line syntax similar to `gzip`(1) and `xz`(1).
It is based on the **LZ77** family, with further FSE & huff0 entropy stages.
`zstd` offers highly configurable compression speed,
from fast modes at > 200 MB/s per core,
It also features a very fast decoder, with speeds > 500 MB/s per core.
`zstd` command line syntax is generally similar to gzip,
-but features the following differences :
+but features the following differences:
- Source files are preserved by default.
It's possible to remove them automatically by using the `--rm` command.
Use `-q` to turn it off.
- `zstd` does not accept input from console,
though it does accept `stdin` when it's not the console.
+ - `zstd` does not store the input's filename or attributes, only its contents.
`zstd` processes each _file_ according to the selected operation mode.
If no _files_ are given or _file_ is `-`, `zstd` reads from standard input
and writes the processed data to standard output.
`zstd` will refuse to write compressed data to standard output
-if it is a terminal : it will display an error message and skip the _file_.
+if it is a terminal: it will display an error message and skip the file.
Similarly, `zstd` will refuse to read compressed data from standard input
if it is a terminal.
* When decompressing, the `.zst` suffix is removed from the source filename to
get the target filename
-### Concatenation with .zst files
+### Concatenation with .zst Files
It is possible to concatenate multiple `.zst` files. `zstd` will decompress
such agglomerated file as if it was a single `.zst` file.
OPTIONS
-------
-### Integer suffixes and special values
+### Integer Suffixes and Special Values
+
In most places where an integer argument is expected,
an optional suffix is supported to easily indicate large integers.
There must be no space between the integer and the suffix.
Multiply the integer by 1,048,576 (2\^20).
`Mi`, `M`, and `MB` are accepted as synonyms for `MiB`.
-### Operation mode
+### Operation Mode
+
If multiple operation mode options are given,
the last one takes effect.
decompressed data is discarded and checksummed for errors.
No files are created or removed.
* `-b#`:
- Benchmark file(s) using compression level #
-* `--train FILEs`:
- Use FILEs as a training set to create a dictionary.
+ Benchmark file(s) using compression level _#_.
+ See _BENCHMARK_ below for a description of this operation.
+* `--train FILES`:
+ Use _FILES_ as a training set to create a dictionary.
The training set should contain a lot of small files (> 100).
+ See _DICTIONARY BUILDER_ below for a description of this operation.
* `-l`, `--list`:
Display information related to a zstd compressed file, such as size, ratio, and checksum.
Some of these fields may not be available.
This command's output can be augmented with the `-v` modifier.
-### Operation modifiers
+### Operation Modifiers
* `-#`:
- `#` compression level \[1-19] (default: 3)
+ selects `#` compression level \[1-19\] (default: 3)
* `--ultra`:
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
As compression is serialized with I/O, this can be slightly slower.
Single-thread mode features significantly lower memory usage,
which can be useful for systems with limited amount of memory, such as 32-bit systems.
- Note 1 : this mode is the only available one when multithread support is disabled.
- Note 2 : this mode is different from `-T1`, which spawns 1 compression thread in parallel with I/O.
+
+ Note 1: this mode is the only available one when multithread support is disabled.
+
+ Note 2: this mode is different from `-T1`, which spawns 1 compression thread in parallel with I/O.
Final compressed result is also slightly different from `-T1`.
* `--auto-threads={physical,logical} (default: physical)`:
When using a default amount of threads via `-T0`, choose the default based on the number
of detected physical or logical cores.
-* `--adapt[=min=#,max=#]` :
+* `--adapt[=min=#,max=#]`:
`zstd` will dynamically adapt compression level to perceived I/O conditions.
Compression level adaptation can be observed live by using command `-v`.
Adaptation can be constrained between supplied `min` and `max` levels.
The feature works when combined with multi-threading and `--long` mode.
It does not work with `--single-thread`.
- It sets window size to 8 MB by default (can be changed manually, see `wlog`).
+ It sets window size to 8 MiB by default (can be changed manually, see `wlog`).
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
- _note_ : at the time of this writing, `--adapt` can remain stuck at low speed
+
+ _Note_: at the time of this writing, `--adapt` can remain stuck at low speed
when combined with multiple worker threads (>=2).
* `--long[=#]`:
enables long distance matching with `#` `windowLog`, if `#` is not
* `--patch-from FILE`:
Specify the file to be used as a reference point for zstd's diff engine.
This is effectively dictionary compression with some convenient parameter
- selection, namely that windowSize > srcSize.
+ selection, namely that _windowSize_ > _srcSize_.
+
+ Note: cannot use both this and `-D` together.
- Note: cannot use both this and -D together
- Note: `--long` mode will be automatically activated if chainLog < fileLog
- (fileLog being the windowLog required to cover the whole file). You
+ Note: `--long` mode will be automatically activated if _chainLog_ < _fileLog_
+ (_fileLog_ being the _windowLog_ required to cover the whole file). You
can also manually force it.
- Note: for all levels, you can use --patch-from in --single-thread mode
- to improve compression ratio at the cost of speed
+
+ Note: for all levels, you can use `--patch-from` in `--single-thread` mode
+ to improve compression ratio at the cost of speed.
+
Note: for level 19, you can get increased compression ratio at the cost
of speed by specifying `--zstd=targetLength=` to be something large
- (i.e. 4096), and by setting a large `--zstd=chainLog=`
-* `--rsyncable` :
+ (i.e. 4096), and by setting a large `--zstd=chainLog=`.
+* `--rsyncable`:
`zstd` will periodically synchronize the compression state to make the
compressed file more rsync-friendly. There is a negligible impact to
compression ratio, and the faster compression levels will see a small
* `--[no-]content-size`:
enable / disable whether or not the original size of the file is placed in
the header of the compressed file. The default option is
- --content-size (meaning that the original size will be placed in the header).
+ `--content-size` (meaning that the original size will be placed in the header).
* `--no-dictID`:
do not store dictionary ID within frame header (dictionary compression).
The decoder will have to rely on implicit knowledge about which dictionary to use,
it won't be able to check if it's correct.
* `-M#`, `--memory=#`:
- Set a memory usage limit. By default, `zstd` uses 128 MB for decompression
+ Set a memory usage limit. By default, `zstd` uses 128 MiB for decompression
as the maximum amount of memory the decompressor is allowed to use, but you can
override this manually if need be in either direction (i.e. you can increase or
decrease it).
- This is also used during compression when using with --patch-from=. In this case,
- this parameter overrides that maximum size allowed for a dictionary. (128 MB).
+ This is also used during compression when using with `--patch-from=`. In this case,
+ this parameter overrides that maximum size allowed for a dictionary. (128 MiB).
Additionally, this can be used to limit memory for dictionary training. This parameter
- overrides the default limit of 2 GB. zstd will load training samples up to the memory limit
+ overrides the default limit of 2 GiB. zstd will load training samples up to the memory limit
and ignore the rest.
-* `--stream-size=#` :
+* `--stream-size=#`:
Sets the pledged source size of input coming from a stream. This value must be exact, as it
will be included in the produced frame header. Incorrect stream sizes will cause an error.
This information will be used to better optimize compression parameters, resulting in
Exact guesses result in better compression ratios. Overestimates result in slightly
degraded compression ratios, while underestimates may result in significant degradation.
* `-o FILE`:
- save result into `FILE`
+ save result into `FILE`.
* `-f`, `--force`:
disable input and output checks. Allows overwriting existing files, input
from console, output to stdout, operating on links, block devices, etc.
enable / disable passing through uncompressed files as-is. During
decompression when pass-through is enabled, unrecognized formats will be
copied as-is from the input to the output. By default, pass-through will
- occur when the output destination is stdout and the force (-f) option is
+ occur when the output destination is stdout and the force (`-f`) option is
set.
* `--rm`:
remove source file(s) after successful compression or decompression. If used in combination with
- -o, will trigger a confirmation prompt (which can be silenced with -f), as this is a destructive operation.
+ `-o`, will trigger a confirmation prompt (which can be silenced with `-f`), as this is a destructive operation.
* `-k`, `--keep`:
keep source file(s) after successful compression or decompression.
This is the default behavior.
display help/long help and exit
* `-V`, `--version`:
display version number and exit.
- Advanced : `-vV` also displays supported formats.
+ Advanced: `-vV` also displays supported formats.
`-vvV` also displays POSIX support.
`-q` will only display the version number, suitable for machine reading.
* `-v`, `--verbose`:
* `--no-progress`:
do not display the progress bar, but keep all other messages.
* `--show-default-cparams`:
- Shows the default compression parameters that will be used for a
- particular src file. If the provided src file is not a regular file
- (e.g. named pipe), the cli will just output the default parameters.
- That is, the parameters that are used when the src size is unknown.
+ shows the default compression parameters that will be used for a particular input file, based on the provided compression level and the input size.
+ If the provided file is not a regular file (e.g. a pipe), this flag will output the parameters used for inputs of unknown size.
* `--`:
All arguments after `--` are treated as files
-### gzip Operation modifiers
+### gzip Operation Modifiers
When invoked via a `gzip` symlink, `zstd` will support further
options that intend to mimic the `gzip` behavior:
alias to the option `-9`.
-### Interactions with Environment Variables
+### Environment Variables
Employing environment variables to set parameters has security implications.
Therefore, this avenue is intentionally limited.
Since dictionary compression is mostly effective for small files,
the expectation is that the training set will only contain small files.
In the case where some samples happen to be large,
- only the first 128 KB of these samples will be used for training.
+ only the first 128 KiB of these samples will be used for training.
`--train` supports multithreading if `zstd` is compiled with threading support (default).
Additional advanced parameters can be specified with `--train-fastcover`.
It's possible to provide an explicit number ID instead.
It's up to the dictionary manager to not assign twice the same ID to
2 different dictionaries.
- Note that short numbers have an advantage :
+ Note that short numbers have an advantage:
an ID < 256 will only need 1 byte in the compressed frame header,
and an ID < 65536 will only need 2 bytes.
This compares favorably to 4 bytes default.
+ Note that RFC8878 reserves IDs less than 32768 and greater than or equal to 2\^31, so they should not be used in public.
+
* `--train-cover[=k#,d=#,steps=#,split=#,shrink[=#]]`:
Select parameters for the default dictionary builder algorithm named cover.
If _d_ is not specified, then it tries _d_ = 6 and _d_ = 8.
* `--priority=rt`:
set process priority to real-time
-**Output Format:** CompressionLevel#Filename : InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
+**Output Format:** CompressionLevel#Filename: InputSize -> OutputSize (CompressionRatio), CompressionSpeed, DecompressionSpeed
**Methodology:** For both compression and decompression speed, the entire input is compressed/decompressed in-memory to measure speed. A run lasts at least 1 sec, so when files are small, they are compressed/decompressed several times per run, in order to improve measurement accuracy.
Different job sizes will lead to non-identical compressed frames.
### --zstd[=options]:
-`zstd` provides 22 predefined compression levels.
-The selected or default predefined compression level can be changed with
-advanced compression options.
+`zstd` provides 22 predefined regular compression levels plus the fast levels.
+This compression level is translated internally into a number of specific parameters that actually control the behavior of the compressor.
+(You can see the result of this translation with `--show-default-cparams`.)
+These specific parameters can be overridden with advanced compression options.
The _options_ are provided as a comma-separated list.
You may specify only the options you want to change and the rest will be
taken from the selected or default compression level.
- `strategy`=_strat_, `strat`=_strat_:
Specify a strategy used by a match finder.
- There are 9 strategies numbered from 1 to 9, from faster to stronger:
- 1=ZSTD\_fast, 2=ZSTD\_dfast, 3=ZSTD\_greedy,
- 4=ZSTD\_lazy, 5=ZSTD\_lazy2, 6=ZSTD\_btlazy2,
- 7=ZSTD\_btopt, 8=ZSTD\_btultra, 9=ZSTD\_btultra2.
+ There are 9 strategies numbered from 1 to 9, from fastest to strongest:
+ 1=`ZSTD_fast`, 2=`ZSTD_dfast`, 3=`ZSTD_greedy`,
+ 4=`ZSTD_lazy`, 5=`ZSTD_lazy2`, 6=`ZSTD_btlazy2`,
+ 7=`ZSTD_btopt`, 8=`ZSTD_btultra`, 9=`ZSTD_btultra2`.
- `windowLog`=_wlog_, `wlog`=_wlog_:
Specify the maximum number of bits for a match distance.
Bigger hash tables cause fewer collisions which usually makes compression
faster, but requires more memory during compression.
- The minimum _hlog_ is 6 (64 B) and the maximum is 30 (1 GiB).
+ The minimum _hlog_ is 6 (64 entries / 256 B) and the maximum is 30 (1B entries / 4 GiB).
- `chainLog`=_clog_, `clog`=_clog_:
- Specify the maximum number of bits for a hash chain or a binary tree.
+ Specify the maximum number of bits for the secondary search structure,
+ whose form depends on the selected `strategy`.
Higher numbers of bits increases the chance to find a match which usually
improves compression ratio.
It also slows down compression speed and increases memory requirements for
compression.
- This option is ignored for the ZSTD_fast strategy.
+ This option is ignored for the `ZSTD_fast` `strategy`, which only has the primary hash table.
- The minimum _clog_ is 6 (64 B) and the maximum is 29 (524 Mib) on 32-bit platforms
- and 30 (1 Gib) on 64-bit platforms.
+ The minimum _clog_ is 6 (64 entries / 256 B) and the maximum is 29 (512M entries / 2 GiB) on 32-bit platforms
+ and 30 (1B entries / 4 GiB) on 64-bit platforms.
- `searchLog`=_slog_, `slog`=_slog_:
Specify the maximum number of searches in a hash chain or a binary tree
- `targetLength`=_tlen_, `tlen`=_tlen_:
The impact of this field vary depending on selected strategy.
- For ZSTD\_btopt, ZSTD\_btultra and ZSTD\_btultra2, it specifies
+ For `ZSTD_btopt`, `ZSTD_btultra` and `ZSTD_btultra2`, it specifies
the minimum match length that causes match finder to stop searching.
A larger `targetLength` usually improves compression ratio
but decreases compression speed.
-t
- For ZSTD\_fast, it triggers ultra-fast mode when > 0.
+
+ For `ZSTD_fast`, it triggers ultra-fast mode when > 0.
The value represents the amount of data skipped between match sampling.
- Impact is reversed : a larger `targetLength` increases compression speed
+ Impact is reversed: a larger `targetLength` increases compression speed
but decreases compression ratio.
For all other strategies, this field has no impact.
- The minimum _tlen_ is 0 and the maximum is 128 Kib.
+ The minimum _tlen_ is 0 and the maximum is 128 KiB.
- `overlapLog`=_ovlog_, `ovlog`=_ovlog_:
Determine `overlapSize`, amount of data reloaded from previous job.
9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
For example, 8 means "windowSize/2", and 6 means "windowSize/8".
- Value 0 is special and means "default" : _ovlog_ is automatically determined by `zstd`.
+ Value 0 is special and means "default": _ovlog_ is automatically determined by `zstd`.
In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
`--zstd`=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6
+SEE ALSO
+--------
+`zstdgrep`(1), `zstdless`(1), `gzip`(1), `xz`(1)
+
+The <zstandard> format is specified in Y. Collet, "Zstandard Compression and the 'application/zstd' Media Type", https://www.ietf.org/rfc/rfc8878.txt, Internet RFC 8878 (February 2021).
BUGS
----
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*-************************************
* Constants
**************************************/
-#define COMPRESSOR_NAME "zstd command line interface"
+#define COMPRESSOR_NAME "Zstandard CLI"
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "v" ZSTD_VERSION_STRING
#endif
#define AUTHOR "Yann Collet"
-#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
+#define WELCOME_MESSAGE "*** %s (%i-bit) %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
#define ZSTD_ZSTDMT "zstdmt"
#define ZSTD_UNZSTD "unzstd"
*/
static void usage(FILE* f, const char* programName)
{
- DISPLAY_F(f, "Usage: %s [OPTION]... [FILE]... [-o file]\n", programName);
- DISPLAY_F(f, "Compress or uncompress FILEs (with no FILE or when FILE is `-`, read from standard input).\n\n");
- DISPLAY_F(f, " -o file result stored into `file` (only 1 output file)\n");
-#ifndef ZSTD_NOCOMPRESS
- DISPLAY_F(f, " -1 .. -%d compression level (faster .. better; default: %d)\n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT);
-#endif
-#ifndef ZSTD_NODECOMPRESS
- DISPLAY_F(f, " -d, --decompress decompression\n");
-#endif
- DISPLAY_F(f, " -f, --force disable input and output checks. Allows overwriting existing files,\n");
- DISPLAY_F(f, " input from console, output to stdout, operating on links,\n");
- DISPLAY_F(f, " block devices, etc. During decompression and when the output\n");
- DISPLAY_F(f, " destination is stdout, pass-through unrecognized formats as-is.\n");
- DISPLAY_F(f, " --rm remove source file(s) after successful de/compression\n");
- DISPLAY_F(f, " -k, --keep preserve source file(s) (default) \n");
+ DISPLAY_F(f, "Compress or decompress the INPUT file(s); reads from STDIN if INPUT is `-` or not provided.\n\n");
+ DISPLAY_F(f, "Usage: %s [OPTIONS...] [INPUT... | -] [-o OUTPUT]\n\n", programName);
+ DISPLAY_F(f, "Options:\n");
+ DISPLAY_F(f, " -o OUTPUT Write output to a single file, OUTPUT.\n");
+ DISPLAY_F(f, " -k, --keep Preserve INPUT file(s). [Default] \n");
+ DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression.\n");
#ifdef ZSTD_GZCOMPRESS
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
- DISPLAY_F(f, " -n, --no-name do not store original filename when compressing\n");
+ DISPLAY_F(f, " -n, --no-name Do not store original filename when compressing.\n\n");
}
#endif
- DISPLAY_F(f, " -D DICT use DICT as Dictionary for compression or decompression\n");
- DISPLAY_F(f, " -h display usage and exit\n");
- DISPLAY_F(f, " -H,--help display long help and exit\n");
+ DISPLAY_F(f, "\n");
+#ifndef ZSTD_NOCOMPRESS
+ DISPLAY_F(f, " -# Desired compression level, where `#` is a number between 1 and %d;\n", ZSTDCLI_CLEVEL_MAX);
+ DISPLAY_F(f, " lower numbers provide faster compression, higher numbers yield\n");
+ DISPLAY_F(f, " better compression ratios. [Default: %d]\n\n", ZSTDCLI_CLEVEL_DEFAULT);
+#endif
+#ifndef ZSTD_NODECOMPRESS
+ DISPLAY_F(f, " -d, --decompress Perform decompression.\n");
+#endif
+ DISPLAY_F(f, " -D DICT Use DICT as the dictionary for compression or decompression.\n\n");
+ DISPLAY_F(f, " -f, --force Disable input and output checks. Allows overwriting existing files,\n");
+ DISPLAY_F(f, " receiving input from the console, printing ouput to STDOUT, and\n");
+ DISPLAY_F(f, " operating on links, block devices, etc. Unrecognized formats will be\n");
+ DISPLAY_F(f, " passed-through through as-is.\n\n");
+
+ DISPLAY_F(f, " -h Display short usage and exit.\n");
+ DISPLAY_F(f, " -H, --help Display full help and exit.\n");
+ DISPLAY_F(f, " -V, --version Display the program version and exit.\n");
DISPLAY_F(f, "\n");
}
static void usage_advanced(const char* programName)
{
DISPLAYOUT(WELCOME_MESSAGE);
+ DISPLAYOUT("\n");
usage(stdout, programName);
- DISPLAYOUT("Advanced options :\n");
- DISPLAYOUT(" -V, --version display Version number and exit\n");
-
- DISPLAYOUT(" -c, --stdout write to standard output (even if it is the console), keep original file\n");
+ DISPLAYOUT("Advanced options:\n");
+ DISPLAYOUT(" -c, --stdout Write to STDOUT (even if it is a console) and keep the INPUT file(s).\n\n");
- DISPLAYOUT(" -v, --verbose verbose mode; specify multiple times to increase verbosity\n");
- DISPLAYOUT(" -q, --quiet suppress warnings; specify twice to suppress errors too\n");
- DISPLAYOUT(" --[no-]progress forcibly display, or never display the progress counter\n");
- DISPLAYOUT(" note: any (de)compressed output to terminal will mix with progress counter text\n");
+ DISPLAYOUT(" -v, --verbose Enable verbose output; pass multiple times to increase verbosity.\n");
+ DISPLAYOUT(" -q, --quiet Suppress warnings; pass twice to suppress errors.\n");
+#ifndef ZSTD_NOTRACE
+ DISPLAYOUT(" --trace LOG Log tracing information to LOG.\n");
+#endif
+ DISPLAYOUT("\n");
+ DISPLAYOUT(" --[no-]progress Forcibly show/hide the progress counter. NOTE: Any (de)compressed\n");
+ DISPLAYOUT(" output to terminal will mix with progress counter text.\n\n");
#ifdef UTIL_HAS_CREATEFILELIST
- DISPLAYOUT(" -r operate recursively on directories\n");
- DISPLAYOUT(" --filelist FILE read list of files to operate upon from FILE\n");
- DISPLAYOUT(" --output-dir-flat DIR : processed files are stored into DIR\n");
+ DISPLAYOUT(" -r Operate recursively on directories.\n");
+ DISPLAYOUT(" --filelist LIST Read a list of files to operate on from LIST.\n");
+ DISPLAYOUT(" --output-dir-flat DIR Store processed files in DIR.\n");
#endif
#ifdef UTIL_HAS_MIRRORFILELIST
- DISPLAYOUT(" --output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure\n");
+ DISPLAYOUT(" --output-dir-mirror DIR Store processed files in DIR, respecting original directory structure.\n");
#endif
if (AIO_supported())
- DISPLAYOUT(" --[no-]asyncio use asynchronous IO (default: enabled)\n");
+ DISPLAYOUT(" --[no-]asyncio Use asynchronous IO. [Default: Enabled]\n");
+ DISPLAYOUT("\n");
#ifndef ZSTD_NOCOMPRESS
- DISPLAYOUT(" --[no-]check during compression, add XXH64 integrity checksum to frame (default: enabled)\n");
+ DISPLAYOUT(" --[no-]check Add XXH64 integrity checksums during compression. [Default: Add, Validate]\n");
#ifndef ZSTD_NODECOMPRESS
- DISPLAYOUT(" if specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)\n");
+ DISPLAYOUT(" If `-d` is present, ignore/validate checksums during decompression.\n");
#endif
#else
#ifdef ZSTD_NOCOMPRESS
- DISPLAYOUT(" --[no-]check during decompression, ignore/validate checksums in compressed frame (default: validate)");
+ DISPLAYOUT(" --[no-]check Ignore/validate checksums during decompression. [Default: Validate]");
#endif
- DISPLAYOUT("\n");
#endif /* ZSTD_NOCOMPRESS */
-#ifndef ZSTD_NOTRACE
- DISPLAYOUT(" --trace FILE log tracing information to FILE\n");
-#endif
- DISPLAYOUT(" -- all arguments after \"--\" are treated as files\n");
+ DISPLAYOUT("\n");
+ DISPLAYOUT(" -- Treat remaining arguments after `--` as files.\n");
#ifndef ZSTD_NOCOMPRESS
DISPLAYOUT("\n");
- DISPLAYOUT("Advanced compression options :\n");
- DISPLAYOUT(" --ultra enable levels beyond %i, up to %i (requires more memory)\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
- DISPLAYOUT(" --fast[=#] switch to very fast compression levels (default: %u)\n", 1);
+ DISPLAYOUT("Advanced compression options:\n");
+ DISPLAYOUT(" --ultra Enable levels beyond %i, up to %i; requires more memory.\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
+ DISPLAYOUT(" --fast[=#] Use to very fast compression levels. [Default: %u]\n", 1);
#ifdef ZSTD_GZCOMPRESS
if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */
- DISPLAYOUT(" --best compatibility alias for -9 \n");
- DISPLAYOUT(" --no-name do not store original filename when compressing\n");
+ DISPLAYOUT(" --best Compatibility alias for `-9`.\n");
}
#endif
- DISPLAYOUT(" --long[=#] enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
- DISPLAYOUT(" --patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n");
- DISPLAYOUT(" --adapt dynamically adapt compression level to I/O conditions\n");
+ DISPLAYOUT(" --adapt Dynamically adapt compression level to I/O conditions.\n");
+ DISPLAYOUT(" --long[=#] Enable long distance matching with window log #. [Default: %u]\n", g_defaultMaxWindowLog);
+ DISPLAYOUT(" --patch-from=REF Use REF as the reference point for Zstandard's diff engine. \n\n");
# ifdef ZSTD_MULTITHREAD
- DISPLAYOUT(" -T# spawn # compression threads (default: 1, 0==# cores) \n");
- DISPLAYOUT(" -B# select size of each job (default: 0==automatic) \n");
- DISPLAYOUT(" --single-thread use a single thread for both I/O and compression (result slightly different than -T1) \n");
- DISPLAYOUT(" --auto-threads={physical,logical} : use either physical cores or logical cores as default when specifying -T0 (default: physical)\n");
- DISPLAYOUT(" --rsyncable compress using a rsync-friendly method (-B sets block size) \n");
+ DISPLAYOUT(" -T# Spawn # compression threads. [Default: 1; pass 0 for core count.]\n");
+ DISPLAYOUT(" --single-thread Share a single thread for I/O and compression (slightly different than `-T1`).\n");
+ DISPLAYOUT(" --auto-threads={physical|logical}\n");
+ DISPLAYOUT(" Use physical/logical cores when using `-T0`. [Default: Physical]\n\n");
+ DISPLAYOUT(" -B# Set job size to #. [Default: 0 (automatic)]\n");
+ DISPLAYOUT(" --rsyncable Compress using a rsync-friendly method (`-B` sets block size). \n");
+ DISPLAYOUT("\n");
# endif
- DISPLAYOUT(" --exclude-compressed : only compress files that are not already compressed \n");
- DISPLAYOUT(" --stream-size=# specify size of streaming input from `stdin` \n");
- DISPLAYOUT(" --size-hint=# optimize compression parameters for streaming input of approximately this size \n");
- DISPLAYOUT(" --target-compressed-block-size=# : generate compressed block of approximately targeted size \n");
- DISPLAYOUT(" --no-dictID don't write dictID into header (dictionary compression only)\n");
- DISPLAYOUT(" --[no-]compress-literals : force (un)compressed literals\n");
- DISPLAYOUT(" --[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies\n");
-
- DISPLAYOUT(" --format=zstd compress files to the .zst format (default)\n");
+ DISPLAYOUT(" --exclude-compressed Only compress files that are not already compressed.\n\n");
+
+ DISPLAYOUT(" --stream-size=# Specify size of streaming input from STDIN.\n");
+ DISPLAYOUT(" --size-hint=# Optimize compression parameters for streaming input of approximately size #.\n");
+ DISPLAYOUT(" --target-compressed-block-size=#\n");
+ DISPLAYOUT(" Generate compressed blocks of approximately # size.\n\n");
+ DISPLAYOUT(" --no-dictID Don't write `dictID` into the header (dictionary compression only).\n");
+ DISPLAYOUT(" --[no-]compress-literals Force (un)compressed literals.\n");
+ DISPLAYOUT(" --[no-]row-match-finder Explicitly enable/disable the fast, row-based matchfinder for\n");
+ DISPLAYOUT(" the 'greedy', 'lazy', and 'lazy2' strategies.\n");
+
+ DISPLAYOUT("\n");
+ DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n");
#ifdef ZSTD_GZCOMPRESS
- DISPLAYOUT(" --format=gzip compress files to the .gz format\n");
+ DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n");
#endif
#ifdef ZSTD_LZMACOMPRESS
- DISPLAYOUT(" --format=xz compress files to the .xz format\n");
- DISPLAYOUT(" --format=lzma compress files to the .lzma format\n");
+ DISPLAYOUT(" --format=xz Compress files to the `.xz` format.\n");
+ DISPLAYOUT(" --format=lzma Compress files to the `.lzma` format.\n");
#endif
#ifdef ZSTD_LZ4COMPRESS
- DISPLAYOUT( " --format=lz4 compress files to the .lz4 format\n");
+ DISPLAYOUT( " --format=lz4 Compress files to the `.lz4` format.\n");
#endif
#endif /* !ZSTD_NOCOMPRESS */
#ifndef ZSTD_NODECOMPRESS
DISPLAYOUT("\n");
- DISPLAYOUT("Advanced decompression options :\n");
- DISPLAYOUT(" -l print information about zstd compressed files\n");
- DISPLAYOUT(" --test test compressed file integrity\n");
- DISPLAYOUT(" -M# Set a memory usage limit for decompression\n");
+ DISPLAYOUT("Advanced decompression options:\n");
+ DISPLAYOUT(" -l Print information about Zstandard-compressed files.\n");
+ DISPLAYOUT(" --test Test compressed file integrity.\n");
+ DISPLAYOUT(" -M# Set the memory usage limit to # megabytes.\n");
# if ZSTD_SPARSE_DEFAULT
- DISPLAYOUT(" --[no-]sparse sparse mode (default: enabled on file, disabled on stdout)\n");
+ DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Enabled for files, disabled for STDOUT.]\n");
# else
- DISPLAYOUT(" --[no-]sparse sparse mode (default: disabled)\n");
+ DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Disabled]\n");
# endif
{
- char const* passThroughDefault = "disabled";
+ char const* passThroughDefault = "Disabled";
if (exeNameMatch(programName, ZSTD_CAT) ||
exeNameMatch(programName, ZSTD_ZCAT) ||
exeNameMatch(programName, ZSTD_GZCAT)) {
- passThroughDefault = "enabled";
+ passThroughDefault = "Enabled";
}
- DISPLAYOUT(" --[no-]pass-through : passes through uncompressed files as-is (default: %s)\n", passThroughDefault);
+ DISPLAYOUT(" --[no-]pass-through Pass through uncompressed files as-is. [Default: %s]\n", passThroughDefault);
}
#endif /* ZSTD_NODECOMPRESS */
#ifndef ZSTD_NODICT
DISPLAYOUT("\n");
- DISPLAYOUT("Dictionary builder :\n");
- DISPLAYOUT(" --train ## create a dictionary from a training set of files\n");
- DISPLAYOUT(" --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args\n");
- DISPLAYOUT(" --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args\n");
- DISPLAYOUT(" --train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u)\n", g_defaultSelectivityLevel);
- DISPLAYOUT(" -o DICT DICT is dictionary name (default: %s)\n", g_defaultDictName);
- DISPLAYOUT(" --maxdict=# limit dictionary to specified size (default: %u)\n", g_defaultMaxDictSize);
- DISPLAYOUT(" --dictID=# force dictionary ID to specified value (default: random)\n");
+ DISPLAYOUT("Dictionary builder:\n");
+ DISPLAYOUT(" --train Create a dictionary from a training set of files.\n\n");
+ DISPLAYOUT(" --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]]\n");
+ DISPLAYOUT(" Use the cover algorithm (with optional arguments).\n");
+ DISPLAYOUT(" --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]]\n");
+ DISPLAYOUT(" Use the fast cover algorithm (with optional arguments).\n\n");
+ DISPLAYOUT(" --train-legacy[=s=#] Use the legacy algorithm with selectivity #. [Default: %u]\n", g_defaultSelectivityLevel);
+ DISPLAYOUT(" -o NAME Use NAME as dictionary name. [Default: %s]\n", g_defaultDictName);
+ DISPLAYOUT(" --maxdict=# Limit dictionary to specified size #. [Default: %u]\n", g_defaultMaxDictSize);
+ DISPLAYOUT(" --dictID=# Force dictionary ID to #. [Default: Random]\n");
#endif
#ifndef ZSTD_NOBENCH
DISPLAYOUT("\n");
- DISPLAYOUT("Benchmark options : \n");
- DISPLAYOUT(" -b# benchmark file(s), using # compression level (default: %d)\n", ZSTDCLI_CLEVEL_DEFAULT);
- DISPLAYOUT(" -e# test all compression levels successively from -b# to -e# (default: 1)\n");
- DISPLAYOUT(" -i# minimum evaluation time in seconds (default: 3s)\n");
- DISPLAYOUT(" -B# cut file into independent chunks of size # (default: no chunking)\n");
- DISPLAYOUT(" -S output one benchmark result per input file (default: consolidated result)\n");
- DISPLAYOUT(" --priority=rt set process priority to real-time\n");
+ DISPLAYOUT("Benchmark options:\n");
+ DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT);
+ DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n");
+ DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n");
+ DISPLAYOUT(" -B# Cut file into independent chunks of size #. [Default: No chunking]\n");
+ DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n");
+ DISPLAYOUT(" --priority=rt Set process priority to real-time.\n");
#endif
}
if (!strcmp(argument, "--fake-stdin-is-console")) { UTIL_fakeStdinIsConsole(); continue; }
if (!strcmp(argument, "--fake-stdout-is-console")) { UTIL_fakeStdoutIsConsole(); continue; }
if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; }
+ if (!strcmp(argument, "--trace-file-stat")) { UTIL_traceFileStat(); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
SYNOPSIS
--------
-`zstdgrep` [*grep-flags*] [--] _pattern_ [_files_ ...]
+`zstdgrep` [<grep-flags>] [--] <pattern> [<files> ...]
DESCRIPTION
-----------
-`zstdgrep` runs `grep (1)` on files, or `stdin` if no files argument is given, after decompressing them with `zstdcat (1)`.
+`zstdgrep` runs `grep`(1) on files, or `stdin` if no files argument is given, after decompressing them with `zstdcat`(1).
-The grep-flags and pattern arguments are passed on to `grep (1)`. If an `-e` flag is found in the `grep-flags`, `zstdgrep` will not look for a pattern argument.
+The <grep-flags> and <pattern> arguments are passed on to `grep`(1). If an `-e` flag is found in the <grep-flags>, `zstdgrep` will not look for a <pattern> argument.
-Note that modern `grep` alternatives such as `ripgrep` (`rg`) support `zstd`-compressed files out of the box,
+Note that modern `grep` alternatives such as `ripgrep` (`rg`(1)) support `zstd`-compressed files out of the box,
and can prove better alternatives than `zstdgrep` notably for unsupported complex pattern searches.
Note though that such alternatives may also feature some minor command line differences.
SEE ALSO
--------
-`zstd (1)`
+`zstd`(1)
AUTHORS
-------
SYNOPSIS
--------
-`zstdless` [*flags*] [_file_ ...]
+`zstdless` [<flags>] [<file> ...]
DESCRIPTION
-----------
-`zstdless` runs `less (1)` on files or stdin, if no files argument is given, after decompressing them with `zstdcat (1)`.
+`zstdless` runs `less`(1) on files or stdin, if no <file> argument is given, after decompressing them with `zstdcat`(1).
SEE ALSO
--------
-`zstd (1)`
+`zstd`(1)
# THIS BENCHMARK IS BEING REPLACED BY automated-bencmarking.py
# ################################################################
-# Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
CLEAN += zstreamtest zstreamtest32
-ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c
+ZSTREAM_LOCAL_FILES := $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/timefn.c seqgen.c zstreamtest.c external_matchfinder.c
ZSTREAM_PROPER_FILES := $(ZDICT_FILES) $(ZSTREAM_LOCAL_FILES)
ZSTREAMFILES := $(ZSTD_FILES) $(ZSTREAM_PROPER_FILES)
zstreamtest32 : CFLAGS += -m32
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
--- /dev/null
+#!/usr/bin/env python3
+# ################################################################
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
+
+import os
+import subprocess
+import sys
+
+if len(sys.argv) != 3:
+ print(f"Usage: {sys.argv[0]} FILE SIZE_LIMIT")
+ sys.exit(1)
+
+file = sys.argv[1]
+limit = int(sys.argv[2])
+
+if not os.path.exists(file):
+ print(f"{file} does not exist")
+ sys.exit(1)
+
+size = os.path.getsize(file)
+
+if size > limit:
+ print(f"file {file} is {size} bytes, which is greater than the limit of {limit} bytes")
+ sys.exit(1)
./run.py --preserve --verbose basic/help.sh
```
+### Updating exact output
+
+If a test is failing because a `.stderr.exact` or `.stdout.exact` no longer matches, you can re-run the tests with `--set-exact-output` and the correct output will be written.
+
+Example:
+```
+./run.py --set-exact-output
+./run.py basic/help.sh --set-exact-output
+```
+
## Writing a test
Test cases are arbitrary executables, and can be written in any language, but are generally shell scripts.
+ zstd -h
-Usage: zstd *OPTION*... *FILE*... *-o file*
-Compress or uncompress FILEs (with no FILE or when FILE is `-`, read from standard input).
+Compress or decompress the INPUT file(s); reads from STDIN if INPUT is `-` or not provided.
- -o file result stored into `file` (only 1 output file)
- -1 .. -19 compression level (faster .. better; default: 3)
- -d, --decompress decompression
- -f, --force disable input and output checks. Allows overwriting existing files,
- input from console, output to stdout, operating on links,
- block devices, etc. During decompression and when the output
- destination is stdout, pass-through unrecognized formats as-is.
- --rm remove source file(s) after successful de/compression
- -k, --keep preserve source file(s) (default)
- -D DICT use DICT as Dictionary for compression or decompression
- -h display usage and exit
- -H,--help display long help and exit
+Usage: zstd *OPTIONS...* *INPUT... | -* *-o OUTPUT*
+
+Options:
+ -o OUTPUT Write output to a single file, OUTPUT.
+ -k, --keep Preserve INPUT file(s). *Default*
+ --rm Remove INPUT file(s) after successful (de)compression.
+
+ -# Desired compression level, where `#` is a number between 1 and 19;
+ lower numbers provide faster compression, higher numbers yield
+ better compression ratios. *Default: 3*
+
+ -d, --decompress Perform decompression.
+ -D DICT Use DICT as the dictionary for compression or decompression.
+
+ -f, --force Disable input and output checks. Allows overwriting existing files,
+ receiving input from the console, printing ouput to STDOUT, and
+ operating on links, block devices, etc. Unrecognized formats will be
+ passed-through through as-is.
+
+ -h Display short usage and exit.
+ -H, --help Display full help and exit.
+ -V, --version Display the program version and exit.
+ zstd -H
...
-Advanced options :
+Advanced options:
...
+ zstd --help
...
-Advanced options :
+Advanced options:
...
-*** zstd command line interface *-bits v1.*.*, by Yann Collet ***
-*** zstd command line interface *-bits v1.*.*, by Yann Collet ***
+*** Zstandard CLI (*-bit) v1.*.*, by Yann Collet ***
+*** Zstandard CLI (*-bit) v1.*.*, by Yann Collet ***
zstd --fast=1 file -o file-f1.zst
zstd -1 file -o file-1.zst
zstd -19 file -o file-19.zst
-zstd -22 --ultra file -o file-22.zst
-zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
+zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst
-cmp_size -ne file-19.zst file-22.zst
cmp_size -lt file-19.zst file-1.zst
cmp_size -lt file-1.zst file-f1.zst
cmp_size -lt file-f1.zst file-f10.zst
zstd --fast=1 file -o file-f1.zst
zstd -1 file -o file-1.zst
zstd -19 file -o file-19.zst
-zstd -22 --ultra file -o file-22.zst
-zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-22.zst
+zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst
-cmp_size -ne file-19.zst file-22.zst
cmp_size -lt file-19.zst file-1.zst
cmp_size -lt file-1.zst file-f1.zst
cmp_size -lt file-f1.zst file-f10.zst
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen > file
+
+zstd file -q --trace-file-stat -o file.zst
+zstd -tq file.zst
--- /dev/null
+Trace:FileStat: > UTIL_isLink(file)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(2)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 65537
+Trace:FileStat: > UTIL_isDirectory(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_isSameFile(file, file.zst)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_getFileSize(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 65537
+Trace:FileStat: > UTIL_utime(file.zst)
+Trace:FileStat: < 0
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen > file
+
+zstd file -cq --trace-file-stat > file.zst
+zstd -tq file.zst
--- /dev/null
+Trace:FileStat: > UTIL_isLink(file)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(1)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 65537
+Trace:FileStat: > UTIL_isDirectory(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_isRegularFile(/*stdout*\)
+Trace:FileStat: > UTIL_stat(/*stdout*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 65537
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen > file
+
+zstd < file -q --trace-file-stat -o file.zst
+zstd -tq file.zst
--- /dev/null
+Trace:FileStat: > UTIL_isConsole(0)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(2)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < -1
+Trace:FileStat: > UTIL_isDirectory(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isSameFile(/*stdin*\, file.zst)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_getFileSize(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < -1
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen > file
+
+zstd < file -cq --trace-file-stat > file.zst
+zstd -tq file.zst
--- /dev/null
+Trace:FileStat: > UTIL_isConsole(0)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(1)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < -1
+Trace:FileStat: > UTIL_isDirectory(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(/*stdout*\)
+Trace:FileStat: > UTIL_stat(/*stdout*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_getFileSize(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < -1
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen | zstd -q > file.zst
+
+zstd -dq --trace-file-stat file.zst
--- /dev/null
+Trace:FileStat: > UTIL_isLink(file.zst)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(1)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(2)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isDirectory(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_isSameFile(file.zst, file)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_utime(file)
+Trace:FileStat: < 0
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen | zstd -q > file.zst
+
+zstd -dcq --trace-file-stat file.zst > file
--- /dev/null
+Trace:FileStat: > UTIL_isLink(file.zst)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(1)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isDirectory(file.zst)
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_stat(file.zst)
+Trace:FileStat: < 1
+Trace:FileStat: > UTIL_isRegularFile(/*stdout*\)
+Trace:FileStat: > UTIL_stat(/*stdout*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen | zstd -q > file.zst
+
+zstd -dcq --trace-file-stat < file.zst -o file
--- /dev/null
+Trace:FileStat: > UTIL_isConsole(0)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(2)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isDirectory(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isSameFile(/*stdin*\, file)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(file)
+Trace:FileStat: > UTIL_stat(file)
+Trace:FileStat: < 1
+Trace:FileStat: < 1
--- /dev/null
+#!/bin/sh
+
+set -e
+
+datagen | zstd -q > file.zst
+
+zstd -dcq --trace-file-stat < file.zst > file
--- /dev/null
+Trace:FileStat: > UTIL_isConsole(0)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isConsole(1)
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isDirectory(/*stdin*\)
+Trace:FileStat: > UTIL_stat(/*stdin*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
+Trace:FileStat: > UTIL_isRegularFile(/*stdout*\)
+Trace:FileStat: > UTIL_stat(/*stdout*\)
+Trace:FileStat: < 0
+Trace:FileStat: < 0
args = --no-progress --fake-stderr-is-console -v
compress file to file
-*zstd*
+*Zstandard CLI*
hello*hello.zst*
compress pipe to pipe
-*zstd*
+*Zstandard CLI*
*stdin*stdout*
compress pipe to file
-*zstd*
+*Zstandard CLI*
*stdin*hello.zst*
compress file to pipe
-*zstd*
+*Zstandard CLI*
*hello*stdout*
compress 2 files
-*zstd*
+*Zstandard CLI*
*hello*hello.zst*
*world*world.zst*
2 files compressed*
decompress file to file
-*zstd*
+*Zstandard CLI*
hello.zst*
decompress pipe to pipe
-*zstd*
+*Zstandard CLI*
*stdin*
decompress pipe to file
-*zstd*
+*Zstandard CLI*
*stdin*
decompress file to pipe
-*zstd*
+*Zstandard CLI*
hello.zst*
decompress 2 files
-*zstd*
+*Zstandard CLI*
hello.zst*
world.zst*
2 files decompressed*
#!/usr/bin/env python3
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
preserve: bool,
scratch_dir: str,
test_dir: str,
+ set_exact_output: bool,
) -> None:
self.env = env
self.timeout = timeout
self.preserve = preserve
self.scratch_dir = scratch_dir
self.test_dir = test_dir
+ self.set_exact_output = set_exact_output
class TestCase:
self._test_stdin.close()
self._test_stdin = None
- def _check_output_exact(self, out_name: str, expected: bytes) -> None:
+ def _check_output_exact(self, out_name: str, expected: bytes, exact_name: str) -> None:
"""
Check the output named :out_name: for an exact match against the :expected: content.
Saves the success and message.
self._success[check_name] = False
self._message[check_name] = f"{out_name} does not match!\n> diff expected actual\n{diff(expected, actual)}"
+ if self._opts.set_exact_output:
+ with open(exact_name, "wb") as f:
+ f.write(actual)
+
def _check_output_glob(self, out_name: str, expected: bytes) -> None:
"""
Check the output named :out_name: for a glob match against the :expected: glob.
ignore_name = f"{self._test_file}.{out_name}.ignore"
if os.path.exists(exact_name):
- return self._check_output_exact(out_name, read_file(exact_name))
+ return self._check_output_exact(out_name, read_file(exact_name), exact_name)
elif os.path.exists(glob_name):
return self._check_output_glob(out_name, read_file(glob_name))
elif os.path.exists(ignore_name):
self._success[check_name] = True
self._message[check_name] = f"{out_name} ignored!"
else:
- return self._check_output_exact(out_name, bytes())
+ return self._check_output_exact(out_name, bytes(), exact_name)
def _check_stderr(self) -> None:
"""Checks the stderr output against the expectation."""
"Scratch directory located in TEST_DIR/scratch/."
)
)
+ parser.add_argument(
+ "--set-exact-output",
+ action="store_true",
+ help="Set stderr.exact and stdout.exact for all failing tests, unless .ignore or .glob already exists"
+ )
parser.add_argument(
"tests",
nargs="*",
preserve=args.preserve,
test_dir=args.test_dir,
scratch_dir=scratch_dir,
+ set_exact_output=args.set_exact_output,
)
if len(args.tests) == 0:
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
BYTE step = (BYTE) ((RAND(seed) % 256) | 1); /* force it to be odd so it's relatively prime to 256 */
while (i < DISTSIZE) {
- size_t states = ((size_t)(weight * statesLeft)) + 1;
+ size_t states = ((size_t)(weight * (double)statesLeft)) + 1;
size_t j;
for (j = 0; j < states && i < DISTSIZE; j++, i++) {
dist[i] = symb;
* ensure nice numbers */
U32 matchLen =
MIN_SEQ_LEN +
- ROUND(RAND_exp(seed, excessMatch / (double)(numSequences - i)));
+ ROUND(RAND_exp(seed, (double)excessMatch / (double)(numSequences - i)));
U32 literalLen =
(RAND(seed) & 7)
? ROUND(RAND_exp(seed,
- literalsSize /
+ (double)literalsSize /
(double)(numSequences - i)))
: 0;
/* actual offset, code to send, and point to copy up to when shifting
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "external_matchfinder.h"
+#include <string.h>
+#include "zstd_compress_internal.h"
+
+#define HSIZE 1024
+static U32 const HLOG = 10;
+static U32 const MLS = 4;
+static U32 const BADIDX = 0xffffffff;
+
+static size_t simpleExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ const BYTE* const istart = (const BYTE*)src;
+ const BYTE* const iend = istart + srcSize;
+ const BYTE* ip = istart;
+ const BYTE* anchor = istart;
+ size_t seqCount = 0;
+ U32 hashTable[HSIZE];
+
+ (void)externalMatchState;
+ (void)dict;
+ (void)dictSize;
+ (void)outSeqsCapacity;
+ (void)compressionLevel;
+
+ { int i;
+ for (i=0; i < HSIZE; i++) {
+ hashTable[i] = BADIDX;
+ } }
+
+ while (ip + MLS < iend) {
+ size_t const hash = ZSTD_hashPtr(ip, HLOG, MLS);
+ U32 const matchIndex = hashTable[hash];
+ hashTable[hash] = (U32)(ip - istart);
+
+ if (matchIndex != BADIDX) {
+ const BYTE* const match = istart + matchIndex;
+ U32 const matchLen = (U32)ZSTD_count(ip, match, iend);
+ if (matchLen >= ZSTD_MINMATCH_MIN) {
+ U32 const litLen = (U32)(ip - anchor);
+ U32 const offset = (U32)(ip - match);
+ ZSTD_Sequence const seq = {
+ offset, litLen, matchLen, 0
+ };
+
+ /* Note: it's crucial to stay within the window size! */
+ if (offset <= windowSize) {
+ outSeqs[seqCount++] = seq;
+ ip += matchLen;
+ anchor = ip;
+ continue;
+ }
+ }
+ }
+
+ ip++;
+ }
+
+ { ZSTD_Sequence const finalSeq = {
+ 0, (U32)(iend - anchor), 0, 0
+ };
+ outSeqs[seqCount++] = finalSeq;
+ }
+
+ return seqCount;
+}
+
+size_t zstreamExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+) {
+ EMF_testCase const testCase = *((EMF_testCase*)externalMatchState);
+ memset(outSeqs, 0, outSeqsCapacity);
+
+ switch (testCase) {
+ case EMF_ZERO_SEQS:
+ return 0;
+ case EMF_ONE_BIG_SEQ:
+ outSeqs[0].offset = 0;
+ outSeqs[0].matchLength = 0;
+ outSeqs[0].litLength = (U32)(srcSize);
+ return 1;
+ case EMF_LOTS_OF_SEQS:
+ return simpleExternalMatchFinder(
+ externalMatchState,
+ outSeqs, outSeqsCapacity,
+ src, srcSize,
+ dict, dictSize,
+ compressionLevel,
+ windowSize
+ );
+ case EMF_SMALL_ERROR:
+ return outSeqsCapacity + 1;
+ case EMF_BIG_ERROR:
+ default:
+ return ZSTD_EXTERNAL_MATCHFINDER_ERROR;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) Yann Collet, Meta Platforms, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef EXTERNAL_MATCHFINDER
+#define EXTERNAL_MATCHFINDER
+
+#define ZSTD_STATIC_LINKING_ONLY
+#include "zstd.h"
+
+/* See external_matchfinder.c for details on each test case */
+typedef enum {
+ EMF_ZERO_SEQS = 0,
+ EMF_ONE_BIG_SEQ = 1,
+ EMF_LOTS_OF_SEQS = 2,
+ EMF_BIG_ERROR = 3,
+ EMF_SMALL_ERROR = 4
+} EMF_testCase;
+
+size_t zstreamExternalMatchFinder(
+ void* externalMatchState,
+ ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
+ const void* src, size_t srcSize,
+ const void* dict, size_t dictSize,
+ int compressionLevel,
+ size_t windowSize
+);
+
+#endif // EXTERNAL_MATCHFINDER
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/**
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/**
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#!/usr/bin/env python
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
void FUZ_bug976(void);
void FUZ_bug976(void)
{ /* these constants shall not depend on MIN() macro */
- assert(ZSTD_HASHLOG_MAX < 31);
- assert(ZSTD_CHAINLOG_MAX < 31);
+ DEBUG_STATIC_ASSERT(ZSTD_HASHLOG_MAX < 31);
+ DEBUG_STATIC_ASSERT(ZSTD_CHAINLOG_MAX < 31);
}
/*=============================================
* Test macros
=============================================*/
-#define CHECK_Z(f) { \
- size_t const err = f; \
- if (ZSTD_isError(err)) { \
- DISPLAY("Error => %s : %s ", \
- #f, ZSTD_getErrorName(err)); \
- exit(1); \
+#define CHECK(fn) { if(!(fn)) { DISPLAYLEVEL(1, "Error : test (%s) failed \n", #fn); exit(1); } }
+
+#define CHECK_Z(f) { \
+ size_t const err = f; \
+ if (ZSTD_isError(err)) { \
+ DISPLAY("Error => %s : %s ", \
+ #f, ZSTD_getErrorName(err)); \
+ exit(1); \
} }
-#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); goto _output_error; }
+#define CHECK_VAR(var, fn) var = fn; if (ZSTD_isError(var)) { DISPLAYLEVEL(1, "%s : fails : %s \n", #fn, ZSTD_getErrorName(var)); exit(1); }
#define CHECK_NEWV(var, fn) size_t const CHECK_VAR(var, fn)
-#define CHECK(fn) { CHECK_NEWV(__err, fn); }
#define CHECKPLUS(var, fn, more) { CHECK_NEWV(var, fn); more; }
#define CHECK_OP(op, lhs, rhs) { \
if (!((lhs) op (rhs))) { \
DISPLAY("Error L%u => FAILED %s %s %s ", __LINE__, #lhs, #op, #rhs); \
- goto _output_error; \
+ exit(1); \
} \
}
#define CHECK_EQ(lhs, rhs) CHECK_OP(==, lhs, rhs)
}
#ifdef ZSTD_MULTITHREAD
+
typedef struct {
ZSTD_CCtx* cctx;
ZSTD_threadPool* pool;
ZSTD_pthread_create(&t1, NULL, threadPoolTests_compressionJob, &p1);
ZSTD_pthread_create(&t2, NULL, threadPoolTests_compressionJob, &p2);
- ZSTD_pthread_join(t1, NULL);
- ZSTD_pthread_join(t2, NULL);
+ ZSTD_pthread_join(t1);
+ ZSTD_pthread_join(t2);
assert(!memcmp(decodedBuffer, decodedBuffer2, CNBuffSize));
free(decodedBuffer2);
* Unit tests
=============================================*/
+static void test_compressBound(unsigned tnb)
+{
+ DISPLAYLEVEL(3, "test%3u : compressBound : ", tnb);
+
+ /* check ZSTD_compressBound == ZSTD_COMPRESSBOUND
+ * for a large range of known valid values */
+ DEBUG_STATIC_ASSERT(sizeof(size_t) >= 4);
+ { int s;
+ for (s=0; s<30; s++) {
+ size_t const w = (size_t)1 << s;
+ CHECK_EQ(ZSTD_compressBound(w), ZSTD_COMPRESSBOUND(w));
+ } }
+
+ // Ensure error if srcSize too big
+ { size_t const w = ZSTD_MAX_INPUT_SIZE + 1;
+ CHECK(ZSTD_isError(ZSTD_compressBound(w))); /* must fail */
+ CHECK_EQ(ZSTD_COMPRESSBOUND(w), 0);
+ }
+
+ DISPLAYLEVEL(3, "OK \n");
+}
+
+static void test_decompressBound(unsigned tnb)
+{
+ DISPLAYLEVEL(3, "test%3u : decompressBound : ", tnb);
+
+ // Simple compression, with size : should provide size;
+ { const char example[] = "abcd";
+ char cBuffer[ZSTD_COMPRESSBOUND(sizeof(example))];
+ size_t const cSize = ZSTD_compress(cBuffer, sizeof(cBuffer), example, sizeof(example), 0);
+ CHECK_Z(cSize);
+ CHECK_EQ(ZSTD_decompressBound(cBuffer, cSize), (unsigned long long)sizeof(example));
+ }
+
+ // Simple small compression without size : should provide 1 block size
+ { char cBuffer[ZSTD_COMPRESSBOUND(0)];
+ ZSTD_outBuffer out = { cBuffer, sizeof(cBuffer), 0 };
+ ZSTD_inBuffer in = { NULL, 0, 0 };
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ assert(cctx);
+ CHECK_Z( ZSTD_initCStream(cctx, 0) );
+ CHECK_Z( ZSTD_compressStream(cctx, &out, &in) );
+ CHECK_EQ( ZSTD_endStream(cctx, &out), 0 );
+ CHECK_EQ( ZSTD_decompressBound(cBuffer, out.pos), ZSTD_BLOCKSIZE_MAX );
+ ZSTD_freeCCtx(cctx);
+ }
+
+ // Attempt to overflow 32-bit intermediate multiplication result
+ // This requires dBound >= 4 GB, aka 2^32.
+ // This requires 2^32 / 2^17 = 2^15 blocks
+ // => create 2^15 blocks (can be empty, or just 1 byte).
+ { const char input[] = "a";
+ size_t const nbBlocks = (1 << 15) + 1;
+ size_t blockNb;
+ size_t const outCapacity = 1 << 18; // large margin
+ char* const outBuffer = malloc (outCapacity);
+ ZSTD_outBuffer out = { outBuffer, outCapacity, 0 };
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ assert(cctx);
+ assert(outBuffer);
+ CHECK_Z( ZSTD_initCStream(cctx, 0) );
+ for (blockNb=0; blockNb<nbBlocks; blockNb++) {
+ ZSTD_inBuffer in = { input, sizeof(input), 0 };
+ CHECK_Z( ZSTD_compressStream(cctx, &out, &in) );
+ CHECK_EQ( ZSTD_flushStream(cctx, &out), 0 );
+ }
+ CHECK_EQ( ZSTD_endStream(cctx, &out), 0 );
+ CHECK( ZSTD_decompressBound(outBuffer, out.pos) > 0x100000000LLU /* 4 GB */ );
+ ZSTD_freeCCtx(cctx);
+ free(outBuffer);
+ }
+
+ DISPLAYLEVEL(3, "OK \n");
+}
+
+static void test_setCParams(unsigned tnb)
+{
+ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
+ ZSTD_compressionParameters cparams;
+ assert(cctx);
+
+ DISPLAYLEVEL(3, "test%3u : ZSTD_CCtx_setCParams : ", tnb);
+
+ /* valid cparams */
+ cparams = ZSTD_getCParams(1, 0, 0);
+ CHECK_Z(ZSTD_CCtx_setCParams(cctx, cparams));
+
+ /* invalid cparams (must fail) */
+ cparams.windowLog = 99;
+ CHECK(ZSTD_isError(ZSTD_CCtx_setCParams(cctx, cparams)));
+
+ free(cctx);
+ DISPLAYLEVEL(3, "OK \n");
+}
+
static int basicUnitTests(U32 const seed, double compressibility)
{
size_t const CNBuffSize = 5 MB;
DISPLAYLEVEL(3, "%u (OK) \n", vn);
}
+ test_compressBound(testNb++);
+
+ test_decompressBound(testNb++);
+
+ test_setCParams(testNb++);
+
DISPLAYLEVEL(3, "test%3u : ZSTD_adjustCParams : ", testNb++);
{
ZSTD_compressionParameters params;
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : init CCtx for small level %u (should work again) : ", testNb++, 1);
- CHECK( ZSTD_compressBegin(staticCCtx, 1) );
+ CHECK_Z( ZSTD_compressBegin(staticCCtx, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : use CStream on CCtx-sized static context (should fail) : ", testNb++);
testResult = 1;
goto _end;
}
- CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2) );
- CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_compressionLevel, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_nbWorkers, 2) );
+ CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_compressionLevel, 1) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3u : compress %u bytes with 2 threads : ", testNb++, (unsigned)CNBuffSize);
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress -T2 with checksum : ", testNb++);
- CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_checksumFlag, 1) );
- CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_contentSizeFlag, 1) );
- CHECK( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_overlapLog, 3) );
+ CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_checksumFlag, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_contentSizeFlag, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(mtctx, ZSTD_c_overlapLog, 3) );
CHECK_VAR(cSize, ZSTD_compress2(mtctx,
compressedBuffer, compressedBufferSize,
CNBuffer, CNBuffSize) );
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
char out[32];
if (cctx == NULL || dctx == NULL) goto _output_error;
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0) );
CHECK_VAR(cSize, ZSTD_compress2(cctx, out, sizeof(out), NULL, 0) );
DISPLAYLEVEL(3, "OK (%u bytes)\n", (unsigned)cSize);
- CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) );
+ CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_windowLogMax, 10) );
{ char const* outPtr = out;
ZSTD_inBuffer inBuffer = { outPtr, cSize, 0 };
ZSTD_outBuffer outBuffer = { NULL, 0, 0 };
DISPLAYLEVEL(3, "test%3i : compress with block splitting : ", testNb++)
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_useBlockSplitter, ZSTD_ps_enable) );
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
- CHECK(cSize);
+ CHECK_Z(cSize);
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress -T2 with/without literals compression : ", testNb++)
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
size_t cSize1, cSize2;
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 2) );
cSize1 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
- CHECK(cSize1);
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_ps_disable) );
+ CHECK_Z(cSize1);
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_literalCompressionMode, ZSTD_ps_disable) );
cSize2 = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
- CHECK(cSize2);
+ CHECK_Z(cSize2);
CHECK_LT(cSize1, cSize2);
ZSTD_freeCCtx(cctx);
}
/* Set rsyncable and don't give the ZSTD_compressBound(CNBuffSize) so
* ZSTDMT is forced to not take the shortcut.
*/
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) );
- CHECK( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 1) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_rsyncable, 1) );
+ CHECK_Z( ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize - 1, CNBuffer, CNBuffSize) );
ZSTD_freeCCtx(cctx);
}
DISPLAYLEVEL(3, "OK \n");
int const jobSize = 512 KB;
int value;
/* Check that the overlap log and job size are unset. */
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
CHECK_EQ(value, 0);
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
CHECK_EQ(value, 0);
/* Set and check the overlap log and job size. */
- CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) );
- CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) );
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
+ CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, 5) );
+ CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, jobSize) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
CHECK_EQ(value, 5);
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
CHECK_EQ(value, jobSize);
/* Set the number of workers and check the overlap log and job size. */
- CHECK( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) );
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
+ CHECK_Z( ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, 2) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_overlapLog, &value) );
CHECK_EQ(value, 5);
- CHECK( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
+ CHECK_Z( ZSTD_CCtxParams_getParameter(params, ZSTD_c_jobSize, &value) );
CHECK_EQ(value, jobSize);
ZSTD_freeCCtxParams(params);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : load dictionary into context : ", testNb++);
- CHECK( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) );
- CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */
+ CHECK_Z( ZSTD_compressBegin_usingDict(ctxOrig, CNBuffer, dictSize, 2) );
+ CHECK_Z( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, 0) ); /* Begin_usingDict implies unknown srcSize, so match that */
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : compress with flat dictionary : ", testNb++);
DISPLAYLEVEL(3, "test%3i : check content size on duplicated context : ", testNb++);
{ size_t const testSize = CNBuffSize / 3;
- CHECK( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) );
- CHECK( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
+ CHECK_Z( ZSTD_compressBegin(ctxOrig, ZSTD_defaultCLevel()) );
+ CHECK_Z( ZSTD_copyCCtx(ctxDuplicated, ctxOrig, testSize) );
CHECK_VAR(cSize, ZSTD_compressEnd(ctxDuplicated, compressedBuffer, ZSTD_compressBound(testSize),
(const char*)CNBuffer + dictSize, testSize) );
size_t const wrongSrcSize = (srcSize + 1000);
ZSTD_parameters params = ZSTD_getParams(1, wrongSrcSize, 0);
params.fParams.contentSizeFlag = 1;
- CHECK( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) );
+ CHECK_Z( ZSTD_compressBegin_advanced(cctx, NULL, 0, params, wrongSrcSize) );
{ size_t const result = ZSTD_compressEnd(cctx, decodedBuffer, CNBuffSize, CNBuffer, srcSize);
if (!ZSTD_isError(result)) goto _output_error;
if (ZSTD_getErrorCode(result) != ZSTD_error_srcSize_wrong) goto _output_error;
CNBuffer, srcSize, compressionLevel);
if (ZSTD_isError(cSize_1pass)) goto _output_error;
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compressionLevel) );
{ size_t const compressionResult = ZSTD_compress2(cctx,
compressedBuffer, compressedBufferSize,
CNBuffer, srcSize);
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
DISPLAYLEVEL(3, "test%3i : parameters in order : ", testNb++);
assert(cctx != NULL);
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
{ size_t const compressedSize = ZSTD_compress2(cctx,
compressedBuffer, ZSTD_compressBound(inputSize),
CNBuffer, inputSize);
- CHECK(compressedSize);
+ CHECK_Z(compressedSize);
cSize = compressedSize;
xxh64 = XXH64(compressedBuffer, compressedSize, 0);
}
{ ZSTD_CCtx* cctx = ZSTD_createCCtx();
DISPLAYLEVEL(3, "test%3i : parameters disordered : ", testNb++);
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) );
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, ZSTD_ps_enable) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 2) );
{ size_t const result = ZSTD_compress2(cctx,
compressedBuffer, ZSTD_compressBound(inputSize),
CNBuffer, inputSize);
- CHECK(result);
+ CHECK_Z(result);
if (result != cSize) goto _output_error; /* must result in same compressed result, hence same size */
if (XXH64(compressedBuffer, result, 0) != xxh64) goto _output_error; /* must result in exactly same content, hence same hash */
DISPLAYLEVEL(3, "OK (compress : %u -> %u bytes)\n", (unsigned)inputSize, (unsigned)result);
DISPLAYLEVEL(3, "test%3i : get dParameter bounds ", testNb++);
{ ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
- CHECK(bounds.error);
+ CHECK_Z(bounds.error);
}
DISPLAYLEVEL(3, "OK \n");
/* basic block compression */
DISPLAYLEVEL(3, "test%3i : magic-less format test : ", testNb++);
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
{ ZSTD_inBuffer in = { CNBuffer, inputSize, 0 };
ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(inputSize), 0 };
size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
DISPLAYLEVEL(3, "test%3i : decompress of magic-less frame : ", testNb++);
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
- CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
+ CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
{ ZSTD_frameHeader zfh;
size_t const zfhrt = ZSTD_getFrameHeader_advanced(&zfh, compressedBuffer, cSize, ZSTD_f_zstd1_magicless);
if (zfhrt != 0) goto _output_error;
/* basic block compression */
DISPLAYLEVEL(3, "test%3i : empty magic-less format test : ", testNb++);
- CHECK( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
+ CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless) );
{ ZSTD_inBuffer in = { CNBuffer, 0, 0 };
ZSTD_outBuffer out = { compressedBuffer, ZSTD_compressBound(0), 0 };
size_t const result = ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_end);
DISPLAYLEVEL(3, "test%3i : decompress of empty magic-less frame : ", testNb++);
ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
- CHECK( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
+ CHECK_Z( ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless) );
/* one shot */
{ size_t const result = ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize);
if (result != 0) goto _output_error;
int check;
if (ZSTD_isError(bounds.error))
continue;
- CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &value1));
+ CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &value1));
value2 = (value1 != bounds.lowerBound) ? bounds.lowerBound : bounds.upperBound;
- CHECK(ZSTD_DCtx_setParameter(dctx, dParam, value2));
- CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &check));
+ CHECK_Z(ZSTD_DCtx_setParameter(dctx, dParam, value2));
+ CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &check));
if (check != value2) goto _output_error;
- CHECK(ZSTD_DCtx_reset(dctx, ZSTD_reset_parameters));
- CHECK(ZSTD_DCtx_getParameter(dctx, dParam, &check));
+ CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_parameters));
+ CHECK_Z(ZSTD_DCtx_getParameter(dctx, dParam, &check));
if (check != value1) goto _output_error;
}
ZSTD_freeDCtx(dctx);
/* basic block compression */
DISPLAYLEVEL(3, "test%3i : Block compression test : ", testNb++);
- CHECK( ZSTD_compressBegin(cctx, 5) );
- CHECK( ZSTD_getBlockSize(cctx) >= blockSize);
+ CHECK_Z( ZSTD_compressBegin(cctx, 5) );
+ CHECK_Z( ZSTD_getBlockSize(cctx) >= blockSize);
CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), CNBuffer, blockSize) );
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Block decompression test : ", testNb++);
- CHECK( ZSTD_decompressBegin(dctx) );
+ CHECK_Z( ZSTD_decompressBegin(dctx) );
{ CHECK_NEWV(r, ZSTD_decompressBlock(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
if (r != blockSize) goto _output_error; }
DISPLAYLEVEL(3, "OK \n");
/* very long stream of block compression */
DISPLAYLEVEL(3, "test%3i : Huge block streaming compression test : ", testNb++);
- CHECK( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */
- CHECK( ZSTD_getBlockSize(cctx) >= blockSize);
+ CHECK_Z( ZSTD_compressBegin(cctx, -199) ); /* we just want to quickly overflow internal U32 index */
+ CHECK_Z( ZSTD_getBlockSize(cctx) >= blockSize);
{ U64 const toCompress = 5000000000ULL; /* > 4 GB */
U64 compressed = 0;
while (compressed < toCompress) {
/* dictionary block compression */
DISPLAYLEVEL(3, "test%3i : Dictionary Block compression test : ", testNb++);
- CHECK( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
+ CHECK_Z( ZSTD_compressBegin_usingDict(cctx, CNBuffer, dictSize, 5) );
CHECK_VAR(cSize, ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize));
RDG_genBuffer((char*)CNBuffer+dictSize+blockSize, blockSize, 0.0, 0.0, seed); /* create a non-compressible second block */
{ CHECK_NEWV(r, ZSTD_compressBlock(cctx, (char*)compressedBuffer+cSize, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize+blockSize, blockSize) ); /* for cctx history consistency */
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Dictionary Block decompression test : ", testNb++);
- CHECK( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
+ CHECK_Z( ZSTD_decompressBegin_usingDict(dctx, CNBuffer, dictSize) );
{ CHECK_NEWV( r, ZSTD_decompressBlock(dctx, decodedBuffer, blockSize, compressedBuffer, cSize) );
if (r != blockSize) {
DISPLAYLEVEL(1, "ZSTD_decompressBlock() with _usingDict() fails : %u, instead of %u expected \n", (unsigned)r, (unsigned)blockSize);
DISPLAYLEVEL(3, "test%3i : Block compression with CDict : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 3);
if (cdict==NULL) goto _output_error;
- CHECK( ZSTD_compressBegin_usingCDict(cctx, cdict) );
- CHECK( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) );
+ CHECK_Z( ZSTD_compressBegin_usingCDict(cctx, cdict) );
+ CHECK_Z( ZSTD_compressBlock(cctx, compressedBuffer, ZSTD_compressBound(blockSize), (char*)CNBuffer+dictSize, blockSize) );
ZSTD_freeCDict(cdict);
}
DISPLAYLEVEL(3, "OK \n");
size_t const bound = ZSTD_compressBound(_3BYTESTESTLENGTH);
size_t nbSeq = 1;
while (nbSeq <= maxNbSeq) {
- CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19));
+ CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, nbSeq * 3, 19));
/* Check every sequence for the first 100, then skip more rapidly. */
if (nbSeq < 100) {
++nbSeq;
size_t const bound = ZSTD_compressBound(CNBuffSize);
size_t size = 1;
while (size <= CNBuffSize) {
- CHECK(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3));
+ CHECK_Z(ZSTD_compressCCtx(cctx, compressedBuffer, bound, CNBuffer, size, 3));
/* Check every size for the first 100, then skip more rapidly. */
if (size < 100) {
++size;
void* const outputBuffer = malloc(outputSize);
ZSTD_CCtx* const cctx = ZSTD_createCCtx();
if (!outputBuffer || !cctx) goto _output_error;
- CHECK(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1));
+ CHECK_Z(ZSTD_compress_usingDict(cctx, outputBuffer, outputSize, CNBuffer, inputSize, dictBuffer, dictSize, 1));
free(outputBuffer);
ZSTD_freeCCtx(cctx);
}
while (approxIndex <= (maxIndex / 4) * 3) {
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
approxIndex += in.pos;
- CHECK(in.pos == in.size);
+ CHECK_Z(in.pos == in.size);
in.pos = 0;
out.pos = 0;
}
while (approxIndex <= maxIndex) {
CHECK_Z(ZSTD_compressStream2(cctx, &out, &in, ZSTD_e_flush));
approxIndex += in.pos;
- CHECK(in.pos == in.size);
+ CHECK_Z(in.pos == in.size);
in.pos = 0;
out.pos = 0;
}
RDG_genBuffer(dict, dictSize, 0.5, 0.5, seed);
RDG_genBuffer(CNBuffer, CNBuffSize, 0.6, 0.6, seed);
- CHECK(cctx_params != NULL);
+ CHECK_Z(cctx_params != NULL);
for (dictSize = CNBuffSize; dictSize; dictSize = dictSize >> 3) {
DISPLAYLEVEL(3, "\n Testing with dictSize %u ", (U32)dictSize);
free(compressedBuffer);
free(decodedBuffer);
return testResult;
-
-_output_error:
- testResult = 1;
- DISPLAY("Error detected in Unit tests ! \n");
- goto _end;
}
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
int main(int argc, const char** argv)
{
ZSTD_CStream* ctx;
- ZSTD_parameters params;
- size_t rc;
- unsigned windowLog;
+ unsigned windowLog = 18;
(void)argc;
(void)argv;
/* Create stream */
- ctx = ZSTD_createCStream();
+ ctx = ZSTD_createCCtx();
if (!ctx) { return 1; }
/* Set parameters */
- memset(¶ms, 0, sizeof(params));
- params.cParams.windowLog = 18;
- params.cParams.chainLog = 13;
- params.cParams.hashLog = 14;
- params.cParams.searchLog = 1;
- params.cParams.minMatch = 7;
- params.cParams.targetLength = 16;
- params.cParams.strategy = ZSTD_fast;
- windowLog = params.cParams.windowLog;
- /* Initialize stream */
- rc = ZSTD_initCStream_advanced(ctx, NULL, 0, params, 0);
- if (ZSTD_isError(rc)) { return 2; }
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, windowLog)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, 13)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, 14)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, 1)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, 7)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, 16)))
+ return 2;
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, ZSTD_fast)))
+ return 2;
{
U64 compressed = 0;
const U64 toCompress = ((U64)1) << 33;
free(srcBuffer);
free(dstBuffer);
}
+ ZSTD_freeCCtx(ctx);
return 0;
}
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
double cs = 0., ds = 0., rt, cm = 0.;
const double r1 = 1, r2 = 0.1, rtr = 0.5;
double ret;
- if(target.cSpeed) { cs = res.cSpeed / (double)target.cSpeed; }
- if(target.dSpeed) { ds = res.dSpeed / (double)target.dSpeed; }
- if(target.cMem != (U32)-1) { cm = (double)target.cMem / res.cMem; }
- rt = ((double)srcSize / res.cSize);
+ if(target.cSpeed) { cs = (double)res.cSpeed / (double)target.cSpeed; }
+ if(target.dSpeed) { ds = (double)res.dSpeed / (double)target.dSpeed; }
+ if(target.cMem != (U32)-1) { cm = (double)target.cMem / (double)res.cMem; }
+ rt = ((double)srcSize / (double)res.cSize);
ret = (MIN(1, cs) + MIN(1, ds) + MIN(1, cm))*r1 + rt * rtr +
(MAX(0, log(cs))+ MAX(0, log(ds))+ MAX(0, log(cm))) * r2;
static double
resultDistLvl(const BMK_benchResult_t result1, const BMK_benchResult_t lvlRes)
{
- double normalizedCSpeedGain1 = ((double)result1.cSpeed / lvlRes.cSpeed) - 1;
- double normalizedRatioGain1 = ((double)lvlRes.cSize / result1.cSize) - 1;
+ double normalizedCSpeedGain1 = ((double)result1.cSpeed / (double)lvlRes.cSpeed) - 1;
+ double normalizedRatioGain1 = ((double)lvlRes.cSize / (double)result1.cSize) - 1;
if(normalizedRatioGain1 < 0 || normalizedCSpeedGain1 < 0) {
return 0.0;
}
}
{ double const ratio = res.result.cSize ?
- (double)srcSize / res.result.cSize : 0;
+ (double)srcSize / (double)res.result.cSize : 0;
double const cSpeedMBps = (double)res.result.cSpeed / MB_UNIT;
double const dSpeedMBps = (double)res.result.dSpeed / MB_UNIT;
}
fprintf(f, "================================\n");
fprintf(f, "Level Bounds: R: > %.3f AND C: < %.1f MB/s \n\n",
- (double)srcSize / g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
+ (double)srcSize / (double)g_lvltarget.cSize, (double)g_lvltarget.cSpeed / MB_UNIT);
fprintf(f, "Overall Winner: \n");
}
/* print comment */
{ double const ratio = result.cSize ?
- (double)srcSize / result.cSize : 0;
+ (double)srcSize / (double)result.cSize : 0;
double const cSpeedMBps = (double)result.cSpeed / MB_UNIT;
double const dSpeedMBps = (double)result.dSpeed / MB_UNIT;
/* calculate uncertainty in compression / decompression runs */
if (benchres.cSpeed) {
- U64 const loopDurationC = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
+ double const loopDurationC = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.cSpeed);
uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC);
}
if (benchres.dSpeed) {
- U64 const loopDurationD = (((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
+ double const loopDurationD = (double)(((U64)buf.srcSize * TIMELOOP_NANOSEC) / benchres.dSpeed);
uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD);
}
/* optimistic assumption of benchres */
{ BMK_benchResult_t resultMax = benchres;
- resultMax.cSpeed = (unsigned long long)(resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
- resultMax.dSpeed = (unsigned long long)(resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
+ resultMax.cSpeed = (unsigned long long)((double)resultMax.cSpeed * uncertaintyConstantC * VARIANCE);
+ resultMax.dSpeed = (unsigned long long)((double)resultMax.dSpeed * uncertaintyConstantD * VARIANCE);
/* disregard infeasible results in feas mode */
/* disregard if resultMax < winner in infeas mode */
if ((double)testResult.cSize <= ((double)winners[cLevel].result.cSize * (1. + (0.02 / cLevel))) ) {
/* Validate solution is "good enough" */
- double W_ratio = (double)buf.srcSize / testResult.cSize;
- double O_ratio = (double)buf.srcSize / winners[cLevel].result.cSize;
+ double W_ratio = (double)buf.srcSize / (double)testResult.cSize;
+ double O_ratio = (double)buf.srcSize / (double)winners[cLevel].result.cSize;
double W_ratioNote = log (W_ratio);
double O_ratioNote = log (O_ratio);
size_t W_DMemUsed = (1 << params.vals[wlog_ind]) + (16 KB);
double W_CMemUsed_note = W_ratioNote * ( 50 + 13*cLevel) - log((double)W_CMemUsed);
double O_CMemUsed_note = O_ratioNote * ( 50 + 13*cLevel) - log((double)O_CMemUsed);
- double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log(testResult.cSpeed);
- double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log(winners[cLevel].result.cSpeed);
+ double W_CSpeed_note = W_ratioNote * (double)( 30 + 10*cLevel) + log((double)testResult.cSpeed);
+ double O_CSpeed_note = O_ratioNote * (double)( 30 + 10*cLevel) + log((double)winners[cLevel].result.cSpeed);
- double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log(testResult.dSpeed);
- double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log(winners[cLevel].result.dSpeed);
+ double W_DSpeed_note = W_ratioNote * (double)( 20 + 2*cLevel) + log((double)testResult.dSpeed);
+ double O_DSpeed_note = O_ratioNote * (double)( 20 + 2*cLevel) + log((double)winners[cLevel].result.dSpeed);
if (W_DMemUsed_note < O_DMemUsed_note) {
/* uses too much Decompression memory for too little benefit */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#!/usr/bin/env python3
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
silesia.tar, level 7, compress simple, 4576661
silesia.tar, level 9, compress simple, 4552899
silesia.tar, level 13, compress simple, 4502956
-silesia.tar, level 16, compress simple, 4360527
-silesia.tar, level 19, compress simple, 4266970
+silesia.tar, level 16, compress simple, 4360546
+silesia.tar, level 19, compress simple, 4265911
silesia.tar, uncompressed literals, compress simple, 4854086
-silesia.tar, uncompressed literals optimal, compress simple, 4266970
+silesia.tar, uncompressed literals optimal, compress simple, 4265911
silesia.tar, huffman literals, compress simple, 6179047
github.tar, level -5, compress simple, 52115
github.tar, level -3, compress simple, 45678
github.tar, level 7, compress simple, 38110
github.tar, level 9, compress simple, 36760
github.tar, level 13, compress simple, 35501
-github.tar, level 16, compress simple, 40471
-github.tar, level 19, compress simple, 32149
+github.tar, level 16, compress simple, 40466
+github.tar, level 19, compress simple, 32276
github.tar, uncompressed literals, compress simple, 38831
-github.tar, uncompressed literals optimal, compress simple, 32149
+github.tar, uncompressed literals optimal, compress simple, 32276
github.tar, huffman literals, compress simple, 42560
silesia, level -5, compress cctx, 6857372
silesia, level -3, compress cctx, 6503412
silesia, level 7, compress cctx, 4566984
silesia, level 9, compress cctx, 4543018
silesia, level 13, compress cctx, 4493990
-silesia, level 16, compress cctx, 4359864
-silesia, level 19, compress cctx, 4296438
+silesia, level 16, compress cctx, 4360041
+silesia, level 19, compress cctx, 4296055
silesia, long distance mode, compress cctx, 4842075
silesia, multithreaded, compress cctx, 4842075
silesia, multithreaded long distance mode, compress cctx, 4842075
silesia, small chain log, compress cctx, 4912197
silesia, explicit params, compress cctx, 4794052
silesia, uncompressed literals, compress cctx, 4842075
-silesia, uncompressed literals optimal, compress cctx, 4296438
+silesia, uncompressed literals optimal, compress cctx, 4296055
silesia, huffman literals, compress cctx, 6172202
silesia, multithreaded with advanced params, compress cctx, 4842075
github, level -5, compress cctx, 204407
github, level 7 with dict, compress cctx, 38755
github, level 9, compress cctx, 135122
github, level 9 with dict, compress cctx, 39398
-github, level 13, compress cctx, 132729
+github, level 13, compress cctx, 132878
github, level 13 with dict, compress cctx, 39948
-github, level 16, compress cctx, 132729
+github, level 16, compress cctx, 133209
github, level 16 with dict, compress cctx, 37568
-github, level 19, compress cctx, 132729
+github, level 19, compress cctx, 132879
github, level 19 with dict, compress cctx, 37567
github, long distance mode, compress cctx, 141069
github, multithreaded, compress cctx, 141069
github, small chain log, compress cctx, 139242
github, explicit params, compress cctx, 140932
github, uncompressed literals, compress cctx, 136332
-github, uncompressed literals optimal, compress cctx, 132729
+github, uncompressed literals optimal, compress cctx, 132879
github, huffman literals, compress cctx, 175468
github, multithreaded with advanced params, compress cctx, 141069
silesia, level -5, zstdcli, 6857420
silesia, level 7, zstdcli, 4567032
silesia, level 9, zstdcli, 4543066
silesia, level 13, zstdcli, 4494038
-silesia, level 16, zstdcli, 4359912
-silesia, level 19, zstdcli, 4296486
+silesia, level 16, zstdcli, 4360089
+silesia, level 19, zstdcli, 4296103
silesia, long distance mode, zstdcli, 4833785
silesia, multithreaded, zstdcli, 4842123
silesia, multithreaded long distance mode, zstdcli, 4833785
silesia.tar, level 7, zstdcli, 4578719
silesia.tar, level 9, zstdcli, 4552903
silesia.tar, level 13, zstdcli, 4502960
-silesia.tar, level 16, zstdcli, 4360531
-silesia.tar, level 19, zstdcli, 4266974
+silesia.tar, level 16, zstdcli, 4360550
+silesia.tar, level 19, zstdcli, 4265915
silesia.tar, no source size, zstdcli, 4854160
silesia.tar, long distance mode, zstdcli, 4845745
silesia.tar, multithreaded, zstdcli, 4854164
silesia.tar, multithreaded long distance mode, zstdcli, 4845745
silesia.tar, small window log, zstdcli, 7100701
-silesia.tar, small hash log, zstdcli, 6529289
+silesia.tar, small hash log, zstdcli, 6529264
silesia.tar, small chain log, zstdcli, 4917022
silesia.tar, explicit params, zstdcli, 4820713
silesia.tar, uncompressed literals, zstdcli, 5122571
github, level 7 with dict, zstdcli, 40745
github, level 9, zstdcli, 137122
github, level 9 with dict, zstdcli, 41393
-github, level 13, zstdcli, 134729
+github, level 13, zstdcli, 134878
github, level 13 with dict, zstdcli, 41900
-github, level 16, zstdcli, 134729
+github, level 16, zstdcli, 135209
github, level 16 with dict, zstdcli, 39577
-github, level 19, zstdcli, 134729
+github, level 19, zstdcli, 134879
github, level 19 with dict, zstdcli, 39576
github, long distance mode, zstdcli, 138332
github, multithreaded, zstdcli, 138332
github, small chain log, zstdcli, 138341
github, explicit params, zstdcli, 136197
github, uncompressed literals, zstdcli, 167911
-github, uncompressed literals optimal, zstdcli, 159227
+github, uncompressed literals optimal, zstdcli, 154667
github, huffman literals, zstdcli, 144365
github, multithreaded with advanced params, zstdcli, 167911
github.tar, level -5, zstdcli, 52119
github.tar, level 9 with dict, zstdcli, 36632
github.tar, level 13, zstdcli, 35505
github.tar, level 13 with dict, zstdcli, 37134
-github.tar, level 16, zstdcli, 40475
+github.tar, level 16, zstdcli, 40470
github.tar, level 16 with dict, zstdcli, 33378
-github.tar, level 19, zstdcli, 32153
+github.tar, level 19, zstdcli, 32280
github.tar, level 19 with dict, zstdcli, 32716
github.tar, no source size, zstdcli, 38832
github.tar, no source size with dict, zstdcli, 38004
silesia, level 12 row 1, advanced one pass, 4505046
silesia, level 12 row 2, advanced one pass, 4503116
silesia, level 13, advanced one pass, 4493990
-silesia, level 16, advanced one pass, 4359864
-silesia, level 19, advanced one pass, 4296438
+silesia, level 16, advanced one pass, 4360041
+silesia, level 19, advanced one pass, 4296055
silesia, no source size, advanced one pass, 4842075
silesia, long distance mode, advanced one pass, 4833710
silesia, multithreaded, advanced one pass, 4842075
silesia.tar, level 12 row 1, advanced one pass, 4514049
silesia.tar, level 12 row 2, advanced one pass, 4513797
silesia.tar, level 13, advanced one pass, 4502956
-silesia.tar, level 16, advanced one pass, 4360527
-silesia.tar, level 19, advanced one pass, 4266970
+silesia.tar, level 16, advanced one pass, 4360546
+silesia.tar, level 19, advanced one pass, 4265911
silesia.tar, no source size, advanced one pass, 4854086
silesia.tar, long distance mode, advanced one pass, 4840452
silesia.tar, multithreaded, advanced one pass, 4854160
silesia.tar, multithreaded long distance mode, advanced one pass, 4845741
silesia.tar, small window log, advanced one pass, 7100655
-silesia.tar, small hash log, advanced one pass, 6529231
+silesia.tar, small hash log, advanced one pass, 6529206
silesia.tar, small chain log, advanced one pass, 4917041
silesia.tar, explicit params, advanced one pass, 4806855
silesia.tar, uncompressed literals, advanced one pass, 5122473
github, level 9 with dict dds, advanced one pass, 39393
github, level 9 with dict copy, advanced one pass, 39398
github, level 9 with dict load, advanced one pass, 41710
-github, level 11 row 1, advanced one pass, 135119
+github, level 11 row 1, advanced one pass, 135367
github, level 11 row 1 with dict dms, advanced one pass, 39671
github, level 11 row 1 with dict dds, advanced one pass, 39671
github, level 11 row 1 with dict copy, advanced one pass, 39651
github, level 11 row 1 with dict load, advanced one pass, 41360
-github, level 11 row 2, advanced one pass, 135119
+github, level 11 row 2, advanced one pass, 135367
github, level 11 row 2 with dict dms, advanced one pass, 39671
github, level 11 row 2 with dict dds, advanced one pass, 39671
github, level 11 row 2 with dict copy, advanced one pass, 39651
github, level 11 row 2 with dict load, advanced one pass, 41360
-github, level 12 row 1, advanced one pass, 134180
+github, level 12 row 1, advanced one pass, 134402
github, level 12 row 1 with dict dms, advanced one pass, 39677
github, level 12 row 1 with dict dds, advanced one pass, 39677
github, level 12 row 1 with dict copy, advanced one pass, 39677
github, level 12 row 1 with dict load, advanced one pass, 41166
-github, level 12 row 2, advanced one pass, 134180
+github, level 12 row 2, advanced one pass, 134402
github, level 12 row 2 with dict dms, advanced one pass, 39677
github, level 12 row 2 with dict dds, advanced one pass, 39677
github, level 12 row 2 with dict copy, advanced one pass, 39677
github, level 12 row 2 with dict load, advanced one pass, 41166
-github, level 13, advanced one pass, 132729
+github, level 13, advanced one pass, 132878
github, level 13 with dict, advanced one pass, 39900
github, level 13 with dict dms, advanced one pass, 39900
github, level 13 with dict dds, advanced one pass, 39900
github, level 13 with dict copy, advanced one pass, 39948
github, level 13 with dict load, advanced one pass, 42624
-github, level 16, advanced one pass, 132729
+github, level 16, advanced one pass, 133209
github, level 16 with dict, advanced one pass, 37577
github, level 16 with dict dms, advanced one pass, 37577
github, level 16 with dict dds, advanced one pass, 37577
github, level 16 with dict copy, advanced one pass, 37568
github, level 16 with dict load, advanced one pass, 42338
-github, level 19, advanced one pass, 132729
+github, level 19, advanced one pass, 132879
github, level 19 with dict, advanced one pass, 37576
github, level 19 with dict dms, advanced one pass, 37576
github, level 19 with dict dds, advanced one pass, 37576
github, small chain log, advanced one pass, 136341
github, explicit params, advanced one pass, 137727
github, uncompressed literals, advanced one pass, 165911
-github, uncompressed literals optimal, advanced one pass, 157227
+github, uncompressed literals optimal, advanced one pass, 152667
github, huffman literals, advanced one pass, 142365
github, multithreaded with advanced params, advanced one pass, 165911
github.tar, level -5, advanced one pass, 52115
github.tar, level 13 with dict dds, advanced one pass, 37220
github.tar, level 13 with dict copy, advanced one pass, 37130
github.tar, level 13 with dict load, advanced one pass, 36010
-github.tar, level 16, advanced one pass, 40471
+github.tar, level 16, advanced one pass, 40466
github.tar, level 16 with dict, advanced one pass, 33374
github.tar, level 16 with dict dms, advanced one pass, 33206
github.tar, level 16 with dict dds, advanced one pass, 33206
github.tar, level 16 with dict copy, advanced one pass, 33374
github.tar, level 16 with dict load, advanced one pass, 39081
-github.tar, level 19, advanced one pass, 32149
+github.tar, level 19, advanced one pass, 32276
github.tar, level 19 with dict, advanced one pass, 32712
github.tar, level 19 with dict dms, advanced one pass, 32555
github.tar, level 19 with dict dds, advanced one pass, 32555
silesia, level 12 row 1, advanced one pass small out, 4505046
silesia, level 12 row 2, advanced one pass small out, 4503116
silesia, level 13, advanced one pass small out, 4493990
-silesia, level 16, advanced one pass small out, 4359864
-silesia, level 19, advanced one pass small out, 4296438
+silesia, level 16, advanced one pass small out, 4360041
+silesia, level 19, advanced one pass small out, 4296055
silesia, no source size, advanced one pass small out, 4842075
silesia, long distance mode, advanced one pass small out, 4833710
silesia, multithreaded, advanced one pass small out, 4842075
silesia.tar, level 12 row 1, advanced one pass small out, 4514049
silesia.tar, level 12 row 2, advanced one pass small out, 4513797
silesia.tar, level 13, advanced one pass small out, 4502956
-silesia.tar, level 16, advanced one pass small out, 4360527
-silesia.tar, level 19, advanced one pass small out, 4266970
+silesia.tar, level 16, advanced one pass small out, 4360546
+silesia.tar, level 19, advanced one pass small out, 4265911
silesia.tar, no source size, advanced one pass small out, 4854086
silesia.tar, long distance mode, advanced one pass small out, 4840452
silesia.tar, multithreaded, advanced one pass small out, 4854160
silesia.tar, multithreaded long distance mode, advanced one pass small out, 4845741
silesia.tar, small window log, advanced one pass small out, 7100655
-silesia.tar, small hash log, advanced one pass small out, 6529231
+silesia.tar, small hash log, advanced one pass small out, 6529206
silesia.tar, small chain log, advanced one pass small out, 4917041
silesia.tar, explicit params, advanced one pass small out, 4806855
silesia.tar, uncompressed literals, advanced one pass small out, 5122473
github, level 9 with dict dds, advanced one pass small out, 39393
github, level 9 with dict copy, advanced one pass small out, 39398
github, level 9 with dict load, advanced one pass small out, 41710
-github, level 11 row 1, advanced one pass small out, 135119
+github, level 11 row 1, advanced one pass small out, 135367
github, level 11 row 1 with dict dms, advanced one pass small out, 39671
github, level 11 row 1 with dict dds, advanced one pass small out, 39671
github, level 11 row 1 with dict copy, advanced one pass small out, 39651
github, level 11 row 1 with dict load, advanced one pass small out, 41360
-github, level 11 row 2, advanced one pass small out, 135119
+github, level 11 row 2, advanced one pass small out, 135367
github, level 11 row 2 with dict dms, advanced one pass small out, 39671
github, level 11 row 2 with dict dds, advanced one pass small out, 39671
github, level 11 row 2 with dict copy, advanced one pass small out, 39651
github, level 11 row 2 with dict load, advanced one pass small out, 41360
-github, level 12 row 1, advanced one pass small out, 134180
+github, level 12 row 1, advanced one pass small out, 134402
github, level 12 row 1 with dict dms, advanced one pass small out, 39677
github, level 12 row 1 with dict dds, advanced one pass small out, 39677
github, level 12 row 1 with dict copy, advanced one pass small out, 39677
github, level 12 row 1 with dict load, advanced one pass small out, 41166
-github, level 12 row 2, advanced one pass small out, 134180
+github, level 12 row 2, advanced one pass small out, 134402
github, level 12 row 2 with dict dms, advanced one pass small out, 39677
github, level 12 row 2 with dict dds, advanced one pass small out, 39677
github, level 12 row 2 with dict copy, advanced one pass small out, 39677
github, level 12 row 2 with dict load, advanced one pass small out, 41166
-github, level 13, advanced one pass small out, 132729
+github, level 13, advanced one pass small out, 132878
github, level 13 with dict, advanced one pass small out, 39900
github, level 13 with dict dms, advanced one pass small out, 39900
github, level 13 with dict dds, advanced one pass small out, 39900
github, level 13 with dict copy, advanced one pass small out, 39948
github, level 13 with dict load, advanced one pass small out, 42624
-github, level 16, advanced one pass small out, 132729
+github, level 16, advanced one pass small out, 133209
github, level 16 with dict, advanced one pass small out, 37577
github, level 16 with dict dms, advanced one pass small out, 37577
github, level 16 with dict dds, advanced one pass small out, 37577
github, level 16 with dict copy, advanced one pass small out, 37568
github, level 16 with dict load, advanced one pass small out, 42338
-github, level 19, advanced one pass small out, 132729
+github, level 19, advanced one pass small out, 132879
github, level 19 with dict, advanced one pass small out, 37576
github, level 19 with dict dms, advanced one pass small out, 37576
github, level 19 with dict dds, advanced one pass small out, 37576
github, small chain log, advanced one pass small out, 136341
github, explicit params, advanced one pass small out, 137727
github, uncompressed literals, advanced one pass small out, 165911
-github, uncompressed literals optimal, advanced one pass small out, 157227
+github, uncompressed literals optimal, advanced one pass small out, 152667
github, huffman literals, advanced one pass small out, 142365
github, multithreaded with advanced params, advanced one pass small out, 165911
github.tar, level -5, advanced one pass small out, 52115
github.tar, level 13 with dict dds, advanced one pass small out, 37220
github.tar, level 13 with dict copy, advanced one pass small out, 37130
github.tar, level 13 with dict load, advanced one pass small out, 36010
-github.tar, level 16, advanced one pass small out, 40471
+github.tar, level 16, advanced one pass small out, 40466
github.tar, level 16 with dict, advanced one pass small out, 33374
github.tar, level 16 with dict dms, advanced one pass small out, 33206
github.tar, level 16 with dict dds, advanced one pass small out, 33206
github.tar, level 16 with dict copy, advanced one pass small out, 33374
github.tar, level 16 with dict load, advanced one pass small out, 39081
-github.tar, level 19, advanced one pass small out, 32149
+github.tar, level 19, advanced one pass small out, 32276
github.tar, level 19 with dict, advanced one pass small out, 32712
github.tar, level 19 with dict dms, advanced one pass small out, 32555
github.tar, level 19 with dict dds, advanced one pass small out, 32555
silesia, level 12 row 1, advanced streaming, 4505046
silesia, level 12 row 2, advanced streaming, 4503116
silesia, level 13, advanced streaming, 4493990
-silesia, level 16, advanced streaming, 4359864
-silesia, level 19, advanced streaming, 4296438
+silesia, level 16, advanced streaming, 4360041
+silesia, level 19, advanced streaming, 4296055
silesia, no source size, advanced streaming, 4842039
silesia, long distance mode, advanced streaming, 4833710
silesia, multithreaded, advanced streaming, 4842075
silesia.tar, level 12 row 1, advanced streaming, 4514049
silesia.tar, level 12 row 2, advanced streaming, 4513797
silesia.tar, level 13, advanced streaming, 4502956
-silesia.tar, level 16, advanced streaming, 4360527
-silesia.tar, level 19, advanced streaming, 4266970
+silesia.tar, level 16, advanced streaming, 4360546
+silesia.tar, level 19, advanced streaming, 4265911
silesia.tar, no source size, advanced streaming, 4859267
silesia.tar, long distance mode, advanced streaming, 4840452
silesia.tar, multithreaded, advanced streaming, 4854160
silesia.tar, multithreaded long distance mode, advanced streaming, 4845741
silesia.tar, small window log, advanced streaming, 7117559
-silesia.tar, small hash log, advanced streaming, 6529234
+silesia.tar, small hash log, advanced streaming, 6529209
silesia.tar, small chain log, advanced streaming, 4917021
silesia.tar, explicit params, advanced streaming, 4806873
silesia.tar, uncompressed literals, advanced streaming, 5127423
github, level 9 with dict dds, advanced streaming, 39393
github, level 9 with dict copy, advanced streaming, 39398
github, level 9 with dict load, advanced streaming, 41710
-github, level 11 row 1, advanced streaming, 135119
+github, level 11 row 1, advanced streaming, 135367
github, level 11 row 1 with dict dms, advanced streaming, 39671
github, level 11 row 1 with dict dds, advanced streaming, 39671
github, level 11 row 1 with dict copy, advanced streaming, 39651
github, level 11 row 1 with dict load, advanced streaming, 41360
-github, level 11 row 2, advanced streaming, 135119
+github, level 11 row 2, advanced streaming, 135367
github, level 11 row 2 with dict dms, advanced streaming, 39671
github, level 11 row 2 with dict dds, advanced streaming, 39671
github, level 11 row 2 with dict copy, advanced streaming, 39651
github, level 11 row 2 with dict load, advanced streaming, 41360
-github, level 12 row 1, advanced streaming, 134180
+github, level 12 row 1, advanced streaming, 134402
github, level 12 row 1 with dict dms, advanced streaming, 39677
github, level 12 row 1 with dict dds, advanced streaming, 39677
github, level 12 row 1 with dict copy, advanced streaming, 39677
github, level 12 row 1 with dict load, advanced streaming, 41166
-github, level 12 row 2, advanced streaming, 134180
+github, level 12 row 2, advanced streaming, 134402
github, level 12 row 2 with dict dms, advanced streaming, 39677
github, level 12 row 2 with dict dds, advanced streaming, 39677
github, level 12 row 2 with dict copy, advanced streaming, 39677
github, level 12 row 2 with dict load, advanced streaming, 41166
-github, level 13, advanced streaming, 132729
+github, level 13, advanced streaming, 132878
github, level 13 with dict, advanced streaming, 39900
github, level 13 with dict dms, advanced streaming, 39900
github, level 13 with dict dds, advanced streaming, 39900
github, level 13 with dict copy, advanced streaming, 39948
github, level 13 with dict load, advanced streaming, 42624
-github, level 16, advanced streaming, 132729
+github, level 16, advanced streaming, 133209
github, level 16 with dict, advanced streaming, 37577
github, level 16 with dict dms, advanced streaming, 37577
github, level 16 with dict dds, advanced streaming, 37577
github, level 16 with dict copy, advanced streaming, 37568
github, level 16 with dict load, advanced streaming, 42338
-github, level 19, advanced streaming, 132729
+github, level 19, advanced streaming, 132879
github, level 19 with dict, advanced streaming, 37576
github, level 19 with dict dms, advanced streaming, 37576
github, level 19 with dict dds, advanced streaming, 37576
github, small chain log, advanced streaming, 136341
github, explicit params, advanced streaming, 137727
github, uncompressed literals, advanced streaming, 165911
-github, uncompressed literals optimal, advanced streaming, 157227
+github, uncompressed literals optimal, advanced streaming, 152667
github, huffman literals, advanced streaming, 142365
github, multithreaded with advanced params, advanced streaming, 165911
github.tar, level -5, advanced streaming, 52152
github.tar, level 13 with dict dds, advanced streaming, 37220
github.tar, level 13 with dict copy, advanced streaming, 37130
github.tar, level 13 with dict load, advanced streaming, 36010
-github.tar, level 16, advanced streaming, 40471
+github.tar, level 16, advanced streaming, 40466
github.tar, level 16 with dict, advanced streaming, 33374
github.tar, level 16 with dict dms, advanced streaming, 33206
github.tar, level 16 with dict dds, advanced streaming, 33206
github.tar, level 16 with dict copy, advanced streaming, 33374
github.tar, level 16 with dict load, advanced streaming, 39081
-github.tar, level 19, advanced streaming, 32149
+github.tar, level 19, advanced streaming, 32276
github.tar, level 19 with dict, advanced streaming, 32712
github.tar, level 19 with dict dms, advanced streaming, 32555
github.tar, level 19 with dict dds, advanced streaming, 32555
silesia, level 7, old streaming, 4566984
silesia, level 9, old streaming, 4543018
silesia, level 13, old streaming, 4493990
-silesia, level 16, old streaming, 4359864
-silesia, level 19, old streaming, 4296438
+silesia, level 16, old streaming, 4360041
+silesia, level 19, old streaming, 4296055
silesia, no source size, old streaming, 4842039
silesia, uncompressed literals, old streaming, 4842075
-silesia, uncompressed literals optimal, old streaming, 4296438
+silesia, uncompressed literals optimal, old streaming, 4296055
silesia, huffman literals, old streaming, 6172207
silesia.tar, level -5, old streaming, 6856523
silesia.tar, level -3, old streaming, 6505954
silesia.tar, level 7, old streaming, 4576664
silesia.tar, level 9, old streaming, 4552900
silesia.tar, level 13, old streaming, 4502956
-silesia.tar, level 16, old streaming, 4360527
-silesia.tar, level 19, old streaming, 4266970
+silesia.tar, level 16, old streaming, 4360546
+silesia.tar, level 19, old streaming, 4265911
silesia.tar, no source size, old streaming, 4859267
silesia.tar, uncompressed literals, old streaming, 4859271
-silesia.tar, uncompressed literals optimal, old streaming, 4266970
+silesia.tar, uncompressed literals optimal, old streaming, 4265911
silesia.tar, huffman literals, old streaming, 6179056
github, level -5, old streaming, 204407
github, level -5 with dict, old streaming, 46718
github, level 7 with dict, old streaming, 38758
github, level 9, old streaming, 135122
github, level 9 with dict, old streaming, 39437
-github, level 13, old streaming, 132729
+github, level 13, old streaming, 132878
github, level 13 with dict, old streaming, 39900
-github, level 16, old streaming, 132729
+github, level 16, old streaming, 133209
github, level 16 with dict, old streaming, 37577
-github, level 19, old streaming, 132729
+github, level 19, old streaming, 132879
github, level 19 with dict, old streaming, 37576
github, no source size, old streaming, 140599
github, no source size with dict, old streaming, 40654
github, uncompressed literals, old streaming, 136332
-github, uncompressed literals optimal, old streaming, 132729
+github, uncompressed literals optimal, old streaming, 132879
github, huffman literals, old streaming, 175468
github.tar, level -5, old streaming, 52152
github.tar, level -5 with dict, old streaming, 51045
github.tar, level 9 with dict, old streaming, 36484
github.tar, level 13, old streaming, 35501
github.tar, level 13 with dict, old streaming, 37130
-github.tar, level 16, old streaming, 40471
+github.tar, level 16, old streaming, 40466
github.tar, level 16 with dict, old streaming, 33374
-github.tar, level 19, old streaming, 32149
+github.tar, level 19, old streaming, 32276
github.tar, level 19 with dict, old streaming, 32712
github.tar, no source size, old streaming, 38828
github.tar, no source size with dict, old streaming, 38000
github.tar, uncompressed literals, old streaming, 38831
-github.tar, uncompressed literals optimal, old streaming, 32149
+github.tar, uncompressed literals optimal, old streaming, 32276
github.tar, huffman literals, old streaming, 42560
silesia, level -5, old streaming advanced, 6854744
silesia, level -3, old streaming advanced, 6503319
silesia, level 7, old streaming advanced, 4566984
silesia, level 9, old streaming advanced, 4543018
silesia, level 13, old streaming advanced, 4493990
-silesia, level 16, old streaming advanced, 4359864
-silesia, level 19, old streaming advanced, 4296438
+silesia, level 16, old streaming advanced, 4360041
+silesia, level 19, old streaming advanced, 4296055
silesia, no source size, old streaming advanced, 4842039
silesia, long distance mode, old streaming advanced, 4842075
silesia, multithreaded, old streaming advanced, 4842075
silesia, small chain log, old streaming advanced, 4912197
silesia, explicit params, old streaming advanced, 4795452
silesia, uncompressed literals, old streaming advanced, 4842075
-silesia, uncompressed literals optimal, old streaming advanced, 4296438
+silesia, uncompressed literals optimal, old streaming advanced, 4296055
silesia, huffman literals, old streaming advanced, 6172207
silesia, multithreaded with advanced params, old streaming advanced, 4842075
silesia.tar, level -5, old streaming advanced, 6856523
silesia.tar, level 7, old streaming advanced, 4576664
silesia.tar, level 9, old streaming advanced, 4552900
silesia.tar, level 13, old streaming advanced, 4502956
-silesia.tar, level 16, old streaming advanced, 4360527
-silesia.tar, level 19, old streaming advanced, 4266970
+silesia.tar, level 16, old streaming advanced, 4360546
+silesia.tar, level 19, old streaming advanced, 4265911
silesia.tar, no source size, old streaming advanced, 4859267
silesia.tar, long distance mode, old streaming advanced, 4859271
silesia.tar, multithreaded, old streaming advanced, 4859271
silesia.tar, multithreaded long distance mode, old streaming advanced, 4859271
silesia.tar, small window log, old streaming advanced, 7117562
-silesia.tar, small hash log, old streaming advanced, 6529234
+silesia.tar, small hash log, old streaming advanced, 6529209
silesia.tar, small chain log, old streaming advanced, 4917021
silesia.tar, explicit params, old streaming advanced, 4806873
silesia.tar, uncompressed literals, old streaming advanced, 4859271
-silesia.tar, uncompressed literals optimal, old streaming advanced, 4266970
+silesia.tar, uncompressed literals optimal, old streaming advanced, 4265911
silesia.tar, huffman literals, old streaming advanced, 6179056
silesia.tar, multithreaded with advanced params, old streaming advanced, 4859271
github, level -5, old streaming advanced, 213265
github, level 9 with dict, old streaming advanced, 38981
github, level 13, old streaming advanced, 138676
github, level 13 with dict, old streaming advanced, 39725
-github, level 16, old streaming advanced, 138676
+github, level 16, old streaming advanced, 138575
github, level 16 with dict, old streaming advanced, 40789
-github, level 19, old streaming advanced, 132729
+github, level 19, old streaming advanced, 132879
github, level 19 with dict, old streaming advanced, 37576
github, no source size, old streaming advanced, 140599
github, no source size with dict, old streaming advanced, 40608
github, small chain log, old streaming advanced, 139275
github, explicit params, old streaming advanced, 140937
github, uncompressed literals, old streaming advanced, 141104
-github, uncompressed literals optimal, old streaming advanced, 132729
+github, uncompressed literals optimal, old streaming advanced, 132879
github, huffman literals, old streaming advanced, 181107
github, multithreaded with advanced params, old streaming advanced, 141104
github.tar, level -5, old streaming advanced, 52152
github.tar, level 9 with dict, old streaming advanced, 36312
github.tar, level 13, old streaming advanced, 35501
github.tar, level 13 with dict, old streaming advanced, 35807
-github.tar, level 16, old streaming advanced, 40471
+github.tar, level 16, old streaming advanced, 40466
github.tar, level 16 with dict, old streaming advanced, 38578
-github.tar, level 19, old streaming advanced, 32149
+github.tar, level 19, old streaming advanced, 32276
github.tar, level 19 with dict, old streaming advanced, 32704
github.tar, no source size, old streaming advanced, 38828
github.tar, no source size with dict, old streaming advanced, 38015
github.tar, small chain log, old streaming advanced, 41669
github.tar, explicit params, old streaming advanced, 41385
github.tar, uncompressed literals, old streaming advanced, 38831
-github.tar, uncompressed literals optimal, old streaming advanced, 32149
+github.tar, uncompressed literals optimal, old streaming advanced, 32276
github.tar, huffman literals, old streaming advanced, 42560
github.tar, multithreaded with advanced params, old streaming advanced, 38831
github, level -5 with dict, old streaming cdict, 46718
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#!/usr/bin/env python3
# ################################################################
-# Copyright (c) Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
continue
if "present" in line:
return (False, f"Copyright line '{line}' contains 'present'!")
- if "Facebook, Inc" not in line:
- return (False, f"Copyright line '{line}' does not contain 'Facebook, Inc'")
+ if "Meta Platforms, Inc" not in line:
+ return (False, f"Copyright line '{line}' does not contain 'Meta Platforms, Inc'")
year = YEAR_REGEX.search(line)
if year is not None:
return (False, f"Copyright line '{line}' contains {year.group(0)}; it should be yearless")
"""Test zstd interoperability between versions"""
# ################################################################
-# Copyright (c) Yann Collet, Facebook, Inc.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under both the BSD-style license (found in the
repo_url = 'https://github.com/facebook/zstd.git'
tmp_dir_name = 'tests/versionsTest'
make_cmd = 'make'
-make_args = ['-j','CFLAGS=-O1']
+make_args = ['-j','CFLAGS=-O0']
git_cmd = 'git'
test_dat_src = 'README.md'
test_dat = 'test_dat'
if result == 0:
print(dict_name + ' created')
else:
- print('ERROR: creating of ' + dict_name + ' failed')
+ raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
else:
print(dict_name + ' already exists')
+def zstd(tag, args, input_file, output_file):
+ """
+ Zstd compress input_file to output_file.
+ Need this helper because 0.5.0 is broken when stdout is not a TTY.
+ Throws an exception if the command returns non-zero.
+ """
+ with open(input_file, "rb") as i:
+ with open(output_file, "wb") as o:
+ cmd = ['./zstd.' + tag] + args
+ print("Running: '{}', input={}, output={}" .format(
+ ' '.join(cmd), input_file, output_file
+ ))
+ subprocess.check_call(cmd, stdin=i, stdout=o)
+
+
def dict_compress_sample(tag, sample):
dict_name = 'dict.' + tag
- DEVNULL = open(os.devnull, 'wb')
- if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_01_64_' + tag + '_dictio.zst')
- if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-5f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_05_64_' + tag + '_dictio.zst')
- if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-9f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_09_64_' + tag + '_dictio.zst')
- if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-15f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_15_64_' + tag + '_dictio.zst')
- if subprocess.call(['./zstd.' + tag, '-D', dict_name, '-18f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_18_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-1'], sample, sample + '_01_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst')
+ zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst')
# zstdFiles = glob.glob("*.zst*")
# print(zstdFiles)
print(tag + " : dict compression completed")
def compress_sample(tag, sample):
- DEVNULL = open(os.devnull, 'wb')
- if subprocess.call(['./zstd.' + tag, '-f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_01_64_' + tag + '_nodict.zst')
- if subprocess.call(['./zstd.' + tag, '-5f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_05_64_' + tag + '_nodict.zst')
- if subprocess.call(['./zstd.' + tag, '-9f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_09_64_' + tag + '_nodict.zst')
- if subprocess.call(['./zstd.' + tag, '-15f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_15_64_' + tag + '_nodict.zst')
- if subprocess.call(['./zstd.' + tag, '-18f', sample], stderr=DEVNULL) == 0:
- os.rename(sample + '.zst', sample + '_18_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst')
+ zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst')
# zstdFiles = glob.glob("*.zst*")
# print(zstdFiles)
print(tag + " : compression completed")
dec_error = 0
list_zst = sorted(glob.glob('*_nodict.zst'))
for file_zst in list_zst:
- print(file_zst, end=' ')
- print(tag, end=' ')
+ print(file_zst + ' ' + tag)
file_dec = file_zst + '_d64_' + tag + '.dec'
- if tag <= 'v0.5.0':
- params = ['./zstd.' + tag, '-df', file_zst, file_dec]
+ zstd(tag, ['-d'], file_zst, file_dec)
+ if not filecmp.cmp(file_dec, test_dat):
+ raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
else:
- params = ['./zstd.' + tag, '-df', file_zst, '-o', file_dec]
- if execute(params) == 0:
- if not filecmp.cmp(file_dec, test_dat):
- print('ERR !! ')
- dec_error = 1
- else:
- print('OK ')
- else:
- print('command does not work')
- dec_error = 1
- return dec_error
+ print('OK ')
def decompress_dict(tag):
if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
continue
dict_name = 'dict.' + dict_tag
- print(file_zst + ' ' + tag + ' dict=' + dict_tag, end=' ')
+ print(file_zst + ' ' + tag + ' dict=' + dict_tag)
file_dec = file_zst + '_d64_' + tag + '.dec'
- if tag <= 'v0.5.0':
- params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, file_dec]
+ zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec)
+ if not filecmp.cmp(file_dec, test_dat):
+ raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
else:
- params = ['./zstd.' + tag, '-D', dict_name, '-df', file_zst, '-o', file_dec]
- if execute(params) == 0:
- if not filecmp.cmp(file_dec, test_dat):
- print('ERR !! ')
- dec_error = 1
- else:
- print('OK ')
- else:
- print('command does not work')
- dec_error = 1
- return dec_error
+ print('OK ')
if __name__ == '__main__':
print('Compress test.dat by all released zstd')
print('-----------------------------------------------')
- error_code = 0
for tag in tags:
print(tag)
if tag >= 'v0.5.0':
create_dict(tag, dict_source_path)
dict_compress_sample(tag, test_dat)
remove_duplicates()
- error_code += decompress_dict(tag)
+ decompress_dict(tag)
compress_sample(tag, test_dat)
remove_duplicates()
- error_code += decompress_zst(tag)
+ decompress_zst(tag)
print('')
print('Enumerate different compressed files')
zstds = sorted(glob.glob('*.zst'))
for zstd in zstds:
print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))
-
- if error_code != 0:
- print('====== ERROR !!! =======')
-
- sys.exit(error_code)
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
#include "seqgen.h"
#include "util.h"
#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */
-
+#include "external_matchfinder.h" /* zstreamExternalMatchFinder, EMF_testCase */
/*-************************************
* Constants
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : External matchfinder API: ", testNb++);
+ {
+ size_t const dstBufSize = ZSTD_compressBound(CNBufferSize);
+ BYTE* const dstBuf = (BYTE*)malloc(ZSTD_compressBound(dstBufSize));
+ size_t const checkBufSize = CNBufferSize;
+ BYTE* const checkBuf = (BYTE*)malloc(checkBufSize);
+ int enableFallback;
+ EMF_testCase externalMatchState;
+
+ CHECK(dstBuf == NULL || checkBuf == NULL, "allocation failed");
+
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+
+ /* Reference external matchfinder outside the test loop to
+ * check that the reference is preserved across compressions */
+ ZSTD_registerExternalMatchFinder(
+ zc,
+ &externalMatchState,
+ zstreamExternalMatchFinder
+ );
+
+ for (enableFallback = 0; enableFallback < 1; enableFallback++) {
+ size_t testCaseId;
+
+ EMF_testCase const EMF_successCases[] = {
+ EMF_ONE_BIG_SEQ,
+ EMF_LOTS_OF_SEQS,
+ };
+ size_t const EMF_numSuccessCases = 2;
+
+ EMF_testCase const EMF_failureCases[] = {
+ EMF_ZERO_SEQS,
+ EMF_BIG_ERROR,
+ EMF_SMALL_ERROR,
+ };
+ size_t const EMF_numFailureCases = 3;
+
+ /* Test external matchfinder success scenarios */
+ for (testCaseId = 0; testCaseId < EMF_numSuccessCases; testCaseId++) {
+ size_t res;
+ externalMatchState = EMF_successCases[testCaseId];
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
+ CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
+ CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+ }
+
+ /* Test external matchfinder failure scenarios */
+ for (testCaseId = 0; testCaseId < EMF_numFailureCases; testCaseId++) {
+ size_t res;
+ externalMatchState = EMF_failureCases[testCaseId];
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize);
+ if (enableFallback) {
+ CHECK_Z(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res));
+ CHECK(memcmp(CNBuffer, checkBuf, CNBufferSize) != 0, "EMF: Corruption!");
+ } else {
+ CHECK(!ZSTD_isError(res), "EMF: Should have raised an error!");
+ CHECK(
+ ZSTD_getErrorCode(res) != ZSTD_error_externalMatchFinder_failed,
+ "EMF: Wrong error code: %s", ZSTD_getErrorName(res)
+ );
+ }
+ }
+
+ /* Test compression with external matchfinder + empty src buffer */
+ {
+ size_t res;
+ externalMatchState = EMF_ZERO_SEQS;
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_only);
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, enableFallback));
+ res = ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, 0);
+ CHECK(ZSTD_isError(res), "EMF: Compression error: %s", ZSTD_getErrorName(res));
+ CHECK(ZSTD_decompress(checkBuf, checkBufSize, dstBuf, res) != 0, "EMF: Empty src round trip failed!");
+ }
+ }
+
+ /* Test that reset clears the external matchfinder */
+ ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters);
+ externalMatchState = EMF_BIG_ERROR; /* ensure zstd will fail if the matchfinder wasn't cleared */
+ CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_enableMatchFinderFallback, 0));
+ CHECK_Z(ZSTD_compress2(zc, dstBuf, dstBufSize, CNBuffer, CNBufferSize));
+
+ free(dstBuf);
+ free(checkBuf);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);
/*
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio,
- (double)srcSize / fastestC );
+ (double)srcSize / (double)fastestC );
(void)fastestD; (void)crcOrig; /* unused when decompression disabled */
#if 1
markNb = (markNb+1) % NB_MARKS;
DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
marks[markNb], displayName, (unsigned)srcSize, (unsigned)cSize, ratio,
- (double)srcSize / fastestC,
- (double)srcSize / fastestD );
+ (double)srcSize / (double)fastestC,
+ (double)srcSize / (double)fastestD );
/* CRC Checking */
{ U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
} /* for (testNb = 1; testNb <= (g_nbIterations + !g_nbIterations); testNb++) */
if (g_displayLevel == 1) {
- double cSpeed = (double)srcSize / fastestC;
- double dSpeed = (double)srcSize / fastestD;
+ double cSpeed = (double)srcSize / (double)fastestC;
+ double dSpeed = (double)srcSize / (double)fastestD;
if (g_additionalParam)
DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
else
/*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
/*
- * Copyright (c) 2016-2021, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the