option(ZLIB_DUAL_LINK "Dual link tests against system zlib" OFF)
option(WITH_FUZZERS "Build test/fuzz" OFF)
option(WITH_BENCHMARKS "Build test/benchmarks" OFF)
+option(WITH_BENCHMARK_APPS "Build application benchmarks" OFF)
option(WITH_OPTIM "Build with optimisation" ON)
option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces performance)" OFF)
option(WITH_NEW_STRATEGIES "Use new strategies" ON)
add_feature_info(WITH_SANITIZER WITH_SANITIZER "Enable sanitizer support")
add_feature_info(WITH_FUZZERS WITH_FUZZERS "Build test/fuzz")
add_feature_info(WITH_BENCHMARKS WITH_BENCHMARKS "Build test/benchmarks")
+add_feature_info(WITH_BENCHMARK_APPS WITH_BENCHMARK_APPS "Build application benchmarks")
add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS
add_test(NAME benchmark_zlib
COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $<TARGET_FILE:benchmark_zlib>)
endif()
+
+if(WITH_BENCHMARK_APPS)
+ option(BUILD_ALT_BENCH "Link against alternative zlib implementation" OFF)
+
+ # Search for libpng package
+ find_package(PNG QUIET)
+
+ if(NOT PNG_FOUND)
+ FetchContent_Declare(PNG
+ GIT_REPOSITORY https://github.com/glennrp/libpng.git)
+ FetchContent_MakeAvailable(PNG)
+ FetchContent_GetProperties(PNG)
+
+ if(NOT PNG_POPULATED)
+ FetchContent_Populate(PNG)
+ endif()
+ endif()
+
+ set(BENCH_APP_SRCS
+ benchmark_png_encode.cc
+ benchmark_png_decode.cc
+ benchmark_main.cc
+ )
+
+ add_executable(benchmark_zlib_apps ${BENCH_APP_SRCS})
+
+ if(DEFINED BUILD_ALT_BENCH)
+ set(ZLIB_ALT_LIB "libz.a" CACHE FILEPATH "Optional alternative zlib implementation (defaults to stock zlib)")
+ add_executable(benchmark_zlib_apps_alt ${BENCH_APP_SRCS})
+ target_link_libraries(benchmark_zlib_apps_alt libpng.a ${ZLIB_ALT_LIB} benchmark::benchmark)
+ target_compile_definitions(benchmark_zlib_apps_alt PRIVATE BUILD_ALT=1)
+ target_include_directories(benchmark_zlib_apps_alt PRIVATE
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_BINARY_DIR}
+ ${PNG_INCLUDE_DIR}
+ ${benchmark_SOURCE_DIR}/benchmark/include)
+ endif()
+
+ target_include_directories(benchmark_zlib_apps PRIVATE
+ ${CMAKE_SOURCE_DIR}
+ ${CMAKE_BINARY_DIR}
+ ${PNG_INCLUDE_DIR}
+ ${benchmark_SOURCE_DIR}/benchmark/include)
+
+ # We need the static png library if we're statically linking to zlib,
+ # otherwise it will resolve these things in the system provided dynamic
+ # libraries (likely linked to stock zlib)
+ target_link_libraries(benchmark_zlib_apps libpng.a zlibstatic benchmark::benchmark)
+endif()
## Benchmarks
-
These benchmarks are written using [Google Benchmark](https://github.com/google/benchmark).
*Repetitions*
```
--benchmark_filter="adler32*"
```
+
+There are two different benchmarks, micro and macro.
+
+### Benchmark benchmark_zlib
+These are microbenchmarks intended to test lower level subfunctions of the library.
+
+Benchmarks include impelementations of:
+ - Adler32
+ - CRC
+ - 256 byte comparisons
+ - SIMD accelerated "slide hash" routine
+
+By default these benchmarks report things on the nanosecond scale and are small enough
+to measure very minute diferences.
+
+### Benchmark benchmark_zlib_apps
+These benchmarks measure applications of zlib as a whole. Currently the only examples
+are PNG encoding and decoding. The PNG encode and decode tests leveraging procedurally
+generated and highly compressible image data.
+
+Additionally, a test called `png_decode_realistic` that will decode any RGB 8 BPP encoded
+set of PNGs in the working directory under a directory named "test_pngs" with files named
+{0..1}.png. If these images do not exist, they will error out and the benchmark will move
+on to the next set of benchmarks.
+
+*benchmark_zlib_apps_alt*
+
+The user can compile a comparison benchmark application linking to any zlib-compatible
+implementation of his or her choosing.
#include <benchmark/benchmark.h>
+#ifndef BUILD_ALT
extern "C" {
# include "zbuild.h"
# include "cpu_features.h"
}
+#endif
int main(int argc, char** argv) {
+#ifndef BUILD_ALT
cpu_check_features();
+#endif
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
--- /dev/null
+#include <stdio.h>
+#include <benchmark/benchmark.h>
+#include "benchmark_png_shared.h"
+#include <assert.h>
+
+class png_decode: public benchmark::Fixture {
+protected:
+ png_dat inpng[10];
+
+ /* Backing this on the heap is a more realistic benchmark */
+ uint8_t *output_img_buf = NULL;
+
+public:
+ /* Let's make the vanilla version have something extremely compressible */
+ virtual void init_img(png_bytep img_bytes, size_t width, size_t height) {
+ init_compressible(img_bytes, width*height);
+ }
+
+ void SetUp(const ::benchmark::State& state) {
+ output_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
+ assert(output_img_buf != NULL);
+ init_img(output_img_buf, IMWIDTH, IMHEIGHT);
+
+ /* First we need to author the png bytes to be decoded */
+ for (int i = 0; i < 10; ++i) {
+ inpng[i] = {NULL, 0, 0};
+ encode_png(output_img_buf, &inpng[i], i, IMWIDTH, IMHEIGHT);
+ }
+ }
+
+ /* State in this circumstance will convey the compression level */
+ void Bench(benchmark::State &state) {
+ for (auto _ : state) {
+ int compress_lvl = state.range(0);
+ png_parse_dat in = { inpng[compress_lvl].buf };
+ uint32_t width, height;
+ decode_png(&in, (png_bytepp)&output_img_buf, IMWIDTH * IMHEIGHT * 3, width, height);
+ }
+ }
+
+ void TearDown(const ::benchmark::State &state) {
+ free(output_img_buf);
+ for (int i = 0; i < 10; ++i) {
+ free(inpng[i].buf);
+ }
+ }
+};
+
+class png_decode_realistic: public png_decode {
+private:
+ bool test_files_found = false;
+
+public:
+ void SetUp(const ::benchmark::State &state) {
+ output_img_buf = NULL;
+ output_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
+ /* Let's take all the images at different compression levels and jam their bytes into buffers */
+ char test_fname[25];
+ FILE *files[10];
+
+ /* Set all to NULL */
+ memset(files, 0, sizeof(FILE*));
+
+ for (size_t i = 0; i < 10; ++i) {
+ sprintf(test_fname, "test_pngs/%1lu.png", i);
+ FILE *in_img = fopen(test_fname, "r");
+ if (in_img == NULL) {
+ for (size_t j = 0; j < i; ++j) {
+ if (files[j])
+ fclose(files[j]);
+ }
+
+ /* For proper cleanup */
+ for (size_t j = i; j < 10; ++j) {
+ inpng[i] = { NULL, 0, 0 };
+ }
+
+ return;
+ }
+ files[i] = in_img;
+ }
+
+ test_files_found = true;
+ /* Now that we've established we have all the png files, let's read all of their bytes into buffers */
+ for (size_t i = 0; i < 10; ++i) {
+ FILE *in_file = files[i];
+ fseek(in_file, 0, SEEK_END);
+ size_t num_bytes = ftell(in_file);
+ rewind(in_file);
+
+ uint8_t *raw_file = (uint8_t*)malloc(num_bytes);
+ if (raw_file == NULL)
+ abort();
+
+ inpng[i].buf = raw_file;
+ inpng[i].len = num_bytes;
+ inpng[i].buf_rem = 0;
+
+ size_t bytes_read = fread(raw_file, 1, num_bytes, in_file);
+ if (bytes_read != num_bytes) {
+ fprintf(stderr, "couldn't read all of the bytes for file test_pngs/%lu.png", i);
+ abort();
+ }
+
+ fclose(in_file);
+ }
+ }
+
+ void Bench(benchmark::State &state) {
+ if (!test_files_found) {
+ state.SkipWithError("Test imagery in test_pngs not found");
+ }
+
+ png_decode::Bench(state);
+ }
+};
+
+BENCHMARK_DEFINE_F(png_decode, png_decode)(benchmark::State &state) {
+ Bench(state);
+}
+BENCHMARK_REGISTER_F(png_decode, png_decode)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
+
+BENCHMARK_DEFINE_F(png_decode_realistic, png_decode_realistic)(benchmark::State &state) {
+ Bench(state);
+}
+BENCHMARK_REGISTER_F(png_decode_realistic, png_decode_realistic)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
--- /dev/null
+#include <stdio.h>
+#include <assert.h>
+#include <benchmark/benchmark.h>
+#include "benchmark_png_shared.h"
+
+#define IMWIDTH 1024
+#define IMHEIGHT 1024
+
+class png_encode: public benchmark::Fixture {
+private:
+ png_dat outpng;
+
+ /* Backing this on the heap is a more realistic benchmark */
+ uint8_t *input_img_buf = NULL;
+
+public:
+ /* Let's make the vanilla version have something extremely compressible */
+ virtual void init_img(png_bytep img_bytes, size_t width, size_t height) {
+ init_compressible(img_bytes, width * height);
+ }
+
+ void SetUp(const ::benchmark::State& state) {
+ input_img_buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
+ outpng.buf = (uint8_t*)malloc(IMWIDTH * IMHEIGHT * 3);
+ /* Using malloc rather than zng_alloc so that we can call realloc.
+ * IMWIDTH * IMHEIGHT is likely to be more than enough bytes, though,
+ * given that a simple run length encoding already pretty much can
+ * reduce to this */
+ outpng.len = 0;
+ outpng.buf_rem = IMWIDTH * IMHEIGHT * 3;
+ assert(input_img_buf != NULL);
+ assert(outpng.buf != NULL);
+ init_img(input_img_buf, IMWIDTH, IMHEIGHT);
+ }
+
+ /* State in this circumstance will convey the compression level */
+ void Bench(benchmark::State &state) {
+ for (auto _ : state) {
+ encode_png((png_bytep)input_img_buf, &outpng, state.range(0), IMWIDTH, IMHEIGHT);
+ outpng.buf_rem = outpng.len;
+ outpng.len = 0;
+ }
+ }
+
+ void TearDown(const ::benchmark::State &state) {
+ free(input_img_buf);
+ free(outpng.buf);
+ }
+};
+
+BENCHMARK_DEFINE_F(png_encode, encode_compressible)(benchmark::State &state) {
+ Bench(state);
+}
+BENCHMARK_REGISTER_F(png_encode, encode_compressible)->DenseRange(0, 9, 1)->Unit(benchmark::kMicrosecond);
--- /dev/null
+#pragma once
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#define IMWIDTH 1024
+#define IMHEIGHT 1024
+
+extern "C" {
+# include <png.h>
+}
+
+typedef struct _png_dat {
+ uint8_t *buf;
+ int64_t len;
+ size_t buf_rem;
+} png_dat;
+
+typedef struct _png_parse_dat {
+ uint8_t *cur_pos;
+} png_parse_dat;
+
+/* Write a customized write callback so that we write back to an in-memory buffer.
+ * This allows the testing to not involve disk IO */
+static void png_write_cb(png_structp pngp, png_bytep data, png_size_t len) {
+ png_dat *dat = (png_dat*)png_get_io_ptr(pngp);
+ size_t curSize = dat->len + len;
+
+ /* realloc double the requested buffer size to prevent excessive reallocs */
+ if (dat->buf_rem < len) {
+ dat->buf = (uint8_t*)realloc(dat->buf, dat->len + dat->buf_rem + 2 * len);
+
+ if (!dat->buf) {
+ /* Pretty unlikely but we'll put it here just in case */
+ fprintf(stderr, "realloc failed, exiting\n");
+ exit(1);
+ }
+
+ dat->buf_rem += 2 * len;
+ }
+
+ memcpy(dat->buf + dat->len, data, len);
+ dat->len = curSize;
+ dat->buf_rem -= len;
+}
+
+static void init_compressible(png_bytep buf, size_t num_pix) {
+ /* It doesn't actually matter what we make this, but for
+ * the sake of a reasonable test image, let's make this
+ * be a stripe of R, G, & B, with no alpha channel */
+ int32_t i = 0;
+ int32_t red_stop = num_pix / 3;
+ int32_t blue_stop = 2 * num_pix / 3;
+ int32_t green_stop = num_pix;
+
+ for (int32_t x = 0; i < red_stop; x += 3, ++i) {
+ buf[x] = 255;
+ buf[x + 1] = 0;
+ buf[x + 2] = 0;
+ }
+
+ for (int32_t x = 3 * i; i < blue_stop; x+= 3, ++i) {
+ buf[x] = 0;
+ buf[x + 1] = 255;
+ buf[x + 2] = 0;
+ }
+
+ for (int32_t x = 3 * i; i < green_stop; x += 3, ++i) {
+ buf[x] = 0;
+ buf[x + 1] = 0;
+ buf[x + 2] = 255;
+ }
+}
+
+static inline void encode_png(png_bytep buf, png_dat *outpng, int32_t comp_level, uint32_t width, uint32_t height) {
+ png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ /* Most of this error handling is _likely_ not necessary. Likewise it's likely
+ * a lot of this stuff can be done in the setup function to avoid measuring this
+ * fixed setup time, but for now we'll do it here */
+ if (!png) abort();
+
+ png_infop info = png_create_info_struct(png);
+ if (!info) abort();
+
+ png_set_write_fn(png, outpng, png_write_cb, NULL);
+ png_bytep *png_row_ptrs = new png_bytep[height];
+ for (int i = 0; i < IMHEIGHT; ++i) {
+ png_row_ptrs[i] = (png_bytep)&buf[3*i*width];
+ }
+
+ png_set_IHDR(png, info, IMWIDTH, IMHEIGHT, 8, PNG_COLOR_TYPE_RGB,
+ PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT,
+ PNG_FILTER_TYPE_DEFAULT);
+
+ png_write_info(png, info);
+ png_set_compression_level(png, comp_level);
+ png_set_filter(png, 0, PNG_FILTER_NONE);
+ png_write_image(png, (png_bytepp)png_row_ptrs);
+ png_write_end(png, NULL);
+ png_destroy_write_struct(&png, &info);
+ delete[] png_row_ptrs;
+}
+
+static void read_from_pngdat(png_structp png, png_bytep out, png_size_t bytes_to_read) {
+ png_parse_dat *io = (png_parse_dat*)png_get_io_ptr(png);
+ memcpy(out, io->cur_pos, bytes_to_read);
+ io->cur_pos += bytes_to_read;
+}
+
+static inline int decode_png(png_parse_dat *dat, png_bytepp out_bytes, size_t in_size, uint32_t &width, uint32_t &height) {
+ png_structp png = NULL;
+ png = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (!png) abort();
+ png_infop info = NULL;
+ info = png_create_info_struct(png);
+ if (!info) abort();
+
+ png_set_read_fn(png, dat, read_from_pngdat);
+ png_read_info(png, info);
+
+ int bit_depth = 0, color_type = -1;
+ png_get_IHDR(png, info, &width, &height, &bit_depth, &color_type, NULL, NULL, NULL);
+
+ size_t im_size = width * height * bit_depth/8;
+ if (color_type != PNG_COLOR_TYPE_RGB) {
+ fprintf(stderr, "expected an 8 bpp RGB image\n");
+ abort();
+ }
+
+ if (im_size > in_size) {
+ *out_bytes = (png_bytep)realloc(*out_bytes, im_size);
+ }
+
+ png_bytep *out_rows = new png_bytep[height];
+ for (size_t i = 0; i < height; ++i)
+ out_rows[i] = *out_bytes + (width*i);
+
+ png_read_rows(png, out_rows, NULL, IMHEIGHT);
+ png_destroy_read_struct(&png, &info, NULL);
+ delete[] out_rows;
+
+ return im_size;
+}