From: Nathan Moinvaziri Date: Thu, 26 Mar 2026 19:11:59 +0000 (-0700) Subject: Add PNG decode benchmark for narrow image widths X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6d13634fc62034e19777d24c3633e17e8ffa899b;p=thirdparty%2Fzlib-ng.git Add PNG decode benchmark for narrow image widths Benchmark libpng row-by-row decoding where avail_out falls below the 260-byte inflate_fast threshold. Uses a synthetic gradient-with-noise pixel generator that produces deflate token distributions representative of real photographs. Also fix encode_png to use the passed width and height instead of the hardcoded IMWIDTH and IMHEIGHT constants. --- diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt index 7d36173dc..e332a5165 100644 --- a/test/benchmarks/CMakeLists.txt +++ b/test/benchmarks/CMakeLists.txt @@ -108,6 +108,7 @@ if(WITH_BENCHMARK_APPS) set(BENCH_APP_SRCS benchmark_png_encode.cc benchmark_png_decode.cc + benchmark_png_decode_small.cc benchmark_main.cc ) diff --git a/test/benchmarks/benchmark_png_decode_small.cc b/test/benchmarks/benchmark_png_decode_small.cc new file mode 100644 index 000000000..6280d1431 --- /dev/null +++ b/test/benchmarks/benchmark_png_decode_small.cc @@ -0,0 +1,59 @@ +#include +#include +#include + +#include "benchmark_png_shared.h" + +/* Decode PNGs through libpng at various image widths. libpng calls inflate() with + * avail_out equal to one row (width * 3 bytes for RGB). Narrow images produce rows + * below the 260-byte inflate_fast threshold, exercising the slow path on every row. */ +class png_decode_small: public benchmark::Fixture { +protected: + png_dat encoded = {NULL, 0, 0}; + uint8_t *output_buf = NULL; + uint32_t img_width = 0; + uint32_t img_height = 0; + +public: + void SetUp(const ::benchmark::State& state) { + img_width = (uint32_t)state.range(0); + img_height = (1024 * 1024) / img_width; + + size_t num_pixels = (size_t)img_width * img_height; + + output_buf = (uint8_t *)malloc(num_pixels * 3); + assert(output_buf != NULL); + init_realistic(output_buf, img_width, img_height); + + encoded = {NULL, 0, 0}; + encode_png(output_buf, &encoded, 9, img_width, img_height); + } + + void Bench(benchmark::State &state) { + size_t buf_size = (size_t)img_width * img_height * 3; + for (auto _ : state) { + png_parse_dat in = { encoded.buf }; + uint32_t w, h; + decode_png(&in, (png_bytepp)&output_buf, buf_size, w, h); + } + } + + void TearDown(const ::benchmark::State &) { + free(output_buf); + free(encoded.buf); + } +}; + +BENCHMARK_DEFINE_F(png_decode_small, png_decode_small)(benchmark::State &state) { + Bench(state); +} +BENCHMARK_REGISTER_F(png_decode_small, png_decode_small) + /* width in pixels: row bytes = width * 3 + * 32 -> 96 bytes/row (well below 260) + * 64 -> 192 bytes/row (below 260) + * 86 -> 258 bytes/row (just below 260) + * 128 -> 384 bytes/row (above 260, but tail in slow path) + * 256 -> 768 bytes/row + * 1024 -> 3072 bytes/row (reference, minimal slow-path impact) */ + ->Arg(32)->Arg(64)->Arg(86)->Arg(128)->Arg(256)->Arg(1024) + ->Unit(benchmark::kMicrosecond); diff --git a/test/benchmarks/benchmark_png_shared.h b/test/benchmarks/benchmark_png_shared.h index bde679e7d..179a5442f 100644 --- a/test/benchmarks/benchmark_png_shared.h +++ b/test/benchmarks/benchmark_png_shared.h @@ -45,10 +45,31 @@ static void png_write_cb(png_structp pngp, png_bytep data, png_size_t len) { dat->buf_rem -= len; } +/* Generate pixel data that resembles a real photograph: smooth gradients with per-pixel + * noise and occasional edges. Produces many short deflate matches and scattered literals */ +static void init_realistic(png_bytep buf, uint32_t width, uint32_t height) { + uint32_t seed = 0x12345678; + for (uint32_t y = 0; y < height; y++) { + for (uint32_t x = 0; x < width; x++) { + size_t idx = ((size_t)y * width + x) * 3; + /* Diagonal gradient as base color */ + uint8_t base_r = (uint8_t)((x + y) * 179 / (width + height)); + uint8_t base_g = (uint8_t)((x * 2 + y) * 131 / (width + height)); + uint8_t base_b = (uint8_t)(y * 241 / height); + /* Simple xorshift noise, +/- 15 levels */ + seed ^= seed << 13; + seed ^= seed >> 17; + seed ^= seed << 5; + int noise = (int)(seed & 0x1F) - 15; + buf[idx] = (uint8_t)MIN(MAX(base_r + noise, 0), 0xFF); + buf[idx + 1] = (uint8_t)MIN(MAX(base_g + (noise >> 1), 0), 0xFF); + buf[idx + 2] = (uint8_t)MIN(MAX(base_b - noise, 0), 0xFF); + } + } +} + +/* Generate a highly compressible RGB test image with solid R, G, and B stripes. */ static void init_compressible(png_bytep buf, size_t num_pix) { - /* It doesn't actually matter what we make this, but for - * the sake of a reasonable test image, let's make this - * be a stripe of R, G, & B, with no alpha channel */ int32_t i = 0; int32_t red_stop = num_pix / 3; int32_t blue_stop = 2 * num_pix / 3; @@ -86,17 +107,17 @@ static inline void encode_png(png_bytep buf, png_dat *outpng, int32_t comp_level png_set_write_fn(png, outpng, png_write_cb, NULL); png_bytep *png_row_ptrs = new png_bytep[height]; - for (int i = 0; i < IMHEIGHT; ++i) { + for (uint32_t i = 0; i < height; ++i) { png_row_ptrs[i] = (png_bytep)&buf[3*i*width]; } - png_set_IHDR(png, info, IMWIDTH, IMHEIGHT, 8, PNG_COLOR_TYPE_RGB, + png_set_IHDR(png, info, width, height, 8, PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); - - png_write_info(png, info); png_set_compression_level(png, comp_level); png_set_filter(png, 0, PNG_FILTER_NONE); + + png_write_info(png, info); png_write_image(png, (png_bytepp)png_row_ptrs); png_write_end(png, NULL); png_destroy_write_struct(&png, &info);