Add PNG decode benchmark for narrow image widths

author Nathan Moinvaziri <nathan@nathanm.com>

Thu, 26 Mar 2026 19:11:59 +0000 (12:11 -0700)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Fri, 1 May 2026 12:36:28 +0000 (14:36 +0200)
author Nathan Moinvaziri <nathan@nathanm.com>
Thu, 26 Mar 2026 19:11:59 +0000 (12:11 -0700)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Fri, 1 May 2026 12:36:28 +0000 (14:36 +0200)
diff --git a/test/benchmarks/CMakeLists.txt b/test/benchmarks/CMakeLists.txt

index 7d36173dccedbc7351f08d6ac9b4f214c270bcac..e332a5165aef749b4fb2ae8a16ae61c130fad7e3 100644 (file)
--- a/test/benchmarks/CMakeLists.txt
+++ b/test/benchmarks/CMakeLists.txt
@@ -108,6 +108,7 @@ if(WITH_BENCHMARK_APPS)
      set(BENCH_APP_SRCS
          benchmark_png_encode.cc
          benchmark_png_decode.cc
+        benchmark_png_decode_small.cc
          benchmark_main.cc
      )
  
diff --git a/test/benchmarks/benchmark_png_decode_small.cc b/test/benchmarks/benchmark_png_decode_small.cc

new file mode 100644 (file)

index 0000000..6280d14
--- /dev/null
+++ b/test/benchmarks/benchmark_png_decode_small.cc
@@ -0,0 +1,59 @@
+#include <stdio.h>
+#include <assert.h>
+#include <benchmark/benchmark.h>
+
+#include "benchmark_png_shared.h"
+
+/* Decode PNGs through libpng at various image widths. libpng calls inflate() with
+ * avail_out equal to one row (width * 3 bytes for RGB). Narrow images produce rows
+ * below the 260-byte inflate_fast threshold, exercising the slow path on every row. */
+class png_decode_small: public benchmark::Fixture {
+protected:
+    png_dat encoded = {NULL, 0, 0};
+    uint8_t *output_buf = NULL;
+    uint32_t img_width = 0;
+    uint32_t img_height = 0;
+
+public:
+    void SetUp(const ::benchmark::State& state) {
+        img_width = (uint32_t)state.range(0);
+        img_height = (1024 * 1024) / img_width;
+
+        size_t num_pixels = (size_t)img_width * img_height;
+
+        output_buf = (uint8_t *)malloc(num_pixels * 3);
+        assert(output_buf != NULL);
+        init_realistic(output_buf, img_width, img_height);
+
+        encoded = {NULL, 0, 0};
+        encode_png(output_buf, &encoded, 9, img_width, img_height);
+    }
+
+    void Bench(benchmark::State &state) {
+        size_t buf_size = (size_t)img_width * img_height * 3;
+        for (auto _ : state) {
+            png_parse_dat in = { encoded.buf };
+            uint32_t w, h;
+            decode_png(&in, (png_bytepp)&output_buf, buf_size, w, h);
+        }
+    }
+
+    void TearDown(const ::benchmark::State &) {
+        free(output_buf);
+        free(encoded.buf);
+    }
+};
+
+BENCHMARK_DEFINE_F(png_decode_small, png_decode_small)(benchmark::State &state) {
+    Bench(state);
+}
+BENCHMARK_REGISTER_F(png_decode_small, png_decode_small)
+    /* width in pixels: row bytes = width * 3
+     *   32 ->  96 bytes/row (well below 260)
+     *   64 -> 192 bytes/row (below 260)
+     *   86 -> 258 bytes/row (just below 260)
+     *  128 -> 384 bytes/row (above 260, but tail in slow path)
+     *  256 -> 768 bytes/row
+     * 1024 -> 3072 bytes/row (reference, minimal slow-path impact) */
+    ->Arg(32)->Arg(64)->Arg(86)->Arg(128)->Arg(256)->Arg(1024)
+    ->Unit(benchmark::kMicrosecond);
diff --git a/test/benchmarks/benchmark_png_shared.h b/test/benchmarks/benchmark_png_shared.h

index bde679e7d3e3a0eb30e5ab366c81f12c7c4723a1..179a5442f19c620c98fbf92cb6187ddd6714cc4e 100644 (file)
--- a/test/benchmarks/benchmark_png_shared.h
+++ b/test/benchmarks/benchmark_png_shared.h
@@ -45,10 +45,31 @@ static void png_write_cb(png_structp pngp, png_bytep data, png_size_t len) {
      dat->buf_rem -= len;
  }
  
+/* Generate pixel data that resembles a real photograph: smooth gradients with per-pixel
+ * noise and occasional edges. Produces many short deflate matches and scattered literals */
+static void init_realistic(png_bytep buf, uint32_t width, uint32_t height) {
+    uint32_t seed = 0x12345678;
+    for (uint32_t y = 0; y < height; y++) {
+        for (uint32_t x = 0; x < width; x++) {
+            size_t idx = ((size_t)y * width + x) * 3;
+            /* Diagonal gradient as base color */
+            uint8_t base_r = (uint8_t)((x + y) * 179 / (width + height));
+            uint8_t base_g = (uint8_t)((x * 2 + y) * 131 / (width + height));
+            uint8_t base_b = (uint8_t)(y * 241 / height);
+            /* Simple xorshift noise, +/- 15 levels */
+            seed ^= seed << 13;
+            seed ^= seed >> 17;
+            seed ^= seed << 5;
+            int noise = (int)(seed & 0x1F) - 15;
+            buf[idx]     = (uint8_t)MIN(MAX(base_r + noise, 0), 0xFF);
+            buf[idx + 1] = (uint8_t)MIN(MAX(base_g + (noise >> 1), 0), 0xFF);
+            buf[idx + 2] = (uint8_t)MIN(MAX(base_b - noise, 0), 0xFF);
+        }
+    }
+}
+
+/* Generate a highly compressible RGB test image with solid R, G, and B stripes. */
  static void init_compressible(png_bytep buf, size_t num_pix) {
-    /* It doesn't actually matter what we make this, but for
-     * the sake of a reasonable test image, let's make this
-     * be a stripe of R, G, & B, with no alpha channel */
      int32_t i = 0;
      int32_t red_stop = num_pix / 3;
      int32_t blue_stop = 2 * num_pix / 3;
@@ -86,17 +107,17 @@ static inline void encode_png(png_bytep buf, png_dat *outpng, int32_t comp_level
  
      png_set_write_fn(png, outpng, png_write_cb, NULL);
      png_bytep *png_row_ptrs = new png_bytep[height];
-    for (int i = 0; i < IMHEIGHT; ++i) {
+    for (uint32_t i = 0; i < height; ++i) {
          png_row_ptrs[i] = (png_bytep)&buf[3*i*width];
      }
  
-    png_set_IHDR(png, info, IMWIDTH, IMHEIGHT, 8, PNG_COLOR_TYPE_RGB,
+    png_set_IHDR(png, info, width, height, 8, PNG_COLOR_TYPE_RGB,
                   PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT,
                   PNG_FILTER_TYPE_DEFAULT);
-
-    png_write_info(png, info);
      png_set_compression_level(png, comp_level);
      png_set_filter(png, 0, PNG_FILTER_NONE);
+
+    png_write_info(png, info);
      png_write_image(png, (png_bytepp)png_row_ptrs);
      png_write_end(png, NULL);
      png_destroy_write_struct(&png, &info);
author	Nathan Moinvaziri <nathan@nathanm.com>
	Thu, 26 Mar 2026 19:11:59 +0000 (12:11 -0700)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Fri, 1 May 2026 12:36:28 +0000 (14:36 +0200)
test/benchmarks/CMakeLists.txt		patch \| blob \| blame \| history
test/benchmarks/benchmark_png_decode_small.cc	[new file with mode: 0644]	patch \| blob
test/benchmarks/benchmark_png_shared.h		patch \| blob \| blame \| history