--- /dev/null
+#include <stdio.h>
+#include <assert.h>
+#include <benchmark/benchmark.h>
+
+#include "benchmark_png_shared.h"
+
+/* Decode PNGs through libpng at various image widths. libpng calls inflate() with
+ * avail_out equal to one row (width * 3 bytes for RGB). Narrow images produce rows
+ * below the 260-byte inflate_fast threshold, exercising the slow path on every row. */
+class png_decode_small: public benchmark::Fixture {
+protected:
+ png_dat encoded = {NULL, 0, 0};
+ uint8_t *output_buf = NULL;
+ uint32_t img_width = 0;
+ uint32_t img_height = 0;
+
+public:
+ void SetUp(const ::benchmark::State& state) {
+ img_width = (uint32_t)state.range(0);
+ img_height = (1024 * 1024) / img_width;
+
+ size_t num_pixels = (size_t)img_width * img_height;
+
+ output_buf = (uint8_t *)malloc(num_pixels * 3);
+ assert(output_buf != NULL);
+ init_realistic(output_buf, img_width, img_height);
+
+ encoded = {NULL, 0, 0};
+ encode_png(output_buf, &encoded, 9, img_width, img_height);
+ }
+
+ void Bench(benchmark::State &state) {
+ size_t buf_size = (size_t)img_width * img_height * 3;
+ for (auto _ : state) {
+ png_parse_dat in = { encoded.buf };
+ uint32_t w, h;
+ decode_png(&in, (png_bytepp)&output_buf, buf_size, w, h);
+ }
+ }
+
+ void TearDown(const ::benchmark::State &) {
+ free(output_buf);
+ free(encoded.buf);
+ }
+};
+
+BENCHMARK_DEFINE_F(png_decode_small, png_decode_small)(benchmark::State &state) {
+ Bench(state);
+}
+BENCHMARK_REGISTER_F(png_decode_small, png_decode_small)
+ /* width in pixels: row bytes = width * 3
+ * 32 -> 96 bytes/row (well below 260)
+ * 64 -> 192 bytes/row (below 260)
+ * 86 -> 258 bytes/row (just below 260)
+ * 128 -> 384 bytes/row (above 260, but tail in slow path)
+ * 256 -> 768 bytes/row
+ * 1024 -> 3072 bytes/row (reference, minimal slow-path impact) */
+ ->Arg(32)->Arg(64)->Arg(86)->Arg(128)->Arg(256)->Arg(1024)
+ ->Unit(benchmark::kMicrosecond);
dat->buf_rem -= len;
}
+/* Generate pixel data that resembles a real photograph: smooth gradients with per-pixel
+ * noise and occasional edges. Produces many short deflate matches and scattered literals */
+static void init_realistic(png_bytep buf, uint32_t width, uint32_t height) {
+ uint32_t seed = 0x12345678;
+ for (uint32_t y = 0; y < height; y++) {
+ for (uint32_t x = 0; x < width; x++) {
+ size_t idx = ((size_t)y * width + x) * 3;
+ /* Diagonal gradient as base color */
+ uint8_t base_r = (uint8_t)((x + y) * 179 / (width + height));
+ uint8_t base_g = (uint8_t)((x * 2 + y) * 131 / (width + height));
+ uint8_t base_b = (uint8_t)(y * 241 / height);
+ /* Simple xorshift noise, +/- 15 levels */
+ seed ^= seed << 13;
+ seed ^= seed >> 17;
+ seed ^= seed << 5;
+ int noise = (int)(seed & 0x1F) - 15;
+ buf[idx] = (uint8_t)MIN(MAX(base_r + noise, 0), 0xFF);
+ buf[idx + 1] = (uint8_t)MIN(MAX(base_g + (noise >> 1), 0), 0xFF);
+ buf[idx + 2] = (uint8_t)MIN(MAX(base_b - noise, 0), 0xFF);
+ }
+ }
+}
+
+/* Generate a highly compressible RGB test image with solid R, G, and B stripes. */
static void init_compressible(png_bytep buf, size_t num_pix) {
- /* It doesn't actually matter what we make this, but for
- * the sake of a reasonable test image, let's make this
- * be a stripe of R, G, & B, with no alpha channel */
int32_t i = 0;
int32_t red_stop = num_pix / 3;
int32_t blue_stop = 2 * num_pix / 3;
png_set_write_fn(png, outpng, png_write_cb, NULL);
png_bytep *png_row_ptrs = new png_bytep[height];
- for (int i = 0; i < IMHEIGHT; ++i) {
+ for (uint32_t i = 0; i < height; ++i) {
png_row_ptrs[i] = (png_bytep)&buf[3*i*width];
}
- png_set_IHDR(png, info, IMWIDTH, IMHEIGHT, 8, PNG_COLOR_TYPE_RGB,
+ png_set_IHDR(png, info, width, height, 8, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT,
PNG_FILTER_TYPE_DEFAULT);
-
- png_write_info(png, info);
png_set_compression_level(png, comp_level);
png_set_filter(png, 0, PNG_FILTER_NONE);
+
+ png_write_info(png, info);
png_write_image(png, (png_bytepp)png_row_ptrs);
png_write_end(png, NULL);
png_destroy_write_struct(&png, &info);