Inline all uses of update_hash*.
Inline insert_string into deflate_quick, deflate_fast and deflate_medium.
Remove insert_string from deflate_state
Use local function pointer for insert_string.
Fix level check to actually check level and not `s->max_chain_length <= 1024`.
inflate.h
inflate_p.h
inftrees.h
+ insert_string_p.h
insert_string_tpl.h
match_tpl.h
trees.h
inflate.c
inftrees.c
insert_string.c
- insert_string_roll.c
trees.c
uncompr.c
zutil.c
inflate.o \
inftrees.o \
insert_string.o \
- insert_string_roll.o \
trees.o \
uncompr.o \
zutil.o \
inflate.lo \
inftrees.lo \
insert_string.lo \
- insert_string_roll.lo \
trees.lo \
uncompr.lo \
zutil.lo \
#include "functable.h"
#include "deflate.h"
#include "deflate_p.h"
+#include "insert_string_p.h"
/* Avoid conflicts with zlib.h macros */
#ifdef ZLIB_COMPAT
/* ========================================================================= */
int32_t Z_EXPORT PREFIX(deflateSetDictionary)(PREFIX3(stream) *strm, const uint8_t *dictionary, uint32_t dictLength) {
deflate_state *s;
+ insert_string_cb insert_string_func;
unsigned int str, n;
int wrap;
uint32_t avail;
if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
return Z_STREAM_ERROR;
+ if (s->level >= 9)
+ insert_string_func = insert_string_roll;
+ else
+ insert_string_func = insert_string;
+
/* when using zlib wrappers, compute Adler-32 for provided dictionary */
if (wrap == 1)
strm->adler = FUNCTABLE_CALL(adler32)(strm->adler, dictionary, dictLength);
while (s->lookahead >= STD_MIN_MATCH) {
str = s->strstart;
n = s->lookahead - (STD_MIN_MATCH - 1);
- s->insert_string(s, str, n);
+ insert_string_func(s, str, n);
s->strstart = str + n;
s->lookahead = STD_MIN_MATCH - 1;
PREFIX(fill_window)(s);
s->good_match = configuration_table[level].good_length;
s->nice_match = configuration_table[level].nice_length;
s->max_chain_length = configuration_table[level].max_chain;
-
- /* Use rolling hash for deflate_slow algorithm with level 9. It allows us to
- * properly lookup different hash chains to speed up longest_match search. Since hashing
- * method changes depending on the level we cannot put this into functable. */
- if (s->max_chain_length > 1024) {
- s->update_hash = &update_hash_roll;
- s->insert_string = &insert_string_roll;
- s->quick_insert_string = &quick_insert_string_roll;
- s->quick_insert_value = &quick_insert_value_roll;
- } else {
- s->update_hash = update_hash;
- s->insert_string = insert_string;
- s->quick_insert_string = quick_insert_string;
- s->quick_insert_value = quick_insert_value;
- }
-
s->level = level;
}
*/
void Z_INTERNAL PREFIX(fill_window)(deflate_state *s) {
+ insert_string_cb insert_string_func;
unsigned n;
unsigned int more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
+ int level = s->level;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+ if (level >= 9)
+ insert_string_func = insert_string_roll;
+ else
+ insert_string_func = insert_string;
+
do {
more = s->window_size - s->lookahead - s->strstart;
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= STD_MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
- if (UNLIKELY(s->max_chain_length > 1024)) {
- s->ins_h = s->update_hash(s->window[str], s->window[str+1]);
+ if (UNLIKELY(level >= 9)) {
+ s->ins_h = update_hash_roll(s->window[str], s->window[str+1]);
} else if (str >= 1) {
- s->quick_insert_string(s, str + 2 - STD_MIN_MATCH);
+ quick_insert_string(s, str + 2 - STD_MIN_MATCH);
}
unsigned int count = s->insert;
if (UNLIKELY(s->lookahead == 1)) {
count -= 1;
}
if (count > 0) {
- s->insert_string(s, str, count);
+ insert_string_func(s, str, count);
s->insert -= count;
}
}
/* Type definitions for hash callbacks */
typedef struct internal_state deflate_state;
-typedef uint32_t (* update_hash_cb) (uint32_t h, uint32_t val);
typedef void (* insert_string_cb) (deflate_state *const s, uint32_t str, uint32_t count);
-typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
-typedef Pos (* quick_insert_value_cb) (deflate_state *const s, uint32_t str, uint32_t val);
-
-uint32_t update_hash (uint32_t h, uint32_t val);
void insert_string (deflate_state *const s, uint32_t str, uint32_t count);
-Pos quick_insert_string (deflate_state *const s, uint32_t str);
-Pos quick_insert_value (deflate_state *const s, uint32_t str, uint32_t val);
-
-uint32_t update_hash_roll (uint32_t h, uint32_t val);
void insert_string_roll (deflate_state *const s, uint32_t str, uint32_t count);
-Pos quick_insert_string_roll(deflate_state *const s, uint32_t str);
-Pos quick_insert_value_roll (deflate_state *const s, uint32_t str, uint32_t val);
/* Struct for memory allocation handling */
typedef struct deflate_allocs_s {
* max_insert_length is used only for compression levels <= 6.
*/
- update_hash_cb update_hash;
- insert_string_cb insert_string;
- quick_insert_string_cb quick_insert_string;
- quick_insert_value_cb quick_insert_value;
- /* Hash function callbacks that can be configured depending on the deflate
- * algorithm being used */
-
int level; /* compression level (1..9) */
int strategy; /* favor or force Huffman coding*/
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
/* ===========================================================================
* Compress as much as possible from the input stream, return the current
match_len--; /* string at strstart already in table */
s->strstart++;
- insert_string(s, s->strstart, match_len);
+ insert_string_static(s, s->strstart, match_len);
s->strstart += match_len;
} else {
s->strstart += match_len;
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
struct match {
uint16_t match_start;
#include "deflate_p.h"
#include "functable.h"
#include "trees_emit.h"
+#include "insert_string_p.h"
extern const ct_data static_ltree[L_CODES+2];
extern const ct_data static_dtree[D_CODES];
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
+#include "insert_string_p.h"
/* ===========================================================================
* Same as deflate_medium, but achieves better compression. We use a lazy
* no better match at the next window position.
*/
Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
- int bflush; /* set if current block must be flushed */
match_func longest_match;
+ insert_string_cb insert_string_func;
+ int bflush; /* set if current block must be flushed */
- if (s->max_chain_length <= 1024)
- longest_match = FUNCTABLE_FPTR(longest_match);
- else
+ if (s->level >= 9) {
longest_match = FUNCTABLE_FPTR(longest_match_slow);
+ insert_string_func = insert_string_roll;
+ } else {
+ longest_match = FUNCTABLE_FPTR(longest_match);
+ insert_string_func = insert_string;
+ }
/* Process the input block. */
for (;;) {
*/
Pos hash_head = 0;
if (LIKELY(s->lookahead >= WANT_MIN_MATCH)) {
- hash_head = s->quick_insert_string(s, s->strstart);
+ if (s->level >= 9)
+ hash_head = quick_insert_string_roll(s, s->strstart);
+ else
+ hash_head = quick_insert_string(s, s->strstart);
}
/* Find the longest match, discarding those <= prev_length.
unsigned int insert_cnt = mov_fwd;
if (UNLIKELY(insert_cnt > max_insert - s->strstart))
insert_cnt = max_insert - s->strstart;
- s->insert_string(s, s->strstart + 1, insert_cnt);
+ insert_string_func(s, s->strstart + 1, insert_cnt);
}
s->prev_length = 0;
s->match_available = 0;
-/* insert_string.c -- insert_string integer hash variant
+/* insert_string.c -- make insert_string functions from static inlined functions
*
* Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
#include "zbuild.h"
#include "deflate.h"
+#include "insert_string_p.h"
-#define HASH_SLIDE 16
+void insert_string(deflate_state *const s, uint32_t str, uint32_t count) {
+ insert_string_static(s, str, count);
+}
-#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE);
-#define HASH_CALC_VAR h
-#define HASH_CALC_VAR_INIT uint32_t h
-
-#define UPDATE_HASH update_hash
-#define INSERT_STRING insert_string
-#define QUICK_INSERT_STRING quick_insert_string
-#define QUICK_INSERT_VALUE quick_insert_value
-
-#include "insert_string_tpl.h"
+void insert_string_roll(deflate_state *const s, uint32_t str, uint32_t count) {
+ insert_string_roll_static(s, str, count);
+}
--- /dev/null
+#ifndef INSERT_STRING_P_H_
+#define INSERT_STRING_P_H_
+
+/* insert_string_p.h -- insert_string function generator
+ *
+ * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ */
+
+// Normal insert_string, levels 1-8
+#define HASH_SLIDE 16
+
+#define HASH_CALC(h, val) h = ((val * 2654435761U) >> HASH_SLIDE);
+#define HASH_CALC_MASK HASH_MASK
+#define HASH_CALC_VAR h
+#define HASH_CALC_VAR_INIT uint32_t h
+#define HASH_CALC_OFFSET 0
+
+#define UPDATE_HASH update_hash
+#define INSERT_STRING insert_string_static
+#define QUICK_INSERT_STRING quick_insert_string
+#define QUICK_INSERT_VALUE quick_insert_value
+
+#include "insert_string_tpl.h"
+
+// Cleanup
+#undef HASH_SLIDE
+#undef HASH_CALC
+#undef HASH_CALC_READ
+#undef HASH_CALC_MASK
+#undef HASH_CALC_OFFSET
+#undef HASH_CALC_VAR
+#undef HASH_CALC_VAR_INIT
+#undef UPDATE_HASH
+#undef INSERT_STRING
+#undef QUICK_INSERT_STRING
+#undef QUICK_INSERT_VALUE
+
+// Rolling insert_string, level 9
+#define HASH_SLIDE 5
+
+#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val))
+#define HASH_CALC_VAR s->ins_h
+#define HASH_CALC_VAR_INIT
+#define HASH_CALC_READ val = strstart[0]
+#define HASH_CALC_MASK (32768u - 1u)
+#define HASH_CALC_OFFSET (STD_MIN_MATCH-1)
+
+#define UPDATE_HASH update_hash_roll
+#define INSERT_STRING insert_string_roll_static
+#define QUICK_INSERT_STRING quick_insert_string_roll
+#define QUICK_INSERT_VALUE quick_insert_value_roll
+
+#include "insert_string_tpl.h"
+
+#endif
+++ /dev/null
-/* insert_string_roll.c -- insert_string rolling hash variant
- *
- * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- */
-
-#include "zbuild.h"
-#include "deflate.h"
-
-#define HASH_SLIDE 5
-
-#define HASH_CALC(h, val) h = ((h << HASH_SLIDE) ^ ((uint8_t)val))
-#define HASH_CALC_VAR s->ins_h
-#define HASH_CALC_VAR_INIT
-#define HASH_CALC_READ val = strstart[0]
-#define HASH_CALC_MASK (32768u - 1u)
-#define HASH_CALC_OFFSET (STD_MIN_MATCH-1)
-
-#define UPDATE_HASH update_hash_roll
-#define INSERT_STRING insert_string_roll
-#define QUICK_INSERT_STRING quick_insert_string_roll
-#define QUICK_INSERT_VALUE quick_insert_value_roll
-
-#include "insert_string_tpl.h"
-#ifndef INSERT_STRING_H_
-#define INSERT_STRING_H_
-
/* insert_string_tpl.h -- Private insert_string functions shared with more than
* one insert string implementation
*
*
*/
-#include "zmemory.h"
-
-#ifndef HASH_CALC_OFFSET
-# define HASH_CALC_OFFSET 0
-#endif
-#ifndef HASH_CALC_MASK
-# define HASH_CALC_MASK HASH_MASK
-#endif
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
-Z_INTERNAL uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
+Z_FORCEINLINE static uint32_t UPDATE_HASH(uint32_t h, uint32_t val) {
HASH_CALC(h, val);
return h & HASH_CALC_MASK;
}
* to the previous head of the hash chain (the most recent string with same hash key).
* Return the previous length of the hash chain.
*/
-Z_INTERNAL Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
+Z_FORCEINLINE static Pos QUICK_INSERT_VALUE(deflate_state *const s, uint32_t str, uint32_t val) {
uint32_t hm;
Pos head;
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
*/
-Z_INTERNAL Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) {
+Z_FORCEINLINE static Pos QUICK_INSERT_STRING(deflate_state *const s, uint32_t str) {
uint8_t *strstart = s->window + str + HASH_CALC_OFFSET;
uint32_t val, hm;
Pos head;
* input characters and the first STD_MIN_MATCH bytes of str are valid
* (except for the last STD_MIN_MATCH-1 bytes of the input file).
*/
-Z_INTERNAL void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) {
+Z_FORCEINLINE static void INSERT_STRING(deflate_state *const s, uint32_t str, uint32_t count) {
uint8_t *strstart = s->window + str + HASH_CALC_OFFSET;
uint8_t *strend = strstart + count;
}
}
}
-#endif
* https://github.com/gildor2/fast_zlib
*/
-#ifndef MATCH_TPL_H
-#define MATCH_TPL_H
+#include "insert_string_p.h"
#define EARLY_EXIT_TRIGGER_LEVEL 5
-#endif
-
/* Set match_start to the longest match starting at the given string and
* return its length. Matches shorter or equal to prev_length are discarded,
* in which case the result is equal to prev_length and match_start is garbage.
* to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
* these strings are not yet inserted into the hash table.
*/
- hash = s->update_hash(0, scan[1]);
- hash = s->update_hash(hash, scan[2]);
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(0, scan[1]);
+ hash = update_hash_roll(hash, scan[2]);
for (i = 3; i <= best_len; i++) {
- hash = s->update_hash(hash, scan[i]);
-
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(hash, scan[i]);
/* If we're starting with best_len >= 3, we can use offset search. */
pos = s->head[hash];
if (pos < cur_match) {
*/
scan_endstr = scan + len - (STD_MIN_MATCH+1);
- hash = s->update_hash(0, scan_endstr[0]);
- hash = s->update_hash(hash, scan_endstr[1]);
- hash = s->update_hash(hash, scan_endstr[2]);
+ // use update_hash_roll for deflate_slow
+ hash = update_hash_roll(0, scan_endstr[0]);
+ hash = update_hash_roll(hash, scan_endstr[1]);
+ hash = update_hash_roll(hash, scan_endstr[2]);
pos = s->head[hash];
if (pos < cur_match) {
# include "deflate.h"
# include "arch_functions.h"
# include "../test_cpu_features.h"
+# include "insert_string_p.h"
}
#define MAX_WSIZE 32768
#define TEST_WINDOW_SIZE (MAX_WSIZE * 2)
+typedef Pos (* quick_insert_string_cb)(deflate_state *const s, uint32_t str);
+
// Base class with common setup/teardown for both insert_string benchmarks
class insert_string_base: public benchmark::Fixture {
protected: