From: Sebastian Pop Date: Thu, 6 Dec 2018 19:23:17 +0000 (-0600) Subject: return an index for hash map collisions in insert_string X-Git-Tag: 1.9.9-b1~587 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=13619fd2b6d0d5e2c2b5d8e8c08bc97097415c11;p=thirdparty%2Fzlib-ng.git return an index for hash map collisions in insert_string The current version of insert_string_c and variations for sse2, arm, and aarch64 in zlib-ng has changed semantics from the original code of INSERT_STRING macro in zlib: #define INSERT_STRING(s, str, match_head) \ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ s->head[s->ins_h] = (Pos)(str)) The code of INSERT_STRING assigns match_head with the content of s->head[s->ins_h]. In zlib-ng, the assignment to match_head happens in the caller of insert_string(). zlib-ng's insert_string_*() functions return 0 instead of str+idx in case of collision, i.e., when if (s->head[s->ins_h] == str+idx). The effect of returning 0 instead of the content of s->head[s->ins_h] is that the search for a longest_match through s->prev[] chains will be cut short when arriving at 0. This leads to a shorter compression time at the expense of a worse compression rate: returning 0 cuts out the search space. With this patch: Performance counter stats for './minigzip -9 llvm.tar': 13422.379017 task-clock (msec) # 1.000 CPUs utilized 20 context-switches # 0.001 K/sec 0 cpu-migrations # 0.000 K/sec 130 page-faults # 0.010 K/sec 58,926,104,511 cycles # 4.390 GHz stalled-cycles-frontend stalled-cycles-backend 77,543,740,646 instructions # 1.32 insns per cycle 17,158,892,214 branches # 1278.379 M/sec 198,433,680 branch-misses # 1.16% of all branches 13.423365095 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 11:47 llvm.tar.gz Without this patch the compressed file is larger: Performance counter stats for './minigzip -9 llvm.tar': 13459.342312 task-clock (msec) # 1.000 CPUs utilized 25 context-switches # 0.002 K/sec 0 cpu-migrations # 0.000 K/sec 129 page-faults # 0.010 K/sec 59,088,391,808 cycles # 4.390 GHz stalled-cycles-frontend stalled-cycles-backend 77,600,766,958 instructions # 1.31 insns per cycle 17,486,130,785 branches # 1299.182 M/sec 196,281,761 branch-misses # 1.12% of all branches 13.463512830 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 11:48 llvm.tar.gz --- diff --git a/arch/aarch64/insert_string_acle.c b/arch/aarch64/insert_string_acle.c index 563100b71..7f9e02b2f 100644 --- a/arch/aarch64/insert_string_acle.c +++ b/arch/aarch64/insert_string_acle.c @@ -40,12 +40,14 @@ Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count h = __crc32w(0, val); hm = h & s->hash_mask; - if (s->head[hm] != p) { - s->prev[p & s->w_mask] = s->head[hm]; + Pos head = s->head[hm]; + if (head != p) { + s->prev[p & s->w_mask] = head; s->head[hm] = p; - if (p == lp) { - ret = s->prev[lp & s->w_mask]; - } + if (p == lp) + ret = head; + } else if (p == lp) { + ret = p; } } return ret; diff --git a/arch/arm/insert_string_acle.c b/arch/arm/insert_string_acle.c index 563100b71..7f9e02b2f 100644 --- a/arch/arm/insert_string_acle.c +++ b/arch/arm/insert_string_acle.c @@ -40,12 +40,14 @@ Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count h = __crc32w(0, val); hm = h & s->hash_mask; - if (s->head[hm] != p) { - s->prev[p & s->w_mask] = s->head[hm]; + Pos head = s->head[hm]; + if (head != p) { + s->prev[p & s->w_mask] = head; s->head[hm] = p; - if (p == lp) { - ret = s->prev[lp & s->w_mask]; - } + if (p == lp) + ret = head; + } else if (p == lp) { + ret = p; } } return ret; diff --git a/arch/x86/insert_string_sse.c b/arch/x86/insert_string_sse.c index bf09aabb8..394e50937 100644 --- a/arch/x86/insert_string_sse.c +++ b/arch/x86/insert_string_sse.c @@ -41,13 +41,14 @@ ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsig : "r" (val) ); #endif - - if (s->head[h & s->hash_mask] != str+idx) { - s->prev[(str+idx) & s->w_mask] = s->head[h & s->hash_mask]; + Pos head = s->head[h & s->hash_mask]; + if (head != str+idx) { + s->prev[(str+idx) & s->w_mask] = head; s->head[h & s->hash_mask] = str+idx; - if (idx == count-1) { - ret = s->prev[(str+idx) & s->w_mask]; - } + if (idx == count-1) + ret = head; + } else if (idx == count - 1) { + ret = str + idx; } } return ret; diff --git a/deflate_p.h b/deflate_p.h index 02fbf66c3..6008a970d 100644 --- a/deflate_p.h +++ b/deflate_p.h @@ -37,12 +37,15 @@ static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigne for (idx = 0; idx < count; idx++) { UPDATE_HASH(s, s->ins_h, str+idx); - if (s->head[s->ins_h] != str+idx) { - s->prev[(str+idx) & s->w_mask] = s->head[s->ins_h]; - s->head[s->ins_h] = str+idx; - if (idx == count-1) { - ret = s->prev[(str+idx) & s->w_mask]; - } + + Pos head = s->head[s->ins_h]; + if (head != str+idx) { + s->prev[(str+idx) & s->w_mask] = head; + s->head[s->ins_h] = str+idx; + if (idx == count - 1) + ret = head; + } else if (idx == count - 1) { + ret = str + idx; } } return ret;