From ea1b70900f9876a7868f71a28dd0675472ef7e83 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 9 Apr 2023 10:23:18 +0200 Subject: [PATCH] IMPORT: slz: avoid multiple shifts on 64-bits On 64-bit platforms, disassembling the code shows that send_huff() performs a left shift followed by a right one, which are the result of integer truncation and zero-extension caused solely by using different types at different levels in the call chain. By making encode24() take a 64-bit int on input and send_huff() take one optionally, we can remove one shift in the hot path and gain 1% performance without affecting other platforms. This is slz upstream commit fd165b36c4621579c5305cf3bb3a7f5410d3720b. --- src/slz.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/slz.c b/src/slz.c index 1560bac343..52120467ef 100644 --- a/src/slz.c +++ b/src/slz.c @@ -166,9 +166,9 @@ union ref { * 32-bit words into output buffer. X must not contain non-zero bits above * xbits. */ -static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits) +static inline void enqueue24(struct slz_stream *strm, uint64_t x, uint32_t xbits) { - uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits); + uint64_t queue = strm->queue + (x << strm->qbits); uint32_t qbits = strm->qbits + xbits; if (__builtin_expect(qbits >= 32, 1)) { @@ -293,7 +293,8 @@ static inline void copy_32b(struct slz_stream *strm, uint32_t x) strm->outbuf += 4; } -static inline void send_huff(struct slz_stream *strm, uint32_t code) +/* Using long because faster on 64-bit (can save one shift) */ +static inline void send_huff(struct slz_stream *strm, unsigned long code) { uint32_t bits; -- 2.47.2