From: Adam Stylinski Date: Sun, 15 Sep 2024 16:23:50 +0000 (-0400) Subject: Simplify chunking in the copy ladder here X-Git-Tag: 2.2.3~37 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b80eb4c6ecf09efedc9b3bfa6a9adc03b011015f;p=thirdparty%2Fzlib-ng.git Simplify chunking in the copy ladder here As it turns out, trying to peel off the remainder with so many branches caused the code size to inflate a bit too much that this function wouldn't inline without some fairly aggressive optimization flags. Only catching vector sized chunks here makes the loop body small enough and having the byte by byte copy idiom at the bottom gives the compiler some flexibility that it is likely to do something there. --- diff --git a/inflate_p.h b/inflate_p.h index c324b048..59ad6d17 100644 --- a/inflate_p.h +++ b/inflate_p.h @@ -174,25 +174,13 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, * behind or lookahead distance. */ uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows - memcpy(out, from, (size_t)non_olap_size); - out += non_olap_size; - from += non_olap_size; - len -= non_olap_size; - /* So this doesn't give use a worst case scenario of function calls in a loop, * we want to instead break this down into copy blocks of fixed lengths */ while (len) { tocopy = MIN(non_olap_size, len); len -= tocopy; - while (tocopy >= 32) { - memcpy(out, from, 32); - out += 32; - from += 32; - tocopy -= 32; - } - - if (tocopy >= 16) { + while (tocopy >= 16) { memcpy(out, from, 16); out += 16; from += 16; @@ -213,14 +201,7 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len, tocopy -= 4; } - if (tocopy >= 2) { - memcpy(out, from, 2); - out += 2; - from += 2; - tocopy -= 2; - } - - if (tocopy) { + while (tocopy--) { *out++ = *from++; } }