From: Adam Stylinski <kungfujesus06@gmail.com>
Date: Sun, 15 Sep 2024 16:23:50 +0000 (-0400)
Subject: Simplify chunking in the copy ladder here
X-Git-Tag: 2.2.3~37
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b80eb4c6ecf09efedc9b3bfa6a9adc03b011015f;p=thirdparty%2Fzlib-ng.git

Simplify chunking in the copy ladder here

As it turns out, trying to peel off the remainder with so many branches
caused the code size to inflate a bit too much that this function
wouldn't inline without some fairly aggressive optimization flags. Only
catching vector sized chunks here makes the loop body small enough and
having the byte by byte copy idiom at the bottom gives the compiler some
flexibility that it is likely to do something there.
---

diff --git a/inflate_p.h b/inflate_p.h
index c324b0486..59ad6d17c 100644
--- a/inflate_p.h
+++ b/inflate_p.h
@@ -174,25 +174,13 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
      * behind or lookahead distance. */
     uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
 
-    memcpy(out, from, (size_t)non_olap_size);
-    out += non_olap_size;
-    from += non_olap_size;
-    len -= non_olap_size;
-
     /* So this doesn't give use a worst case scenario of function calls in a loop,
      * we want to instead break this down into copy blocks of fixed lengths */
     while (len) {
         tocopy = MIN(non_olap_size, len);
         len -= tocopy;
 
-        while (tocopy >= 32) {
-            memcpy(out, from, 32);
-            out += 32;
-            from += 32;
-            tocopy -= 32;
-        }
-
-        if (tocopy >= 16) {
+        while (tocopy >= 16) {
             memcpy(out, from, 16);
             out += 16;
             from += 16;
@@ -213,14 +201,7 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
             tocopy -= 4;
         }
 
-        if (tocopy >= 2) {
-            memcpy(out, from, 2);
-            out += 2;
-            from += 2;
-            tocopy -= 2;
-        }
-
-        if (tocopy) {
+        while (tocopy--) {
             *out++ = *from++;
         }
     }