Unroll the slide hash loop similar to other ISAs

author Adam Stylinski <kungfujesus06@gmail.com>

Sat, 7 Mar 2026 18:27:27 +0000 (13:27 -0500)

committer Hans Kristian Rosbach <hk-github@circlestorm.org>

Mon, 9 Mar 2026 12:33:29 +0000 (13:33 +0100)
author Adam Stylinski <kungfujesus06@gmail.com>
Sat, 7 Mar 2026 18:27:27 +0000 (13:27 -0500)
committer Hans Kristian Rosbach <hk-github@circlestorm.org>
Mon, 9 Mar 2026 12:33:29 +0000 (13:33 +0100)
diff --git a/arch/power/slide_ppc_tpl.h b/arch/power/slide_ppc_tpl.h

index 680a7f8e2af4076f2b654fb865dca361514a7549..24629b4039318b5aaa10ee0dddc5ee37c189f8fa 100644 (file)
--- a/arch/power/slide_ppc_tpl.h
+++ b/arch/power/slide_ppc_tpl.h
@@ -12,15 +12,27 @@ static inline void slide_hash_chain(Pos *table, uint32_t entries, uint16_t wsize
      Pos *p = table;
  
      do {
-        vector unsigned short value, result;
+        /* Do the pointer arithmetic early to hopefully overlap the vector unit */
+        Pos *q = p;
+        p += 32;
+        vector unsigned short value0, value1, value2, value3;
+        vector unsigned short result0, result1, result2, result3;
  
-        value = vec_ld(0, p);
-        result = vec_subs(value, vmx_wsize);
-        vec_st(result, 0, p);
+        value0 = vec_ld(0, q);
+        value1 = vec_ld(16, q);
+        value2 = vec_ld(32, q);
+        value3 = vec_ld(48, q);
+        result0 = vec_subs(value0, vmx_wsize);
+        result1 = vec_subs(value1, vmx_wsize);
+        result2 = vec_subs(value2, vmx_wsize);
+        result3 = vec_subs(value3, vmx_wsize);
+        vec_st(result0, 0, q);
+        vec_st(result1, 16, q);
+        vec_st(result2, 32, q);
+        vec_st(result3, 48, q);
  
-        p += 8;
-        entries -= 8;
-   } while (entries > 0);
+        entries -= 32;
+   } while (entries);
  }
  
  void Z_INTERNAL SLIDE_PPC(deflate_state *s) {
author	Adam Stylinski <kungfujesus06@gmail.com>
	Sat, 7 Mar 2026 18:27:27 +0000 (13:27 -0500)
committer	Hans Kristian Rosbach <hk-github@circlestorm.org>
	Mon, 9 Mar 2026 12:33:29 +0000 (13:33 +0100)