i386: Simplify powl computation for small integral y [BZ #33586]

author H.J. Lu <hjl.tools@gmail.com>

Sun, 2 Nov 2025 00:47:53 +0000 (08:47 +0800)

committer H.J. Lu <hjl.tools@gmail.com>

Wed, 5 Nov 2025 23:04:17 +0000 (07:04 +0800)
author H.J. Lu <hjl.tools@gmail.com>
Sun, 2 Nov 2025 00:47:53 +0000 (08:47 +0800)
committer H.J. Lu <hjl.tools@gmail.com>
Wed, 5 Nov 2025 23:04:17 +0000 (07:04 +0800)
diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S

index 9452e352a00d8bbaa613bd8942f0f1356fd23b92..766d64c8b90e5f943c0ac4ca59c2ca8a8e924cad 100644 (file)
--- a/sysdeps/i386/fpu/e_powl.S
+++ b/sysdeps/i386/fpu/e_powl.S
@@ -166,29 +166,32 @@ ENTRY(__ieee754_powl)
         adcl    $0, %edx
         negl    %edx
  4:     fldl    MO(one)         // 1 : x
-       fxch
  
-       /* If y is even, take the absolute value of x.  Otherwise,
-          ensure all intermediate values that might overflow have the
-          sign of x.  */
+       /* y range is further reduced to [0, 3].  Simply walk through the
+          options.  First up, 0 and 1.  */
+       test    %eax, %eax
+       jz      6f
+       fxch                    // x : 1
+       subl    $1, %eax
+       jz      6f
+
+       /* Finally, y == 2 and 3.  For y == 3 we do |x| * x * |x| because x * x
+          and |x| * |x| decay faster towards infinity compared to x * |x|.  */
+       fld     %st             // x : x : 1
+       fabs                    // |x| : x : 1
+       fxch                    // x : |x| : 1
+       fld     %st(1)          // |x| : x : |x| : 1
         testb   $1, %al
-       jnz     6f
-       fabs
-
-6:     shrdl   $1, %edx, %eax
-       jnc     5f
-       fxch
-       fabs
-       fmul    %st(1)          // x : ST*x
-       fxch
-5:     fld     %st             // x : x : ST*x
-       fabs                    // |x| : x : ST*x
-       fmulp                   // |x|*x : ST*x
-       shrl    $1, %edx
-       movl    %eax, %ecx
-       orl     %edx, %ecx
-       jnz     6b
-       fstp    %st(0)          // ST*x
+       jz      7f
+       fmulp   %st(2)          // x : |x| * |x| : 1
+       fstp    %st(0)          // |x| * |x| : 1
+       jmp     6f
+7:     fmulp                   // |x| * x : |x| : 1
+       fmulp                   // |x| * x * |x| : 1
+
+       /* We come here with the stack as RES : <something>, so pop off
+          <something>.  */
+6:     fstp    %st(1)
  #ifdef PIC
         LOAD_PIC_REG (cx)
  #endif
author	H.J. Lu <hjl.tools@gmail.com>
	Sun, 2 Nov 2025 00:47:53 +0000 (08:47 +0800)
committer	H.J. Lu <hjl.tools@gmail.com>
	Wed, 5 Nov 2025 23:04:17 +0000 (07:04 +0800)