From 975c8c4e22f73fb60996f6bcc2cf1a6f6af70928 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 2 Nov 2025 08:47:53 +0800 Subject: [PATCH] i386: Simplify powl computation for small integral y [BZ #33586] On i386, tests added by commit 1b657c53c21a100082b0855392e4cb40c9c43a87 Author: Siddhesh Poyarekar Date: Fri Oct 10 20:21:13 2025 -0400 Simplify powl computation for small integral y [BZ #33411] exposed the same bug in i386 e_powl.S: FAIL: math/test-float64x-pow original exit status 1 testing _Float64x (without inline functions) Failure: pow (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_downward (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_towardzero (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_upward (0x1p+8192, 0x1p+0): Exception "Overflow" set FAIL: math/test-ldouble-pow original exit status 1 testing long double (without inline functions) Failure: pow (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_downward (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_towardzero (0x1p+8192, 0x1p+0): Exception "Overflow" set Failure: pow_upward (0x1p+8192, 0x1p+0): Exception "Overflow" set Port x86-64 e_powl.S fix to i386 e_powl.S. This fixes BZ #33586. Signed-off-by: H.J. Lu Reviewed-by: Sunil K Pandey --- sysdeps/i386/fpu/e_powl.S | 45 +++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/sysdeps/i386/fpu/e_powl.S b/sysdeps/i386/fpu/e_powl.S index 9452e352a0..766d64c8b9 100644 --- a/sysdeps/i386/fpu/e_powl.S +++ b/sysdeps/i386/fpu/e_powl.S @@ -166,29 +166,32 @@ ENTRY(__ieee754_powl) adcl $0, %edx negl %edx 4: fldl MO(one) // 1 : x - fxch - /* If y is even, take the absolute value of x. Otherwise, - ensure all intermediate values that might overflow have the - sign of x. */ + /* y range is further reduced to [0, 3]. Simply walk through the + options. First up, 0 and 1. */ + test %eax, %eax + jz 6f + fxch // x : 1 + subl $1, %eax + jz 6f + + /* Finally, y == 2 and 3. For y == 3 we do |x| * x * |x| because x * x + and |x| * |x| decay faster towards infinity compared to x * |x|. */ + fld %st // x : x : 1 + fabs // |x| : x : 1 + fxch // x : |x| : 1 + fld %st(1) // |x| : x : |x| : 1 testb $1, %al - jnz 6f - fabs - -6: shrdl $1, %edx, %eax - jnc 5f - fxch - fabs - fmul %st(1) // x : ST*x - fxch -5: fld %st // x : x : ST*x - fabs // |x| : x : ST*x - fmulp // |x|*x : ST*x - shrl $1, %edx - movl %eax, %ecx - orl %edx, %ecx - jnz 6b - fstp %st(0) // ST*x + jz 7f + fmulp %st(2) // x : |x| * |x| : 1 + fstp %st(0) // |x| * |x| : 1 + jmp 6f +7: fmulp // |x| * x : |x| : 1 + fmulp // |x| * x * |x| : 1 + + /* We come here with the stack as RES : , so pop off + . */ +6: fstp %st(1) #ifdef PIC LOAD_PIC_REG (cx) #endif -- 2.47.3