From: Adhemerval Zanella Date: Fri, 23 Jan 2026 13:02:23 +0000 (-0300) Subject: math: Optimize f{max,min}imum_num{f,l,f,f128} X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3ced14f88924873762a9d0cb4561050b9839ce4;p=thirdparty%2Fglibc.git math: Optimize f{max,min}imum_num{f,l,f,f128} Add an isunordered check for fast-path and simplified sign check and use the fmax/fmin when possible. With gcc-15 on aarch64 for fmaximum_num: * master 0000000000000000 <__fmaximum_num>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 5400008d b.le 18 <__fmaximum_num+0x18> c: 1e60401f fmov d31, d0 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 54000085 b.pl 28 <__fmaximum_num+0x28> // b.nfrst 1c: 1e60403f fmov d31, d1 20: 1e6043e0 fmov d0, d31 24: d65f03c0 ret 28: 54000161 b.ne 54 <__fmaximum_num+0x54> // b.any 2c: 4f000402 movi v2.4s, #0x0 30: 1e6e101e fmov d30, #1.000000000000000000e+00 34: 6ee0f842 fneg v2.2d, v2.2d 38: 4ea21c5d mov v29.16b, v2.16b 3c: 2e7e1c22 bsl v2.8b, v1.8b, v30.8b 40: 2e7e1c1d bsl v29.8b, v0.8b, v30.8b 44: 1e6223b0 fcmpe d29, d2 48: 1e61ac1f fcsel d31, d0, d1, ge // ge = tcont 4c: 1e6043e0 fmov d0, d31 50: d65f03c0 ret 54: 1e612020 fcmp d1, d1 58: 1e60403f fmov d31, d1 5c: 54ffff87 b.vc 4c <__fmaximum_num+0x4c> 60: 1e602000 fcmp d0, d0 64: 1e60401f fmov d31, d0 68: 54ffff27 b.vc 4c <__fmaximum_num+0x4c> 6c: 1e61281f fadd d31, d0, d1 70: 17fffff7 b 4c <__fmaximum_num+0x4c> * patch: 0000000000000000 <__fmaximum_num>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 54000086 b.vs 18 <__fmaximum_num+0x18> c: 1e61681f fmaxnm d31, d0, d1 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 1e612020 fcmp d1, d1 1c: 1e60403f fmov d31, d1 20: 54ffff87 b.vc 10 <__fmaximum_num+0x10> 24: 1e602000 fcmp d0, d0 28: 1e60401f fmov d31, d0 2c: 54ffff27 b.vc 10 <__fmaximum_num+0x10> 30: 1e61281f fadd d31, d0, d1 34: 17fffff7 b 10 <__fmaximum_num+0x10> And with gcc-15 on x86_64: * master: 0000000000000000 <__fmaximum_num>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 66 0f 28 d8 movapd %xmm0,%xmm3 8: 77 5e ja 68 <__fmaximum_num+0x68> a: 66 0f 2e c8 ucomisd %xmm0,%xmm1 e: 77 50 ja 60 <__fmaximum_num+0x60> 10: 66 0f 2e c1 ucomisd %xmm1,%xmm0 14: 7a 5a jp 70 <__fmaximum_num+0x70> 16: 75 58 jne 70 <__fmaximum_num+0x70> 18: f3 0f 7e 05 00 00 00 movq 0x0(%rip),%xmm0 # 20 <__fmaximum_num+0x20> 1f: 00 20: f2 0f 10 15 00 00 00 movsd 0x0(%rip),%xmm2 # 28 <__fmaximum_num+0x28> 27: 00 28: 66 0f 28 e0 movapd %xmm0,%xmm4 2c: 66 0f 54 15 00 00 00 andpd 0x0(%rip),%xmm2 # 34 <__fmaximum_num+0x34> 33: 00 34: 66 0f 54 c1 andpd %xmm1,%xmm0 38: 66 0f 54 e3 andpd %xmm3,%xmm4 3c: 66 0f 56 e2 orpd %xmm2,%xmm4 40: 66 0f 56 d0 orpd %xmm0,%xmm2 44: f2 0f c2 d4 02 cmplesd %xmm4,%xmm2 49: 66 0f 54 da andpd %xmm2,%xmm3 4d: 66 0f 55 d1 andnpd %xmm1,%xmm2 51: 66 0f 56 d3 orpd %xmm3,%xmm2 55: 66 0f 28 c2 movapd %xmm2,%xmm0 59: c3 ret 5a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 60: 66 0f 28 c1 movapd %xmm1,%xmm0 64: c3 ret 65: 0f 1f 00 nopl (%rax) 68: c3 ret 69: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 70: 66 0f 2e c9 ucomisd %xmm1,%xmm1 74: 66 0f 28 c1 movapd %xmm1,%xmm0 78: 7b ea jnp 64 <__fmaximum_num+0x64> 7a: 66 0f 2e db ucomisd %xmm3,%xmm3 7e: 66 0f 28 c3 movapd %xmm3,%xmm0 82: 7b e0 jnp 64 <__fmaximum_num+0x64> 84: f2 0f 58 c1 addsd %xmm1,%xmm0 88: c3 ret * patch: 0000000000000000 <__fmaximum_num>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 7a 2a jp 30 <__fmaximum_num+0x30> 6: 77 18 ja 20 <__fmaximum_num+0x20> 8: 66 0f 2e c8 ucomisd %xmm0,%xmm1 c: 77 08 ja 16 <__fmaximum_num+0x16> e: 66 0f 50 c0 movmskpd %xmm0,%eax 12: a8 01 test $0x1,%al 14: 74 0a je 20 <__fmaximum_num+0x20> 16: 66 0f 28 c1 movapd %xmm1,%xmm0 1a: c3 ret 1b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 20: 66 0f 28 c8 movapd %xmm0,%xmm1 24: 66 0f 28 c1 movapd %xmm1,%xmm0 28: c3 ret 29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 30: 66 0f 2e c9 ucomisd %xmm1,%xmm1 34: 7b e0 jnp 16 <__fmaximum_num+0x16> 36: 66 0f 2e c0 ucomisd %xmm0,%xmm0 3a: 7b e4 jnp 20 <__fmaximum_num+0x20> 3c: f2 0f 58 c8 addsd %xmm0,%xmm1 40: eb d4 jmp 16 <__fmaximum_num+0x16> Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64. Reviewed-by: Wilco Dijkstra --- diff --git a/math/s_fmaximum_num_template.c b/math/s_fmaximum_num_template.c index 9277ad1ad4..1b8db53e81 100644 --- a/math/s_fmaximum_num_template.c +++ b/math/s_fmaximum_num_template.c @@ -21,12 +21,18 @@ FLOAT M_DECL_FUNC (__fmaximum_num) (FLOAT x, FLOAT y) { - if (isgreater (x, y)) - return x; - else if (isless (x, y)) - return y; - else if (x == y) - return (M_COPYSIGN (1, x) >= M_COPYSIGN (1, y) ? x : y); + if (__glibc_likely (!isunordered (x, y))) + { +#if M_USE_BUILTIN (FMAX) + return M_SUF (__builtin_fmax) (x, y); +#else + if (isgreater (x, y)) + return x; + else if (isless (x, y)) + return y; + return signbit (x) ? y : x; +#endif + } else return isnan (y) ? (isnan (x) ? x + y : x) : y; } diff --git a/math/s_fminimum_num_template.c b/math/s_fminimum_num_template.c index cd5cecbc75..5869935aa4 100644 --- a/math/s_fminimum_num_template.c +++ b/math/s_fminimum_num_template.c @@ -21,12 +21,18 @@ FLOAT M_DECL_FUNC (__fminimum_num) (FLOAT x, FLOAT y) { - if (isless (x, y)) - return x; - else if (isgreater (x, y)) - return y; - else if (x == y) - return (M_COPYSIGN (1, x) <= M_COPYSIGN (1, y) ? x : y); + if (__glibc_likely (!isunordered (x, y))) + { +#if M_USE_BUILTIN (FMIN) + return M_SUF (__builtin_fmin) (x, y); +#else + if (isless (x, y)) + return x; + else if (isgreater (x, y)) + return y; + return signbit (x) ? x : y; +#endif + } else return isnan (y) ? (isnan (x) ? x + y : x) : y; }