From: Adhemerval Zanella Date: Fri, 23 Jan 2026 13:02:22 +0000 (-0300) Subject: math: Optimize f{max,min}imum{f,l,f128} X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a95db490b5acefda5dbb2ffbb1523af6b2d47dfa;p=thirdparty%2Fglibc.git math: Optimize f{max,min}imum{f,l,f128} Add an isunordered check for fast-path and simplified sign check and use the fmax/fmin when possible. With gcc-15 on aarch64: * master: 0000000000000000 <__fmaximum>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 5400008d b.le 18 <__fmaximum+0x18> c: 1e60401f fmov d31, d0 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 54000085 b.pl 28 <__fmaximum+0x28> // b.nfrst 1c: 1e60403f fmov d31, d1 20: 1e6043e0 fmov d0, d31 24: d65f03c0 ret 28: 54000161 b.ne 54 <__fmaximum+0x54> // b.any 2c: 4f000402 movi v2.4s, #0x0 30: 1e6e101e fmov d30, #1.000000000000000000e+00 34: 6ee0f842 fneg v2.2d, v2.2d 38: 4ea21c5d mov v29.16b, v2.16b 3c: 2e7e1c22 bsl v2.8b, v1.8b, v30.8b 40: 2e7e1c1d bsl v29.8b, v0.8b, v30.8b 44: 1e6223b0 fcmpe d29, d2 48: 1e61ac1f fcsel d31, d0, d1, ge // ge = tcont 4c: 1e6043e0 fmov d0, d31 50: d65f03c0 ret 54: 1e61281f fadd d31, d0, d1 58: 1e6043e0 fmov d0, d31 5c: d65f03c0 ret * patch: 0000000000000000 <__fmaximum>: 0: d503245f bti c 4: 1e612000 fcmp d0, d1 8: 54000086 b.vs 18 <__fmaximum+0x18> c: 1e61681f fmaxnm d31, d0, d1 10: 1e6043e0 fmov d0, d31 14: d65f03c0 ret 18: 1e61281f fadd d31, d0, d1 1c: 1e6043e0 fmov d0, d31 20: d65f03c0 ret And with gcc-15 on x86_64: * master: 0000000000000000 <__fmaximum>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 77 56 ja 5c <__fmaximum+0x5c> 6: 66 0f 2e c8 ucomisd %xmm0,%xmm1 a: 77 4c ja 58 <__fmaximum+0x58> c: 66 0f 2e c1 ucomisd %xmm1,%xmm0 10: 7a 4e jp 60 <__fmaximum+0x60> 12: 75 4c jne 60 <__fmaximum+0x60> 14: f3 0f 7e 1d 00 00 00 movq 0x0(%rip),%xmm3 # 1c <__fmaximum+0x1c> 1b: 00 1c: f2 0f 10 15 00 00 00 movsd 0x0(%rip),%xmm2 # 24 <__fmaximum+0x24> 23: 00 24: 66 0f 28 e3 movapd %xmm3,%xmm4 28: 66 0f 54 15 00 00 00 andpd 0x0(%rip),%xmm2 # 30 <__fmaximum+0x30> 2f: 00 30: 66 0f 54 e0 andpd %xmm0,%xmm4 34: 66 0f 54 d9 andpd %xmm1,%xmm3 38: 66 0f 56 e2 orpd %xmm2,%xmm4 3c: 66 0f 56 d3 orpd %xmm3,%xmm2 40: f2 0f c2 d4 02 cmplesd %xmm4,%xmm2 45: 66 0f 54 c2 andpd %xmm2,%xmm0 49: 66 0f 55 d1 andnpd %xmm1,%xmm2 4d: 66 0f 56 c2 orpd %xmm2,%xmm0 51: c3 ret 52: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 58: 66 0f 28 c1 movapd %xmm1,%xmm0 5c: c3 ret 5d: 0f 1f 00 nopl (%rax) 60: f2 0f 58 c1 addsd %xmm1,%xmm0 64: c3 ret * patched: 0000000000000000 <__fmaximum>: 0: 66 0f 2e c1 ucomisd %xmm1,%xmm0 4: 7a 2a jp 30 <__fmaximum+0x30> 6: 77 18 ja 20 <__fmaximum+0x20> 8: 66 0f 2e c8 ucomisd %xmm0,%xmm1 c: 77 08 ja 16 <__fmaximum+0x16> e: 66 0f 50 c0 movmskpd %xmm0,%eax 12: a8 01 test $0x1,%al 14: 74 0a je 20 <__fmaximum+0x20> 16: 66 0f 28 c1 movapd %xmm1,%xmm0 1a: c3 ret 1b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 20: 66 0f 28 c8 movapd %xmm0,%xmm1 24: 66 0f 28 c1 movapd %xmm1,%xmm0 28: c3 ret 29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 30: f2 0f 58 c8 addsd %xmm0,%xmm1 34: 66 0f 28 c1 movapd %xmm1,%xmm0 38: c3 ret Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu, arm-linux-gnueabihf, powerpc64le-linux-gnu, riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64. Reviewed-by: Wilco Dijkstra --- diff --git a/math/s_fmaximum_template.c b/math/s_fmaximum_template.c index ddd7a8443c..d0eb299120 100644 --- a/math/s_fmaximum_template.c +++ b/math/s_fmaximum_template.c @@ -21,12 +21,18 @@ FLOAT M_DECL_FUNC (__fmaximum) (FLOAT x, FLOAT y) { - if (isgreater (x, y)) - return x; - else if (isless (x, y)) - return y; - else if (x == y) - return (M_COPYSIGN (1, x) >= M_COPYSIGN (1, y) ? x : y); + if (__glibc_likely (!isunordered (x, y))) + { +#if M_USE_BUILTIN (FMAX) + return M_SUF (__builtin_fmax) (x, y); +#else + if (isgreater (x, y)) + return x; + else if (isless (x, y)) + return y; + return signbit (x) ? y : x; +#endif + } else return x + y; } diff --git a/math/s_fminimum_template.c b/math/s_fminimum_template.c index b987e5447c..32380e6741 100644 --- a/math/s_fminimum_template.c +++ b/math/s_fminimum_template.c @@ -21,12 +21,18 @@ FLOAT M_DECL_FUNC (__fminimum) (FLOAT x, FLOAT y) { - if (isless (x, y)) - return x; - else if (isgreater (x, y)) - return y; - else if (x == y) - return (M_COPYSIGN (1, x) <= M_COPYSIGN (1, y) ? x : y); + if (__glibc_likely (!isunordered (x, y))) + { +#if M_USE_BUILTIN (FMIN) + return M_SUF (__builtin_fmin) (x, y); +#else + if (isless (x, y)) + return x; + else if (isgreater (x, y)) + return y; + return signbit (x) ? x : y; +#endif + } else return x + y; }