On aarch64 with gcc-15 it optimizes the fast-path slightly, for fmaxmag:
* master
0000000000000000 <__fmaxmag>:
0:
d503245f bti c
4:
1e60c01e fabs d30, d0
8:
1e60c03f fabs d31, d1
c:
1e7f23c0 fcmp d30, d31
10:
5400004d b.le 18 <__fmaxmag+0x18>
14:
d65f03c0 ret
18:
54000065 b.pl 24 <__fmaxmag+0x24> // b.nfrst
1c:
1e604020 fmov d0, d1
20:
d65f03c0 ret
24:
540001e0 b.eq 60 <__fmaxmag+0x60> // b.none
[...]
60:
1e612010 fcmpe d0, d1
64:
1e61cc00 fcsel d0, d0, d1, gt
68:
d65f03c0 ret
* patch:
0000000000000000 <__fmaxmag>:
0:
d503245f bti c
4:
1e612000 fcmp d0, d1
8:
540000e6 b.vs 24 <__fmaxmag+0x24>
c:
1e60c01f fabs d31, d0
10:
1e60c03e fabs d30, d1
14:
1e7e23e0 fcmp d31, d30
18:
540001c0 b.eq 50 <__fmaxmag+0x50> // b.none
1c:
1e60dc20 fcsel d0, d1, d0, le
20:
d65f03c0 ret
[...]
50:
1e612010 fcmpe d0, d1
54:
1e61cc00 fcsel d0, d0, d1, gt
58:
d65f03c0 ret
[...]
Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu,
arm-linux-gnueabihf, powerpc64le-linux-gnu,
riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
FLOAT
M_DECL_FUNC (__fmaxmag) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isgreater (ax, ay))
- return x;
- else if (isless (ax, ay))
- return y;
- else if (ax == ay)
- return x > y ? x : y;
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return x > y ? x : y;
+ return isgreater (ax, ay) ? x : y;
+ }
else if (issignaling (x) || issignaling (y))
return x + y;
else
FLOAT
M_DECL_FUNC (__fminmag) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isless (ax, ay))
- return x;
- else if (isgreater (ax, ay))
- return y;
- else if (ax == ay)
- return x < y ? x : y;
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return x < y ? x : y;
+ return isless (ax, ay) ? x : y;
+ }
else if (issignaling (x) || issignaling (y))
return x + y;
else