Simplify order signal comparison and reorganize check to reduce
branches and allow targets to use conditional select/move.
With gcc-15 on aarch64 for fmaximum_mag:
* master:
0000000000000000 <__fmaximum_mag>:
0:
d503245f bti c
4:
1e60c004 fabs d4, d0
8:
1e60c023 fabs d3, d1
c:
1e632080 fcmp d4, d3
10:
5400008d b.le 20 <__fmaximum_mag+0x20>
14:
1e60401f fmov d31, d0
18:
1e6043e0 fmov d0, d31
1c:
d65f03c0 ret
20:
54000085 b.pl 30 <__fmaximum_mag+0x30> // b.nfrst
24:
1e60403f fmov d31, d1
28:
1e6043e0 fmov d0, d31
2c:
d65f03c0 ret
30:
54000161 b.ne 5c <__fmaximum_mag+0x5c> // b.any
34:
4f000402 movi v2.4s, #0x0
38:
1e6e101d fmov d29, #1.
000000000000000000e+00
3c:
6ee0f842 fneg v2.2d, v2.2d
40:
4ea21c5e mov v30.16b, v2.16b
44:
2e7d1c22 bsl v2.8b, v1.8b, v29.8b
48:
2e7d1c1e bsl v30.8b, v0.8b, v29.8b
4c:
1e6223d0 fcmpe d30, d2
50:
1e61ac1f fcsel d31, d0, d1, ge // ge = tcont
54:
1e6043e0 fmov d0, d31
58:
d65f03c0 ret
5c:
1e61281f fadd d31, d0, d1
60:
1e6043e0 fmov d0, d31
64:
d65f03c0 ret
* patch:
0000000000000000 <__fmaximum_mag>:
0:
d503245f bti c
4:
1e612000 fcmp d0, d1
8:
540000e6 b.vs 24 <__fmaximum_mag+0x24>
c:
1e60c01f fabs d31, d0
10:
1e60c03e fabs d30, d1
14:
1e7e23e0 fcmp d31, d30
18:
540000a0 b.eq 2c <__fmaximum_mag+0x2c> // b.none
1c:
1e60dc20 fcsel d0, d1, d0, le
20:
d65f03c0 ret
24:
1e612800 fadd d0, d0, d1
28:
d65f03c0 ret
2c:
9e660000 fmov x0, d0
30:
f100001f cmp x0, #0x0
34:
1e60bc20 fcsel d0, d1, d0, lt // lt = tstop
38:
d65f03c0 ret
Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu,
arm-linux-gnueabihf, powerpc64le-linux-gnu,
riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
FLOAT
M_DECL_FUNC (__fmaximum_mag) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isgreater (ax, ay))
- return x;
- else if (isless (ax, ay))
- return y;
- else if (ax == ay)
- return (M_COPYSIGN (1, x) >= M_COPYSIGN (1, y) ? x : y);
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return signbit (x) ? y : x;
+ return isgreater (ax, ay) ? x : y;
+ }
else
return x + y;
}
FLOAT
M_DECL_FUNC (__fminimum_mag) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isless (ax, ay))
- return x;
- else if (isgreater (ax, ay))
- return y;
- else if (ax == ay)
- return (M_COPYSIGN (1, x) <= M_COPYSIGN (1, y) ? x : y);
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return signbit (x) ? x : y;
+ return isless (ax, ay) ? x : y;
+ }
else
return x + y;
}