Simplify order signal comparison and reorganize check to reduce
branches and allow targets to use conditional select/move.
With gcc-15 on aarch64 for fmaximum_mag_num:
* master:
0000000000000000 <__fmaximum_mag_num>:
0:
d503245f bti c
4:
1e60c004 fabs d4, d0
8:
1e60c023 fabs d3, d1
c:
1e632080 fcmp d4, d3
10:
5400008d b.le 20 <__fmaximum_mag_num+0x20>
14:
1e60401f fmov d31, d0
18:
1e6043e0 fmov d0, d31
1c:
d65f03c0 ret
20:
54000085 b.pl 30 <__fmaximum_mag_num+0x30> // b.nfrst
24:
1e60403f fmov d31, d1
28:
1e6043e0 fmov d0, d31
2c:
d65f03c0 ret
30:
54000161 b.ne 5c <__fmaximum_mag_num+0x5c> // b.any
34:
4f000402 movi v2.4s, #0x0
38:
1e6e101d fmov d29, #1.
000000000000000000e+00
3c:
6ee0f842 fneg v2.2d, v2.2d
40:
4ea21c5e mov v30.16b, v2.16b
44:
2e7d1c22 bsl v2.8b, v1.8b, v29.8b
48:
2e7d1c1e bsl v30.8b, v0.8b, v29.8b
4c:
1e6223d0 fcmpe d30, d2
50:
1e61ac1f fcsel d31, d0, d1, ge // ge = tcont
54:
1e6043e0 fmov d0, d31
58:
d65f03c0 ret
5c:
1e612020 fcmp d1, d1
60:
1e60403f fmov d31, d1
64:
54ffff87 b.vc 54 <__fmaximum_mag_num+0x54>
68:
1e602000 fcmp d0, d0
6c:
1e60401f fmov d31, d0
70:
54ffff27 b.vc 54 <__fmaximum_mag_num+0x54>
74:
1e61281f fadd d31, d0, d1
78:
17fffff7 b 54 <__fmaximum_mag_num+0x54>
* patch:
0000000000000000 <__fminimum_mag_num>:
0:
d503245f bti c
4:
1e612000 fcmp d0, d1
8:
1e60401f fmov d31, d0
c:
540000e6 b.vs 28 <__fminimum_mag_num+0x28>
10:
1e60c01e fabs d30, d0
14:
1e60c03d fabs d29, d1
18:
1e7d23c0 fcmp d30, d29
1c:
54000160 b.eq 48 <__fminimum_mag_num+0x48> // b.none
20:
1e605c20 fcsel d0, d1, d0, pl // pl = nfrst
24:
d65f03c0 ret
28:
1e612020 fcmp d1, d1
2c:
1e604020 fmov d0, d1
30:
54ffffa7 b.vc 24 <__fminimum_mag_num+0x24>
34:
1e7f23e0 fcmp d31, d31
38:
1e6043e0 fmov d0, d31
3c:
54ffff47 b.vc 24 <__fminimum_mag_num+0x24>
40:
1e612be0 fadd d0, d31, d1
44:
d65f03c0 ret
48:
9e660000 fmov x0, d0
4c:
f100001f cmp x0, #0x0
50:
1e61bc00 fcsel d0, d0, d1, lt // lt = tstop
54:
d65f03c0 ret
Checked on x86_64-linux-gnu, aarch64-linux-gnu, i686-linux-gnu,
arm-linux-gnueabihf, powerpc64le-linux-gnu,
riscv64-linux-gnu-rv64imafdc-lp64d, and loongarch64-linux-gnuf64.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
FLOAT
M_DECL_FUNC (__fmaximum_mag_num) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isgreater (ax, ay))
- return x;
- else if (isless (ax, ay))
- return y;
- else if (ax == ay)
- return (M_COPYSIGN (1, x) >= M_COPYSIGN (1, y) ? x : y);
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return signbit (x) ? y : x;
+ return isgreater (ax, ay) ? x : y;
+ }
else
return isnan (y) ? (isnan (x) ? x + y : x) : y;
}
FLOAT
M_DECL_FUNC (__fminimum_mag_num) (FLOAT x, FLOAT y)
{
- FLOAT ax = M_FABS (x);
- FLOAT ay = M_FABS (y);
- if (isless (ax, ay))
- return x;
- else if (isgreater (ax, ay))
- return y;
- else if (ax == ay)
- return (M_COPYSIGN (1, x) <= M_COPYSIGN (1, y) ? x : y);
+ if (__glibc_likely (!isunordered (x, y)))
+ {
+ FLOAT ax = M_FABS (x);
+ FLOAT ay = M_FABS (y);
+ if (__glibc_unlikely (ax == ay))
+ return signbit (x) ? x : y;
+ return isless (ax, ay) ? x : y;
+ }
else
return isnan (y) ? (isnan (x) ? x + y : x) : y;
}