math: Remove UB and optimize double ilogbf
The subnormal exponent calculation invokes UB by left shifting the
signed expoenent to find the first leading bit.
The patch reimplements ilogb using the math_config.h macros and
uses the new stdbit.h function to simplify the subnormal handling.
On aarch64 it generates better code:
* master:
0000000000000000 <__ieee754_ilogbf>:
0:
1e260000 fmov w0, s0
4:
12007801 and w1, w0, #0x7fffffff
8:
72091c1f tst w0, #0x7f800000
c:
54000141 b.ne 34 <__ieee754_ilogbf+0x34> // b.any
10:
34000201 cbz w1, 50 <__ieee754_ilogbf+0x50>
14:
53185c21 lsl w1, w1, #8
18:
12800fa0 mov w0, #0xffffff82 // #-126
1c:
d503201f nop
20:
531f7821 lsl w1, w1, #1
24:
51000400 sub w0, w0, #0x1
28:
7100003f cmp w1, #0x0
2c:
54ffffac b.gt 20 <__ieee754_ilogbf+0x20>
30:
d65f03c0 ret
34:
13177c20 asr w0, w1, #23
38:
12b01002 mov w2, #0x7f7fffff // #
2139095039
3c:
5101fc00 sub w0, w0, #0x7f
40:
6b02003f cmp w1, w2
44:
12b00001 mov w1, #0x7fffffff // #
2147483647
48:
1a819000 csel w0, w0, w1, ls // ls = plast
4c:
d65f03c0 ret
50:
320107e0 mov w0, #0x80000001 // #-
2147483647
54:
d65f03c0 ret
* patch:
0000000000000000 <__ieee754_ilogbf>:
0:
1e260001 fmov w1, s0
4:
d3577820 ubfx x0, x1, #23, #8
8:
350000e0 cbnz w0, 24 <__ieee754_ilogbf+0x24>
c:
53175821 lsl w1, w1, #9
10:
34000141 cbz w1, 38 <__ieee754_ilogbf+0x38>
14:
5ac01021 clz w1, w1
18:
12800fc0 mov w0, #0xffffff81 // #-127
1c:
4b010000 sub w0, w0, w1
20:
d65f03c0 ret
24:
7103fc1f cmp w0, #0xff
28:
5101fc00 sub w0, w0, #0x7f
2c:
12b00001 mov w1, #0x7fffffff // #
2147483647
30:
1a811000 csel w0, w0, w1, ne // ne = any
34:
d65f03c0 ret
38:
320107e0 mov w0, #0x80000001 // #-
2147483647
3c:
d65f03c0 ret
Other architecture with support for stdc_leading_zeros and/or
__builtin_clzll should have similar improvements.
Checked on aarch64-linux-gnu and x86_64-linux-gnu.