git.ipfire.org Git - thirdparty/glibc.git/commit

math: Remove UB and optimize double ilogb

The subnormal exponent calculation invokes UB by left shifting the
signed exponent to find the first leading bit.  The implementation
also uses 32 bits operations, which generates suboptimal code in
64 bits architectures.

The patch reimplements ilogb using the math_config.h macros and
uses the new stdbit function to simplify the subnormal handling.

On aarch64 it generates better code:

* master:

0000000000000000 <__ieee754_ilogb>:
   0:   9e660000        fmov    x0, d0
   4:   d360fc02        lsr     x2, x0, #32
   8:   d360f801        ubfx    x1, x0, #32, #31
   c:   f26c285f        tst     x2, #0x7ff00000
  10:   540001a1        b.ne    44 <__ieee754_ilogb+0x44>  // b.any
  14:   2a000022        orr     w2, w1, w0
  18:   34000322        cbz     w2, 7c <__ieee754_ilogb+0x7c>
  1c:   35000221        cbnz    w1, 60 <__ieee754_ilogb+0x60>
  20:   2a0003e1        mov     w1, w0
  24:   7100001f        cmp     w0, #0x0
  28:   12808240        mov     w0, #0xfffffbed                 // #-1043
  2c:   540000ad        b.le    40 <__ieee754_ilogb+0x40>
  30:   531f7821        lsl     w1, w1, #1
  34:   51000400        sub     w0, w0, #0x1
  38:   7100003f        cmp     w1, #0x0
  3c:   54ffffac        b.gt    30 <__ieee754_ilogb+0x30>
  40:   d65f03c0        ret
  44:   13147c20        asr     w0, w1, #20
  48:   12b00202        mov     w2, #0x7fefffff                 // #2146435071
  4c:   510ffc00        sub     w0, w0, #0x3ff
  50:   6b02003f        cmp     w1, w2
  54:   12b00001        mov     w1, #0x7fffffff                 // #2147483647
  58:   1a819000        csel    w0, w0, w1, ls  // ls = plast
  5c:   d65f03c0        ret
  60:   53155021        lsl     w1, w1, #11
  64:   12807fa0        mov     w0, #0xfffffc02                 // #-1022
  68:   531f7821        lsl     w1, w1, #1
  6c:   51000400        sub     w0, w0, #0x1
  70:   7100003f        cmp     w1, #0x0
  74:   54ffffac        b.gt    68 <__ieee754_ilogb+0x68>
  78:   d65f03c0        ret
  7c:   320107e0        mov     w0, #0x80000001                 // #-2147483647
  80:   d65f03c0        ret

* patch:

0000000000000000 <__ieee754_ilogb>:
   0:   9e660001        fmov    x1, d0
   4:   d374f820        ubfx    x0, x1, #52, #11
   8:   350000e0        cbnz    w0, 24 <__ieee754_ilogb+0x24>
   c:   d374cc21        lsl     x1, x1, #12
  10:   b4000141        cbz     x1, 38 <__ieee754_ilogb+0x38>
  14:   dac01021        clz     x1, x1
  18:   12807fc0        mov     w0, #0xfffffc01                 // #-1023
  1c:   4b010000        sub     w0, w0, w1
  20:   d65f03c0        ret
  24:   711ffc1f        cmp     w0, #0x7ff
  28:   510ffc00        sub     w0, w0, #0x3ff
  2c:   12b00001        mov     w1, #0x7fffffff                 // #2147483647
  30:   1a811000        csel    w0, w0, w1, ne  // ne = any
  34:   d65f03c0        ret
  38:   320107e0        mov     w0, #0x80000001                 // #-2147483647
  3c:   d65f03c0        ret

Other architecture with support for stdc_leading_zeros and/or
__builtin_clzll should have similar improvements.

Checked on aarch64-linux-gnu and x86_64-linux-gnu.

Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>

author	Adhemerval Zanella <adhemerval.zanella@linaro.org>
	Mon, 21 Apr 2025 13:26:27 +0000 (10:26 -0300)
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>
	Mon, 2 Jun 2025 16:32:19 +0000 (13:32 -0300)
commit	eb1e9194fa3802dea813880fd6765467f8e86a49
tree	d949363bc0aeb53b8d6b19270e0aa813e0a4a38f	tree
parent	591283a68965fe61a7186c9c81f7812e71b282b4	commit \| diff