]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Avoid excess range in results from i386 exp, hypot, pow functions (bug 18980).
authorJoseph Myers <joseph@codesourcery.com>
Fri, 18 Sep 2015 21:53:22 +0000 (21:53 +0000)
committerJoseph Myers <joseph@codesourcery.com>
Fri, 18 Sep 2015 21:53:22 +0000 (21:53 +0000)
i386 exp, hypot and pow functions can return overflowing and
underflowing values with excess range and precision; ; Wilco
Dijkstra's patches to make isfinite etc. expand inline cause this
pre-existing issue to result in test failures.

This patch fixes those functions to avoid excess range and precision
in their return values.  Appropriate macros are added for the repeated
code sequences; in future I'll add more such macros and refactor
existing code forcing underflow (with or without also eliminating
excess range and precision from the return value) to use such macros.

Tested for x86.  If, after this patch, you still see x86 libm test
failures with excess range or precision, please file bugs in Bugzilla.

[BZ #18980]
* sysdeps/i386/fpu/i386-math-asm.h (DEFINE_FLT_MIN): New macro.
(DEFINE_DBL_MIN): Likewise.
(FLT_NARROW_EVAL_UFLOW_NONNEG_NAN): Likewise.
(DBL_NARROW_EVAL_UFLOW_NONNEG_NAN): Likewise.
(FLT_NARROW_EVAL_UFLOW_NONNEG): Likewise.
(DBL_NARROW_EVAL_UFLOW_NONNEG): Likewise.
* sysdeps/i386/fpu/e_exp.S: Include <i386-math-asm.h>.
(dbl_min): Replace with use of DEFINE_DBL_MIN.
(__ieee754_exp): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
(__exp_finite): Use DBL_NARROW_EVAL_UFLOW_NONNEG.
* sysdeps/i386/fpu/e_exp10.S: Include <i386-math-asm.h>.
(dbl_min): Replace with use of DEFINE_DBL_MIN.
(__ieee754_exp10): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
* sysdeps/i386/fpu/e_exp10f.S: Include <i386-math-asm.h>.
(flt_min): Replace with use of DEFINE_FLT_MIN.
(__ieee754_exp10f): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
* sysdeps/i386/fpu/e_exp2.S: Include <i386-math-asm.h>.
(dbl_min): Replace with use of DEFINE_DBL_MIN.
(__ieee754_exp2): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
* sysdeps/i386/fpu/e_exp2f.S: Include <i386-math-asm.h>.
(flt_min): Replace with use of DEFINE_FLT_MIN.
(__ieee754_exp2f): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
* sysdeps/i386/fpu/e_expf.S: Include <i386-math-asm.h>.
(flt_min): Replace with use of DEFINE_FLT_MIN.
(__ieee754_expf): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
(__expf_finite): Use FLT_NARROW_EVAL_UFLOW_NONNEG.
* sysdeps/i386/fpu/e_hypot.S: Include <i386-math-asm.h>.
(__ieee754_hypot): Use DBL_NARROW_EVAL.
* sysdeps/i386/fpu/e_hypotf.S: Include <i386-math-asm.h>.
(__ieee754_hypotf): Use FLT_NARROW_EVAL.
* sysdeps/i386/fpu/e_pow.S: Include <i386-math-asm.h>.
(__ieee754_pow): Use DBL_NARROW_EVAL.
* sysdeps/i386/fpu/e_powf.S: Include <i386-math-asm.h>.
(__ieee754_powf): Use FLT_NARROW_EVAL.
* sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
(__ieee754_expf_sse2): Convert double-precision result to single
precision.
* sysdeps/i386/fpu/libm-test-ulps: Update.

15 files changed:
ChangeLog
NEWS
sysdeps/i386/fpu/e_exp.S
sysdeps/i386/fpu/e_exp10.S
sysdeps/i386/fpu/e_exp10f.S
sysdeps/i386/fpu/e_exp2.S
sysdeps/i386/fpu/e_exp2f.S
sysdeps/i386/fpu/e_expf.S
sysdeps/i386/fpu/e_hypot.S
sysdeps/i386/fpu/e_hypotf.S
sysdeps/i386/fpu/e_pow.S
sysdeps/i386/fpu/e_powf.S
sysdeps/i386/fpu/i386-math-asm.h
sysdeps/i386/fpu/libm-test-ulps
sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S

index 53c6eee1e7a3acb1e46b6e7ded6022273edfa7e3..4be1427a04c30fd1272af80229f6cbe3be638a71 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,45 @@
+2015-09-18  Joseph Myers  <joseph@codesourcery.com>
+
+       [BZ #18980]
+       * sysdeps/i386/fpu/i386-math-asm.h (DEFINE_FLT_MIN): New macro.
+       (DEFINE_DBL_MIN): Likewise.
+       (FLT_NARROW_EVAL_UFLOW_NONNEG_NAN): Likewise.
+       (DBL_NARROW_EVAL_UFLOW_NONNEG_NAN): Likewise.
+       (FLT_NARROW_EVAL_UFLOW_NONNEG): Likewise.
+       (DBL_NARROW_EVAL_UFLOW_NONNEG): Likewise.
+       * sysdeps/i386/fpu/e_exp.S: Include <i386-math-asm.h>.
+       (dbl_min): Replace with use of DEFINE_DBL_MIN.
+       (__ieee754_exp): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       (__exp_finite): Use DBL_NARROW_EVAL_UFLOW_NONNEG.
+       * sysdeps/i386/fpu/e_exp10.S: Include <i386-math-asm.h>.
+       (dbl_min): Replace with use of DEFINE_DBL_MIN.
+       (__ieee754_exp10): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       * sysdeps/i386/fpu/e_exp10f.S: Include <i386-math-asm.h>.
+       (flt_min): Replace with use of DEFINE_FLT_MIN.
+       (__ieee754_exp10f): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       * sysdeps/i386/fpu/e_exp2.S: Include <i386-math-asm.h>.
+       (dbl_min): Replace with use of DEFINE_DBL_MIN.
+       (__ieee754_exp2): Use DBL_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       * sysdeps/i386/fpu/e_exp2f.S: Include <i386-math-asm.h>.
+       (flt_min): Replace with use of DEFINE_FLT_MIN.
+       (__ieee754_exp2f): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       * sysdeps/i386/fpu/e_expf.S: Include <i386-math-asm.h>.
+       (flt_min): Replace with use of DEFINE_FLT_MIN.
+       (__ieee754_expf): Use FLT_NARROW_EVAL_UFLOW_NONNEG_NAN.
+       (__expf_finite): Use FLT_NARROW_EVAL_UFLOW_NONNEG.
+       * sysdeps/i386/fpu/e_hypot.S: Include <i386-math-asm.h>.
+       (__ieee754_hypot): Use DBL_NARROW_EVAL.
+       * sysdeps/i386/fpu/e_hypotf.S: Include <i386-math-asm.h>.
+       (__ieee754_hypotf): Use FLT_NARROW_EVAL.
+       * sysdeps/i386/fpu/e_pow.S: Include <i386-math-asm.h>.
+       (__ieee754_pow): Use DBL_NARROW_EVAL.
+       * sysdeps/i386/fpu/e_powf.S: Include <i386-math-asm.h>.
+       (__ieee754_powf): Use FLT_NARROW_EVAL.
+       * sysdeps/i386/i686/fpu/multiarch/e_expf-sse2.S
+       (__ieee754_expf_sse2): Convert double-precision result to single
+       precision.
+       * sysdeps/i386/fpu/libm-test-ulps: Update.
+
 2015-09-18  Wilco Dijkstra  <wdijkstr@arm.com>
 
        * timezone/Makefile: Ignore unused variable errors due to private.h
diff --git a/NEWS b/NEWS
index 4d65df21b8181675b3ff4963f5cf642bd3619edc..bf16f4f8bae037686be9750c21666ea63d4d5084 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -16,7 +16,7 @@ Version 2.23
   18647, 18661, 18674, 18675, 18681, 18757, 18778, 18781, 18787, 18789,
   18790, 18795, 18796, 18820, 18823, 18824, 18857, 18863, 18870, 18872,
   18873, 18875, 18887, 18921, 18951, 18952, 18961, 18966, 18967, 18970,
-  18977, 18981.
+  18977, 18980, 18981.
 
 * The obsolete header <regexp.h> has been removed.  Programs that require
   this header must be updated to use <regex.h> instead.
index c00beedfe5c4748a6b70245df234107a93172cdb..a7e7f13f6fe99f5ae907fac60575a9fdc505864e 100644 (file)
@@ -4,13 +4,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst8,"aM",@progbits,8
-
-       .p2align 3
-       .type dbl_min,@object
-dbl_min:       .byte 0, 0, 0, 0, 0, 0, 0x10, 0
-       ASM_SIZE_DIRECTIVE(dbl_min)
+DEFINE_DBL_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -46,22 +42,8 @@ ENTRY(__ieee754_exp)
        faddp                           /* 2^(fract(x * log2(e))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       fldl    MO(dbl_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $8, %esp
-       cfi_adjust_cfa_offset (8)
-       fld     %st(0)
-       fmul    %st(0)
-       fstpl   (%esp)
-       fstpl   (%esp)
-       fldl    (%esp)
-       addl    $8, %esp
-       cfi_adjust_cfa_offset (-8)
-3:     ret
+       DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
@@ -86,20 +68,6 @@ ENTRY(__exp_finite)
        faddp                           /* 2^(fract(x * log2(e))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       fldl    MO(dbl_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 4f
-       subl    $8, %esp
-       cfi_adjust_cfa_offset (8)
-       fld     %st(0)
-       fmul    %st(0)
-       fstpl   (%esp)
-       fstpl   (%esp)
-       fldl    (%esp)
-       addl    $8, %esp
-       cfi_adjust_cfa_offset (-8)
-4:     ret
+       DBL_NARROW_EVAL_UFLOW_NONNEG
+       ret
 END(__exp_finite)
index fa54732e2a9d60bcf887831108a8de8d2f41dced..acb5160a3ff9b1544399d17a83e97be1a6c50850 100644 (file)
@@ -3,13 +3,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst8,"aM",@progbits,8
-
-       .p2align 3
-       .type dbl_min,@object
-dbl_min:       .byte 0, 0, 0, 0, 0, 0, 0x10, 0
-       ASM_SIZE_DIRECTIVE(dbl_min)
+DEFINE_DBL_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -45,22 +41,8 @@ ENTRY(__ieee754_exp10)
        faddp                           /* 2^(fract(x * log2(10))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       fldl    MO(dbl_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $8, %esp
-       cfi_adjust_cfa_offset (8)
-       fld     %st(0)
-       fmul    %st(0)
-       fstpl   (%esp)
-       fstpl   (%esp)
-       fldl    (%esp)
-       addl    $8, %esp
-       cfi_adjust_cfa_offset (-8)
-3:     ret
+       DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
index a84b2ae535f243d1aa35551edf2b18c20f89bfbf..1812b34398fa4d48e51ef059c5348f455514407c 100644 (file)
@@ -3,13 +3,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst4,"aM",@progbits,4
-
-       .p2align 2
-       .type flt_min,@object
-flt_min:       .byte 0, 0, 0x80, 0
-       ASM_SIZE_DIRECTIVE(flt_min)
+DEFINE_FLT_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -45,22 +41,8 @@ ENTRY(__ieee754_exp10f)
        faddp                           /* 2^(fract(x * log2(10))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       flds    MO(flt_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $4, %esp
-       cfi_adjust_cfa_offset (4)
-       fld     %st(0)
-       fmul    %st(0)
-       fstps   (%esp)
-       fstps   (%esp)
-       flds    (%esp)
-       addl    $4, %esp
-       cfi_adjust_cfa_offset (-4)
-3:     ret
+       FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
index b75a63a0b3a6ebde3276a90897e839cc59a1acd5..fc16a9605389555c5397de1b2994845b8d0a1263 100644 (file)
@@ -5,13 +5,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst8,"aM",@progbits,8
-
-       .p2align 3
-       .type dbl_min,@object
-dbl_min:       .byte 0, 0, 0, 0, 0, 0, 0x10, 0
-       ASM_SIZE_DIRECTIVE(dbl_min)
+DEFINE_DBL_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -44,22 +40,8 @@ ENTRY(__ieee754_exp2)
        faddp                           /* 2^(fract(x)) */
        fscale                          /* e^x */
        fstp    %st(1)
-       fldl    MO(dbl_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $8, %esp
-       cfi_adjust_cfa_offset (8)
-       fld     %st(0)
-       fmul    %st(0)
-       fstpl   (%esp)
-       fstpl   (%esp)
-       fldl    (%esp)
-       addl    $8, %esp
-       cfi_adjust_cfa_offset (-8)
-3:     ret
+       DBL_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
index 042c83b1efcd316d36f7706b7fae460387a4c6f3..30623cd85017d5a5a70e2a6d77f4d093b05b8f5c 100644 (file)
@@ -5,13 +5,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst4,"aM",@progbits,4
-
-       .p2align 2
-       .type flt_min,@object
-flt_min:       .byte 0, 0, 0x80, 0
-       ASM_SIZE_DIRECTIVE(flt_min)
+DEFINE_FLT_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -44,22 +40,8 @@ ENTRY(__ieee754_exp2f)
        faddp                           /* 2^(fract(x)) */
        fscale                          /* e^x */
        fstp    %st(1)
-       flds    MO(flt_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $4, %esp
-       cfi_adjust_cfa_offset (4)
-       fld     %st(0)
-       fmul    %st(0)
-       fstps   (%esp)
-       fstps   (%esp)
-       flds    (%esp)
-       addl    $4, %esp
-       cfi_adjust_cfa_offset (-4)
-3:     ret
+       FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
index 306afd1122dabba1224f6d50c1e7e0a58a1eae7a..65cb4ec204ddbe87101934ca9104ab3de16f6cf0 100644 (file)
@@ -5,13 +5,9 @@
  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
-       .section .rodata.cst4,"aM",@progbits,4
-
-       .p2align 2
-       .type flt_min,@object
-flt_min:       .byte 0, 0, 0x80, 0
-       ASM_SIZE_DIRECTIVE(flt_min)
+DEFINE_FLT_MIN
 
 #ifdef PIC
 # define MO(op) op##@GOTOFF(%ecx)
@@ -47,22 +43,8 @@ ENTRY(__ieee754_expf)
        faddp                           /* 2^(fract(x * log2(e))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       flds    MO(flt_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 3f
-       subl    $4, %esp
-       cfi_adjust_cfa_offset (4)
-       fld     %st(0)
-       fmul    %st(0)
-       fstps   (%esp)
-       fstps   (%esp)
-       flds    (%esp)
-       addl    $4, %esp
-       cfi_adjust_cfa_offset (-4)
-3:     ret
+       FLT_NARROW_EVAL_UFLOW_NONNEG_NAN
+       ret
 
 1:     testl   $0x200, %eax            /* Test sign.  */
        jz      2f                      /* If positive, jump.  */
@@ -87,20 +69,6 @@ ENTRY(__expf_finite)
        faddp                           /* 2^(fract(x * log2(e))) */
        fscale                          /* e^x */
        fstp    %st(1)
-       flds    MO(flt_min)
-       fld     %st(1)
-       fucompp
-       fnstsw
-       sahf
-       jnc 4f
-       subl    $4, %esp
-       cfi_adjust_cfa_offset (4)
-       fld     %st(0)
-       fmul    %st(0)
-       fstps   (%esp)
-       fstps   (%esp)
-       flds    (%esp)
-       addl    $4, %esp
-       cfi_adjust_cfa_offset (-4)
-4:     ret
+       FLT_NARROW_EVAL_UFLOW_NONNEG
+       ret
 END(__expf_finite)
index 63083ad4c9057357520cf6f411cbba82d6b7c580..5323fde01e2dcd78a81d40c94ecb2e302df615a1 100644 (file)
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <i386-math-asm.h>
 
        .text
 ENTRY(__ieee754_hypot)
@@ -37,6 +38,7 @@ ENTRY(__ieee754_hypot)
        fmul    %st(0)          // x * x : y * y
        faddp                   // x * x + y * y
        fsqrt
+       DBL_NARROW_EVAL
 2:     ret
 
        // We have to test whether any of the parameters is Inf.
index 4e22d33ebefb9994d94bd567350600dc206af97a..fd11ea7105bd547726677bb17f18a2c1ba245fa5 100644 (file)
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <i386-math-asm.h>
 
        .text
 ENTRY(__ieee754_hypotf)
@@ -37,6 +38,7 @@ ENTRY(__ieee754_hypotf)
        fmul    %st(0)          // x * x : y * y
        faddp                   // x * x + y * y
        fsqrt
+       FLT_NARROW_EVAL
 2:     ret
 
        // We have to test whether any of the parameters is Inf.
index 40f8227fda802b9a2fafb199c6b54ea397c74d68..2903e13e5980e4c56e33f1899a3bc0f3b4758de2 100644 (file)
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
        .section .rodata.cst8,"aM",@progbits,8
 
@@ -165,6 +166,7 @@ ENTRY(__ieee754_pow)
        orl     %edx, %ecx
        jnz     6b
        fstp    %st(0)          // ST*x
+       DBL_NARROW_EVAL
        ret
 
        /* y is Â±NAN */
@@ -257,6 +259,7 @@ ENTRY(__ieee754_pow)
        cfi_adjust_cfa_offset (-8)
 292:   fscale                  // +/- 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
        fstp    %st(1)          // +/- 2^fract(y*log2(x))*2^int(y*log2(x))
+       DBL_NARROW_EVAL
        ret
 
 
index 02338284f62691dd0840f8b23a829b76fc639944..d929bdc9f15358c51822ba7dba8b12d87cf356d7 100644 (file)
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <machine/asm.h>
+#include <i386-math-asm.h>
 
        .section .rodata.cst8,"aM",@progbits,8
 
@@ -148,6 +149,7 @@ ENTRY(__ieee754_powf)
        testl   %edx, %edx
        jnz     6b
        fstp    %st(0)          // ST*x
+       FLT_NARROW_EVAL
        ret
 
        /* y is Â±NAN */
@@ -196,6 +198,7 @@ ENTRY(__ieee754_powf)
 32:    addl    $4, %esp
        cfi_adjust_cfa_offset (-4)
        fstp    %st(1)          // 2^fract(y*log2(x))*2^int(y*log2(x))
+       FLT_NARROW_EVAL
        ret
 
 
index fd4313016cd80fa257fadfc0a3aa1b9607cad373..c15029d2eee72742d46e4cf3a320cbf074eec6b9 100644 (file)
        addl    $8, %esp;                       \
        cfi_adjust_cfa_offset (-8);
 
+/* Define constants for the minimum value of a floating-point
+   type.  */
+#define DEFINE_FLT_MIN                         \
+       .section .rodata.cst4,"aM",@progbits,4; \
+       .p2align 2;                             \
+       .type flt_min,@object;                  \
+flt_min:                                       \
+       .byte 0, 0, 0x80, 0;                    \
+       .size flt_min, .-flt_min;
+#define DEFINE_DBL_MIN                         \
+       .section .rodata.cst8,"aM",@progbits,8; \
+       .p2align 3;                             \
+       .type dbl_min,@object;                  \
+dbl_min:                                       \
+       .byte 0, 0, 0, 0, 0, 0, 0x10, 0;        \
+       .size dbl_min, .-dbl_min;
+
+/* Remove excess range and precision by storing a value on the stack
+   and loading it back.  The value is given to be nonnegative or NaN;
+   if it is subnormal, also force an underflow exception.  The
+   relevant constant for the minimum of the type must have been
+   defined, the MO macro must have been defined for access to memory
+   operands, and, if PIC, the PIC register must have been loaded.  */
+#define FLT_NARROW_EVAL_UFLOW_NONNEG_NAN       \
+       subl    $4, %esp;                       \
+       cfi_adjust_cfa_offset (4);              \
+       flds    MO(flt_min);                    \
+       fld     %st(1);                         \
+       fucompp;                                \
+       fnstsw;                                 \
+       sahf;                                   \
+       jnc 6424f;                              \
+       fld     %st(0);                         \
+       fmul    %st(0);                         \
+       fstps   (%esp);                         \
+6424:  fstps   (%esp);                         \
+       flds    (%esp);                         \
+       addl    $4, %esp;                       \
+       cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL_UFLOW_NONNEG_NAN       \
+       subl    $8, %esp;                       \
+       cfi_adjust_cfa_offset (8);              \
+       fldl    MO(dbl_min);                    \
+       fld     %st(1);                         \
+       fucompp;                                \
+       fnstsw;                                 \
+       sahf;                                   \
+       jnc 6453f;                              \
+       fld     %st(0);                         \
+       fmul    %st(0);                         \
+       fstpl   (%esp);                         \
+6453:  fstpl   (%esp);                         \
+       fldl    (%esp);                         \
+       addl    $8, %esp;                       \
+       cfi_adjust_cfa_offset (-8);
+
+/* Likewise, but the argument is not a NaN (so fcom instructions,
+   which support memory operands, can be used).  */
+#define FLT_NARROW_EVAL_UFLOW_NONNEG           \
+       subl    $4, %esp;                       \
+       cfi_adjust_cfa_offset (4);              \
+       fcoms   MO(flt_min);                    \
+       fnstsw;                                 \
+       sahf;                                   \
+       jnc 6424f;                              \
+       fld     %st(0);                         \
+       fmul    %st(0);                         \
+       fstps   (%esp);                         \
+6424:  fstps   (%esp);                         \
+       flds    (%esp);                         \
+       addl    $4, %esp;                       \
+       cfi_adjust_cfa_offset (-4);
+#define DBL_NARROW_EVAL_UFLOW_NONNEG           \
+       subl    $8, %esp;                       \
+       cfi_adjust_cfa_offset (8);              \
+       fcoml   MO(dbl_min);                    \
+       fnstsw;                                 \
+       sahf;                                   \
+       jnc 6453f;                              \
+       fld     %st(0);                         \
+       fmul    %st(0);                         \
+       fstpl   (%esp);                         \
+6453:  fstpl   (%esp);                         \
+       fldl    (%esp);                         \
+       addl    $8, %esp;                       \
+       cfi_adjust_cfa_offset (-8);
+
 #endif /* i386-math-asm.h.  */
index ddaa8fc93b29fa8d791b4b4b6e677293928da4dd..32f24d07a297f34a0830e10eb55b3b77a540e152 100644 (file)
@@ -238,9 +238,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacos_downward":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -254,9 +254,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacos_towardzero":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -294,9 +294,9 @@ ildouble: 1
 ldouble: 1
 
 Function: Real part of "cacosh_downward":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -310,9 +310,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Real part of "cacosh_towardzero":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -396,9 +396,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "casin_downward":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -412,9 +412,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "casin_towardzero":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -452,9 +452,9 @@ ildouble: 1
 ldouble: 1
 
 Function: Real part of "casinh_downward":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -468,9 +468,9 @@ ildouble: 2
 ldouble: 2
 
 Function: Real part of "casinh_towardzero":
-double: 3
+double: 4
 float: 3
-idouble: 3
+idouble: 4
 ifloat: 3
 ildouble: 5
 ldouble: 5
@@ -668,10 +668,10 @@ ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccos_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
@@ -684,25 +684,25 @@ ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccos_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccos_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "ccos_upward":
-double: 1
+double: 2
 float: 2
-idouble: 1
+idouble: 2
 ifloat: 2
 ildouble: 2
 ldouble: 2
@@ -725,77 +725,69 @@ ldouble: 1
 
 Function: Real part of "ccosh_downward":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccosh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccosh_towardzero":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccosh_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccosh_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "ccosh_upward":
-double: 1
+double: 3
 float: 2
-idouble: 1
+idouble: 3
 ifloat: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "cexp":
-double: 1
+double: 2
 float: 1
-idouble: 1
+idouble: 2
 ifloat: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "cexp":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "cexp_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
-ildouble: 3
-ldouble: 3
-
-Function: Imaginary part of "cexp_downward":
 double: 2
 float: 2
 idouble: 2
@@ -803,15 +795,15 @@ ifloat: 2
 ildouble: 3
 ldouble: 3
 
-Function: Real part of "cexp_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+Function: Imaginary part of "cexp_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
-Function: Imaginary part of "cexp_towardzero":
+Function: Real part of "cexp_towardzero":
 double: 2
 float: 2
 idouble: 2
@@ -819,19 +811,27 @@ ifloat: 2
 ildouble: 3
 ldouble: 3
 
+Function: Imaginary part of "cexp_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
 Function: Real part of "cexp_upward":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cexp_upward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
 ildouble: 3
 ldouble: 3
 
@@ -983,27 +983,30 @@ ldouble: 2
 
 Function: "cosh":
 double: 1
+float: 1
 idouble: 1
 ildouble: 2
 ldouble: 2
 
 Function: "cosh_downward":
-double: 1
+double: 2
+float: 1
 idouble: 1
 ifloat: 1
 ildouble: 2
 ldouble: 3
 
 Function: "cosh_towardzero":
-double: 1
+double: 2
+float: 1
 idouble: 1
 ifloat: 1
 ildouble: 2
 ldouble: 2
 
 Function: "cosh_upward":
-double: 1
-float: 1
+double: 4
+float: 2
 idouble: 1
 ifloat: 1
 ildouble: 2
@@ -1019,9 +1022,9 @@ ldouble: 3
 
 Function: Imaginary part of "cpow":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 4
 ldouble: 4
 
@@ -1034,10 +1037,10 @@ ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_downward":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
@@ -1050,10 +1053,10 @@ ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_towardzero":
-double: 1
-float: 1
-idouble: 1
-ifloat: 1
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
 ildouble: 1
 ldouble: 1
 
@@ -1067,9 +1070,9 @@ ldouble: 2
 
 Function: Imaginary part of "cpow_upward":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 2
 ldouble: 2
 
@@ -1088,10 +1091,10 @@ idouble: 1
 ifloat: 1
 
 Function: Real part of "csin_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
@@ -1104,10 +1107,10 @@ ildouble: 3
 ldouble: 3
 
 Function: Real part of "csin_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
@@ -1120,9 +1123,9 @@ ildouble: 3
 ldouble: 3
 
 Function: Real part of "csin_upward":
-double: 1
+double: 3
 float: 2
-idouble: 1
+idouble: 3
 ifloat: 2
 ildouble: 3
 ldouble: 3
@@ -1160,10 +1163,10 @@ ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
@@ -1176,10 +1179,10 @@ ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_towardzero":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
 ildouble: 3
 ldouble: 3
 
@@ -1192,9 +1195,9 @@ ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_upward":
-double: 1
+double: 3
 float: 2
-idouble: 1
+idouble: 3
 ifloat: 2
 ildouble: 3
 ldouble: 3
@@ -1207,7 +1210,9 @@ ldouble: 2
 
 Function: Imaginary part of "csqrt":
 double: 1
+float: 1
 idouble: 1
+ifloat: 1
 ildouble: 2
 ldouble: 2
 
@@ -1253,9 +1258,9 @@ ldouble: 5
 
 Function: Imaginary part of "csqrt_upward":
 double: 1
-float: 1
+float: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
 ildouble: 4
 ldouble: 4
 
@@ -1429,9 +1434,9 @@ ldouble: 3
 
 Function: "erfc_downward":
 double: 2
-float: 2
+float: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ildouble: 4
 ldouble: 4
 
@@ -1445,9 +1450,9 @@ ldouble: 4
 
 Function: "erfc_upward":
 double: 2
-float: 2
+float: 3
 idouble: 2
-ifloat: 2
+ifloat: 3
 ildouble: 5
 ldouble: 5
 
index be3b145e0f2d71b5f50beeac9fc64c577c81db4a..2e2359508b635bfb03411e7f4d977378f6fd6369 100644 (file)
@@ -113,11 +113,12 @@ ENTRY(__ieee754_expf_sse2)
        mulsd   MO2(DP_T,%eax,8), %xmm0 /* DP P(y)*T[j] */
        addsd   MO2(DP_T,%eax,8), %xmm0 /* DP T[j]*(P(y)+1) */
        mulsd   %xmm1, %xmm0            /* DP result=2^n*(T[j]*(P(y)+1)) */
+       cvtsd2ss        %xmm0, %xmm1
 
-       lea     -8(%esp), %esp          /* Borrow 8 bytes of stack frame */
-       movsd   %xmm0, 0(%esp)          /* Move result from sse... */
-       fldl    0(%esp)                 /* ...to FPU. */
-       lea     8(%esp), %esp           /* Return back 8 bytes of stack frame */
+       lea     -4(%esp), %esp          /* Borrow 4 bytes of stack frame */
+       movss   %xmm1, 0(%esp)          /* Move result from sse... */
+       flds    0(%esp)                 /* ...to FPU. */
+       lea     4(%esp), %esp           /* Return back 4 bytes of stack frame */
        ret
 
        .p2align        4