]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
PowerPC: remove branch prediction from rint implementation
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Fri, 29 Mar 2013 23:15:28 +0000 (18:15 -0500)
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Mon, 1 Apr 2013 11:36:51 +0000 (06:36 -0500)
The branch prediction hints is actually hurts performance in this case.
The assembly implementation make two assumptions: 1. 'fabs (x) < 2^52'
is unlikely and 2. 'x > 0.0' is unlike (if 1. is true). Since it a
general floating point function, expected input is not bounded and then
it is better to let the hardware handle the branches.

benchtests/Makefile
benchtests/rint-inputs [new file with mode: 0644]
sysdeps/powerpc/powerpc32/fpu/s_rint.S
sysdeps/powerpc/powerpc32/fpu/s_rintf.S
sysdeps/powerpc/powerpc64/fpu/s_rint.S
sysdeps/powerpc/powerpc64/fpu/s_rintf.S

index 74938b925c7d33bafc412664873bb57027b30778..cc54b81faa1451fa3eee4f93f7944c377dd0f6aa 100644 (file)
@@ -43,7 +43,7 @@
 #   See pow-inputs for an example.
 
 subdir := benchtests
-bench := exp pow
+bench := exp pow rint
 
 exp-ITER = 100000
 exp-ARGLIST = double
@@ -55,5 +55,10 @@ pow-ARGLIST = double:double
 pow-RET = double
 LDFLAGS-bench-pow = -lm
 
+rint-ITER = 250000000
+rint-ARGLIST = double
+rint-RET = double
+LDFLAGS-bench-rint = -lm
+
 include ../Makeconfig
 include ../Rules
diff --git a/benchtests/rint-inputs b/benchtests/rint-inputs
new file mode 100644 (file)
index 0000000..a5f83dc
--- /dev/null
@@ -0,0 +1,4 @@
+78.5
+-78.5
+4503599627370497.0
+-4503599627370497.0
index f3cd0366805af4adc05fa3adc151abeedacce4ca..f04055f4612f56f68959063ea6c6e06c4ece8b7b 100644 (file)
@@ -45,14 +45,14 @@ ENTRY (__rint)
        fsub    fp12,fp13,fp13  /* generate 0.0  */
        fcmpu   cr7,fp0,fp13    /* if (fabs(x) > TWO52)  */
        fcmpu   cr6,fp1,fp12    /* if (x > 0.0)  */
-       bnllr-  cr7
-       bng-    cr6,.L4
+       bnllr   cr7
+       bng     cr6,.L4
        fadd    fp1,fp1,fp13    /* x+= TWO52;  */
        fsub    fp1,fp1,fp13    /* x-= TWO52;  */
        fabs    fp1,fp1         /* if (x == 0.0)  */
        blr                     /* x = 0.0; */
 .L4:
-       bnllr-  cr6             /* if (x < 0.0)  */
+       bnllr   cr6             /* if (x < 0.0)  */
        fsub    fp1,fp1,fp13    /* x-= TWO52;  */
        fadd    fp1,fp1,fp13    /* x+= TWO52;  */
        fnabs   fp1,fp1         /* if (x == 0.0)  */
index 247dd4a14db7c28ef93aece5f6eb794a1968c124..e0301af2e766f3a08a85cceb5712bbc9343da0ca 100644 (file)
@@ -41,14 +41,14 @@ ENTRY (__rintf)
        fsubs   fp12,fp13,fp13  /* generate 0.0  */
        fcmpu   cr7,fp0,fp13    /* if (fabs(x) > TWO23)  */
        fcmpu   cr6,fp1,fp12    /* if (x > 0.0)  */
-       bnllr-  cr7
-       bng-    cr6,.L4
+       bnllr   cr7
+       bng     cr6,.L4
        fadds   fp1,fp1,fp13    /* x+= TWO23;  */
        fsubs   fp1,fp1,fp13    /* x-= TWO23;  */
        fabs    fp1,fp1         /* if (x == 0.0)  */
        blr                     /* x = 0.0; */
 .L4:
-       bnllr-  cr6             /* if (x < 0.0)  */
+       bnllr   cr6             /* if (x < 0.0)  */
        fsubs   fp1,fp1,fp13    /* x-= TWO23;  */
        fadds   fp1,fp1,fp13    /* x+= TWO23;  */
        fnabs   fp1,fp1         /* if (x == 0.0)  */
index f3339727f40704fc0f1b1cccf53f5050fd32ad84..57e3759bf021da12c6bcd2ffaad3d5bd6836691c 100644 (file)
@@ -34,14 +34,14 @@ EALIGN (__rint, 4, 0)
        fsub    fp12,fp13,fp13  /* generate 0.0  */
        fcmpu   cr7,fp0,fp13    /* if (fabs(x) > TWO52)  */
        fcmpu   cr6,fp1,fp12    /* if (x > 0.0)  */
-       bnllr-  cr7
-       bng-    cr6,.L4
+       bnllr   cr7
+       bng     cr6,.L4
        fadd    fp1,fp1,fp13    /* x+= TWO52;  */
        fsub    fp1,fp1,fp13    /* x-= TWO52;  */
        fabs    fp1,fp1         /* if (x == 0.0)  */
        blr                     /* x = 0.0; */
 .L4:
-       bnllr-  cr6             /* if (x < 0.0)  */
+       bnllr   cr6             /* if (x < 0.0)  */
        fsub    fp1,fp1,fp13    /* x-= TWO52;  */
        fadd    fp1,fp1,fp13    /* x+= TWO52;  */
        fnabs   fp1,fp1         /* if (x == 0.0)  */
index 26b08721c79729d769f132a110f0e5a9a6d5d5a8..cb28ec748d4486d47c8b51f2bd7c9084d2b2bf51 100644 (file)
@@ -30,14 +30,14 @@ EALIGN (__rintf, 4, 0)
        fsubs   fp12,fp13,fp13  /* generate 0.0  */
        fcmpu   cr7,fp0,fp13    /* if (fabs(x) > TWO23)  */
        fcmpu   cr6,fp1,fp12    /* if (x > 0.0)  */
-       bnllr-  cr7
-       bng-    cr6,.L4
+       bnllr   cr7
+       bng     cr6,.L4
        fadds   fp1,fp1,fp13    /* x+= TWO23;  */
        fsubs   fp1,fp1,fp13    /* x-= TWO23;  */
        fabs    fp1,fp1         /* if (x == 0.0)  */
        blr                     /* x = 0.0; */
 .L4:
-       bnllr-  cr6             /* if (x < 0.0)  */
+       bnllr   cr6             /* if (x < 0.0)  */
        fsubs   fp1,fp1,fp13    /* x-= TWO23;  */
        fadds   fp1,fp1,fp13    /* x+= TWO23;  */
        fnabs   fp1,fp1         /* if (x == 0.0)  */