[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / fpu / s_ceill.S

/* s_ceill.S IBM extended format long double version.
   Copyright (C) 2004-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include <math_ldbl_opt.h>

	.section	".toc","aw"
.LC0:	/* 2**52 */
	.tc FD_43300000_0[TC],0x4330000000000000

	.section	".text"

/* long double [fp1,fp2] ceill (long double x [fp1,fp2])
   IEEE 1003.1 ceil function.

   PowerPC64 long double uses the IBM extended format which is
   represented two 64-floating point double values. The values are
   non-overlapping giving an effective precision of 106 bits. The first
   double contains the high order bits of mantisa and is always ceiled
   to represent a normal ceiling of long double to double. Since the
   long double value is sum of the high and low values, the low double
   normally has the opposite sign to compensate for the this ceiling.

   For long double there are two cases:
   1) |x| < 2**52, all the integer bits are in the high double.
      ceil the high double and set the low double to -0.0.
   2) |x| >= 2**52, ceiling involves both doubles.
      See the comment before lable .L2 for details.
   */

ENTRY (__ceill)
	mffs	fp11		/* Save current FPU rounding mode.  */
	lfd	fp13,.LC0@toc(2)
	fabs	fp0,fp1
	fabs	fp9,fp2
	fsub	fp12,fp13,fp13	/* generate 0.0  */
	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO52)  */
	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
	bnl-	cr7,.L2
	mtfsfi	7,2		/* Set rounding mode toward +inf.  */
	fneg	fp2,fp12
	ble-	cr6,.L1
	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
	fabs	fp1,fp1		/* if (x == 0.0)  */
.L0:
	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
	blr			/* x = 0.0; */
.L1:
	bge-	cr6,.L0		/* if (x < 0.0)  */
	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
	fnabs	fp1,fp1		/* if (x == 0.0)  */
	blr			/* x = -0.0; */
	
/* The high double is > TWO52 so we need to round the low double and
   perhaps the high double.  In this case we have to round the low
   double and handle any adjustment to the high double that may be
   caused by rounding (up).  This is complicated by the fact that the
   high double may already be rounded and the low double may have the
   opposite sign to compensate.This gets a bit tricky so we use the
   following algorithm:

   tau = floor(x_high/TWO52);
   x0 = x_high - tau;
   x1 = x_low + tau;
   r1 = rint(x1);
   y_high = x0 + r1;
   y_low = x0 - y_high + r1;
   return y;  */
.L2:
	fcmpu	cr7,fp9,fp13	/* if (|x_low| > TWO52)  */
	fcmpu	cr0,fp9,fp12	/* || (|x_low| == 0.0)  */
	fcmpu	cr5,fp2,fp12	/* if (x_low > 0.0)  */
	bgelr-	cr7		/*   return x;	*/
	beqlr-  cr0
	mtfsfi	7,2		/* Set rounding mode toward +inf.  */
	fdiv	fp8,fp1,fp13	/* x_high/TWO52  */
	
	bng-	cr6,.L6		/* if (x > 0.0)  */
	fctidz	fp0,fp8
	fcfid	fp8,fp0		/* tau = floor(x_high/TWO52);  */
	bng	cr5,.L4		/* if (x_low > 0.0)  */
	fmr	fp3,fp1
	fmr	fp4,fp2
	b	.L5
.L4:				/* if (x_low < 0.0)  */
	fsub	fp3,fp1,fp8	/* x0 = x_high - tau;  */
	fadd	fp4,fp2,fp8	/* x1 = x_low + tau;  */
.L5:
	fadd	fp5,fp4,fp13	/* r1 = r1 + TWO52;  */
	fsub	fp5,fp5,fp13	/* r1 = r1 - TWO52;  */
	b	.L9
.L6:				/* if (x < 0.0)  */
	fctidz	fp0,fp8
	fcfid	fp8,fp0		/* tau = floor(x_high/TWO52);  */	
	bnl	cr5,.L7		/* if (x_low < 0.0)  */
	fmr	fp3,fp1
	fmr	fp4,fp2
	b	.L8
.L7:				/* if (x_low > 0.0)  */
	fsub	fp3,fp1,fp8	/* x0 = x_high - tau;  */
	fadd	fp4,fp2,fp8	/* x1 = x_low + tau;  */
.L8:
	fsub	fp5,fp4,fp13	/* r1-= TWO52;  */
	fadd	fp5,fp5,fp13	/* r1+= TWO52;  */
.L9:
	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
	fadd	fp1,fp3,fp5	/* y_high = x0 + r1;  */
	fsub	fp2,fp3,fp1	/* y_low = x0 - y_high + r1;  */
	fadd	fp2,fp2,fp5
	blr
END (__ceill)

long_double_symbol (libm, __ceill, ceill)
Commit	Line	Data
f964490f	1	/* s_ceill.S IBM extended format long double version.
568035b7	2	Copyright (C) 2004-2013 Free Software Foundation, Inc.
f964490f RM	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	16	License along with the GNU C Library; if not, see
59ba27a6 PE	17	<http://www.gnu.org/licenses/>. */
f964490f RM	18
	19	#include <sysdep.h>
	20	#include <math_ldbl_opt.h>
	21
	22	.section ".toc","aw"
	23	.LC0: /* 2*52 /
	24	.tc FD_43300000_0[TC],0x4330000000000000
	25
	26	.section ".text"
	27
	28	/* long double [fp1,fp2] ceill (long double x [fp1,fp2])
	29	IEEE 1003.1 ceil function.
	30
	31	PowerPC64 long double uses the IBM extended format which is
	32	represented two 64-floating point double values. The values are
	33	non-overlapping giving an effective precision of 106 bits. The first
	34	double contains the high order bits of mantisa and is always ceiled
	35	to represent a normal ceiling of long double to double. Since the
	36	long double value is sum of the high and low values, the low double
	37	normally has the opposite sign to compensate for the this ceiling.
	38
	39	For long double there are two cases:
	40	1) \|x\| < 2**52, all the integer bits are in the high double.
	41	ceil the high double and set the low double to -0.0.
	42	2) \|x\| >= 2**52, ceiling involves both doubles.
	43	See the comment before lable .L2 for details.
	44	*/
	45
	46	ENTRY (__ceill)
	47	mffs fp11 /* Save current FPU rounding mode. */
	48	lfd fp13,.LC0@toc(2)
	49	fabs fp0,fp1
	50	fabs fp9,fp2
	51	fsub fp12,fp13,fp13 /* generate 0.0 */
	52	fcmpu cr7,fp0,fp13 /* if (fabs(x) > TWO52) */
1e832e37	53	fcmpu cr6,fp1,fp12 /* if (x > 0.0) */
f964490f RM	54	bnl- cr7,.L2
	55	mtfsfi 7,2 /* Set rounding mode toward +inf. */
	56	fneg fp2,fp12
	57	ble- cr6,.L1
	58	fadd fp1,fp1,fp13 /* x+= TWO52; */
	59	fsub fp1,fp1,fp13 /* x-= TWO52; */
	60	fabs fp1,fp1 /* if (x == 0.0) */
	61	.L0:
	62	mtfsf 0x01,fp11 /* restore previous rounding mode. */
	63	blr /* x = 0.0; */
	64	.L1:
	65	bge- cr6,.L0 /* if (x < 0.0) */
	66	fsub fp1,fp1,fp13 /* x-= TWO52; */
	67	fadd fp1,fp1,fp13 /* x+= TWO52; */
	68	fcmpu cr5,fp1,fp12 /* if (x > 0.0) */
	69	mtfsf 0x01,fp11 /* restore previous rounding mode. */
	70	fnabs fp1,fp1 /* if (x == 0.0) */
	71	blr /* x = -0.0; */
	72
	73	/* The high double is > TWO52 so we need to round the low double and
	74	perhaps the high double. In this case we have to round the low
	75	double and handle any adjustment to the high double that may be
	76	caused by rounding (up). This is complicated by the fact that the
	77	high double may already be rounded and the low double may have the
	78	opposite sign to compensate.This gets a bit tricky so we use the
	79	following algorithm:
	80
	81	tau = floor(x_high/TWO52);
	82	x0 = x_high - tau;
	83	x1 = x_low + tau;
	84	r1 = rint(x1);
	85	y_high = x0 + r1;
	86	y_low = x0 - y_high + r1;
	87	return y; */
	88	.L2:
	89	fcmpu cr7,fp9,fp13 /* if (\|x_low\| > TWO52) */
	90	fcmpu cr0,fp9,fp12 /* \|\| (\|x_low\| == 0.0) */
	91	fcmpu cr5,fp2,fp12 /* if (x_low > 0.0) */
	92	bgelr- cr7 /* return x; */
	93	beqlr- cr0
	94	mtfsfi 7,2 /* Set rounding mode toward +inf. */
	95	fdiv fp8,fp1,fp13 /* x_high/TWO52 */
	96
	97	bng- cr6,.L6 /* if (x > 0.0) */
	98	fctidz fp0,fp8
	99	fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */
	100	bng cr5,.L4 /* if (x_low > 0.0) */
	101	fmr fp3,fp1
	102	fmr fp4,fp2
	103	b .L5
	104	.L4: /* if (x_low < 0.0) */
	105	fsub fp3,fp1,fp8 /* x0 = x_high - tau; */
	106	fadd fp4,fp2,fp8 /* x1 = x_low + tau; */
	107	.L5:
	108	fadd fp5,fp4,fp13 /* r1 = r1 + TWO52; */
	109	fsub fp5,fp5,fp13 /* r1 = r1 - TWO52; */
	110	b .L9
	111	.L6: /* if (x < 0.0) */
	112	fctidz fp0,fp8
	113	fcfid fp8,fp0 /* tau = floor(x_high/TWO52); */
	114	bnl cr5,.L7 /* if (x_low < 0.0) */
	115	fmr fp3,fp1
	116	fmr fp4,fp2
	117	b .L8
118	.L7: /* if (x_low > 0.0) */
119	fsub fp3,fp1,fp8 /* x0 = x_high - tau; */
120	fadd fp4,fp2,fp8 /* x1 = x_low + tau; */
121	.L8:
122	fsub fp5,fp4,fp13 /* r1-= TWO52; */
123	fadd fp5,fp5,fp13 /* r1+= TWO52; */
124	.L9:
125	mtfsf 0x01,fp11 /* restore previous rounding mode. */
126	fadd fp1,fp3,fp5 /* y_high = x0 + r1; */
127	fsub fp2,fp3,fp1 /* y_low = x0 - y_high + r1; */
128	fadd fp2,fp2,fp5
129	blr
130	END (__ceill)
131
132	long_double_symbol (libm, __ceill, ceill)