[thirdparty/glibc.git] / sysdeps / powerpc / powerpc32 / fpu / s_lround.S

/* lround function.  PowerPC32 version.
   Copyright (C) 2004-2016 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include <math_ldbl_opt.h>

	.section	.rodata.cst4,"aM",@progbits,4
	.align	2
.LC0:	/* 0.5 */
	.long 0x3f000000
.LC1:	/* 2^52.  */
	.long 0x59800000
	.section	.rodata.cst8,"aM",@progbits,8
	.align	3
.LC2:	/* 0x7fffffff.8p0.  */
	.long 0x41dfffff
	.long 0xffe00000
.LC3:	/* -0x80000000.8p0.  */
	.long 0xc1e00000
	.long 0x00100000
	.section	".text"

/* long [r3] lround (float x [fp1])
   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest
   integer value, rounding halfway cases away from zero, regardless of
   the current rounding mode."  However PowerPC Architecture defines
   "round to Nearest" as "Choose the best approximation. In case of a
   tie, choose the one that is even (least significant bit o).".
   So we can't use the PowerPC "round to Nearest" mode. Instead we set
   "round toward Zero" mode and round by adding +-0.5 before rounding
   to the integer value.  It is necessary to detect when x is
   (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will
   cause an erroneous shift, carry and round.  We simply return 0 if
   0.5 > x > -0.5.  */

ENTRY (__lround)
	stwu    r1,-16(r1)
	cfi_adjust_cfa_offset (16)
#ifdef SHARED
	mflr	r11
	cfi_register(lr,r11)
	SETUP_GOT_ACCESS(r9,got_label)
	addis	r10,r9,.LC0-got_label@ha
	lfs	fp10,.LC0-got_label@l(r10)
	addis	r10,r9,.LC1-got_label@ha
	lfs	fp11,.LC1-got_label@l(r10)
	addis	r10,r9,.LC2-got_label@ha
	lfd	fp9,.LC2-got_label@l(r10)
	addis	r10,r9,.LC3-got_label@ha
	lfd	fp8,.LC3-got_label@l(r10)
	mtlr	r11
	cfi_same_value (lr)
#else
	lis	r9,.LC0@ha
	lfs	fp10,.LC0@l(r9)
	lis	r9,.LC1@ha
	lfs	fp11,.LC1@l(r9)
	lis	r9,.LC2@ha
	lfd	fp9,.LC2@l(r9)
	lis	r9,.LC3@ha
	lfd	fp8,.LC3@l(r9)
#endif
	fabs	fp2, fp1	/* Get the absolute value of x.  */
	fsub	fp12,fp10,fp10	/* Compute 0.0.  */
	fcmpu	cr6, fp2, fp10	/* if |x| < 0.5  */
	fcmpu	cr5, fp1, fp9	/* if x >= 0x7fffffff.8p0  */
	fcmpu	cr1, fp1, fp8	/* if x <= -0x80000000.8p0  */
	fcmpu	cr7, fp1, fp12	/* x is negative? x < 0.0  */
	blt-	cr6,.Lretzero
	bge-	cr5,.Loflow
	ble-	cr1,.Loflow
	/* Test whether an integer to avoid spurious "inexact".  */
	fadd	fp3,fp2,fp11
	fsub	fp3,fp3,fp11
	fcmpu	cr5, fp2, fp3
	beq	cr5,.Lnobias
	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
	bge	cr7,.Lconvert	/* x is positive so don't negate x.  */
	fnabs	fp3,fp3		/* -(|x|+=0.5)  */
.Lconvert:
	fctiwz	fp4,fp3		/* Convert to Integer word lround toward 0.  */
	stfd	fp4,8(r1)
	nop	/* Ensure the following load is in a different dispatch  */
	nop	/* group to avoid pipe stall on POWER4&5.  */
	nop
	lwz	r3,8+LOWORD(r1)	/* Load return as integer.  */
.Lout:
	addi	r1,r1,16
	blr
.Lretzero:			/* when 0.5 > x > -0.5  */
	li	r3,0		/* return 0.  */
	b	.Lout
.Lnobias:
	fmr	fp3,fp1
	b	.Lconvert
.Loflow:
	fmr	fp3,fp11
	bge	cr7,.Lconvert
	fnabs	fp3,fp3
	b	.Lconvert
	END (__lround)

weak_alias (__lround, lround)

strong_alias (__lround, __lroundf)
weak_alias (__lround, lroundf)

#ifdef NO_LONG_DOUBLE
weak_alias (__lround, lroundl)
strong_alias (__lround, __lroundl)
#endif
#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
#endif
Commit	Line	Data
f9f70e68	1	/* lround function. PowerPC32 version.
f7a9f785	2	Copyright (C) 2004-2016 Free Software Foundation, Inc.
f9f70e68 UD	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
59ba27a6 PE	16	License along with the GNU C Library; if not, see
59ba27a6 PE	17	<http://www.gnu.org/licenses/>. */
f9f70e68 UD	18
f9f70e68 UD	19	#include <sysdep.h>
f964490f	20	#include <math_ldbl_opt.h>
a334319f	21
c1e6b459	22	.section .rodata.cst4,"aM",@progbits,4
b0e196a4	23	.align 2
c1e6b459	24	.LC0: /* 0.5 */
b0e196a4	25	.long 0x3f000000
0c25f5b5 JM	26	.LC1: /* 2^52. */
	27	.long 0x59800000
	28	.section .rodata.cst8,"aM",@progbits,8
	29	.align 3
	30	.LC2: /* 0x7fffffff.8p0. */
	31	.long 0x41dfffff
	32	.long 0xffe00000
	33	.LC3: /* -0x80000000.8p0. */
	34	.long 0xc1e00000
	35	.long 0x00100000
f9f70e68	36	.section ".text"
9c84384c	37
f9f70e68	38	/* long [r3] lround (float x [fp1])
9c84384c	39	IEEE 1003.1 lround function. IEEE specifies "round to the nearest
f9f70e68 UD	40	integer value, rounding halfway cases away from zero, regardless of
f9f70e68 UD	41	the current rounding mode." However PowerPC Architecture defines
9c84384c JM	42	"round to Nearest" as "Choose the best approximation. In case of a
9c84384c JM	43	tie, choose the one that is even (least significant bit o).".
f9f70e68 UD	44	So we can't use the PowerPC "round to Nearest" mode. Instead we set
f9f70e68 UD	45	"round toward Zero" mode and round by adding +-0.5 before rounding
c1e6b459 UD	46	to the integer value. It is necessary to detect when x is
	47	(+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will
	48	cause an erroneous shift, carry and round. We simply return 0 if
	49	0.5 > x > -0.5. */
f9f70e68 UD	50
f9f70e68 UD	51	ENTRY (__lround)
a7e91561 UD	52	stwu r1,-16(r1)
a7e91561 UD	53	cfi_adjust_cfa_offset (16)
f9f70e68 UD	54	#ifdef SHARED
f9f70e68 UD	55	mflr r11
a7e91561	56	cfi_register(lr,r11)
91d2a845	57	SETUP_GOT_ACCESS(r9,got_label)
0c25f5b5 JM	58	addis r10,r9,.LC0-got_label@ha
	59	lfs fp10,.LC0-got_label@l(r10)
	60	addis r10,r9,.LC1-got_label@ha
	61	lfs fp11,.LC1-got_label@l(r10)
	62	addis r10,r9,.LC2-got_label@ha
	63	lfd fp9,.LC2-got_label@l(r10)
	64	addis r10,r9,.LC3-got_label@ha
	65	lfd fp8,.LC3-got_label@l(r10)
f9f70e68	66	mtlr r11
a7e91561	67	cfi_same_value (lr)
f9f70e68 UD	68	#else
f9f70e68 UD	69	lis r9,.LC0@ha
c1e6b459	70	lfs fp10,.LC0@l(r9)
0c25f5b5 JM	71	lis r9,.LC1@ha
	72	lfs fp11,.LC1@l(r9)
	73	lis r9,.LC2@ha
	74	lfd fp9,.LC2@l(r9)
	75	lis r9,.LC3@ha
	76	lfd fp8,.LC3@l(r9)
f9f70e68	77	#endif
c1e6b459 UD	78	fabs fp2, fp1 /* Get the absolute value of x. */
	79	fsub fp12,fp10,fp10 /* Compute 0.0. */
	80	fcmpu cr6, fp2, fp10 /* if \|x\| < 0.5 */
0c25f5b5 JM	81	fcmpu cr5, fp1, fp9 /* if x >= 0x7fffffff.8p0 */
0c25f5b5 JM	82	fcmpu cr1, fp1, fp8 /* if x <= -0x80000000.8p0 */
9ea8bfec	83	fcmpu cr7, fp1, fp12 /* x is negative? x < 0.0 */
c1e6b459	84	blt- cr6,.Lretzero
0c25f5b5 JM	85	bge- cr5,.Loflow
	86	ble- cr1,.Loflow
	87	/* Test whether an integer to avoid spurious "inexact". */
	88	fadd fp3,fp2,fp11
	89	fsub fp3,fp3,fp11
	90	fcmpu cr5, fp2, fp3
	91	beq cr5,.Lnobias
c1e6b459	92	fadd fp3,fp2,fp10 /* \|x\|+=0.5 bias to prepare to round. */
9ea8bfec	93	bge cr7,.Lconvert /* x is positive so don't negate x. */
9c84384c	94	fnabs fp3,fp3 /* -(\|x\|+=0.5) */
c1e6b459 UD	95	.Lconvert:
	96	fctiwz fp4,fp3 /* Convert to Integer word lround toward 0. */
	97	stfd fp4,8(r1)
f7d78e18 UD	98	nop /* Ensure the following load is in a different dispatch */
f7d78e18 UD	99	nop /* group to avoid pipe stall on POWER4&5. */
f9f70e68	100	nop
7b88401f	101	lwz r3,8+LOWORD(r1) /* Load return as integer. */
c1e6b459	102	.Lout:
a7e91561	103	addi r1,r1,16
f9f70e68	104	blr
c1e6b459 UD	105	.Lretzero: /* when 0.5 > x > -0.5 */
	106	li r3,0 /* return 0. */
	107	b .Lout
0c25f5b5 JM	108	.Lnobias:
	109	fmr fp3,fp1
	110	b .Lconvert
	111	.Loflow:
	112	fmr fp3,fp11
	113	bge cr7,.Lconvert
	114	fnabs fp3,fp3
	115	b .Lconvert
f9f70e68 UD	116	END (__lround)
f9f70e68 UD	117
f9f70e68 UD	118	weak_alias (__lround, lround)
	119
	120	strong_alias (__lround, __lroundf)
	121	weak_alias (__lround, lroundf)
	122
	123	#ifdef NO_LONG_DOUBLE
	124	weak_alias (__lround, lroundl)
	125	strong_alias (__lround, __lroundl)
	126	#endif
f964490f RM	127	#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1)
	128	compat_symbol (libm, __lround, lroundl, GLIBC_2_1)
	129	#endif