[thirdparty/glibc.git] / sysdeps / sparc / sparc32 / sparcv9 / fpu / multiarch / s_floor-vis3.S

/* floor function, sparc32 v9 vis3 version.
   Copyright (C) 2012-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by David S. Miller <davem@davemloft.net>, 2012.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>

	/* Since changing the rounding mode is extremely expensive, we
	   try to round up using a method that is rounding mode
	   agnostic.

	   We add then subtract (or subtract than add if the initial
	   value was negative) 2**23 to the value, then subtract it
	   back out.

	   This will clear out the fractional portion of the value.
	   One of two things will happen for non-whole initial values.
	   Either the rounding mode will round it up, or it will be
	   rounded down.  If the value started out whole, it will be
	   equal after the addition and subtraction.  This means we
	   can accurately detect with one test whether we need to add
	   another 1.0 to round it up properly.

	   VIS instructions are used to facilitate the formation of
	   easier constants, and the propagation of the sign bit.  */

#define TWO_FIFTYTWO	0x43300000		/* 2**52 */
#define ONE_DOT_ZERO	0x3ff00000		/* 1.0 */

#define ZERO		%f10			/* 0.0 */
#define SIGN_BIT	%f12			/* -0.0 */

ENTRY (__floor_vis3)
	sethi	%hi(TWO_FIFTYTWO), %o2
	sllx	%o0, 32, %o0
	sethi	%hi(ONE_DOT_ZERO), %o3
	or	%o0, %o1, %o0
	movxtod	%o0, %f0
	sllx	%o2, 32, %o2
	fzero	ZERO
	sllx	%o3, 32, %o3

	fnegd	ZERO, SIGN_BIT

	movxtod	%o2, %f16
	fabsd	%f0, %f14

	fcmpd	%fcc3, %f14, %f16

	fmovduge %fcc3, ZERO, %f16
	fand	%f0, SIGN_BIT, SIGN_BIT

	for	%f16, SIGN_BIT, %f16
	faddd	%f0, %f16, %f18
	fsubd	%f18, %f16, %f18
	fcmpd	%fcc2, %f18, %f0
	movxtod	%o3, %f20

	fmovdule %fcc2, ZERO, %f20
	fsubd	%f18, %f20, %f0
	fabsd	%f0, %f0
	retl
	 for	%f0, SIGN_BIT, %f0
END (__floor_vis3)
Commit	Line	Data
559398ab	1	/* floor function, sparc32 v9 vis3 version.
568035b7	2	Copyright (C) 2012-2013 Free Software Foundation, Inc.
559398ab DM	3	This file is part of the GNU C Library.
	4	Contributed by David S. Miller <davem@davemloft.net>, 2012.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
	17	License along with the GNU C Library; if not, see
	18	<http://www.gnu.org/licenses/>. */
	19
	20	#include <sysdep.h>
	21
	22	/* Since changing the rounding mode is extremely expensive, we
	23	try to round up using a method that is rounding mode
	24	agnostic.
	25
	26	We add then subtract (or subtract than add if the initial
	27	value was negative) 2**23 to the value, then subtract it
	28	back out.
	29
	30	This will clear out the fractional portion of the value.
	31	One of two things will happen for non-whole initial values.
	32	Either the rounding mode will round it up, or it will be
	33	rounded down. If the value started out whole, it will be
	34	equal after the addition and subtraction. This means we
	35	can accurately detect with one test whether we need to add
	36	another 1.0 to round it up properly.
	37
	38	VIS instructions are used to facilitate the formation of
	39	easier constants, and the propagation of the sign bit. */
	40
	41	#define TWO_FIFTYTWO 0x43300000 /* 2*52 /
	42	#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */
	43
	44	#define ZERO %f10 /* 0.0 */
	45	#define SIGN_BIT %f12 /* -0.0 */
	46
	47	ENTRY (__floor_vis3)
	48	sethi %hi(TWO_FIFTYTWO), %o2
	49	sllx %o0, 32, %o0
	50	sethi %hi(ONE_DOT_ZERO), %o3
	51	or %o0, %o1, %o0
	52	movxtod %o0, %f0
	53	sllx %o2, 32, %o2
	54	fzero ZERO
	55	sllx %o3, 32, %o3
	56
	57	fnegd ZERO, SIGN_BIT
	58
ee0db190	59	movxtod %o2, %f16
559398ab DM	60	fabsd %f0, %f14
559398ab DM	61
559398ab DM	62	fcmpd %fcc3, %f14, %f16
	63
	64	fmovduge %fcc3, ZERO, %f16
	65	fand %f0, SIGN_BIT, SIGN_BIT
	66
	67	for %f16, SIGN_BIT, %f16
	68	faddd %f0, %f16, %f18
	69	fsubd %f18, %f16, %f18
	70	fcmpd %fcc2, %f18, %f0
	71	movxtod %o3, %f20
	72
	73	fmovdule %fcc2, ZERO, %f20
	74	fsubd %f18, %f20, %f0
	75	fabsd %f0, %f0
	76	retl
	77	for %f0, SIGN_BIT, %f0
	78	END (__floor_vis3)