[thirdparty/glibc.git] / sysdeps / alpha / ldiv.S

/* Copyright (C) 1996-2021 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Richard Henderson <rth@tamu.edu>.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library.  If not, see
   <https://www.gnu.org/licenses/>.  */

#include "div_libc.h"

#undef FRAME
#ifdef __alpha_fix__
#define FRAME 0
#else
#define FRAME 16
#endif

#undef X
#undef Y
#define X $17
#define Y $18

	.set noat

	.align 4
	.globl ldiv
	.ent ldiv
ldiv:
	.frame sp, FRAME, ra
#if FRAME > 0
	lda	sp, -FRAME(sp)
#endif
#ifdef PROF
	.set	macro
	ldgp	gp, 0(pv)
	lda	AT, _mcount
	jsr	AT, (AT), _mcount
	.set	nomacro
	.prologue 1
#else
	.prologue 0
#endif

	beq	Y, $divbyzero
	excb
	mf_fpcr	$f10

	_ITOFT2	X, $f0, 0, Y, $f1, 8

	.align	4
	cvtqt	$f0, $f0
	cvtqt	$f1, $f1
	divt/c	$f0, $f1, $f0
	unop

	/* Check to see if X fit in the double as an exact value.  */
	sll	X, (64-53), AT
	sra	AT, (64-53), AT
	cmpeq	X, AT, AT
	beq	AT, $x_big

	/* If we get here, we're expecting exact results from the division.
	   Do nothing else besides convert and clean up.  */
	cvttq/c	$f0, $f0
	excb
	mt_fpcr	$f10
	_FTOIT	$f0, $0, 0

$egress:
	mulq	$0, Y, $1
	subq	X, $1, $1

	stq	$0, 0($16)
	stq	$1, 8($16)
	mov	$16, $0

#if FRAME > 0
	lda	sp, FRAME(sp)
#endif
	ret

	.align	4
$x_big:
	/* If we get here, X is large enough that we don't expect exact
	   results, and neither X nor Y got mis-translated for the fp
	   division.  Our task is to take the fp result, figure out how
	   far it's off from the correct result and compute a fixup.  */

#define Q	v0		/* quotient */
#define R	t0		/* remainder */
#define SY	t1		/* scaled Y */
#define S	t2		/* scalar */
#define QY	t3		/* Q*Y */

	/* The fixup code below can only handle unsigned values.  */
	or	X, Y, AT
	mov	$31, t5
	blt	AT, $fix_sign_in
$fix_sign_in_ret1:
	cvttq/c	$f0, $f0

	_FTOIT	$f0, Q, 8
$fix_sign_in_ret2:
	mulq	Q, Y, QY
	excb
	mt_fpcr	$f10

	.align	4
	subq	QY, X, R
	mov	Y, SY
	mov	1, S
	bgt	R, $q_high

$q_high_ret:
	subq	X, QY, R
	mov	Y, SY
	mov	1, S
	bgt	R, $q_low

$q_low_ret:
	negq	Q, t4
	cmovlbs	t5, t4, Q
	br	$egress

	.align	4
	/* The quotient that we computed was too large.  We need to reduce
	   it by S such that Y*S >= R.  Obviously the closer we get to the
	   correct value the better, but overshooting high is ok, as we'll
	   fix that up later.  */
0:
	addq	SY, SY, SY
	addq	S, S, S
$q_high:
	cmpult	SY, R, AT
	bne	AT, 0b

	subq	Q, S, Q
	unop
	subq	QY, SY, QY
	br	$q_high_ret

	.align	4
	/* The quotient that we computed was too small.  Divide Y by the
	   current remainder (R) and add that to the existing quotient (Q).
	   The expectation, of course, is that R is much smaller than X.  */
	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
	   already have a copy of Y in SY and the value 1 in S.  */
0:
	addq	SY, SY, SY
	addq	S, S, S
$q_low:
	cmpult	SY, R, AT
	bne	AT, 0b

	/* Shift-down and subtract loop.  Each iteration compares our scaled
	   Y (SY) with the remainder (R); if SY <= R then X is divisible by
	   Y's scalar (S) so add it to the quotient (Q).  */
2:	addq	Q, S, t3
	srl	S, 1, S
	cmpule	SY, R, AT
	subq	R, SY, t4

	cmovne	AT, t3, Q
	cmovne	AT, t4, R
	srl	SY, 1, SY
	bne	S, 2b

	br	$q_low_ret

	.align	4
$fix_sign_in:
	/* If we got here, then X|Y is negative.  Need to adjust everything
	   such that we're doing unsigned division in the fixup loop.  */
	/* T5 is true if result should be negative.  */
	xor	X, Y, AT
	cmplt	AT, 0, t5
	cmplt	X, 0, AT
	negq	X, t0

	cmovne	AT, t0, X
	cmplt	Y, 0, AT
	negq	Y, t0

	cmovne	AT, t0, Y
	blbc	t5, $fix_sign_in_ret1

	cvttq/c	$f0, $f0
	_FTOIT	$f0, Q, 8
	.align	3
	negq	Q, Q
	br	$fix_sign_in_ret2

$divbyzero:
	mov	a0, v0
	lda	a0, GEN_INTDIV
	call_pal PAL_gentrap
	stq	zero, 0(v0)
	stq	zero, 8(v0)

#if FRAME > 0
	lda	sp, FRAME(sp)
#endif
	ret

	.end	ldiv

weak_alias (ldiv, lldiv)
weak_alias (ldiv, imaxdiv)
Commit	Line	Data
2b778ceb	1	/* Copyright (C) 1996-2021 Free Software Foundation, Inc.
60c74cf0	2	This file is part of the GNU C Library.
cb23ff1f	3	Contributed by Richard Henderson <rth@tamu.edu>.
60c74cf0 UD	4
60c74cf0 UD	5	The GNU C Library is free software; you can redistribute it and/or
3214b89b AJ	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
60c74cf0 UD	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3214b89b	13	Lesser General Public License for more details.
60c74cf0	14
3214b89b	15	You should have received a copy of the GNU Lesser General Public
ab84e3ff	16	License along with the GNU C Library. If not, see
5a82c748	17	<https://www.gnu.org/licenses/>. */
60c74cf0	18
bd68d850	19	#include "div_libc.h"
60c74cf0	20
bd68d850 RH	21	#undef FRAME
	22	#ifdef __alpha_fix__
	23	#define FRAME 0
60c74cf0	24	#else
bd68d850	25	#define FRAME 16
60c74cf0 UD	26	#endif
60c74cf0 UD	27
bd68d850 RH	28	#undef X
	29	#undef Y
	30	#define X $17
	31	#define Y $18
	32
60c74cf0 UD	33	.set noat
	34
	35	.align 4
	36	.globl ldiv
	37	.ent ldiv
	38	ldiv:
bd68d850 RH	39	.frame sp, FRAME, ra
	40	#if FRAME > 0
	41	lda sp, -FRAME(sp)
	42	#endif
60c74cf0	43	#ifdef PROF
bd68d850	44	.set macro
60c74cf0 UD	45	ldgp gp, 0(pv)
	46	lda AT, _mcount
	47	jsr AT, (AT), _mcount
bd68d850	48	.set nomacro
60c74cf0 UD	49	.prologue 1
	50	#else
	51	.prologue 0
	52	#endif
	53
bd68d850	54	beq Y, $divbyzero
a61c91b0 RH	55	excb
a61c91b0 RH	56	mf_fpcr $f10
bd68d850 RH	57
	58	_ITOFT2 X, $f0, 0, Y, $f1, 8
	59
	60	.align 4
	61	cvtqt $f0, $f0
	62	cvtqt $f1, $f1
	63	divt/c $f0, $f1, $f0
	64	unop
	65
	66	/* Check to see if X fit in the double as an exact value. */
	67	sll X, (64-53), AT
	68	sra AT, (64-53), AT
	69	cmpeq X, AT, AT
	70	beq AT, $x_big
	71
	72	/* If we get here, we're expecting exact results from the division.
	73	Do nothing else besides convert and clean up. */
	74	cvttq/c $f0, $f0
a61c91b0 RH	75	excb
a61c91b0 RH	76	mt_fpcr $f10
bd68d850 RH	77	_FTOIT $f0, $0, 0
	78
	79	$egress:
	80	mulq $0, Y, $1
	81	subq X, $1, $1
	82
	83	stq $0, 0($16)
	84	stq $1, 8($16)
	85	mov $16, $0
	86
	87	#if FRAME > 0
	88	lda sp, FRAME(sp)
	89	#endif
	90	ret
	91
	92	.align 4
	93	$x_big:
	94	/* If we get here, X is large enough that we don't expect exact
	95	results, and neither X nor Y got mis-translated for the fp
	96	division. Our task is to take the fp result, figure out how
	97	far it's off from the correct result and compute a fixup. */
	98
	99	#define Q v0 /* quotient */
	100	#define R t0 /* remainder */
	101	#define SY t1 /* scaled Y */
	102	#define S t2 /* scalar */
	103	#define QY t3 /* QY /
	104
	105	/* The fixup code below can only handle unsigned values. */
	106	or X, Y, AT
	107	mov $31, t5
	108	blt AT, $fix_sign_in
	109	$fix_sign_in_ret1:
	110	cvttq/c $f0, $f0
	111
	112	_FTOIT $f0, Q, 8
bd68d850 RH	113	$fix_sign_in_ret2:
bd68d850 RH	114	mulq Q, Y, QY
a61c91b0 RH	115	excb
a61c91b0 RH	116	mt_fpcr $f10
bd68d850 RH	117
	118	.align 4
	119	subq QY, X, R
	120	mov Y, SY
	121	mov 1, S
	122	bgt R, $q_high
	123
	124	$q_high_ret:
	125	subq X, QY, R
	126	mov Y, SY
	127	mov 1, S
	128	bgt R, $q_low
	129
	130	$q_low_ret:
	131	negq Q, t4
	132	cmovlbs t5, t4, Q
	133	br $egress
	134
	135	.align 4
	136	/* The quotient that we computed was too large. We need to reduce
	137	it by S such that Y*S >= R. Obviously the closer we get to the
	138	correct value the better, but overshooting high is ok, as we'll
	139	fix that up later. */
	140	0:
	141	addq SY, SY, SY
	142	addq S, S, S
	143	$q_high:
	144	cmpult SY, R, AT
	145	bne AT, 0b
	146
	147	subq Q, S, Q
	148	unop
	149	subq QY, SY, QY
	150	br $q_high_ret
	151
	152	.align 4
5556231d	153	/* The quotient that we computed was too small. Divide Y by the
bd68d850 RH	154	current remainder (R) and add that to the existing quotient (Q).
	155	The expectation, of course, is that R is much smaller than X. */
	156	/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
	157	already have a copy of Y in SY and the value 1 in S. */
	158	0:
	159	addq SY, SY, SY
	160	addq S, S, S
	161	$q_low:
	162	cmpult SY, R, AT
	163	bne AT, 0b
	164
	165	/* Shift-down and subtract loop. Each iteration compares our scaled
	166	Y (SY) with the remainder (R); if SY <= R then X is divisible by
	167	Y's scalar (S) so add it to the quotient (Q). */
	168	2: addq Q, S, t3
	169	srl S, 1, S
	170	cmpule SY, R, AT
	171	subq R, SY, t4
	172
	173	cmovne AT, t3, Q
	174	cmovne AT, t4, R
	175	srl SY, 1, SY
	176	bne S, 2b
	177
	178	br $q_low_ret
	179
	180	.align 4
	181	$fix_sign_in:
	182	/* If we got here, then X\|Y is negative. Need to adjust everything
	183	such that we're doing unsigned division in the fixup loop. */
	184	/* T5 is true if result should be negative. */
	185	xor X, Y, AT
	186	cmplt AT, 0, t5
	187	cmplt X, 0, AT
	188	negq X, t0
	189
	190	cmovne AT, t0, X
	191	cmplt Y, 0, AT
	192	negq Y, t0
	193
	194	cmovne AT, t0, Y
	195	blbc t5, $fix_sign_in_ret1
	196
	197	cvttq/c $f0, $f0
	198	_FTOIT $f0, Q, 8
	199	.align 3
	200	negq Q, Q
	201	br $fix_sign_in_ret2
60c74cf0 UD	202
	203	$divbyzero:
	204	mov a0, v0
	205	lda a0, GEN_INTDIV
	206	call_pal PAL_gentrap
60c74cf0 UD	207	stq zero, 0(v0)
60c74cf0 UD	208	stq zero, 8(v0)
60c74cf0	209
bd68d850 RH	210	#if FRAME > 0
	211	lda sp, FRAME(sp)
	212	#endif
	213	ret
60c74cf0	214
bd68d850	215	.end ldiv
228f398a RH	216
228f398a RH	217	weak_alias (ldiv, lldiv)
dc0c31fb	218	weak_alias (ldiv, imaxdiv)