[thirdparty/glibc.git] / sysdeps / alpha / ldiv.S

/* Copyright (C) 1996-2024 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library.  If not, see
   <https://www.gnu.org/licenses/>.  */

#include "div_libc.h"

#undef FRAME
#ifdef __alpha_fix__
#define FRAME 0
#else
#define FRAME 16
#endif

#undef X
#undef Y
#define X $17
#define Y $18

	.set noat

	.align 4
	.globl ldiv
	.ent ldiv
ldiv:
	.frame sp, FRAME, ra
#if FRAME > 0
	lda	sp, -FRAME(sp)
#endif
#ifdef PROF
	.set	macro
	ldgp	gp, 0(pv)
	lda	AT, _mcount
	jsr	AT, (AT), _mcount
	.set	nomacro
	.prologue 1
#else
	.prologue 0
#endif

	beq	Y, $divbyzero
	excb
	mf_fpcr	$f10

	_ITOFT2	X, $f0, 0, Y, $f1, 8

	.align	4
	cvtqt	$f0, $f0
	cvtqt	$f1, $f1
	divt/c	$f0, $f1, $f0
	unop

	/* Check to see if X fit in the double as an exact value.  */
	sll	X, (64-53), AT
	sra	AT, (64-53), AT
	cmpeq	X, AT, AT
	beq	AT, $x_big

	/* If we get here, we're expecting exact results from the division.
	   Do nothing else besides convert and clean up.  */
	cvttq/c	$f0, $f0
	excb
	mt_fpcr	$f10
	_FTOIT	$f0, $0, 0

$egress:
	mulq	$0, Y, $1
	subq	X, $1, $1

	stq	$0, 0($16)
	stq	$1, 8($16)
	mov	$16, $0

#if FRAME > 0
	lda	sp, FRAME(sp)
#endif
	ret

	.align	4
$x_big:
	/* If we get here, X is large enough that we don't expect exact
	   results, and neither X nor Y got mis-translated for the fp
	   division.  Our task is to take the fp result, figure out how
	   far it's off from the correct result and compute a fixup.  */

#define Q	v0		/* quotient */
#define R	t0		/* remainder */
#define SY	t1		/* scaled Y */
#define S	t2		/* scalar */
#define QY	t3		/* Q*Y */

	/* The fixup code below can only handle unsigned values.  */
	or	X, Y, AT
	mov	$31, t5
	blt	AT, $fix_sign_in
$fix_sign_in_ret1:
	cvttq/c	$f0, $f0

	_FTOIT	$f0, Q, 8
$fix_sign_in_ret2:
	mulq	Q, Y, QY
	excb
	mt_fpcr	$f10

	.align	4
	subq	QY, X, R
	mov	Y, SY
	mov	1, S
	bgt	R, $q_high

$q_high_ret:
	subq	X, QY, R
	mov	Y, SY
	mov	1, S
	bgt	R, $q_low

$q_low_ret:
	negq	Q, t4
	cmovlbs	t5, t4, Q
	br	$egress

	.align	4
	/* The quotient that we computed was too large.  We need to reduce
	   it by S such that Y*S >= R.  Obviously the closer we get to the
	   correct value the better, but overshooting high is ok, as we'll
	   fix that up later.  */
0:
	addq	SY, SY, SY
	addq	S, S, S
$q_high:
	cmpult	SY, R, AT
	bne	AT, 0b

	subq	Q, S, Q
	unop
	subq	QY, SY, QY
	br	$q_high_ret

	.align	4
	/* The quotient that we computed was too small.  Divide Y by the
	   current remainder (R) and add that to the existing quotient (Q).
	   The expectation, of course, is that R is much smaller than X.  */
	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
	   already have a copy of Y in SY and the value 1 in S.  */
0:
	addq	SY, SY, SY
	addq	S, S, S
$q_low:
	cmpult	SY, R, AT
	bne	AT, 0b

	/* Shift-down and subtract loop.  Each iteration compares our scaled
	   Y (SY) with the remainder (R); if SY <= R then X is divisible by
	   Y's scalar (S) so add it to the quotient (Q).  */
2:	addq	Q, S, t3
	srl	S, 1, S
	cmpule	SY, R, AT
	subq	R, SY, t4

	cmovne	AT, t3, Q
	cmovne	AT, t4, R
	srl	SY, 1, SY
	bne	S, 2b

	br	$q_low_ret

	.align	4
$fix_sign_in:
	/* If we got here, then X|Y is negative.  Need to adjust everything
	   such that we're doing unsigned division in the fixup loop.  */
	/* T5 is true if result should be negative.  */
	xor	X, Y, AT
	cmplt	AT, 0, t5
	cmplt	X, 0, AT
	negq	X, t0

	cmovne	AT, t0, X
	cmplt	Y, 0, AT
	negq	Y, t0

	cmovne	AT, t0, Y
	blbc	t5, $fix_sign_in_ret1

	cvttq/c	$f0, $f0
	_FTOIT	$f0, Q, 8
	.align	3
	negq	Q, Q
	br	$fix_sign_in_ret2

$divbyzero:
	mov	a0, v0
	lda	a0, GEN_INTDIV
	call_pal PAL_gentrap
	stq	zero, 0(v0)
	stq	zero, 8(v0)

#if FRAME > 0
	lda	sp, FRAME(sp)
#endif
	ret

	.end	ldiv

weak_alias (ldiv, lldiv)
weak_alias (ldiv, imaxdiv)
Commit	Line	Data
dff8da6b	1	/* Copyright (C) 1996-2024 Free Software Foundation, Inc.
60c74cf0 UD	2	This file is part of the GNU C Library.
	3
	4	The GNU C Library is free software; you can redistribute it and/or
3214b89b AJ	5	modify it under the terms of the GNU Lesser General Public
	6	License as published by the Free Software Foundation; either
	7	version 2.1 of the License, or (at your option) any later version.
60c74cf0 UD	8
	9	The GNU C Library is distributed in the hope that it will be useful,
	10	but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3214b89b	12	Lesser General Public License for more details.
60c74cf0	13
3214b89b	14	You should have received a copy of the GNU Lesser General Public
ab84e3ff	15	License along with the GNU C Library. If not, see
5a82c748	16	<https://www.gnu.org/licenses/>. */
60c74cf0	17
bd68d850	18	#include "div_libc.h"
60c74cf0	19
bd68d850 RH	20	#undef FRAME
	21	#ifdef __alpha_fix__
	22	#define FRAME 0
60c74cf0	23	#else
bd68d850	24	#define FRAME 16
60c74cf0 UD	25	#endif
60c74cf0 UD	26
bd68d850 RH	27	#undef X
	28	#undef Y
	29	#define X $17
	30	#define Y $18
	31
60c74cf0 UD	32	.set noat
	33
	34	.align 4
	35	.globl ldiv
	36	.ent ldiv
	37	ldiv:
bd68d850 RH	38	.frame sp, FRAME, ra
	39	#if FRAME > 0
	40	lda sp, -FRAME(sp)
	41	#endif
60c74cf0	42	#ifdef PROF
bd68d850	43	.set macro
60c74cf0 UD	44	ldgp gp, 0(pv)
	45	lda AT, _mcount
	46	jsr AT, (AT), _mcount
bd68d850	47	.set nomacro
60c74cf0 UD	48	.prologue 1
	49	#else
	50	.prologue 0
	51	#endif
	52
bd68d850	53	beq Y, $divbyzero
a61c91b0 RH	54	excb
a61c91b0 RH	55	mf_fpcr $f10
bd68d850 RH	56
	57	_ITOFT2 X, $f0, 0, Y, $f1, 8
	58
	59	.align 4
	60	cvtqt $f0, $f0
	61	cvtqt $f1, $f1
	62	divt/c $f0, $f1, $f0
	63	unop
	64
	65	/* Check to see if X fit in the double as an exact value. */
	66	sll X, (64-53), AT
	67	sra AT, (64-53), AT
	68	cmpeq X, AT, AT
	69	beq AT, $x_big
	70
	71	/* If we get here, we're expecting exact results from the division.
	72	Do nothing else besides convert and clean up. */
	73	cvttq/c $f0, $f0
a61c91b0 RH	74	excb
a61c91b0 RH	75	mt_fpcr $f10
bd68d850 RH	76	_FTOIT $f0, $0, 0
	77
	78	$egress:
	79	mulq $0, Y, $1
	80	subq X, $1, $1
	81
	82	stq $0, 0($16)
	83	stq $1, 8($16)
	84	mov $16, $0
	85
	86	#if FRAME > 0
	87	lda sp, FRAME(sp)
	88	#endif
	89	ret
	90
	91	.align 4
	92	$x_big:
	93	/* If we get here, X is large enough that we don't expect exact
	94	results, and neither X nor Y got mis-translated for the fp
	95	division. Our task is to take the fp result, figure out how
	96	far it's off from the correct result and compute a fixup. */
	97
	98	#define Q v0 /* quotient */
	99	#define R t0 /* remainder */
	100	#define SY t1 /* scaled Y */
	101	#define S t2 /* scalar */
	102	#define QY t3 /* QY /
	103
	104	/* The fixup code below can only handle unsigned values. */
	105	or X, Y, AT
	106	mov $31, t5
	107	blt AT, $fix_sign_in
	108	$fix_sign_in_ret1:
	109	cvttq/c $f0, $f0
	110
	111	_FTOIT $f0, Q, 8
bd68d850 RH	112	$fix_sign_in_ret2:
bd68d850 RH	113	mulq Q, Y, QY
a61c91b0 RH	114	excb
a61c91b0 RH	115	mt_fpcr $f10
bd68d850 RH	116
	117	.align 4
	118	subq QY, X, R
	119	mov Y, SY
	120	mov 1, S
	121	bgt R, $q_high
	122
	123	$q_high_ret:
	124	subq X, QY, R
	125	mov Y, SY
	126	mov 1, S
	127	bgt R, $q_low
	128
	129	$q_low_ret:
	130	negq Q, t4
	131	cmovlbs t5, t4, Q
	132	br $egress
	133
	134	.align 4
	135	/* The quotient that we computed was too large. We need to reduce
	136	it by S such that Y*S >= R. Obviously the closer we get to the
	137	correct value the better, but overshooting high is ok, as we'll
	138	fix that up later. */
	139	0:
	140	addq SY, SY, SY
	141	addq S, S, S
	142	$q_high:
	143	cmpult SY, R, AT
	144	bne AT, 0b
	145
	146	subq Q, S, Q
	147	unop
	148	subq QY, SY, QY
	149	br $q_high_ret
	150
	151	.align 4
5556231d	152	/* The quotient that we computed was too small. Divide Y by the
bd68d850 RH	153	current remainder (R) and add that to the existing quotient (Q).
	154	The expectation, of course, is that R is much smaller than X. */
	155	/* Begin with a shift-up loop. Compute S such that Y*S >= R. We
	156	already have a copy of Y in SY and the value 1 in S. */
	157	0:
	158	addq SY, SY, SY
	159	addq S, S, S
	160	$q_low:
	161	cmpult SY, R, AT
	162	bne AT, 0b
	163
	164	/* Shift-down and subtract loop. Each iteration compares our scaled
	165	Y (SY) with the remainder (R); if SY <= R then X is divisible by
	166	Y's scalar (S) so add it to the quotient (Q). */
	167	2: addq Q, S, t3
	168	srl S, 1, S
	169	cmpule SY, R, AT
	170	subq R, SY, t4
	171
	172	cmovne AT, t3, Q
	173	cmovne AT, t4, R
	174	srl SY, 1, SY
	175	bne S, 2b
	176
	177	br $q_low_ret
	178
	179	.align 4
	180	$fix_sign_in:
	181	/* If we got here, then X\|Y is negative. Need to adjust everything
	182	such that we're doing unsigned division in the fixup loop. */
	183	/* T5 is true if result should be negative. */
	184	xor X, Y, AT
	185	cmplt AT, 0, t5
	186	cmplt X, 0, AT
	187	negq X, t0
	188
	189	cmovne AT, t0, X
	190	cmplt Y, 0, AT
	191	negq Y, t0
	192
	193	cmovne AT, t0, Y
	194	blbc t5, $fix_sign_in_ret1
	195
	196	cvttq/c $f0, $f0
	197	_FTOIT $f0, Q, 8
	198	.align 3
	199	negq Q, Q
	200	br $fix_sign_in_ret2
60c74cf0 UD	201
	202	$divbyzero:
	203	mov a0, v0
	204	lda a0, GEN_INTDIV
	205	call_pal PAL_gentrap
60c74cf0 UD	206	stq zero, 0(v0)
60c74cf0 UD	207	stq zero, 8(v0)
60c74cf0	208
bd68d850 RH	209	#if FRAME > 0
	210	lda sp, FRAME(sp)
	211	#endif
	212	ret
60c74cf0	213
bd68d850	214	.end ldiv
228f398a RH	215
228f398a RH	216	weak_alias (ldiv, lldiv)
dc0c31fb	217	weak_alias (ldiv, imaxdiv)