[thirdparty/glibc.git] / sysdeps / sparc / sparc64 / addmul_1.S

/* SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
   add the product to a second limb vector.

   Copyright (C) 1996-2021 Free Software Foundation, Inc.

   This file is part of the GNU MP Library.

   The GNU MP Library is free software; you can redistribute it and/or modify
   it under the terms of the GNU Lesser General Public License as published by
   the Free Software Foundation; either version 2.1 of the License, or (at your
   option) any later version.

   The GNU MP Library is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
   License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with the GNU MP Library; see the file COPYING.LIB.  If not,
   see <https://www.gnu.org/licenses/>.  */

#include <sysdep.h>


/* INPUT PARAMETERS
   res_ptr	o0
   s1_ptr	o1
   size		o2
   s2_limb	o3  */


ENTRY(__mpn_addmul_1)
	save	%sp,-192,%sp

	sub	%g0,%i2,%o7
	mov	0,%o0			! zero cy_limb
	sllx	%o7,3,%o7
	sethi	%hi(0x80000000),%o2
	srl	%i3,0,%o1		! extract low 32 bits of s2_limb
	sub	%i1,%o7,%o3
	srlx	%i3,32,%i3		! extract high 32 bits of s2_limb
	sub	%i0,%o7,%o4
	add	%o2,%o2,%o2		! o2 = 0x100000000

	!   hi   !
             !  mid-1 !
             !  mid-2 !
		 !   lo   !
1:
	ldx	[%o3+%o7],%g5
	srl	%g5,0,%i0		! zero hi bits
	ldx	[%o4+%o7],%l1
	srlx	%g5,32,%g5
	mulx	%o1,%i0,%i4		! lo product
	mulx	%i3,%i0,%i1		! mid-1 product
	mulx	%o1,%g5,%l2		! mid-2 product
	mulx	%i3,%g5,%i5		! hi product
	srlx	%i4,32,%i0		! extract high 32 bits of lo product...
	add	%i1,%i0,%i1		! ...and add it to the mid-1 product
	addcc	%i1,%l2,%i1		! add mid products
	mov	0,%l0			! we need the carry from that add...
	movcs	%xcc,%o2,%l0		! ...compute it and...
	sllx	%i1,32,%i0		!  align low bits of mid product
	add	%i5,%l0,%i5		! ...add to bit 32 of the hi product
	srl	%i4,0,%g5		! zero high 32 bits of lo product
	add	%i0,%g5,%i0		! combine into low 64 bits of result
	srlx	%i1,32,%i1		! extract high bits of mid product...
	addcc	%i0,%o0,%i0		!  add cy_limb to low 64 bits of result
	add	%i5,%i1,%i1		! ...and add them to the high result
	mov	0,%g5
	movcs	%xcc,1,%g5
	addcc	%l1,%i0,%i0
	stx	%i0,[%o4+%o7]
	add	%g5,1,%l1
	movcs	%xcc,%l1,%g5
	addcc	%o7,8,%o7
	bne,pt	%xcc,1b
	 add	%i1,%g5,%o0		! compute new cy_limb

	jmpl	%i7+8, %g0
	 restore %o0,%g0,%o0

END(__mpn_addmul_1)
Commit	Line	Data
5ae3e846 UD	1	/* SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
5ae3e846 UD	2	add the product to a second limb vector.
6b628d36	3
2b778ceb	4	Copyright (C) 1996-2021 Free Software Foundation, Inc.
6b628d36	5
5ae3e846	6	This file is part of the GNU MP Library.
6b628d36	7
5ae3e846	8	The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a AJ	9	it under the terms of the GNU Lesser General Public License as published by
6d84f89a AJ	10	the Free Software Foundation; either version 2.1 of the License, or (at your
5ae3e846	11	option) any later version.
6b628d36	12
5ae3e846 UD	13	The GNU MP Library is distributed in the hope that it will be useful, but
5ae3e846 UD	14	WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a	15	or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
5ae3e846	16	License for more details.
6b628d36	17
6d84f89a	18	You should have received a copy of the GNU Lesser General Public License
59ba27a6	19	along with the GNU MP Library; see the file COPYING.LIB. If not,
5a82c748	20	see <https://www.gnu.org/licenses/>. */
6b628d36	21
5ae3e846	22	#include <sysdep.h>
6b628d36	23
6b628d36	24
5ae3e846 UD	25	/* INPUT PARAMETERS
	26	res_ptr o0
	27	s1_ptr o1
	28	size o2
	29	s2_limb o3 */
	30
	31
	32	ENTRY(__mpn_addmul_1)
9c4c0024	33	save %sp,-192,%sp
5ae3e846	34
6b628d36	35	sub %g0,%i2,%o7
6b628d36	36	mov 0,%o0 ! zero cy_limb
abfbdde1 UD	37	sllx %o7,3,%o7
abfbdde1 UD	38	sethi %hi(0x80000000),%o2
6b628d36	39	srl %i3,0,%o1 ! extract low 32 bits of s2_limb
abfbdde1	40	sub %i1,%o7,%o3
6b628d36	41	srlx %i3,32,%i3 ! extract high 32 bits of s2_limb
abfbdde1 UD	42	sub %i0,%o7,%o4
abfbdde1 UD	43	add %o2,%o2,%o2 ! o2 = 0x100000000
6b628d36 RM	44
	45	! hi !
	46	! mid-1 !
	47	! mid-2 !
	48	! lo !
5ae3e846	49	1:
abfbdde1	50	ldx [%o3+%o7],%g5
6b628d36	51	srl %g5,0,%i0 ! zero hi bits
abfbdde1	52	ldx [%o4+%o7],%l1
6b628d36 RM	53	srlx %g5,32,%g5
	54	mulx %o1,%i0,%i4 ! lo product
	55	mulx %i3,%i0,%i1 ! mid-1 product
	56	mulx %o1,%g5,%l2 ! mid-2 product
	57	mulx %i3,%g5,%i5 ! hi product
	58	srlx %i4,32,%i0 ! extract high 32 bits of lo product...
	59	add %i1,%i0,%i1 ! ...and add it to the mid-1 product
	60	addcc %i1,%l2,%i1 ! add mid products
	61	mov 0,%l0 ! we need the carry from that add...
	62	movcs %xcc,%o2,%l0 ! ...compute it and...
abfbdde1	63	sllx %i1,32,%i0 ! align low bits of mid product
6b628d36	64	add %i5,%l0,%i5 ! ...add to bit 32 of the hi product
6b628d36 RM	65	srl %i4,0,%g5 ! zero high 32 bits of lo product
	66	add %i0,%g5,%i0 ! combine into low 64 bits of result
	67	srlx %i1,32,%i1 ! extract high bits of mid product...
abfbdde1	68	addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result
6b628d36	69	add %i5,%i1,%i1 ! ...and add them to the high result
6b628d36 RM	70	mov 0,%g5
6b628d36 RM	71	movcs %xcc,1,%g5
6b628d36	72	addcc %l1,%i0,%i0
abfbdde1 UD	73	stx %i0,[%o4+%o7]
	74	add %g5,1,%l1
	75	movcs %xcc,%l1,%g5
	76	addcc %o7,8,%o7
	77	bne,pt %xcc,1b
5ae3e846	78	add %i1,%g5,%o0 ! compute new cy_limb
6b628d36	79
5ae3e846	80	jmpl %i7+8, %g0
abfbdde1	81	restore %o0,%g0,%o0
5ae3e846 UD	82
5ae3e846 UD	83	END(__mpn_addmul_1)