[thirdparty/gcc.git] / libgcc / config / epiphany / udivsi3-float.S

/* Unsigned 32 bit division optimized for Epiphany.
   Copyright (C) 2009-2022 Free Software Foundation, Inc.
   Contributed by Embecosm on behalf of Adapteva, Inc.

This file is part of GCC.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

#include "epiphany-asm.h"

	FSTAB (__udivsi3,T_UINT)
	.global SYM(__udivsi3)
	.balign 4
	HIDDEN_FUNC(__udivsi3)
SYM(__udivsi3):
	sub TMP0,r0,r1
	bltu .Lret0
	float TMP2,r0
	  mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data
	float TMP3,r1
	  movt TMP1,%high(0xb0800000)
	asr TMP0,r0,8
	sub TMP0,TMP0,TMP1
	movt TMP1,%high(0x00810000)
	movgteu TMP2,TMP0
	bblt .Lret1
	sub TMP2,TMP2,TMP1
	sub TMP2,TMP2,TMP3
	mov TMP3,0
	movltu TMP2,TMP3
	lsr TMP2,TMP2,23
	lsl r1,r1,TMP2
	mov TMP0,1
	lsl TMP0,TMP0,TMP2
	sub r0,r0,r1
	bltu .Ladd_back
	add TMP3,TMP3,TMP0
	sub r0,r0,r1
	bltu .Ladd_back
.Lsub_loop:; More than two iterations are rare, so it makes sense to leave
           ; this label here to reduce average branch penalties.
	add TMP3,TMP3,TMP0
	sub r0,r0,r1
	bgteu .Lsub_loop
.Ladd_back:
	add r0,r0,r1
	sub TMP1,r1,1
	mov r1,%low(.L0step)
	movt r1,%high(.L0step)
	lsl TMP2,TMP2,3
	sub r1,r1,TMP2
	jr r1
	.rep 30
	lsl r0,r0,1
	sub.l r1,r0,TMP1
	movgteu r0,r1
	.endr
.L0step:sub r1,TMP0,1 ; mask result bits from steps ...
	and r0,r0,r1
	orr r0,r0,TMP3 ; ... and combine with first bits.
	rts
.Lret0:	mov r0,0
	rts
.Lret1:	mov r0,1
	rts
	ENDFUNC(__udivsi3)
Commit	Line	Data
feeeff5c	1	/* Unsigned 32 bit division optimized for Epiphany.
7adcbafe	2	Copyright (C) 2009-2022 Free Software Foundation, Inc.
feeeff5c JR	3	Contributed by Embecosm on behalf of Adapteva, Inc.
	4
	5	This file is part of GCC.
	6
	7	This file is free software; you can redistribute it and/or modify it
	8	under the terms of the GNU General Public License as published by the
	9	Free Software Foundation; either version 3, or (at your option) any
	10	later version.
	11
	12	This file is distributed in the hope that it will be useful, but
	13	WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	General Public License for more details.
	16
	17	Under Section 7 of GPL version 3, you are granted additional
	18	permissions described in the GCC Runtime Library Exception, version
	19	3.1, as published by the Free Software Foundation.
	20
	21	You should have received a copy of the GNU General Public License and
	22	a copy of the GCC Runtime Library Exception along with this program;
	23	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	24	<http://www.gnu.org/licenses/>. */
	25
	26	#include "epiphany-asm.h"
	27
	28	FSTAB (__udivsi3,T_UINT)
	29	.global SYM(__udivsi3)
	30	.balign 4
	31	HIDDEN_FUNC(__udivsi3)
	32	SYM(__udivsi3):
	33	sub TMP0,r0,r1
	34	bltu .Lret0
	35	float TMP2,r0
	36	mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data
	37	float TMP3,r1
	38	movt TMP1,%high(0xb0800000)
	39	asr TMP0,r0,8
	40	sub TMP0,TMP0,TMP1
	41	movt TMP1,%high(0x00810000)
	42	movgteu TMP2,TMP0
	43	bblt .Lret1
	44	sub TMP2,TMP2,TMP1
	45	sub TMP2,TMP2,TMP3
	46	mov TMP3,0
	47	movltu TMP2,TMP3
	48	lsr TMP2,TMP2,23
	49	lsl r1,r1,TMP2
	50	mov TMP0,1
	51	lsl TMP0,TMP0,TMP2
	52	sub r0,r0,r1
	53	bltu .Ladd_back
	54	add TMP3,TMP3,TMP0
	55	sub r0,r0,r1
	56	bltu .Ladd_back
	57	.Lsub_loop:; More than two iterations are rare, so it makes sense to leave
	58	; this label here to reduce average branch penalties.
	59	add TMP3,TMP3,TMP0
	60	sub r0,r0,r1
	61	bgteu .Lsub_loop
	62	.Ladd_back:
	63	add r0,r0,r1
	64	sub TMP1,r1,1
	65	mov r1,%low(.L0step)
	66	movt r1,%high(.L0step)
67	lsl TMP2,TMP2,3
68	sub r1,r1,TMP2
69	jr r1
70	.rep 30
71	lsl r0,r0,1
72	sub.l r1,r0,TMP1
73	movgteu r0,r1
74	.endr
75	.L0step:sub r1,TMP0,1 ; mask result bits from steps ...
76	and r0,r0,r1
77	orr r0,r0,TMP3 ; ... and combine with first bits.
78	rts
79	.Lret0: mov r0,0
80	rts
81	.Lret1: mov r0,1
82	rts
83	ENDFUNC(__udivsi3)