]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/epiphany/ieee-754/fast_div.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / epiphany / ieee-754 / fast_div.S
CommitLineData
83ffe9cd 1/* Copyright (C) 2011-2023 Free Software Foundation, Inc.
feeeff5c
JR
2 Contributed by Embecosm on behalf of Adapteva, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23<http://www.gnu.org/licenses/>. */
24
25#include "../epiphany-asm.h"
26
27.section _fast_div_text,"a",@progbits;
28 .balign 8;
29_fast_div_table:
30.word 0x007fffff// mantissa mask
31.word 0x40257ebb// hold constant a = 2.58586
32
33.word 0x3f000000// hold constant 126 shifted to bits [30:23]
34.word 0xc0ba2e88// hold constant b = -5.81818
35
36.word 0x4087c1e8// hold constant c = 4.24242
37.word 0x40000000// to hold constant 2 for Newton-Raphson iterations
38
39 .global SYM(__fast_recipsf2)
40 FUNC(__fast_recipsf2)
41SYM(__fast_recipsf2):
42
43//###################
44//# input operands:
45//###################
46// Divisor
47//R0
48// Function address (used with negative offsets to read _fast_div_table)
49//R1
50/* Scratch registers: two single (TMP0/TMP5) and two pairs. */
51#define P0L TMP1
52#define P0H TMP2
53#define P1L TMP3
54#define P1H TMP4
55
56//#########################################
57//# Constants to be used in the algorithm
58//#########################################
59ldrd P0L , [ R1 , -3 ]
60
61ldrd P1L , [ R1 , -2 ]
62
63
64
65//#############################################################################
66//# The Algorithm
67//#
68//# Operation: C=A/B
69//# stage 1 - find the reciprocal 1/B according to the following scheme:
70//# B = (2^E)*m (1<m<2, E=e-127)
71//# 1/B = 1/((2^E)*m) = 1/((2^(E+1))*m1) (0.5<m1<1)
72//# = (2^-(E+1))*(1/m1) = (2^E1)*(1/m1)
73//#
74//# Now we can find the new exponent:
75//# e1 = E1+127 = -E-1+127 = -e+127-1+127 = 253-e **
76//# 1/m1 alreadt has the exponent 127, so we have to add 126-e.
77//# the exponent might underflow, which we can detect as a sign change.
78//# Since the architeture uses flush-to-zero for subnormals, we can
79//# give the result 0. then.
80//#
81//# The 1/m1 term with 0.5<m1<1 is approximated with the Chebyshev polynomial
82//# 1/m1 = 2.58586*(m1^2) - 5.81818*m1 + 4.24242
83//#
84//# Next step is to use two iterations of Newton-Raphson algorithm to complete
85//# the reciprocal calculation.
86//#
87//# Final result is achieved by multiplying A with 1/B
88//#############################################################################
89
90
91
92// R0 exponent and sign "replacement" into TMP0
93AND TMP0,R0,P0L ;
94ORR TMP0,TMP0,P1L
95SUB TMP5,R0,TMP0 // R0 sign/exponent extraction into TMP5
96// Calculate new mantissa
97FMADD P1H,TMP0,P0H ;
98 // Calculate new exponent offset 126 - "old exponent"
99 SUB P1L,P1L,TMP5
100 ldrd P0L , [ R1 , -1 ]
101FMADD P0L,TMP0,P1H ;
102 eor P1H,r0,P1L // check for overflow (N-BIT).
103 blt .Lret_0
104// P0L exponent and sign "replacement"
105sub P0L,P0L,TMP5
106
107// Newton-Raphson iteration #1
108MOV TMP0,P0H ;
109FMSUB P0H,R0,P0L ;
110FMUL P0L,P0H,P0L ;
111// Newton-Raphson iteration #2
112FMSUB TMP0,R0,P0L ;
113FMUL R0,TMP0,P0L ;
114jr lr
115.Lret_0:ldrd P0L , [ R1 , -3 ]
116 lsr TMP0,r0,31 ; extract sign
117 lsl TMP0,TMP0,31
118 add P0L,P0L,r0 ; check for NaN input
119 eor P0L,P0L,r0
120 movgte r0,TMP0
121 jr lr
122// Quotient calculation is expected by the caller: FMUL quotient,divident,R0
123 ;
124 ENDFUNC(__fast_recipsf2)