]>
Commit | Line | Data |
---|---|---|
80920132 ME |
1 | ###################################- |
2 | # | |
99dee823 | 3 | # Copyright (C) 2009-2021 Free Software Foundation, Inc. |
80920132 ME |
4 | # |
5 | # Contributed by Michael Eager <eager@eagercon.com>. | |
6 | # | |
7 | # This file is free software; you can redistribute it and/or modify it | |
8 | # under the terms of the GNU General Public License as published by the | |
9 | # Free Software Foundation; either version 3, or (at your option) any | |
10 | # later version. | |
11 | # | |
12 | # GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | # License for more details. | |
16 | # | |
17 | # Under Section 7 of GPL version 3, you are granted additional | |
18 | # permissions described in the GCC Runtime Library Exception, version | |
19 | # 3.1, as published by the Free Software Foundation. | |
20 | # | |
21 | # You should have received a copy of the GNU General Public License and | |
22 | # a copy of the GCC Runtime Library Exception along with this program; | |
23 | # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | # <http://www.gnu.org/licenses/>. | |
25 | # | |
f9989b51 | 26 | # muldi3_hard.S |
80920132 ME |
27 | # |
28 | # Multiply operation for 64 bit integers, for devices with hard multiply | |
29 | # Input : Operand1[H] in Reg r5 | |
30 | # Operand1[L] in Reg r6 | |
31 | # Operand2[H] in Reg r7 | |
32 | # Operand2[L] in Reg r8 | |
33 | # Output: Result[H] in Reg r3 | |
34 | # Result[L] in Reg r4 | |
35 | # | |
36 | # Explaination: | |
37 | # | |
38 | # Both the input numbers are divided into 16 bit number as follows | |
39 | # op1 = A B C D | |
40 | # op2 = E F G H | |
41 | # result = D * H | |
42 | # + (C * H + D * G) << 16 | |
43 | # + (B * H + C * G + D * F) << 32 | |
44 | # + (A * H + B * G + C * F + D * E) << 48 | |
45 | # | |
46 | # Only 64 bits of the output are considered | |
47 | # | |
48 | ####################################### | |
49 | ||
938b6f1e JM |
50 | /* An executable stack is *not* required for these functions. */ |
51 | #ifdef __linux__ | |
52 | .section .note.GNU-stack,"",%progbits | |
53 | .previous | |
54 | #endif | |
55 | ||
80920132 ME |
56 | .globl muldi3_hardproc |
57 | .ent muldi3_hardproc | |
58 | muldi3_hardproc: | |
59 | addi r1,r1,-40 | |
60 | ||
61 | # Save the input operands on the caller's stack | |
62 | swi r5,r1,44 | |
63 | swi r6,r1,48 | |
64 | swi r7,r1,52 | |
65 | swi r8,r1,56 | |
66 | ||
67 | # Store all the callee saved registers | |
68 | sw r20,r1,r0 | |
69 | swi r21,r1,4 | |
70 | swi r22,r1,8 | |
71 | swi r23,r1,12 | |
72 | swi r24,r1,16 | |
73 | swi r25,r1,20 | |
74 | swi r26,r1,24 | |
75 | swi r27,r1,28 | |
76 | ||
d8c9d391 | 77 | # Load all the 16 bit values for A through H |
80920132 ME |
78 | lhui r20,r1,44 # A |
79 | lhui r21,r1,46 # B | |
80 | lhui r22,r1,48 # C | |
81 | lhui r23,r1,50 # D | |
82 | lhui r24,r1,52 # E | |
83 | lhui r25,r1,54 # F | |
84 | lhui r26,r1,56 # G | |
85 | lhui r27,r1,58 # H | |
86 | ||
87 | # D * H ==> LSB of the result on stack ==> Store1 | |
88 | mul r9,r23,r27 | |
89 | swi r9,r1,36 # Pos2 and Pos3 | |
90 | ||
91 | # Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 | |
92 | # Store the carry generated in position 2 for Pos 3 | |
93 | lhui r11,r1,36 # Pos2 | |
94 | mul r9,r22,r27 # C * H | |
95 | mul r10,r23,r26 # D * G | |
96 | add r9,r9,r10 | |
97 | addc r12,r0,r0 | |
98 | add r9,r9,r11 | |
99 | addc r12,r12,r0 # Store the Carry | |
100 | shi r9,r1,36 # Store Pos2 | |
101 | swi r9,r1,32 | |
102 | lhui r11,r1,32 | |
103 | shi r11,r1,34 # Store Pos1 | |
104 | ||
105 | # Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 | |
106 | mul r9,r21,r27 # B * H | |
107 | mul r10,r22,r26 # C * G | |
108 | mul r7,r23,r25 # D * F | |
109 | add r9,r9,r11 | |
110 | add r9,r9,r10 | |
111 | add r9,r9,r7 | |
112 | swi r9,r1,32 # Pos0 and Pos1 | |
113 | ||
114 | # Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 | |
115 | lhui r11,r1,32 # Pos0 | |
116 | mul r9,r20,r27 # A * H | |
117 | mul r10,r21,r26 # B * G | |
118 | mul r7,r22,r25 # C * F | |
119 | mul r8,r23,r24 # D * E | |
120 | add r9,r9,r11 | |
121 | add r9,r9,r10 | |
122 | add r9,r9,r7 | |
123 | add r9,r9,r8 | |
124 | sext16 r9,r9 # Sign extend the MSB | |
125 | shi r9,r1,32 | |
126 | ||
127 | # Move results to r3 and r4 | |
128 | lhui r3,r1,32 | |
129 | add r3,r3,r12 | |
130 | shi r3,r1,32 | |
131 | lwi r3,r1,32 # Hi Part | |
132 | lwi r4,r1,36 # Lo Part | |
133 | ||
134 | # Restore Callee saved registers | |
135 | lw r20,r1,r0 | |
136 | lwi r21,r1,4 | |
137 | lwi r22,r1,8 | |
138 | lwi r23,r1,12 | |
139 | lwi r24,r1,16 | |
140 | lwi r25,r1,20 | |
141 | lwi r26,r1,24 | |
142 | lwi r27,r1,28 | |
143 | ||
144 | # Restore Frame and return | |
145 | rtsd r15,8 | |
146 | addi r1,r1,40 | |
147 | ||
148 | .end muldi3_hardproc | |
149 | ||
150 |