]>
Commit | Line | Data |
---|---|---|
7def3d92 RM |
1 | # Alpha EV5 __mpn_lshift -- |
2 | ||
04277e02 | 3 | # Copyright (C) 1994-2019 Free Software Foundation, Inc. |
7def3d92 RM |
4 | |
5 | # This file is part of the GNU MP Library. | |
6 | ||
7 | # The GNU MP Library is free software; you can redistribute it and/or modify | |
f01ec467 AJ |
8 | # it under the terms of the GNU Lesser General Public License as published by |
9 | # the Free Software Foundation; either version 2.1 of the License, or (at your | |
7def3d92 RM |
10 | # option) any later version. |
11 | ||
12 | # The GNU MP Library is distributed in the hope that it will be useful, but | |
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
f01ec467 | 14 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
7def3d92 RM |
15 | # License for more details. |
16 | ||
f01ec467 | 17 | # You should have received a copy of the GNU Lesser General Public License |
ab84e3ff | 18 | # along with the GNU MP Library. If not, see <http://www.gnu.org/licenses/>. |
7def3d92 RM |
19 | |
20 | ||
21 | # INPUT PARAMETERS | |
22 | # res_ptr r16 | |
23 | # s1_ptr r17 | |
24 | # size r18 | |
25 | # cnt r19 | |
26 | ||
3de9f02e | 27 | # This code runs at 3.25 cycles/limb on the EV5. |
7def3d92 RM |
28 | |
29 | .set noreorder | |
30 | .set noat | |
31 | .text | |
32 | .align 3 | |
33 | .globl __mpn_lshift | |
34 | .ent __mpn_lshift | |
35 | __mpn_lshift: | |
36 | .frame $30,0,$26,0 | |
37 | ||
38 | s8addq $18,$17,$17 # make r17 point at end of s1 | |
39 | ldq $4,-8($17) # load first limb | |
40 | subq $31,$19,$20 | |
41 | s8addq $18,$16,$16 # make r16 point at end of RES | |
42 | subq $18,1,$18 | |
43 | and $18,4-1,$28 # number of limbs in first loop | |
44 | srl $4,$20,$0 # compute function result | |
45 | ||
3de9f02e | 46 | beq $28,.L0 |
7def3d92 RM |
47 | subq $18,$28,$18 |
48 | ||
49 | .align 3 | |
3de9f02e | 50 | .Loop0: ldq $3,-16($17) |
7def3d92 RM |
51 | subq $16,8,$16 |
52 | sll $4,$19,$5 | |
53 | subq $17,8,$17 | |
54 | subq $28,1,$28 | |
55 | srl $3,$20,$6 | |
56 | or $3,$3,$4 | |
57 | or $5,$6,$8 | |
58 | stq $8,0($16) | |
3de9f02e | 59 | bne $28,.Loop0 |
7def3d92 | 60 | |
3de9f02e RM |
61 | .L0: sll $4,$19,$24 |
62 | beq $18,.Lend | |
7def3d92 RM |
63 | # warm up phase 1 |
64 | ldq $1,-16($17) | |
65 | subq $18,4,$18 | |
66 | ldq $2,-24($17) | |
67 | ldq $3,-32($17) | |
68 | ldq $4,-40($17) | |
3de9f02e | 69 | beq $18,.Lend1 |
7def3d92 RM |
70 | # warm up phase 2 |
71 | srl $1,$20,$7 | |
72 | sll $1,$19,$21 | |
73 | srl $2,$20,$8 | |
74 | ldq $1,-48($17) | |
75 | sll $2,$19,$22 | |
76 | ldq $2,-56($17) | |
77 | srl $3,$20,$5 | |
78 | or $7,$24,$7 | |
79 | sll $3,$19,$23 | |
80 | or $8,$21,$8 | |
81 | srl $4,$20,$6 | |
82 | ldq $3,-64($17) | |
83 | sll $4,$19,$24 | |
84 | ldq $4,-72($17) | |
85 | subq $18,4,$18 | |
3de9f02e | 86 | beq $18,.Lend2 |
7def3d92 RM |
87 | .align 4 |
88 | # main loop | |
3de9f02e | 89 | .Loop: stq $7,-8($16) |
7def3d92 RM |
90 | or $5,$22,$5 |
91 | stq $8,-16($16) | |
92 | or $6,$23,$6 | |
93 | ||
94 | srl $1,$20,$7 | |
95 | subq $18,4,$18 | |
96 | sll $1,$19,$21 | |
97 | unop # ldq $31,-96($17) | |
98 | ||
99 | srl $2,$20,$8 | |
100 | ldq $1,-80($17) | |
101 | sll $2,$19,$22 | |
102 | ldq $2,-88($17) | |
103 | ||
104 | stq $5,-24($16) | |
105 | or $7,$24,$7 | |
106 | stq $6,-32($16) | |
107 | or $8,$21,$8 | |
108 | ||
109 | srl $3,$20,$5 | |
110 | unop # ldq $31,-96($17) | |
111 | sll $3,$19,$23 | |
112 | subq $16,32,$16 | |
113 | ||
114 | srl $4,$20,$6 | |
3de9f02e | 115 | ldq $3,-96($17) |
7def3d92 RM |
116 | sll $4,$19,$24 |
117 | ldq $4,-104($17) | |
118 | ||
119 | subq $17,32,$17 | |
3de9f02e | 120 | bne $18,.Loop |
7def3d92 | 121 | # cool down phase 2/1 |
3de9f02e | 122 | .Lend2: stq $7,-8($16) |
7def3d92 RM |
123 | or $5,$22,$5 |
124 | stq $8,-16($16) | |
125 | or $6,$23,$6 | |
126 | srl $1,$20,$7 | |
127 | sll $1,$19,$21 | |
128 | srl $2,$20,$8 | |
129 | sll $2,$19,$22 | |
130 | stq $5,-24($16) | |
131 | or $7,$24,$7 | |
132 | stq $6,-32($16) | |
133 | or $8,$21,$8 | |
134 | srl $3,$20,$5 | |
135 | sll $3,$19,$23 | |
136 | srl $4,$20,$6 | |
137 | sll $4,$19,$24 | |
138 | # cool down phase 2/2 | |
139 | stq $7,-40($16) | |
140 | or $5,$22,$5 | |
141 | stq $8,-48($16) | |
142 | or $6,$23,$6 | |
143 | stq $5,-56($16) | |
144 | stq $6,-64($16) | |
145 | # cool down phase 2/3 | |
146 | stq $24,-72($16) | |
147 | ret $31,($26),1 | |
148 | ||
149 | # cool down phase 1/1 | |
3de9f02e | 150 | .Lend1: srl $1,$20,$7 |
7def3d92 RM |
151 | sll $1,$19,$21 |
152 | srl $2,$20,$8 | |
153 | sll $2,$19,$22 | |
154 | srl $3,$20,$5 | |
155 | or $7,$24,$7 | |
156 | sll $3,$19,$23 | |
157 | or $8,$21,$8 | |
158 | srl $4,$20,$6 | |
159 | sll $4,$19,$24 | |
160 | # cool down phase 1/2 | |
161 | stq $7,-8($16) | |
162 | or $5,$22,$5 | |
163 | stq $8,-16($16) | |
164 | or $6,$23,$6 | |
165 | stq $5,-24($16) | |
166 | stq $6,-32($16) | |
167 | stq $24,-40($16) | |
168 | ret $31,($26),1 | |
169 | ||
3de9f02e | 170 | .Lend: stq $24,-8($16) |
7def3d92 RM |
171 | ret $31,($26),1 |
172 | .end __mpn_lshift |