]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i586/lshift.S
* sysdeps/i386/addmul_1.S: Exchange roles of %ebp and %ebx.
[thirdparty/glibc.git] / sysdeps / i386 / i586 / lshift.S
1 /* Pentium optimized __mpn_lshift --
2 Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Library General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or (at your
8 option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 License for more details.
14
15 You should have received a copy of the GNU Library General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA. */
19
20 #include "sysdep.h"
21 #include "asm-syntax.h"
22 #include "bp-asm.h"
23
24 #define PARMS LINKAGE+16 /* space for 4 saved regs */
25 #define RES PARMS
26 #define S RES+PTR_SIZE
27 #define SIZE S+PTR_SIZE
28 #define CNT SIZE+4
29
30 .text
31 ENTRY(__mpn_lshift)
32 ENTER
33
34 pushl %edi
35 pushl %esi
36 pushl %ebp
37 pushl %ebx
38
39 movl RES(%esp),%edi
40 movl S(%esp),%esi
41 movl SIZE(%esp),%ebx
42 movl CNT(%esp),%ecx
43
44 /* We can use faster code for shift-by-1 under certain conditions. */
45 cmp $1,%ecx
46 jne L(normal)
47 leal 4(%esi),%eax
48 cmpl %edi,%eax
49 jnc L(special) /* jump if s_ptr + 1 >= res_ptr */
50 leal (%esi,%ebx,4),%eax
51 cmpl %eax,%edi
52 jnc L(special) /* jump if res_ptr >= s_ptr + size */
53
54 L(normal):
55 leal -4(%edi,%ebx,4),%edi
56 leal -4(%esi,%ebx,4),%esi
57
58 movl (%esi),%edx
59 subl $4,%esi
60 xorl %eax,%eax
61 shldl %cl,%edx,%eax /* compute carry limb */
62 pushl %eax /* push carry limb onto stack */
63
64 decl %ebx
65 pushl %ebx
66 shrl $3,%ebx
67 jz L(end)
68
69 movl (%edi),%eax /* fetch destination cache line */
70
71 ALIGN (2)
72 L(oop): movl -28(%edi),%eax /* fetch destination cache line */
73 movl %edx,%ebp
74
75 movl (%esi),%eax
76 movl -4(%esi),%edx
77 shldl %cl,%eax,%ebp
78 shldl %cl,%edx,%eax
79 movl %ebp,(%edi)
80 movl %eax,-4(%edi)
81
82 movl -8(%esi),%ebp
83 movl -12(%esi),%eax
84 shldl %cl,%ebp,%edx
85 shldl %cl,%eax,%ebp
86 movl %edx,-8(%edi)
87 movl %ebp,-12(%edi)
88
89 movl -16(%esi),%edx
90 movl -20(%esi),%ebp
91 shldl %cl,%edx,%eax
92 shldl %cl,%ebp,%edx
93 movl %eax,-16(%edi)
94 movl %edx,-20(%edi)
95
96 movl -24(%esi),%eax
97 movl -28(%esi),%edx
98 shldl %cl,%eax,%ebp
99 shldl %cl,%edx,%eax
100 movl %ebp,-24(%edi)
101 movl %eax,-28(%edi)
102
103 subl $32,%esi
104 subl $32,%edi
105 decl %ebx
106 jnz L(oop)
107
108 L(end): popl %ebx
109 andl $7,%ebx
110 jz L(end2)
111 L(oop2):
112 movl (%esi),%eax
113 shldl %cl,%eax,%edx
114 movl %edx,(%edi)
115 movl %eax,%edx
116 subl $4,%esi
117 subl $4,%edi
118 decl %ebx
119 jnz L(oop2)
120
121 L(end2):
122 shll %cl,%edx /* compute least significant limb */
123 movl %edx,(%edi) /* store it */
124
125 popl %eax /* pop carry limb */
126
127 popl %ebx
128 popl %ebp
129 popl %esi
130 popl %edi
131
132 LEAVE
133 ret
134
135 /* We loop from least significant end of the arrays, which is only
136 permissible if the source and destination don't overlap, since the
137 function is documented to work for overlapping source and destination.
138 */
139
140 L(special):
141 movl (%esi),%edx
142 addl $4,%esi
143
144 decl %ebx
145 pushl %ebx
146 shrl $3,%ebx
147
148 addl %edx,%edx
149 incl %ebx
150 decl %ebx
151 jz L(Lend)
152
153 movl (%edi),%eax /* fetch destination cache line */
154
155 ALIGN (2)
156 L(Loop):
157 movl 28(%edi),%eax /* fetch destination cache line */
158 movl %edx,%ebp
159
160 movl (%esi),%eax
161 movl 4(%esi),%edx
162 adcl %eax,%eax
163 movl %ebp,(%edi)
164 adcl %edx,%edx
165 movl %eax,4(%edi)
166
167 movl 8(%esi),%ebp
168 movl 12(%esi),%eax
169 adcl %ebp,%ebp
170 movl %edx,8(%edi)
171 adcl %eax,%eax
172 movl %ebp,12(%edi)
173
174 movl 16(%esi),%edx
175 movl 20(%esi),%ebp
176 adcl %edx,%edx
177 movl %eax,16(%edi)
178 adcl %ebp,%ebp
179 movl %edx,20(%edi)
180
181 movl 24(%esi),%eax
182 movl 28(%esi),%edx
183 adcl %eax,%eax
184 movl %ebp,24(%edi)
185 adcl %edx,%edx
186 movl %eax,28(%edi)
187
188 leal 32(%esi),%esi /* use leal not to clobber carry */
189 leal 32(%edi),%edi
190 decl %ebx
191 jnz L(Loop)
192
193 L(Lend):
194 popl %ebx
195 sbbl %eax,%eax /* save carry in %eax */
196 andl $7,%ebx
197 jz L(Lend2)
198 addl %eax,%eax /* restore carry from eax */
199 L(Loop2):
200 movl %edx,%ebp
201 movl (%esi),%edx
202 adcl %edx,%edx
203 movl %ebp,(%edi)
204
205 leal 4(%esi),%esi /* use leal not to clobber carry */
206 leal 4(%edi),%edi
207 decl %ebx
208 jnz L(Loop2)
209
210 jmp L(L1)
211 L(Lend2):
212 addl %eax,%eax /* restore carry from eax */
213 L(L1): movl %edx,(%edi) /* store last limb */
214
215 sbbl %eax,%eax
216 negl %eax
217
218 popl %ebx
219 popl %ebp
220 popl %esi
221 popl %edi
222
223 LEAVE
224 ret
225 END(__mpn_lshift)