]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i586/rshift.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i586 / rshift.S
CommitLineData
6d52618b 1/* Pentium optimized __mpn_rshift --
f7a9f785 2 Copyright (C) 1992-2016 Free Software Foundation, Inc.
5929563f 3 This file is part of the GNU MP Library.
8f5ca04b 4
5929563f 5 The GNU MP Library is free software; you can redistribute it and/or modify
6d84f89a
AJ
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
5929563f 8 option) any later version.
8f5ca04b 9
5929563f
UD
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
6d84f89a 12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
5929563f 13 License for more details.
8f5ca04b 14
6d84f89a 15 You should have received a copy of the GNU Lesser General Public License
59ba27a6
PE
16 along with the GNU MP Library; see the file COPYING.LIB. If not,
17 see <http://www.gnu.org/licenses/>. */
8f5ca04b 18
8f5ca04b
RM
19#include "sysdep.h"
20#include "asm-syntax.h"
3f02f778 21
2366713d 22#define PARMS 4+16 /* space for 4 saved regs */
3f02f778 23#define RES PARMS
2366713d
JM
24#define S RES+4
25#define SIZE S+4
3f02f778 26#define CNT SIZE+4
8f5ca04b 27
3f02f778 28 .text
2366713d 29ENTRY (__mpn_rshift)
3f02f778 30
8f5ca04b 31 pushl %edi
1ad9da69 32 cfi_adjust_cfa_offset (4)
8f5ca04b 33 pushl %esi
1ad9da69 34 cfi_adjust_cfa_offset (4)
8f5ca04b 35 pushl %ebp
1ad9da69
UD
36 cfi_adjust_cfa_offset (4)
37 cfi_rel_offset (ebp, 0)
f9e7bbce 38 pushl %ebx
1ad9da69 39 cfi_adjust_cfa_offset (4)
8f5ca04b 40
3f02f778 41 movl RES(%esp),%edi
1ad9da69 42 cfi_rel_offset (edi, 12)
3f02f778 43 movl S(%esp),%esi
1ad9da69 44 cfi_rel_offset (esi, 8)
f9e7bbce 45 movl SIZE(%esp),%ebx
1ad9da69 46 cfi_rel_offset (ebx, 0)
3f02f778 47 movl CNT(%esp),%ecx
8f5ca04b 48
ba848785 49/* We can use faster code for shift-by-1 under certain conditions. */
8f5ca04b 50 cmp $1,%ecx
5929563f 51 jne L(normal)
ba848785
RM
52 leal 4(%edi),%eax
53 cmpl %esi,%eax
5929563f 54 jnc L(special) /* jump if res_ptr + 1 >= s_ptr */
f9e7bbce 55 leal (%edi,%ebx,4),%eax
ba848785 56 cmpl %eax,%esi
5929563f 57 jnc L(special) /* jump if s_ptr >= res_ptr + size */
8f5ca04b 58
5929563f 59L(normal):
8f5ca04b
RM
60 movl (%esi),%edx
61 addl $4,%esi
62 xorl %eax,%eax
63 shrdl %cl,%edx,%eax /* compute carry limb */
64 pushl %eax /* push carry limb onto stack */
1ad9da69 65 cfi_adjust_cfa_offset (4)
8f5ca04b 66
f9e7bbce
GM
67 decl %ebx
68 pushl %ebx
1ad9da69 69 cfi_adjust_cfa_offset (4)
f9e7bbce 70 shrl $3,%ebx
5929563f 71 jz L(end)
8f5ca04b
RM
72
73 movl (%edi),%eax /* fetch destination cache line */
74
75 ALIGN (2)
5929563f 76L(oop): movl 28(%edi),%eax /* fetch destination cache line */
f9e7bbce 77 movl %edx,%ebp
8f5ca04b
RM
78
79 movl (%esi),%eax
80 movl 4(%esi),%edx
f9e7bbce 81 shrdl %cl,%eax,%ebp
8f5ca04b 82 shrdl %cl,%edx,%eax
f9e7bbce 83 movl %ebp,(%edi)
8f5ca04b
RM
84 movl %eax,4(%edi)
85
f9e7bbce 86 movl 8(%esi),%ebp
8f5ca04b 87 movl 12(%esi),%eax
f9e7bbce
GM
88 shrdl %cl,%ebp,%edx
89 shrdl %cl,%eax,%ebp
8f5ca04b 90 movl %edx,8(%edi)
f9e7bbce 91 movl %ebp,12(%edi)
8f5ca04b
RM
92
93 movl 16(%esi),%edx
f9e7bbce 94 movl 20(%esi),%ebp
8f5ca04b 95 shrdl %cl,%edx,%eax
f9e7bbce 96 shrdl %cl,%ebp,%edx
8f5ca04b
RM
97 movl %eax,16(%edi)
98 movl %edx,20(%edi)
99
100 movl 24(%esi),%eax
101 movl 28(%esi),%edx
f9e7bbce 102 shrdl %cl,%eax,%ebp
8f5ca04b 103 shrdl %cl,%edx,%eax
f9e7bbce 104 movl %ebp,24(%edi)
8f5ca04b
RM
105 movl %eax,28(%edi)
106
107 addl $32,%esi
108 addl $32,%edi
f9e7bbce 109 decl %ebx
5929563f 110 jnz L(oop)
8f5ca04b 111
f9e7bbce 112L(end): popl %ebx
1ad9da69 113 cfi_adjust_cfa_offset (-4)
f9e7bbce 114 andl $7,%ebx
5929563f
UD
115 jz L(end2)
116L(oop2):
117 movl (%esi),%eax
8f5ca04b
RM
118 shrdl %cl,%eax,%edx /* compute result limb */
119 movl %edx,(%edi)
120 movl %eax,%edx
121 addl $4,%esi
122 addl $4,%edi
f9e7bbce 123 decl %ebx
5929563f 124 jnz L(oop2)
8f5ca04b 125
5929563f
UD
126L(end2):
127 shrl %cl,%edx /* compute most significant limb */
8f5ca04b
RM
128 movl %edx,(%edi) /* store it */
129
130 popl %eax /* pop carry limb */
1ad9da69 131 cfi_adjust_cfa_offset (-4)
8f5ca04b 132
8f5ca04b 133 popl %ebx
1ad9da69
UD
134 cfi_adjust_cfa_offset (-4)
135 cfi_restore (ebx)
f9e7bbce 136 popl %ebp
1ad9da69
UD
137 cfi_adjust_cfa_offset (-4)
138 cfi_restore (ebp)
8f5ca04b 139 popl %esi
1ad9da69
UD
140 cfi_adjust_cfa_offset (-4)
141 cfi_restore (esi)
8f5ca04b 142 popl %edi
1ad9da69
UD
143 cfi_adjust_cfa_offset (-4)
144 cfi_restore (edi)
3f02f778 145
8f5ca04b
RM
146 ret
147
148/* We loop from least significant end of the arrays, which is only
6d52618b 149 permissible if the source and destination don't overlap, since the
8f5ca04b
RM
150 function is documented to work for overlapping source and destination.
151*/
152
1ad9da69
UD
153 cfi_adjust_cfa_offset (16)
154 cfi_rel_offset (edi, 12)
155 cfi_rel_offset (esi, 8)
156 cfi_rel_offset (ebp, 4)
fee732e5 157 cfi_rel_offset (ebx, 0)
5929563f 158L(special):
f9e7bbce
GM
159 leal -4(%edi,%ebx,4),%edi
160 leal -4(%esi,%ebx,4),%esi
8f5ca04b
RM
161
162 movl (%esi),%edx
163 subl $4,%esi
164
f9e7bbce
GM
165 decl %ebx
166 pushl %ebx
1ad9da69 167 cfi_adjust_cfa_offset (4)
f9e7bbce 168 shrl $3,%ebx
8f5ca04b
RM
169
170 shrl $1,%edx
f9e7bbce
GM
171 incl %ebx
172 decl %ebx
5929563f 173 jz L(Lend)
8f5ca04b
RM
174
175 movl (%edi),%eax /* fetch destination cache line */
176
177 ALIGN (2)
5929563f
UD
178L(Loop):
179 movl -28(%edi),%eax /* fetch destination cache line */
f9e7bbce 180 movl %edx,%ebp
8f5ca04b
RM
181
182 movl (%esi),%eax
183 movl -4(%esi),%edx
184 rcrl $1,%eax
f9e7bbce 185 movl %ebp,(%edi)
8f5ca04b
RM
186 rcrl $1,%edx
187 movl %eax,-4(%edi)
188
f9e7bbce 189 movl -8(%esi),%ebp
8f5ca04b 190 movl -12(%esi),%eax
f9e7bbce 191 rcrl $1,%ebp
8f5ca04b
RM
192 movl %edx,-8(%edi)
193 rcrl $1,%eax
f9e7bbce 194 movl %ebp,-12(%edi)
8f5ca04b
RM
195
196 movl -16(%esi),%edx
f9e7bbce 197 movl -20(%esi),%ebp
8f5ca04b
RM
198 rcrl $1,%edx
199 movl %eax,-16(%edi)
f9e7bbce 200 rcrl $1,%ebp
8f5ca04b
RM
201 movl %edx,-20(%edi)
202
203 movl -24(%esi),%eax
204 movl -28(%esi),%edx
205 rcrl $1,%eax
f9e7bbce 206 movl %ebp,-24(%edi)
8f5ca04b
RM
207 rcrl $1,%edx
208 movl %eax,-28(%edi)
209
210 leal -32(%esi),%esi /* use leal not to clobber carry */
211 leal -32(%edi),%edi
f9e7bbce 212 decl %ebx
5929563f 213 jnz L(Loop)
8f5ca04b 214
5929563f 215L(Lend):
f9e7bbce 216 popl %ebx
1ad9da69 217 cfi_adjust_cfa_offset (-4)
8f5ca04b 218 sbbl %eax,%eax /* save carry in %eax */
f9e7bbce 219 andl $7,%ebx
5929563f 220 jz L(Lend2)
8f5ca04b 221 addl %eax,%eax /* restore carry from eax */
5929563f 222L(Loop2):
f9e7bbce 223 movl %edx,%ebp
8f5ca04b
RM
224 movl (%esi),%edx
225 rcrl $1,%edx
f9e7bbce 226 movl %ebp,(%edi)
8f5ca04b
RM
227
228 leal -4(%esi),%esi /* use leal not to clobber carry */
229 leal -4(%edi),%edi
f9e7bbce 230 decl %ebx
5929563f 231 jnz L(Loop2)
8f5ca04b 232
5929563f
UD
233 jmp L(L1)
234L(Lend2):
235 addl %eax,%eax /* restore carry from eax */
236L(L1): movl %edx,(%edi) /* store last limb */
8f5ca04b
RM
237
238 movl $0,%eax
239 rcrl $1,%eax
240
8f5ca04b 241 popl %ebx
1ad9da69
UD
242 cfi_adjust_cfa_offset (-4)
243 cfi_restore (ebx)
f9e7bbce 244 popl %ebp
1ad9da69
UD
245 cfi_adjust_cfa_offset (-4)
246 cfi_restore (ebp)
8f5ca04b 247 popl %esi
1ad9da69
UD
248 cfi_adjust_cfa_offset (-4)
249 cfi_restore (esi)
8f5ca04b 250 popl %edi
1ad9da69
UD
251 cfi_adjust_cfa_offset (-4)
252 cfi_restore (edi)
3f02f778 253
8f5ca04b 254 ret
2366713d 255END (__mpn_rshift)