]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/i386/i586/lshift.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i586 / lshift.S
CommitLineData
6d52618b 1/* Pentium optimized __mpn_lshift --
bfff8b1b 2 Copyright (C) 1992-2017 Free Software Foundation, Inc.
6d52618b 3 This file is part of the GNU C Library.
8f5ca04b 4
41bdb6e2
AJ
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
8f5ca04b 18
8f5ca04b
RM
19#include "sysdep.h"
20#include "asm-syntax.h"
3f02f778 21
2366713d 22#define PARMS 4+16 /* space for 4 saved regs */
3f02f778 23#define RES PARMS
2366713d
JM
24#define S RES+4
25#define SIZE S+4
3f02f778 26#define CNT SIZE+4
8f5ca04b 27
3f02f778 28 .text
2366713d 29ENTRY (__mpn_lshift)
3f02f778 30
8f5ca04b 31 pushl %edi
1ad9da69 32 cfi_adjust_cfa_offset (4)
8f5ca04b 33 pushl %esi
1ad9da69 34 cfi_adjust_cfa_offset (4)
8f5ca04b 35 pushl %ebp
1ad9da69
UD
36 cfi_adjust_cfa_offset (4)
37 cfi_rel_offset (ebp, 0)
f9e7bbce 38 pushl %ebx
1ad9da69 39 cfi_adjust_cfa_offset (4)
8f5ca04b 40
3f02f778 41 movl RES(%esp),%edi
1ad9da69 42 cfi_rel_offset (edi, 12)
3f02f778 43 movl S(%esp),%esi
1ad9da69 44 cfi_rel_offset (esi, 8)
f9e7bbce 45 movl SIZE(%esp),%ebx
1ad9da69 46 cfi_rel_offset (ebx, 0)
3f02f778 47 movl CNT(%esp),%ecx
8f5ca04b 48
ba848785 49/* We can use faster code for shift-by-1 under certain conditions. */
8f5ca04b 50 cmp $1,%ecx
5929563f 51 jne L(normal)
ba848785
RM
52 leal 4(%esi),%eax
53 cmpl %edi,%eax
5929563f 54 jnc L(special) /* jump if s_ptr + 1 >= res_ptr */
f9e7bbce 55 leal (%esi,%ebx,4),%eax
ba848785 56 cmpl %eax,%edi
5929563f 57 jnc L(special) /* jump if res_ptr >= s_ptr + size */
8f5ca04b 58
5929563f 59L(normal):
f9e7bbce
GM
60 leal -4(%edi,%ebx,4),%edi
61 leal -4(%esi,%ebx,4),%esi
8f5ca04b
RM
62
63 movl (%esi),%edx
64 subl $4,%esi
65 xorl %eax,%eax
66 shldl %cl,%edx,%eax /* compute carry limb */
67 pushl %eax /* push carry limb onto stack */
1ad9da69 68 cfi_adjust_cfa_offset (4)
8f5ca04b 69
f9e7bbce
GM
70 decl %ebx
71 pushl %ebx
1ad9da69 72 cfi_adjust_cfa_offset (4)
f9e7bbce 73 shrl $3,%ebx
5929563f 74 jz L(end)
8f5ca04b
RM
75
76 movl (%edi),%eax /* fetch destination cache line */
77
78 ALIGN (2)
5929563f 79L(oop): movl -28(%edi),%eax /* fetch destination cache line */
f9e7bbce 80 movl %edx,%ebp
8f5ca04b
RM
81
82 movl (%esi),%eax
83 movl -4(%esi),%edx
f9e7bbce 84 shldl %cl,%eax,%ebp
8f5ca04b 85 shldl %cl,%edx,%eax
f9e7bbce 86 movl %ebp,(%edi)
8f5ca04b
RM
87 movl %eax,-4(%edi)
88
f9e7bbce 89 movl -8(%esi),%ebp
8f5ca04b 90 movl -12(%esi),%eax
f9e7bbce
GM
91 shldl %cl,%ebp,%edx
92 shldl %cl,%eax,%ebp
8f5ca04b 93 movl %edx,-8(%edi)
f9e7bbce 94 movl %ebp,-12(%edi)
8f5ca04b
RM
95
96 movl -16(%esi),%edx
f9e7bbce 97 movl -20(%esi),%ebp
8f5ca04b 98 shldl %cl,%edx,%eax
f9e7bbce 99 shldl %cl,%ebp,%edx
8f5ca04b
RM
100 movl %eax,-16(%edi)
101 movl %edx,-20(%edi)
102
103 movl -24(%esi),%eax
104 movl -28(%esi),%edx
f9e7bbce 105 shldl %cl,%eax,%ebp
8f5ca04b 106 shldl %cl,%edx,%eax
f9e7bbce 107 movl %ebp,-24(%edi)
8f5ca04b
RM
108 movl %eax,-28(%edi)
109
110 subl $32,%esi
111 subl $32,%edi
f9e7bbce 112 decl %ebx
5929563f 113 jnz L(oop)
8f5ca04b 114
f9e7bbce 115L(end): popl %ebx
1ad9da69 116 cfi_adjust_cfa_offset (-4)
f9e7bbce 117 andl $7,%ebx
5929563f
UD
118 jz L(end2)
119L(oop2):
120 movl (%esi),%eax
8f5ca04b
RM
121 shldl %cl,%eax,%edx
122 movl %edx,(%edi)
123 movl %eax,%edx
124 subl $4,%esi
125 subl $4,%edi
f9e7bbce 126 decl %ebx
5929563f 127 jnz L(oop2)
8f5ca04b 128
5929563f
UD
129L(end2):
130 shll %cl,%edx /* compute least significant limb */
8f5ca04b
RM
131 movl %edx,(%edi) /* store it */
132
133 popl %eax /* pop carry limb */
1ad9da69 134 cfi_adjust_cfa_offset (-4)
8f5ca04b 135
8f5ca04b 136 popl %ebx
1ad9da69
UD
137 cfi_adjust_cfa_offset (-4)
138 cfi_restore (ebx)
f9e7bbce 139 popl %ebp
1ad9da69
UD
140 cfi_adjust_cfa_offset (-4)
141 cfi_restore (ebp)
8f5ca04b 142 popl %esi
1ad9da69
UD
143 cfi_adjust_cfa_offset (-4)
144 cfi_restore (esi)
8f5ca04b 145 popl %edi
1ad9da69
UD
146 cfi_adjust_cfa_offset (-4)
147 cfi_restore (edi)
3f02f778 148
8f5ca04b
RM
149 ret
150
151/* We loop from least significant end of the arrays, which is only
6d52618b 152 permissible if the source and destination don't overlap, since the
8f5ca04b
RM
153 function is documented to work for overlapping source and destination.
154*/
155
1ad9da69
UD
156 cfi_adjust_cfa_offset (16)
157 cfi_rel_offset (edi, 12)
158 cfi_rel_offset (esi, 8)
159 cfi_rel_offset (ebp, 4)
160 cfi_rel_offset (ebx, 0)
5929563f 161L(special):
8f5ca04b
RM
162 movl (%esi),%edx
163 addl $4,%esi
164
f9e7bbce
GM
165 decl %ebx
166 pushl %ebx
1ad9da69 167 cfi_adjust_cfa_offset (4)
f9e7bbce 168 shrl $3,%ebx
8f5ca04b
RM
169
170 addl %edx,%edx
f9e7bbce
GM
171 incl %ebx
172 decl %ebx
5929563f 173 jz L(Lend)
8f5ca04b
RM
174
175 movl (%edi),%eax /* fetch destination cache line */
176
177 ALIGN (2)
5929563f
UD
178L(Loop):
179 movl 28(%edi),%eax /* fetch destination cache line */
f9e7bbce 180 movl %edx,%ebp
8f5ca04b
RM
181
182 movl (%esi),%eax
183 movl 4(%esi),%edx
184 adcl %eax,%eax
f9e7bbce 185 movl %ebp,(%edi)
8f5ca04b
RM
186 adcl %edx,%edx
187 movl %eax,4(%edi)
188
f9e7bbce 189 movl 8(%esi),%ebp
8f5ca04b 190 movl 12(%esi),%eax
f9e7bbce 191 adcl %ebp,%ebp
8f5ca04b
RM
192 movl %edx,8(%edi)
193 adcl %eax,%eax
f9e7bbce 194 movl %ebp,12(%edi)
8f5ca04b
RM
195
196 movl 16(%esi),%edx
f9e7bbce 197 movl 20(%esi),%ebp
8f5ca04b
RM
198 adcl %edx,%edx
199 movl %eax,16(%edi)
f9e7bbce 200 adcl %ebp,%ebp
8f5ca04b
RM
201 movl %edx,20(%edi)
202
203 movl 24(%esi),%eax
204 movl 28(%esi),%edx
205 adcl %eax,%eax
f9e7bbce 206 movl %ebp,24(%edi)
8f5ca04b
RM
207 adcl %edx,%edx
208 movl %eax,28(%edi)
209
210 leal 32(%esi),%esi /* use leal not to clobber carry */
211 leal 32(%edi),%edi
f9e7bbce 212 decl %ebx
5929563f 213 jnz L(Loop)
8f5ca04b 214
5929563f 215L(Lend):
f9e7bbce 216 popl %ebx
1ad9da69 217 cfi_adjust_cfa_offset (-4)
8f5ca04b 218 sbbl %eax,%eax /* save carry in %eax */
f9e7bbce 219 andl $7,%ebx
5929563f 220 jz L(Lend2)
8f5ca04b 221 addl %eax,%eax /* restore carry from eax */
5929563f 222L(Loop2):
f9e7bbce 223 movl %edx,%ebp
8f5ca04b
RM
224 movl (%esi),%edx
225 adcl %edx,%edx
f9e7bbce 226 movl %ebp,(%edi)
8f5ca04b
RM
227
228 leal 4(%esi),%esi /* use leal not to clobber carry */
229 leal 4(%edi),%edi
f9e7bbce 230 decl %ebx
5929563f 231 jnz L(Loop2)
8f5ca04b 232
5929563f
UD
233 jmp L(L1)
234L(Lend2):
235 addl %eax,%eax /* restore carry from eax */
236L(L1): movl %edx,(%edi) /* store last limb */
8f5ca04b
RM
237
238 sbbl %eax,%eax
239 negl %eax
240
8f5ca04b 241 popl %ebx
1ad9da69
UD
242 cfi_adjust_cfa_offset (-4)
243 cfi_restore (ebx)
f9e7bbce 244 popl %ebp
1ad9da69
UD
245 cfi_adjust_cfa_offset (-4)
246 cfi_restore (ebp)
8f5ca04b 247 popl %esi
1ad9da69
UD
248 cfi_adjust_cfa_offset (-4)
249 cfi_restore (esi)
8f5ca04b 250 popl %edi
1ad9da69
UD
251 cfi_adjust_cfa_offset (-4)
252 cfi_restore (edi)
3f02f778 253
8f5ca04b 254 ret
2366713d 255END (__mpn_lshift)