]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/lshift.S
Update x86-64 mpn routines from GMP 5.0.1.
[thirdparty/glibc.git] / sysdeps / x86_64 / lshift.S
1 /* x86-64 __mpn_lshift --
2 Copyright (C) 2007, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
4
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
9
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA. */
19
20 #include "sysdep.h"
21 #include "asm-syntax.h"
22
23 #define rp %rdi
24 #define up %rsi
25 #define n %rdx
26 #define cnt %cl
27
28 .text
29 ENTRY (__mpn_lshift)
30 lea -8(rp,n,8), rp
31 lea -8(up,n,8), up
32
33 mov %edx, %eax
34 and $3, %eax
35 jne L(nb00)
36 L(b00): /* n = 4, 8, 12, ... */
37 mov (up), %r10
38 mov -8(up), %r11
39 xor %eax, %eax
40 shld %cl, %r10, %rax
41 mov -16(up), %r8
42 lea 24(rp), rp
43 sub $4, n
44 jmp L(00)
45
46 L(nb00):/* n = 1, 5, 9, ... */
47 cmp $2, %eax
48 jae L(nb01)
49 L(b01): mov (up), %r9
50 xor %eax, %eax
51 shld %cl, %r9, %rax
52 sub $2, n
53 jb L(le1)
54 mov -8(up), %r10
55 mov -16(up), %r11
56 lea -8(up), up
57 lea 16(rp), rp
58 jmp L(01)
59 L(le1): shl %cl, %r9
60 mov %r9, (rp)
61 ret
62
63 L(nb01):/* n = 2, 6, 10, ... */
64 jne L(b11)
65 L(b10): mov (up), %r8
66 mov -8(up), %r9
67 xor %eax, %eax
68 shld %cl, %r8, %rax
69 sub $3, n
70 jb L(le2)
71 mov -16(up), %r10
72 lea -16(up), up
73 lea 8(rp), rp
74 jmp L(10)
75 L(le2): shld %cl, %r9, %r8
76 mov %r8, (rp)
77 shl %cl, %r9
78 mov %r9, -8(rp)
79 ret
80
81 .p2align 4 /* performance critical! */
82 L(b11): /* n = 3, 7, 11, ... */
83 mov (up), %r11
84 mov -8(up), %r8
85 xor %eax, %eax
86 shld %cl, %r11, %rax
87 mov -16(up), %r9
88 lea -24(up), up
89 sub $4, n
90 jb L(end)
91
92 .p2align 4
93 L(top): shld %cl, %r8, %r11
94 mov (up), %r10
95 mov %r11, (rp)
96 L(10): shld %cl, %r9, %r8
97 mov -8(up), %r11
98 mov %r8, -8(rp)
99 L(01): shld %cl, %r10, %r9
100 mov -16(up), %r8
101 mov %r9, -16(rp)
102 L(00): shld %cl, %r11, %r10
103 mov -24(up), %r9
104 mov %r10, -24(rp)
105 add $-32, up
106 lea -32(rp), rp
107 sub $4, n
108 jnc L(top)
109
110 L(end): shld %cl, %r8, %r11
111 mov %r11, (rp)
112 shld %cl, %r9, %r8
113 mov %r8, -8(rp)
114 shl %cl, %r9
115 mov %r9, -16(rp)
116 ret
117 END (__mpn_lshift)