]>
Commit | Line | Data |
---|---|---|
893a5fd4 UD |
1 | /* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store |
2 | the result in a second limb vector. | |
2b778ceb | 3 | Copyright (C) 2003-2021 Free Software Foundation, Inc. |
893a5fd4 UD |
4 | This file is part of the GNU MP Library. |
5 | ||
6 | The GNU MP Library is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
9 | option) any later version. | |
10 | ||
11 | The GNU MP Library is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
14 | License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
59ba27a6 | 17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
5a82c748 | 18 | see <https://www.gnu.org/licenses/>. */ |
893a5fd4 UD |
19 | |
20 | #include <sysdep.h> | |
21 | #include "asm-syntax.h" | |
22 | ||
0959ffc9 UD |
23 | #define rp %rdi |
24 | #define up %rsi | |
25 | #define n_param %rdx | |
26 | #define vl %rcx | |
27 | ||
28 | #define n %r11 | |
29 | ||
893a5fd4 UD |
30 | .text |
31 | ENTRY (__mpn_mul_1) | |
0959ffc9 UD |
32 | push %rbx |
33 | cfi_adjust_cfa_offset (8) | |
34 | cfi_rel_offset (%rbx, 0) | |
35 | xor %r10, %r10 | |
36 | mov (up), %rax /* read first u limb early */ | |
37 | mov n_param, %rbx /* move away n from rdx, mul uses it */ | |
38 | mul vl | |
39 | mov %rbx, %r11 | |
40 | ||
41 | add %r10, %rax | |
42 | adc $0, %rdx | |
43 | ||
44 | and $3, %ebx | |
45 | jz L(b0) | |
46 | cmp $2, %ebx | |
47 | jz L(b2) | |
48 | jg L(b3) | |
49 | ||
50 | L(b1): dec n | |
51 | jne L(gt1) | |
52 | mov %rax, (rp) | |
53 | jmp L(ret) | |
54 | L(gt1): lea 8(up,n,8), up | |
55 | lea -8(rp,n,8), rp | |
56 | neg n | |
57 | xor %r10, %r10 | |
58 | xor %ebx, %ebx | |
59 | mov %rax, %r9 | |
60 | mov (up,n,8), %rax | |
61 | mov %rdx, %r8 | |
62 | jmp L(L1) | |
63 | ||
64 | L(b0): lea (up,n,8), up | |
65 | lea -16(rp,n,8), rp | |
66 | neg n | |
67 | xor %r10, %r10 | |
68 | mov %rax, %r8 | |
69 | mov %rdx, %rbx | |
70 | jmp L(L0) | |
71 | ||
72 | L(b3): lea -8(up,n,8), up | |
73 | lea -24(rp,n,8), rp | |
74 | neg n | |
75 | mov %rax, %rbx | |
76 | mov %rdx, %r10 | |
77 | jmp L(L3) | |
78 | ||
79 | L(b2): lea -16(up,n,8), up | |
80 | lea -32(rp,n,8), rp | |
81 | neg n | |
82 | xor %r8, %r8 | |
83 | xor %ebx, %ebx | |
84 | mov %rax, %r10 | |
85 | mov 24(up,n,8), %rax | |
86 | mov %rdx, %r9 | |
87 | jmp L(L2) | |
88 | ||
89 | .p2align 4 | |
90 | L(top): mov %r10, (rp,n,8) | |
91 | add %rax, %r9 | |
92 | mov (up,n,8), %rax | |
93 | adc %rdx, %r8 | |
94 | mov $0, %r10d | |
95 | L(L1): mul vl | |
96 | mov %r9, 8(rp,n,8) | |
97 | add %rax, %r8 | |
98 | adc %rdx, %rbx | |
99 | L(L0): mov 8(up,n,8), %rax | |
100 | mul vl | |
101 | mov %r8, 16(rp,n,8) | |
102 | add %rax, %rbx | |
103 | adc %rdx, %r10 | |
104 | L(L3): mov 16(up,n,8), %rax | |
105 | mul vl | |
106 | mov %rbx, 24(rp,n,8) | |
107 | mov $0, %r8d # zero | |
108 | mov %r8, %rbx # zero | |
109 | add %rax, %r10 | |
110 | mov 24(up,n,8), %rax | |
111 | mov %r8, %r9 # zero | |
112 | adc %rdx, %r9 | |
113 | L(L2): mul vl | |
114 | add $4, n | |
115 | js L(top) | |
116 | ||
117 | mov %r10, (rp,n,8) | |
118 | add %rax, %r9 | |
119 | adc %r8, %rdx | |
120 | mov %r9, 8(rp,n,8) | |
121 | add %r8, %rdx | |
122 | L(ret): mov %rdx, %rax | |
123 | ||
124 | pop %rbx | |
125 | cfi_adjust_cfa_offset (-8) | |
126 | cfi_restore (%rbx) | |
893a5fd4 UD |
127 | ret |
128 | END (__mpn_mul_1) |