]>
Commit | Line | Data |
---|---|---|
0959ffc9 UD |
1 | /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store |
2 | sum in a third limb vector. | |
b168057a | 3 | Copyright (C) 2006-2015 Free Software Foundation, Inc. |
7fd23f1f UD |
4 | This file is part of the GNU MP Library. |
5 | ||
6 | The GNU MP Library is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
9 | option) any later version. | |
10 | ||
11 | The GNU MP Library is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public | |
14 | License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
59ba27a6 PE |
17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
18 | see <http://www.gnu.org/licenses/>. */ | |
7fd23f1f UD |
19 | |
20 | #include "sysdep.h" | |
21 | #include "asm-syntax.h" | |
22 | ||
0959ffc9 UD |
23 | #define rp %rdi |
24 | #define up %rsi | |
25 | #define vp %rdx | |
26 | #define n %rcx | |
27 | #define cy %r8 | |
28 | ||
29 | #ifndef func | |
30 | # define func __mpn_add_n | |
31 | # define ADCSBB adc | |
32 | #endif | |
33 | ||
7fd23f1f | 34 | .text |
0959ffc9 UD |
35 | ENTRY (func) |
36 | xor %r8, %r8 | |
37 | mov (up), %r10 | |
38 | mov (vp), %r11 | |
39 | ||
40 | lea -8(up,n,8), up | |
41 | lea -8(vp,n,8), vp | |
42 | lea -16(rp,n,8), rp | |
43 | mov %ecx, %eax | |
44 | neg n | |
45 | and $3, %eax | |
46 | je L(b00) | |
47 | add %rax, n /* clear low rcx bits for jrcxz */ | |
48 | cmp $2, %eax | |
49 | jl L(b01) | |
50 | je L(b10) | |
51 | ||
52 | L(b11): shr %r8 /* set cy */ | |
53 | jmp L(e11) | |
54 | ||
55 | L(b00): shr %r8 /* set cy */ | |
56 | mov %r10, %r8 | |
57 | mov %r11, %r9 | |
58 | lea 4(n), n | |
59 | jmp L(e00) | |
60 | ||
61 | L(b01): shr %r8 /* set cy */ | |
62 | jmp L(e01) | |
63 | ||
64 | L(b10): shr %r8 /* set cy */ | |
65 | mov %r10, %r8 | |
66 | mov %r11, %r9 | |
67 | jmp L(e10) | |
68 | ||
69 | L(end): ADCSBB %r11, %r10 | |
70 | mov %r10, 8(rp) | |
71 | mov %ecx, %eax /* clear eax, ecx contains 0 */ | |
72 | adc %eax, %eax | |
7fd23f1f | 73 | ret |
0959ffc9 UD |
74 | |
75 | .p2align 4 | |
76 | L(top): | |
77 | mov -24(up,n,8), %r8 | |
78 | mov -24(vp,n,8), %r9 | |
79 | ADCSBB %r11, %r10 | |
80 | mov %r10, -24(rp,n,8) | |
81 | L(e00): | |
82 | mov -16(up,n,8), %r10 | |
83 | mov -16(vp,n,8), %r11 | |
84 | ADCSBB %r9, %r8 | |
85 | mov %r8, -16(rp,n,8) | |
86 | L(e11): | |
87 | mov -8(up,n,8), %r8 | |
88 | mov -8(vp,n,8), %r9 | |
89 | ADCSBB %r11, %r10 | |
90 | mov %r10, -8(rp,n,8) | |
91 | L(e10): | |
92 | mov (up,n,8), %r10 | |
93 | mov (vp,n,8), %r11 | |
94 | ADCSBB %r9, %r8 | |
95 | mov %r8, (rp,n,8) | |
96 | L(e01): | |
97 | jrcxz L(end) | |
98 | lea 4(n), n | |
99 | jmp L(top) | |
100 | END (func) |