]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/add_n.S
f0b4c3f78c6530fa5a720a40fe5de20313133470
[thirdparty/glibc.git] / sysdeps / x86_64 / add_n.S
1 /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
2 sum in a third limb vector.
3 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
4 This file is part of the GNU MP Library.
5
6 The GNU MP Library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or (at your
9 option) any later version.
10
11 The GNU MP Library is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
19 MA 02111-1307, USA. */
20
21 #include "sysdep.h"
22 #include "asm-syntax.h"
23
24 #define rp %rdi
25 #define up %rsi
26 #define vp %rdx
27 #define n %rcx
28 #define cy %r8
29
30 #ifndef func
31 # define func __mpn_add_n
32 # define ADCSBB adc
33 #endif
34
35 .text
36 ENTRY (func)
37 xor %r8, %r8
38 mov (up), %r10
39 mov (vp), %r11
40
41 lea -8(up,n,8), up
42 lea -8(vp,n,8), vp
43 lea -16(rp,n,8), rp
44 mov %ecx, %eax
45 neg n
46 and $3, %eax
47 je L(b00)
48 add %rax, n /* clear low rcx bits for jrcxz */
49 cmp $2, %eax
50 jl L(b01)
51 je L(b10)
52
53 L(b11): shr %r8 /* set cy */
54 jmp L(e11)
55
56 L(b00): shr %r8 /* set cy */
57 mov %r10, %r8
58 mov %r11, %r9
59 lea 4(n), n
60 jmp L(e00)
61
62 L(b01): shr %r8 /* set cy */
63 jmp L(e01)
64
65 L(b10): shr %r8 /* set cy */
66 mov %r10, %r8
67 mov %r11, %r9
68 jmp L(e10)
69
70 L(end): ADCSBB %r11, %r10
71 mov %r10, 8(rp)
72 mov %ecx, %eax /* clear eax, ecx contains 0 */
73 adc %eax, %eax
74 ret
75
76 .p2align 4
77 L(top):
78 mov -24(up,n,8), %r8
79 mov -24(vp,n,8), %r9
80 ADCSBB %r11, %r10
81 mov %r10, -24(rp,n,8)
82 L(e00):
83 mov -16(up,n,8), %r10
84 mov -16(vp,n,8), %r11
85 ADCSBB %r9, %r8
86 mov %r8, -16(rp,n,8)
87 L(e11):
88 mov -8(up,n,8), %r8
89 mov -8(vp,n,8), %r9
90 ADCSBB %r11, %r10
91 mov %r10, -8(rp,n,8)
92 L(e10):
93 mov (up,n,8), %r10
94 mov (vp,n,8), %r11
95 ADCSBB %r9, %r8
96 mov %r8, (rp,n,8)
97 L(e01):
98 jrcxz L(end)
99 lea 4(n), n
100 jmp L(top)
101 END (func)