]>
Commit | Line | Data |
---|---|---|
1d2fc9b3 UD |
1 | /* Add two limb vectors of the same length > 0 and store sum in a third |
2 | limb vector. | |
581c785b | 3 | Copyright (C) 1992-2022 Free Software Foundation, Inc. |
1d2fc9b3 UD |
4 | This file is part of the GNU MP Library. |
5 | ||
6 | The GNU MP Library is free software; you can redistribute it and/or modify | |
6d84f89a AJ |
7 | it under the terms of the GNU Lesser General Public License as published by |
8 | the Free Software Foundation; either version 2.1 of the License, or (at your | |
1d2fc9b3 UD |
9 | option) any later version. |
10 | ||
11 | The GNU MP Library is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
6d84f89a | 13 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
1d2fc9b3 UD |
14 | License for more details. |
15 | ||
6d84f89a | 16 | You should have received a copy of the GNU Lesser General Public License |
59ba27a6 | 17 | along with the GNU MP Library; see the file COPYING.LIB. If not, |
5a82c748 | 18 | see <https://www.gnu.org/licenses/>. */ |
1d2fc9b3 | 19 | |
be9ccd27 | 20 | #include <sysdep.h> |
1d2fc9b3 | 21 | #include "asm-syntax.h" |
3f02f778 | 22 | |
2366713d | 23 | #define PARMS 4+8 /* space for 2 saved regs */ |
3f02f778 | 24 | #define RES PARMS |
2366713d JM |
25 | #define S1 RES+4 |
26 | #define S2 S1+4 | |
27 | #define SIZE S2+4 | |
1d2fc9b3 UD |
28 | |
29 | .text | |
30 | #ifdef PIC | |
31 | L(1): addl (%esp), %eax | |
32 | ret | |
33 | #endif | |
2366713d | 34 | ENTRY (__mpn_add_n) |
3f02f778 | 35 | |
1d2fc9b3 | 36 | pushl %edi |
1ad9da69 | 37 | cfi_adjust_cfa_offset (4) |
1d2fc9b3 | 38 | pushl %esi |
1ad9da69 | 39 | cfi_adjust_cfa_offset (4) |
1d2fc9b3 | 40 | |
3f02f778 | 41 | movl RES(%esp),%edi |
1ad9da69 | 42 | cfi_rel_offset (edi, 4) |
3f02f778 | 43 | movl S1(%esp),%esi |
1ad9da69 | 44 | cfi_rel_offset (esi, 0) |
3f02f778 GM |
45 | movl S2(%esp),%edx |
46 | movl SIZE(%esp),%ecx | |
be9ccd27 L |
47 | |
48 | #if IBT_ENABLED | |
49 | pushl %ebx | |
50 | cfi_adjust_cfa_offset (4) | |
51 | cfi_rel_offset (ebx, 0) | |
52 | #endif | |
53 | ||
1d2fc9b3 UD |
54 | movl %ecx,%eax |
55 | shrl $3,%ecx /* compute count for unrolled loop */ | |
56 | negl %eax | |
57 | andl $7,%eax /* get index where to start loop */ | |
58 | jz L(oop) /* necessary special case for 0 */ | |
59 | incl %ecx /* adjust loop count */ | |
60 | shll $2,%eax /* adjustment for pointers... */ | |
61 | subl %eax,%edi /* ... since they are offset ... */ | |
62 | subl %eax,%esi /* ... by a constant when we ... */ | |
63 | subl %eax,%edx /* ... enter the loop */ | |
64 | shrl $2,%eax /* restore previous value */ | |
be9ccd27 L |
65 | #if IBT_ENABLED |
66 | leal -4(,%eax,4),%ebx /* Count for 4-byte endbr32 */ | |
67 | #endif | |
1d2fc9b3 UD |
68 | #ifdef PIC |
69 | /* Calculate start address in loop for PIC. */ | |
70 | leal (L(oop)-L(0)-3)(%eax,%eax,8),%eax | |
71 | call L(1) | |
72 | L(0): | |
73 | #else | |
74 | /* Calculate start address in loop for non-PIC. */ | |
75 | leal (L(oop) - 3)(%eax,%eax,8),%eax | |
be9ccd27 L |
76 | #endif |
77 | #if IBT_ENABLED | |
78 | addl %ebx,%eax /* Adjust for endbr32 */ | |
1d2fc9b3 UD |
79 | #endif |
80 | jmp *%eax /* jump into loop */ | |
81 | ALIGN (3) | |
82 | L(oop): movl (%esi),%eax | |
83 | adcl (%edx),%eax | |
84 | movl %eax,(%edi) | |
be9ccd27 | 85 | _CET_ENDBR |
1d2fc9b3 UD |
86 | movl 4(%esi),%eax |
87 | adcl 4(%edx),%eax | |
88 | movl %eax,4(%edi) | |
be9ccd27 | 89 | _CET_ENDBR |
1d2fc9b3 UD |
90 | movl 8(%esi),%eax |
91 | adcl 8(%edx),%eax | |
92 | movl %eax,8(%edi) | |
be9ccd27 | 93 | _CET_ENDBR |
1d2fc9b3 UD |
94 | movl 12(%esi),%eax |
95 | adcl 12(%edx),%eax | |
96 | movl %eax,12(%edi) | |
be9ccd27 | 97 | _CET_ENDBR |
1d2fc9b3 UD |
98 | movl 16(%esi),%eax |
99 | adcl 16(%edx),%eax | |
100 | movl %eax,16(%edi) | |
be9ccd27 | 101 | _CET_ENDBR |
1d2fc9b3 UD |
102 | movl 20(%esi),%eax |
103 | adcl 20(%edx),%eax | |
104 | movl %eax,20(%edi) | |
be9ccd27 | 105 | _CET_ENDBR |
1d2fc9b3 UD |
106 | movl 24(%esi),%eax |
107 | adcl 24(%edx),%eax | |
108 | movl %eax,24(%edi) | |
be9ccd27 | 109 | _CET_ENDBR |
1d2fc9b3 UD |
110 | movl 28(%esi),%eax |
111 | adcl 28(%edx),%eax | |
112 | movl %eax,28(%edi) | |
113 | leal 32(%edi),%edi | |
114 | leal 32(%esi),%esi | |
115 | leal 32(%edx),%edx | |
116 | decl %ecx | |
117 | jnz L(oop) | |
118 | ||
119 | sbbl %eax,%eax | |
120 | negl %eax | |
121 | ||
be9ccd27 L |
122 | #if IBT_ENABLED |
123 | popl %ebx | |
124 | cfi_adjust_cfa_offset (-4) | |
125 | cfi_restore (ebx) | |
126 | #endif | |
1d2fc9b3 | 127 | popl %esi |
1ad9da69 UD |
128 | cfi_adjust_cfa_offset (-4) |
129 | cfi_restore (esi) | |
1d2fc9b3 | 130 | popl %edi |
1ad9da69 UD |
131 | cfi_adjust_cfa_offset (-4) |
132 | cfi_restore (edi) | |
3f02f778 | 133 | |
1d2fc9b3 | 134 | ret |
2366713d | 135 | END (__mpn_add_n) |