]>
Commit | Line | Data |
---|---|---|
7def3d92 RM |
1 | # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and |
2 | # store sum in a third limb vector. | |
3 | ||
04277e02 | 4 | # Copyright (C) 1995-2019 Free Software Foundation, Inc. |
7def3d92 RM |
5 | |
6 | # This file is part of the GNU MP Library. | |
7 | ||
8 | # The GNU MP Library is free software; you can redistribute it and/or modify | |
f01ec467 AJ |
9 | # it under the terms of the GNU Lesser General Public License as published by |
10 | # the Free Software Foundation; either version 2.1 of the License, or (at your | |
7def3d92 RM |
11 | # option) any later version. |
12 | ||
13 | # The GNU MP Library is distributed in the hope that it will be useful, but | |
14 | # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
f01ec467 | 15 | # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
7def3d92 RM |
16 | # License for more details. |
17 | ||
f01ec467 | 18 | # You should have received a copy of the GNU Lesser General Public License |
5a82c748 | 19 | # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>. |
7def3d92 RM |
20 | |
21 | ||
22 | # INPUT PARAMETERS | |
23 | # res_ptr $16 | |
24 | # s1_ptr $17 | |
25 | # s2_ptr $18 | |
26 | # size $19 | |
27 | ||
28 | .set noreorder | |
29 | .set noat | |
30 | .text | |
31 | .align 3 | |
32 | .globl __mpn_add_n | |
33 | .ent __mpn_add_n | |
34 | __mpn_add_n: | |
35 | .frame $30,0,$26,0 | |
36 | ||
3de9f02e RM |
37 | or $31,$31,$25 # clear cy |
38 | subq $19,4,$19 # decr loop cnt | |
39 | blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop | |
40 | # Start software pipeline for 1st loop | |
41 | ldq $0,0($18) | |
42 | ldq $1,8($18) | |
43 | ldq $4,0($17) | |
7def3d92 | 44 | ldq $5,8($17) |
3de9f02e RM |
45 | addq $17,32,$17 # update s1_ptr |
46 | ldq $2,16($18) | |
47 | addq $0,$4,$20 # 1st main add | |
48 | ldq $3,24($18) | |
49 | subq $19,4,$19 # decr loop cnt | |
50 | ldq $6,-16($17) | |
51 | cmpult $20,$0,$25 # compute cy from last add | |
52 | ldq $7,-8($17) | |
53 | addq $1,$25,$28 # cy add | |
54 | addq $18,32,$18 # update s2_ptr | |
55 | addq $5,$28,$21 # 2nd main add | |
56 | cmpult $28,$25,$8 # compute cy from last add | |
57 | blt $19,.Lend1 # if less than 4 limbs remain, jump | |
58 | # 1st loop handles groups of 4 limbs in a software pipeline | |
7def3d92 | 59 | .align 4 |
3de9f02e RM |
60 | .Loop: cmpult $21,$28,$25 # compute cy from last add |
61 | ldq $0,0($18) | |
62 | or $8,$25,$25 # combine cy from the two adds | |
63 | ldq $1,8($18) | |
64 | addq $2,$25,$28 # cy add | |
65 | ldq $4,0($17) | |
66 | addq $28,$6,$22 # 3rd main add | |
7def3d92 | 67 | ldq $5,8($17) |
3de9f02e RM |
68 | cmpult $28,$25,$8 # compute cy from last add |
69 | cmpult $22,$28,$25 # compute cy from last add | |
7def3d92 | 70 | stq $20,0($16) |
3de9f02e | 71 | or $8,$25,$25 # combine cy from the two adds |
7def3d92 | 72 | stq $21,8($16) |
3de9f02e RM |
73 | addq $3,$25,$28 # cy add |
74 | addq $28,$7,$23 # 4th main add | |
75 | cmpult $28,$25,$8 # compute cy from last add | |
76 | cmpult $23,$28,$25 # compute cy from last add | |
77 | addq $17,32,$17 # update s1_ptr | |
78 | or $8,$25,$25 # combine cy from the two adds | |
79 | addq $16,32,$16 # update res_ptr | |
80 | addq $0,$25,$28 # cy add | |
81 | ldq $2,16($18) | |
82 | addq $4,$28,$20 # 1st main add | |
83 | ldq $3,24($18) | |
84 | cmpult $28,$25,$8 # compute cy from last add | |
85 | ldq $6,-16($17) | |
86 | cmpult $20,$28,$25 # compute cy from last add | |
87 | ldq $7,-8($17) | |
88 | or $8,$25,$25 # combine cy from the two adds | |
89 | subq $19,4,$19 # decr loop cnt | |
90 | stq $22,-16($16) | |
91 | addq $1,$25,$28 # cy add | |
92 | stq $23,-8($16) | |
93 | addq $5,$28,$21 # 2nd main add | |
94 | addq $18,32,$18 # update s2_ptr | |
95 | cmpult $28,$25,$8 # compute cy from last add | |
96 | bge $19,.Loop | |
97 | # Finish software pipeline for 1st loop | |
98 | .Lend1: cmpult $21,$28,$25 # compute cy from last add | |
99 | or $8,$25,$25 # combine cy from the two adds | |
100 | addq $2,$25,$28 # cy add | |
101 | addq $28,$6,$22 # 3rd main add | |
102 | cmpult $28,$25,$8 # compute cy from last add | |
103 | cmpult $22,$28,$25 # compute cy from last add | |
104 | stq $20,0($16) | |
105 | or $8,$25,$25 # combine cy from the two adds | |
106 | stq $21,8($16) | |
107 | addq $3,$25,$28 # cy add | |
108 | addq $28,$7,$23 # 4th main add | |
109 | cmpult $28,$25,$8 # compute cy from last add | |
110 | cmpult $23,$28,$25 # compute cy from last add | |
111 | or $8,$25,$25 # combine cy from the two adds | |
112 | addq $16,32,$16 # update res_ptr | |
113 | stq $22,-16($16) | |
114 | stq $23,-8($16) | |
115 | .Lend2: addq $19,4,$19 # restore loop cnt | |
116 | beq $19,.Lret | |
117 | # Start software pipeline for 2nd loop | |
118 | ldq $0,0($18) | |
119 | ldq $4,0($17) | |
120 | subq $19,1,$19 | |
121 | beq $19,.Lend0 | |
122 | # 2nd loop handles remaining 1-3 limbs | |
123 | .align 4 | |
124 | .Loop0: addq $0,$25,$28 # cy add | |
125 | ldq $0,8($18) | |
126 | addq $4,$28,$20 # main add | |
127 | ldq $4,8($17) | |
128 | addq $18,8,$18 | |
129 | cmpult $28,$25,$8 # compute cy from last add | |
130 | addq $17,8,$17 | |
131 | stq $20,0($16) | |
132 | cmpult $20,$28,$25 # compute cy from last add | |
133 | subq $19,1,$19 # decr loop cnt | |
134 | or $8,$25,$25 # combine cy from the two adds | |
135 | addq $16,8,$16 | |
136 | bne $19,.Loop0 | |
137 | .Lend0: addq $0,$25,$28 # cy add | |
138 | addq $4,$28,$20 # main add | |
139 | cmpult $28,$25,$8 # compute cy from last add | |
140 | cmpult $20,$28,$25 # compute cy from last add | |
141 | stq $20,0($16) | |
142 | or $8,$25,$25 # combine cy from the two adds | |
7def3d92 | 143 | |
3de9f02e | 144 | .Lret: or $25,$31,$0 # return cy |
7def3d92 | 145 | ret $31,($26),1 |
7def3d92 | 146 | .end __mpn_add_n |