]>
Commit | Line | Data |
---|---|---|
a945c346 | 1 | ;; Copyright (C) 2016-2024 Free Software Foundation, Inc. |
3d6275e3 AS |
2 | |
3 | ;; This file is free software; you can redistribute it and/or modify it under | |
4 | ;; the terms of the GNU General Public License as published by the Free | |
5 | ;; Software Foundation; either version 3 of the License, or (at your option) | |
6 | ;; any later version. | |
7 | ||
8 | ;; This file is distributed in the hope that it will be useful, but WITHOUT | |
9 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
10 | ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
11 | ;; for more details. | |
12 | ||
13 | ;; You should have received a copy of the GNU General Public License | |
14 | ;; along with GCC; see the file COPYING3. If not see | |
15 | ;; <http://www.gnu.org/licenses/>. | |
16 | ||
17 | ;; {{{ Vector iterators | |
cfdc45f7 | 18 | ; SV iterators include both scalar and vector modes. |
3d6275e3 | 19 | |
1165109b | 20 | ; Vector modes for specific types |
1165109b | 21 | (define_mode_iterator V_QI |
45381d6f | 22 | [V2QI V4QI V8QI V16QI V32QI V64QI]) |
1165109b | 23 | (define_mode_iterator V_HI |
45381d6f | 24 | [V2HI V4HI V8HI V16HI V32HI V64HI]) |
1165109b | 25 | (define_mode_iterator V_HF |
45381d6f | 26 | [V2HF V4HF V8HF V16HF V32HF V64HF]) |
1165109b | 27 | (define_mode_iterator V_SI |
45381d6f | 28 | [V2SI V4SI V8SI V16SI V32SI V64SI]) |
1165109b | 29 | (define_mode_iterator V_SF |
45381d6f | 30 | [V2SF V4SF V8SF V16SF V32SF V64SF]) |
1165109b | 31 | (define_mode_iterator V_DI |
45381d6f | 32 | [V2DI V4DI V8DI V16DI V32DI V64DI]) |
1165109b | 33 | (define_mode_iterator V_DF |
45381d6f AS |
34 | [V2DF V4DF V8DF V16DF V32DF V64DF]) |
35 | ||
dc941ea9 | 36 | ; Vector modes for sub-dword modes |
03876953 | 37 | (define_mode_iterator V_QIHI |
45381d6f AS |
38 | [V2QI V2HI |
39 | V4QI V4HI | |
40 | V8QI V8HI | |
41 | V16QI V16HI | |
42 | V32QI V32HI | |
43 | V64QI V64HI]) | |
dc941ea9 | 44 | |
3d6275e3 | 45 | ; Vector modes for one vector register |
03876953 | 46 | (define_mode_iterator V_1REG |
45381d6f AS |
47 | [V2QI V2HI V2SI V2HF V2SF |
48 | V4QI V4HI V4SI V4HF V4SF | |
49 | V8QI V8HI V8SI V8HF V8SF | |
50 | V16QI V16HI V16SI V16HF V16SF | |
51 | V32QI V32HI V32SI V32HF V32SF | |
52 | V64QI V64HI V64SI V64HF V64SF]) | |
db80ccd3 AS |
53 | (define_mode_iterator V_1REG_ALT |
54 | [V2QI V2HI V2SI V2HF V2SF | |
55 | V4QI V4HI V4SI V4HF V4SF | |
56 | V8QI V8HI V8SI V8HF V8SF | |
57 | V16QI V16HI V16SI V16HF V16SF | |
58 | V32QI V32HI V32SI V32HF V32SF | |
59 | V64QI V64HI V64SI V64HF V64SF]) | |
3d6275e3 | 60 | |
03876953 | 61 | (define_mode_iterator V_INT_1REG |
45381d6f AS |
62 | [V2QI V2HI V2SI |
63 | V4QI V4HI V4SI | |
64 | V8QI V8HI V8SI | |
65 | V16QI V16HI V16SI | |
66 | V32QI V32HI V32SI | |
67 | V64QI V64HI V64SI]) | |
03876953 | 68 | (define_mode_iterator V_INT_1REG_ALT |
45381d6f AS |
69 | [V2QI V2HI V2SI |
70 | V4QI V4HI V4SI | |
71 | V8QI V8HI V8SI | |
72 | V16QI V16HI V16SI | |
73 | V32QI V32HI V32SI | |
74 | V64QI V64HI V64SI]) | |
03876953 | 75 | (define_mode_iterator V_FP_1REG |
45381d6f AS |
76 | [V2HF V2SF |
77 | V4HF V4SF | |
78 | V8HF V8SF | |
79 | V16HF V16SF | |
80 | V32HF V32SF | |
81 | V64HF V64SF]) | |
82 | ||
3d6275e3 | 83 | ; Vector modes for two vector registers |
03876953 | 84 | (define_mode_iterator V_2REG |
45381d6f AS |
85 | [V2DI V2DF |
86 | V4DI V4DF | |
87 | V8DI V8DF | |
88 | V16DI V16DF | |
89 | V32DI V32DF | |
90 | V64DI V64DF]) | |
db80ccd3 AS |
91 | (define_mode_iterator V_2REG_ALT |
92 | [V2DI V2DF | |
93 | V4DI V4DF | |
94 | V8DI V8DF | |
95 | V16DI V16DF | |
96 | V32DI V32DF | |
97 | V64DI V64DF]) | |
45381d6f | 98 | |
8aeabd9f AS |
99 | ; Vector modes for four vector registers |
100 | (define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI]) | |
101 | (define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI]) | |
102 | ||
03876953 AS |
103 | ; Vector modes with native support |
104 | (define_mode_iterator V_noQI | |
45381d6f AS |
105 | [V2HI V2HF V2SI V2SF V2DI V2DF |
106 | V4HI V4HF V4SI V4SF V4DI V4DF | |
107 | V8HI V8HF V8SI V8SF V8DI V8DF | |
108 | V16HI V16HF V16SI V16SF V16DI V16DF | |
109 | V32HI V32HF V32SI V32SF V32DI V32DF | |
110 | V64HI V64HF V64SI V64SF V64DI V64DF]) | |
03876953 | 111 | (define_mode_iterator V_noHI |
45381d6f AS |
112 | [V2HF V2SI V2SF V2DI V2DF |
113 | V4HF V4SI V4SF V4DI V4DF | |
114 | V8HF V8SI V8SF V8DI V8DF | |
115 | V16HF V16SI V16SF V16DI V16DF | |
116 | V32HF V32SI V32SF V32DI V32DF | |
117 | V64HF V64SI V64SF V64DI V64DF]) | |
03876953 AS |
118 | |
119 | (define_mode_iterator V_INT_noQI | |
45381d6f AS |
120 | [V2HI V2SI V2DI |
121 | V4HI V4SI V4DI | |
122 | V8HI V8SI V8DI | |
123 | V16HI V16SI V16DI | |
124 | V32HI V32SI V32DI | |
125 | V64HI V64SI V64DI]) | |
6e0ca3fe | 126 | (define_mode_iterator V_INT_noHI |
45381d6f AS |
127 | [V2SI V2DI |
128 | V4SI V4DI | |
129 | V8SI V8DI | |
130 | V16SI V16DI | |
131 | V32SI V32DI | |
132 | V64SI V64DI]) | |
03876953 | 133 | |
cfdc45f7 AS |
134 | (define_mode_iterator SV_SFDF |
135 | [SF DF | |
136 | V2SF V2DF | |
137 | V4SF V4DF | |
138 | V8SF V8DF | |
139 | V16SF V16DF | |
140 | V32SF V32DF | |
141 | V64SF V64DF]) | |
142 | ||
8aeabd9f | 143 | ; All modes in which we want to do more than just moves. |
03876953 | 144 | (define_mode_iterator V_ALL |
45381d6f AS |
145 | [V2QI V2HI V2HF V2SI V2SF V2DI V2DF |
146 | V4QI V4HI V4HF V4SI V4SF V4DI V4DF | |
147 | V8QI V8HI V8HF V8SI V8SF V8DI V8DF | |
148 | V16QI V16HI V16HF V16SI V16SF V16DI V16DF | |
149 | V32QI V32HI V32HF V32SI V32SF V32DI V32DF | |
150 | V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) | |
03876953 | 151 | (define_mode_iterator V_ALL_ALT |
45381d6f AS |
152 | [V2QI V2HI V2HF V2SI V2SF V2DI V2DF |
153 | V4QI V4HI V4HF V4SI V4SF V4DI V4DF | |
154 | V8QI V8HI V8HF V8SI V8SF V8DI V8DF | |
155 | V16QI V16HI V16HF V16SI V16SF V16DI V16DF | |
156 | V32QI V32HI V32HF V32SI V32SF V32DI V32DF | |
157 | V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) | |
03876953 AS |
158 | |
159 | (define_mode_iterator V_INT | |
45381d6f AS |
160 | [V2QI V2HI V2SI V2DI |
161 | V4QI V4HI V4SI V4DI | |
162 | V8QI V8HI V8SI V8DI | |
163 | V16QI V16HI V16SI V16DI | |
164 | V32QI V32HI V32SI V32DI | |
165 | V64QI V64HI V64SI V64DI]) | |
03876953 | 166 | (define_mode_iterator V_FP |
45381d6f AS |
167 | [V2HF V2SF V2DF |
168 | V4HF V4SF V4DF | |
169 | V8HF V8SF V8DF | |
170 | V16HF V16SF V16DF | |
171 | V32HF V32SF V32DF | |
172 | V64HF V64SF V64DF]) | |
cfdc45f7 AS |
173 | (define_mode_iterator SV_FP |
174 | [HF SF DF | |
175 | V2HF V2SF V2DF | |
176 | V4HF V4SF V4DF | |
177 | V8HF V8SF V8DF | |
178 | V16HF V16SF V16DF | |
179 | V32HF V32SF V32DF | |
180 | V64HF V64SF V64DF]) | |
45381d6f | 181 | |
8aeabd9f AS |
182 | ; All modes that need moves, including those without many insns. |
183 | (define_mode_iterator V_MOV | |
184 | [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI | |
185 | V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI | |
186 | V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI | |
187 | V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI | |
188 | V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI | |
189 | V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI]) | |
190 | (define_mode_iterator V_MOV_ALT | |
191 | [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI | |
192 | V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI | |
193 | V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI | |
194 | V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI | |
195 | V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI | |
196 | V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI]) | |
197 | ||
3d6275e3 | 198 | (define_mode_attr scalar_mode |
8aeabd9f | 199 | [(QI "qi") (HI "hi") (SI "si") (TI "ti") |
cfdc45f7 | 200 | (HF "hf") (SF "sf") (DI "di") (DF "df") |
8aeabd9f | 201 | (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti") |
45381d6f | 202 | (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df") |
8aeabd9f | 203 | (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti") |
45381d6f | 204 | (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df") |
8aeabd9f | 205 | (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti") |
45381d6f | 206 | (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df") |
8aeabd9f | 207 | (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti") |
45381d6f | 208 | (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df") |
8aeabd9f | 209 | (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti") |
45381d6f | 210 | (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df") |
8aeabd9f | 211 | (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti") |
3d6275e3 AS |
212 | (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) |
213 | ||
214 | (define_mode_attr SCALAR_MODE | |
8aeabd9f | 215 | [(QI "QI") (HI "HI") (SI "SI") (TI "TI") |
cfdc45f7 | 216 | (HF "HF") (SF "SF") (DI "DI") (DF "DF") |
8aeabd9f | 217 | (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI") |
45381d6f | 218 | (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF") |
8aeabd9f | 219 | (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI") |
45381d6f | 220 | (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF") |
8aeabd9f | 221 | (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI") |
45381d6f | 222 | (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF") |
8aeabd9f | 223 | (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI") |
45381d6f | 224 | (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF") |
8aeabd9f | 225 | (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI") |
45381d6f | 226 | (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF") |
8aeabd9f | 227 | (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI") |
3d6275e3 AS |
228 | (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) |
229 | ||
1165109b | 230 | (define_mode_attr vnsi |
8aeabd9f | 231 | [(QI "si") (HI "si") (SI "si") (TI "si") |
0be4fbea AS |
232 | (HF "si") (SF "si") (DI "si") (DF "si") |
233 | (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") | |
8aeabd9f | 234 | (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si") |
45381d6f | 235 | (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si") |
8aeabd9f | 236 | (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si") |
45381d6f | 237 | (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si") |
8aeabd9f | 238 | (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si") |
45381d6f | 239 | (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si") |
8aeabd9f | 240 | (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si") |
45381d6f | 241 | (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si") |
8aeabd9f | 242 | (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si") |
45381d6f | 243 | (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") |
8aeabd9f | 244 | (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")]) |
1165109b AS |
245 | |
246 | (define_mode_attr VnSI | |
8aeabd9f | 247 | [(QI "SI") (HI "SI") (SI "SI") (TI "SI") |
0be4fbea AS |
248 | (HF "SI") (SF "SI") (DI "SI") (DF "SI") |
249 | (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") | |
8aeabd9f | 250 | (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI") |
45381d6f | 251 | (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI") |
8aeabd9f | 252 | (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI") |
45381d6f | 253 | (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI") |
8aeabd9f | 254 | (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI") |
45381d6f | 255 | (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI") |
8aeabd9f | 256 | (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI") |
45381d6f | 257 | (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI") |
8aeabd9f | 258 | (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI") |
45381d6f | 259 | (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") |
8aeabd9f | 260 | (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")]) |
1165109b AS |
261 | |
262 | (define_mode_attr vndi | |
45381d6f | 263 | [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di") |
8aeabd9f | 264 | (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di") |
45381d6f | 265 | (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di") |
8aeabd9f | 266 | (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di") |
45381d6f | 267 | (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di") |
8aeabd9f | 268 | (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di") |
45381d6f | 269 | (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di") |
8aeabd9f | 270 | (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di") |
45381d6f | 271 | (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di") |
8aeabd9f | 272 | (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di") |
45381d6f | 273 | (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") |
8aeabd9f | 274 | (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")]) |
1165109b AS |
275 | |
276 | (define_mode_attr VnDI | |
45381d6f | 277 | [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI") |
8aeabd9f | 278 | (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI") |
45381d6f | 279 | (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI") |
8aeabd9f | 280 | (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI") |
45381d6f | 281 | (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI") |
8aeabd9f | 282 | (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI") |
45381d6f | 283 | (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI") |
8aeabd9f | 284 | (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI") |
45381d6f | 285 | (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI") |
8aeabd9f | 286 | (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI") |
45381d6f | 287 | (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") |
8aeabd9f | 288 | (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")]) |
1165109b | 289 | |
45381d6f AS |
290 | (define_mode_attr sdwa |
291 | [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD") | |
292 | (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD") | |
293 | (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD") | |
294 | (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD") | |
295 | (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD") | |
296 | (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) | |
3d66c777 | 297 | |
3d6275e3 AS |
298 | ;; }}} |
299 | ;; {{{ Substitutions | |
300 | ||
301 | (define_subst_attr "exec" "vec_merge" | |
302 | "" "_exec") | |
303 | (define_subst_attr "exec_clobber" "vec_merge_with_clobber" | |
304 | "" "_exec") | |
305 | (define_subst_attr "exec_vcc" "vec_merge_with_vcc" | |
306 | "" "_exec") | |
307 | (define_subst_attr "exec_scatter" "scatter_store" | |
308 | "" "_exec") | |
309 | ||
310 | (define_subst "vec_merge" | |
8aeabd9f AS |
311 | [(set (match_operand:V_MOV 0) |
312 | (match_operand:V_MOV 1))] | |
3d6275e3 AS |
313 | "" |
314 | [(set (match_dup 0) | |
8aeabd9f | 315 | (vec_merge:V_MOV |
3d6275e3 | 316 | (match_dup 1) |
8aeabd9f | 317 | (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
318 | (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]) |
319 | ||
320 | (define_subst "vec_merge_with_clobber" | |
8aeabd9f AS |
321 | [(set (match_operand:V_MOV 0) |
322 | (match_operand:V_MOV 1)) | |
3d6275e3 AS |
323 | (clobber (match_operand 2))] |
324 | "" | |
325 | [(set (match_dup 0) | |
8aeabd9f | 326 | (vec_merge:V_MOV |
3d6275e3 | 327 | (match_dup 1) |
8aeabd9f | 328 | (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
329 | (match_operand:DI 4 "gcn_exec_reg_operand" "e"))) |
330 | (clobber (match_dup 2))]) | |
331 | ||
332 | (define_subst "vec_merge_with_vcc" | |
8aeabd9f AS |
333 | [(set (match_operand:V_MOV 0) |
334 | (match_operand:V_MOV 1)) | |
3d6275e3 AS |
335 | (set (match_operand:DI 2) |
336 | (match_operand:DI 3))] | |
337 | "" | |
338 | [(parallel | |
339 | [(set (match_dup 0) | |
8aeabd9f | 340 | (vec_merge:V_MOV |
3d6275e3 | 341 | (match_dup 1) |
8aeabd9f | 342 | (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0") |
3d6275e3 AS |
343 | (match_operand:DI 5 "gcn_exec_reg_operand" "e"))) |
344 | (set (match_dup 2) | |
345 | (and:DI (match_dup 3) | |
346 | (reg:DI EXEC_REG)))])]) | |
347 | ||
348 | (define_subst "scatter_store" | |
349 | [(set (mem:BLK (scratch)) | |
350 | (unspec:BLK | |
351 | [(match_operand 0) | |
352 | (match_operand 1) | |
353 | (match_operand 2) | |
354 | (match_operand 3)] | |
355 | UNSPEC_SCATTER))] | |
356 | "" | |
357 | [(set (mem:BLK (scratch)) | |
358 | (unspec:BLK | |
359 | [(match_dup 0) | |
360 | (match_dup 1) | |
361 | (match_dup 2) | |
362 | (match_dup 3) | |
363 | (match_operand:DI 4 "gcn_exec_reg_operand" "e")] | |
364 | UNSPEC_SCATTER))]) | |
365 | ||
366 | ;; }}} | |
367 | ;; {{{ Vector moves | |
368 | ||
369 | ; This is the entry point for all vector register moves. Memory accesses can | |
370 | ; come this way also, but will more usually use the reload_in/out, | |
371 | ; gather/scatter, maskload/store, etc. | |
372 | ||
373 | (define_expand "mov<mode>" | |
8aeabd9f AS |
374 | [(set (match_operand:V_MOV 0 "nonimmediate_operand") |
375 | (match_operand:V_MOV 1 "general_operand"))] | |
3d6275e3 AS |
376 | "" |
377 | { | |
45381d6f AS |
378 | /* Bitwise reinterpret casts via SUBREG don't work with GCN vector |
379 | registers, but we can convert the MEM to a mode that does work. */ | |
380 | if (MEM_P (operands[0]) && !SUBREG_P (operands[0]) | |
381 | && SUBREG_P (operands[1]) | |
382 | && GET_MODE_SIZE (GET_MODE (operands[1])) | |
383 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1])))) | |
384 | { | |
385 | rtx src = SUBREG_REG (operands[1]); | |
386 | rtx mem = copy_rtx (operands[0]); | |
387 | PUT_MODE_RAW (mem, GET_MODE (src)); | |
388 | emit_move_insn (mem, src); | |
389 | DONE; | |
390 | } | |
391 | if (MEM_P (operands[1]) && !SUBREG_P (operands[1]) | |
392 | && SUBREG_P (operands[0]) | |
393 | && GET_MODE_SIZE (GET_MODE (operands[0])) | |
394 | == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))) | |
395 | { | |
396 | rtx dest = SUBREG_REG (operands[0]); | |
397 | rtx mem = copy_rtx (operands[1]); | |
398 | PUT_MODE_RAW (mem, GET_MODE (dest)); | |
399 | emit_move_insn (dest, mem); | |
400 | DONE; | |
401 | } | |
402 | ||
403 | /* SUBREG of MEM is not supported. */ | |
404 | gcc_assert ((!SUBREG_P (operands[0]) | |
405 | || !MEM_P (SUBREG_REG (operands[0]))) | |
406 | && (!SUBREG_P (operands[1]) | |
407 | || !MEM_P (SUBREG_REG (operands[1])))); | |
408 | ||
3d6275e3 AS |
409 | if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) |
410 | { | |
411 | operands[1] = force_reg (<MODE>mode, operands[1]); | |
1165109b | 412 | rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); |
3d6275e3 AS |
413 | rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); |
414 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
415 | rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
416 | operands[0], | |
417 | scratch); | |
418 | emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v)); | |
419 | DONE; | |
420 | } | |
421 | else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed) | |
422 | { | |
1165109b | 423 | rtx scratch = gen_rtx_SCRATCH (<VnDI>mode); |
3d6275e3 AS |
424 | rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); |
425 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
426 | rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
427 | operands[1], | |
428 | scratch); | |
429 | emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v)); | |
430 | DONE; | |
431 | } | |
432 | else if ((MEM_P (operands[0]) || MEM_P (operands[1]))) | |
433 | { | |
434 | gcc_assert (!reload_completed); | |
1165109b | 435 | rtx scratch = gen_reg_rtx (<VnDI>mode); |
3d6275e3 AS |
436 | emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch)); |
437 | DONE; | |
438 | } | |
439 | }) | |
440 | ||
441 | ; A pseudo instruction that helps LRA use the "U0" constraint. | |
442 | ||
443 | (define_insn "mov<mode>_unspec" | |
8aeabd9f AS |
444 | [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v") |
445 | (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))] | |
3d6275e3 AS |
446 | "" |
447 | "" | |
448 | [(set_attr "type" "unknown") | |
449 | (set_attr "length" "0")]) | |
450 | ||
451 | (define_insn "*mov<mode>" | |
ae0d2c24 AS |
452 | [(set (match_operand:V_1REG 0 "nonimmediate_operand") |
453 | (match_operand:V_1REG 1 "general_operand"))] | |
3d6275e3 | 454 | "" |
b9bf0c3f | 455 | {@ [cons: =0, 1; attrs: type, length, cdna] |
ae0d2c24 AS |
456 | [v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1 |
457 | [v ,B ;vop1 ,8,* ] ^ | |
458 | [v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1 | |
459 | [$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1 | |
460 | [a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1 | |
461 | }) | |
3d6275e3 AS |
462 | |
463 | (define_insn "mov<mode>_exec" | |
ddfa4393 | 464 | [(set (match_operand:V_1REG 0 "nonimmediate_operand") |
03876953 | 465 | (vec_merge:V_1REG |
ddfa4393 AS |
466 | (match_operand:V_1REG 1 "general_operand") |
467 | (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand") | |
468 | (match_operand:DI 3 "register_operand"))) | |
469 | (clobber (match_scratch:<VnDI> 4))] | |
3d6275e3 | 470 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
ddfa4393 AS |
471 | {@ [cons: =0, 1, 2, 3, =4; attrs: type, length] |
472 | [v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1 | |
473 | [v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1 | |
474 | [v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc | |
475 | [v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3 | |
476 | [v,m ,U0,e ,&v;* ,16] # | |
477 | [m,v ,U0,e ,&v;* ,16] # | |
478 | }) | |
3d6275e3 AS |
479 | |
480 | ; This variant does not accept an unspec, but does permit MEM | |
481 | ; read/modify/write which is necessary for maskstore. | |
482 | ||
483 | ;(define_insn "*mov<mode>_exec_match" | |
03876953 AS |
484 | ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m") |
485 | ; (vec_merge:V_1REG | |
486 | ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v") | |
3d6275e3 | 487 | ; (match_dup 0) |
03876953 | 488 | ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) |
1165109b | 489 | ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))] |
3d6275e3 AS |
490 | ; "!MEM_P (operands[0]) || REG_P (operands[1])" |
491 | ; "@ | |
492 | ; v_mov_b32\t%0, %1 | |
493 | ; v_mov_b32\t%0, %1 | |
494 | ; # | |
495 | ; #" | |
496 | ; [(set_attr "type" "vop1,vop1,*,*") | |
497 | ; (set_attr "length" "4,8,16,16")]) | |
498 | ||
499 | (define_insn "*mov<mode>" | |
ae0d2c24 AS |
500 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a") |
501 | (match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))] | |
3d6275e3 | 502 | "" |
ae0d2c24 AS |
503 | "@ |
504 | * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ | |
505 | return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ | |
506 | else \ | |
507 | return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; | |
508 | * if (REGNO (operands[0]) <= REGNO (operands[1])) \ | |
509 | return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \ | |
510 | else \ | |
511 | return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\"; | |
512 | * if (REGNO (operands[0]) <= REGNO (operands[1])) \ | |
513 | return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \ | |
514 | else \ | |
515 | return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\"; | |
516 | * if (REGNO (operands[0]) <= REGNO (operands[1])) \ | |
517 | return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \ | |
518 | else \ | |
519 | return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";" | |
520 | [(set_attr "type" "vmult,vmult,vmult,vmult") | |
521 | (set_attr "length" "16,16,16,8") | |
b9bf0c3f | 522 | (set_attr "cdna" "*,*,*,cdna2")]) |
3d6275e3 AS |
523 | |
524 | (define_insn "mov<mode>_exec" | |
03876953 AS |
525 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m") |
526 | (vec_merge:V_2REG | |
527 | (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v") | |
b7886845 | 528 | (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand" |
3d6275e3 | 529 | " U0,vDA0,vDA0,U0,U0") |
b7886845 | 530 | (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e"))) |
1165109b | 531 | (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))] |
3d6275e3 AS |
532 | "!MEM_P (operands[0]) || REG_P (operands[1])" |
533 | { | |
534 | if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) | |
535 | switch (which_alternative) | |
536 | { | |
537 | case 0: | |
538 | return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; | |
539 | case 1: | |
b7886845 AS |
540 | return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" |
541 | "v_cndmask_b32\t%H0, %H2, %H1, vcc"; | |
3d6275e3 | 542 | case 2: |
b7886845 AS |
543 | return "v_cndmask_b32\t%L0, %L2, %L1, %3\;" |
544 | "v_cndmask_b32\t%H0, %H2, %H1, %3"; | |
3d6275e3 AS |
545 | } |
546 | else | |
547 | switch (which_alternative) | |
548 | { | |
549 | case 0: | |
550 | return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; | |
551 | case 1: | |
b7886845 AS |
552 | return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" |
553 | "v_cndmask_b32\t%L0, %L2, %L1, vcc"; | |
3d6275e3 | 554 | case 2: |
b7886845 AS |
555 | return "v_cndmask_b32\t%H0, %H2, %H1, %3\;" |
556 | "v_cndmask_b32\t%L0, %L2, %L1, %3"; | |
3d6275e3 AS |
557 | } |
558 | ||
559 | return "#"; | |
560 | } | |
561 | [(set_attr "type" "vmult,vmult,vmult,*,*") | |
562 | (set_attr "length" "16,16,16,16,16")]) | |
563 | ||
8aeabd9f | 564 | (define_insn "*mov<mode>_4reg" |
ae0d2c24 AS |
565 | [(set (match_operand:V_4REG 0 "nonimmediate_operand") |
566 | (match_operand:V_4REG 1 "general_operand"))] | |
8aeabd9f | 567 | "" |
b9bf0c3f | 568 | {@ [cons: =0, 1; attrs: type, length, cdna] |
ecb22ddb AS |
569 | [v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1 |
570 | [v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1 | |
571 | [$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1 | |
572 | [a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1 | |
ae0d2c24 | 573 | }) |
8aeabd9f AS |
574 | |
575 | (define_insn "mov<mode>_exec" | |
576 | [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m") | |
577 | (vec_merge:V_4REG | |
578 | (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v") | |
579 | (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand" | |
580 | " U0,vDA0,vDA0,U0,U0") | |
581 | (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e"))) | |
582 | (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))] | |
583 | "!MEM_P (operands[0]) || REG_P (operands[1])" | |
584 | { | |
585 | if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) | |
586 | switch (which_alternative) | |
587 | { | |
588 | case 0: | |
589 | return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;" | |
590 | "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"; | |
591 | case 1: | |
592 | return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" | |
593 | "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" | |
594 | "v_cndmask_b32\t%J0, %J2, %J1, vcc\;" | |
595 | "v_cndmask_b32\t%K0, %K2, %K1, vcc"; | |
596 | case 2: | |
597 | return "v_cndmask_b32\t%L0, %L2, %L1, %3\;" | |
598 | "v_cndmask_b32\t%H0, %H2, %H1, %3\;" | |
599 | "v_cndmask_b32\t%J0, %J2, %J1, %3\;" | |
600 | "v_cndmask_b32\t%K0, %K2, %K1, %3"; | |
601 | } | |
602 | else | |
603 | switch (which_alternative) | |
604 | { | |
605 | case 0: | |
606 | return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;" | |
607 | "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"; | |
608 | case 1: | |
609 | return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;" | |
610 | "v_cndmask_b32\t%L0, %L2, %L1, vcc\;" | |
611 | "v_cndmask_b32\t%J0, %J2, %J1, vcc\;" | |
612 | "v_cndmask_b32\t%K0, %K2, %K1, vcc"; | |
613 | case 2: | |
614 | return "v_cndmask_b32\t%H0, %H2, %H1, %3\;" | |
615 | "v_cndmask_b32\t%L0, %L2, %L1, %3\;" | |
616 | "v_cndmask_b32\t%J0, %J2, %J1, %3\;" | |
617 | "v_cndmask_b32\t%K0, %K2, %K1, %3"; | |
618 | } | |
619 | ||
620 | return "#"; | |
621 | } | |
622 | [(set_attr "type" "vmult,vmult,vmult,*,*") | |
623 | (set_attr "length" "32")]) | |
624 | ||
3d6275e3 AS |
625 | ; This variant does not accept an unspec, but does permit MEM |
626 | ; read/modify/write which is necessary for maskstore. | |
627 | ||
628 | ;(define_insn "*mov<mode>_exec_match" | |
03876953 AS |
629 | ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m") |
630 | ; (vec_merge:V_2REG | |
631 | ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v") | |
3d6275e3 | 632 | ; (match_dup 0) |
03876953 | 633 | ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) |
1165109b | 634 | ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))] |
3d6275e3 AS |
635 | ; "!MEM_P (operands[0]) || REG_P (operands[1])" |
636 | ; "@ | |
637 | ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ | |
638 | ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ | |
639 | ; else \ | |
640 | ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; | |
641 | ; # | |
642 | ; #" | |
643 | ; [(set_attr "type" "vmult,*,*") | |
644 | ; (set_attr "length" "16,16,16")]) | |
645 | ||
646 | ; A SGPR-base load looks like: | |
647 | ; <load> v, Sv | |
648 | ; | |
649 | ; There's no hardware instruction that corresponds to this, but vector base | |
650 | ; addresses are placed in an SGPR because it is easier to add to a vector. | |
651 | ; We also have a temporary vT, and the vector v1 holding numbered lanes. | |
652 | ; | |
653 | ; Rewrite as: | |
654 | ; vT = v1 << log2(element-size) | |
655 | ; vT += Sv | |
656 | ; flat_load v, vT | |
657 | ||
a0e6306b | 658 | (define_insn "@mov<mode>_sgprbase" |
ddfa4393 | 659 | [(set (match_operand:V_1REG 0 "nonimmediate_operand") |
03876953 | 660 | (unspec:V_1REG |
ddfa4393 | 661 | [(match_operand:V_1REG 1 "general_operand")] |
3d6275e3 | 662 | UNSPEC_SGPRBASE)) |
ddfa4393 | 663 | (clobber (match_operand:<VnDI> 2 "register_operand"))] |
3d6275e3 | 664 | "lra_in_progress || reload_completed" |
b9bf0c3f | 665 | {@ [cons: =0, 1, =2; attrs: type, length, cdna] |
ae0d2c24 AS |
666 | [v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1 |
667 | [v,vB,&v;vop1,8 ,* ] ^ | |
668 | [v,m ,&v;* ,12,* ] # | |
669 | [m,v ,&v;* ,12,* ] # | |
670 | [a,m ,&v;* ,12,cdna2] # | |
671 | [m,a ,&v;* ,12,cdna2] # | |
ddfa4393 | 672 | }) |
3d6275e3 | 673 | |
a0e6306b | 674 | (define_insn "@mov<mode>_sgprbase" |
ae0d2c24 | 675 | [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m") |
03876953 | 676 | (unspec:V_2REG |
ae0d2c24 | 677 | [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")] |
3d6275e3 | 678 | UNSPEC_SGPRBASE)) |
ae0d2c24 | 679 | (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))] |
3d6275e3 AS |
680 | "lra_in_progress || reload_completed" |
681 | "@ | |
682 | * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ | |
683 | return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ | |
684 | else \ | |
685 | return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; | |
686 | # | |
ae0d2c24 AS |
687 | # |
688 | # | |
3d6275e3 | 689 | #" |
ae0d2c24 AS |
690 | [(set_attr "type" "vmult,*,*,*,*") |
691 | (set_attr "length" "8,12,12,12,12") | |
b9bf0c3f | 692 | (set_attr "cdna" "*,*,*,cdna2,cdna2")]) |
3d6275e3 | 693 | |
a0e6306b | 694 | (define_insn "@mov<mode>_sgprbase" |
ddfa4393 | 695 | [(set (match_operand:V_4REG 0 "nonimmediate_operand") |
8aeabd9f | 696 | (unspec:V_4REG |
ddfa4393 | 697 | [(match_operand:V_4REG 1 "general_operand")] |
8aeabd9f | 698 | UNSPEC_SGPRBASE)) |
ddfa4393 | 699 | (clobber (match_operand:<VnDI> 2 "register_operand"))] |
8aeabd9f | 700 | "lra_in_progress || reload_completed" |
ddfa4393 AS |
701 | {@ [cons: =0, 1, =2; attrs: type, length] |
702 | [v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1 | |
703 | [v,m ,&v;* ,12] # | |
704 | [m,v ,&v;* ,12] # | |
705 | }) | |
8aeabd9f | 706 | |
3d6275e3 AS |
707 | ; Expand scalar addresses into gather/scatter patterns |
708 | ||
709 | (define_split | |
8aeabd9f AS |
710 | [(set (match_operand:V_MOV 0 "memory_operand") |
711 | (unspec:V_MOV | |
712 | [(match_operand:V_MOV 1 "general_operand")] | |
3d6275e3 | 713 | UNSPEC_SGPRBASE)) |
1165109b | 714 | (clobber (match_scratch:<VnDI> 2))] |
3d6275e3 AS |
715 | "" |
716 | [(set (mem:BLK (scratch)) | |
717 | (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)] | |
718 | UNSPEC_SCATTER))] | |
719 | { | |
720 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
721 | operands[0], | |
722 | operands[2]); | |
723 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); | |
724 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
725 | }) | |
726 | ||
727 | (define_split | |
8aeabd9f AS |
728 | [(set (match_operand:V_MOV 0 "memory_operand") |
729 | (vec_merge:V_MOV | |
730 | (match_operand:V_MOV 1 "general_operand") | |
731 | (match_operand:V_MOV 2 "") | |
3d6275e3 | 732 | (match_operand:DI 3 "gcn_exec_reg_operand"))) |
1165109b | 733 | (clobber (match_scratch:<VnDI> 4))] |
3d6275e3 AS |
734 | "" |
735 | [(set (mem:BLK (scratch)) | |
736 | (unspec:BLK [(match_dup 5) (match_dup 1) | |
737 | (match_dup 6) (match_dup 7) (match_dup 3)] | |
738 | UNSPEC_SCATTER))] | |
739 | { | |
740 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, | |
741 | operands[3], | |
742 | operands[0], | |
743 | operands[4]); | |
744 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); | |
745 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
746 | }) | |
747 | ||
748 | (define_split | |
8aeabd9f AS |
749 | [(set (match_operand:V_MOV 0 "nonimmediate_operand") |
750 | (unspec:V_MOV | |
751 | [(match_operand:V_MOV 1 "memory_operand")] | |
3d6275e3 | 752 | UNSPEC_SGPRBASE)) |
1165109b | 753 | (clobber (match_scratch:<VnDI> 2))] |
3d6275e3 AS |
754 | "" |
755 | [(set (match_dup 0) | |
8aeabd9f | 756 | (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7) |
03876953 AS |
757 | (mem:BLK (scratch))] |
758 | UNSPEC_GATHER))] | |
3d6275e3 AS |
759 | { |
760 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, | |
761 | operands[1], | |
762 | operands[2]); | |
763 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); | |
764 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
765 | }) | |
766 | ||
767 | (define_split | |
8aeabd9f AS |
768 | [(set (match_operand:V_MOV 0 "nonimmediate_operand") |
769 | (vec_merge:V_MOV | |
770 | (match_operand:V_MOV 1 "memory_operand") | |
771 | (match_operand:V_MOV 2 "") | |
3d6275e3 | 772 | (match_operand:DI 3 "gcn_exec_reg_operand"))) |
1165109b | 773 | (clobber (match_scratch:<VnDI> 4))] |
3d6275e3 AS |
774 | "" |
775 | [(set (match_dup 0) | |
8aeabd9f AS |
776 | (vec_merge:V_MOV |
777 | (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7) | |
03876953 AS |
778 | (mem:BLK (scratch))] |
779 | UNSPEC_GATHER) | |
3d6275e3 AS |
780 | (match_dup 2) |
781 | (match_dup 3)))] | |
782 | { | |
783 | operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, | |
784 | operands[3], | |
785 | operands[1], | |
786 | operands[4]); | |
787 | operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); | |
788 | operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
789 | }) | |
790 | ||
791 | ; TODO: Add zero/sign extending variants. | |
792 | ||
793 | ;; }}} | |
794 | ;; {{{ Lane moves | |
795 | ||
796 | ; v_writelane and v_readlane work regardless of exec flags. | |
797 | ; We allow source to be scratch. | |
798 | ; | |
799 | ; FIXME these should take A immediates | |
800 | ||
801 | (define_insn "*vec_set<mode>" | |
03876953 AS |
802 | [(set (match_operand:V_1REG 0 "register_operand" "= v") |
803 | (vec_merge:V_1REG | |
804 | (vec_duplicate:V_1REG | |
805 | (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) | |
806 | (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 807 | (ashift (const_int 1) |
03876953 | 808 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
3d6275e3 AS |
809 | "" |
810 | "v_writelane_b32 %0, %1, %2" | |
811 | [(set_attr "type" "vop3a") | |
812 | (set_attr "length" "8") | |
813 | (set_attr "exec" "none") | |
814 | (set_attr "laneselect" "yes")]) | |
815 | ||
816 | ; FIXME: 64bit operations really should be splitters, but I am not sure how | |
817 | ; to represent vertical subregs. | |
818 | (define_insn "*vec_set<mode>" | |
03876953 AS |
819 | [(set (match_operand:V_2REG 0 "register_operand" "= v") |
820 | (vec_merge:V_2REG | |
821 | (vec_duplicate:V_2REG | |
822 | (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) | |
823 | (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 824 | (ashift (const_int 1) |
03876953 | 825 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
3d6275e3 AS |
826 | "" |
827 | "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" | |
828 | [(set_attr "type" "vmult") | |
829 | (set_attr "length" "16") | |
830 | (set_attr "exec" "none") | |
831 | (set_attr "laneselect" "yes")]) | |
832 | ||
833 | (define_expand "vec_set<mode>" | |
8aeabd9f AS |
834 | [(set (match_operand:V_MOV 0 "register_operand") |
835 | (vec_merge:V_MOV | |
836 | (vec_duplicate:V_MOV | |
3d6275e3 AS |
837 | (match_operand:<SCALAR_MODE> 1 "register_operand")) |
838 | (match_dup 0) | |
839 | (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] | |
840 | "") | |
841 | ||
842 | (define_insn "*vec_set<mode>_1" | |
03876953 AS |
843 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
844 | (vec_merge:V_1REG | |
845 | (vec_duplicate:V_1REG | |
846 | (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) | |
847 | (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0") | |
848 | (match_operand:SI 2 "const_int_operand" " i")))] | |
1165109b | 849 | "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" |
3d6275e3 AS |
850 | { |
851 | operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); | |
852 | return "v_writelane_b32 %0, %1, %2"; | |
853 | } | |
854 | [(set_attr "type" "vop3a") | |
855 | (set_attr "length" "8") | |
856 | (set_attr "exec" "none") | |
857 | (set_attr "laneselect" "yes")]) | |
858 | ||
859 | (define_insn "*vec_set<mode>_1" | |
03876953 AS |
860 | [(set (match_operand:V_2REG 0 "register_operand" "=v") |
861 | (vec_merge:V_2REG | |
862 | (vec_duplicate:V_2REG | |
863 | (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) | |
864 | (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0") | |
865 | (match_operand:SI 2 "const_int_operand" " i")))] | |
1165109b | 866 | "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))" |
3d6275e3 AS |
867 | { |
868 | operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); | |
869 | return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; | |
870 | } | |
871 | [(set_attr "type" "vmult") | |
872 | (set_attr "length" "16") | |
873 | (set_attr "exec" "none") | |
874 | (set_attr "laneselect" "yes")]) | |
875 | ||
876 | (define_insn "vec_duplicate<mode><exec>" | |
03876953 AS |
877 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
878 | (vec_duplicate:V_1REG | |
879 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))] | |
3d6275e3 AS |
880 | "" |
881 | "v_mov_b32\t%0, %1" | |
882 | [(set_attr "type" "vop3a") | |
883 | (set_attr "length" "8")]) | |
884 | ||
885 | (define_insn "vec_duplicate<mode><exec>" | |
03876953 AS |
886 | [(set (match_operand:V_2REG 0 "register_operand" "= v") |
887 | (vec_duplicate:V_2REG | |
3d6275e3 AS |
888 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] |
889 | "" | |
890 | "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" | |
891 | [(set_attr "type" "vop3a") | |
892 | (set_attr "length" "16")]) | |
893 | ||
8aeabd9f AS |
894 | (define_insn "vec_duplicate<mode><exec>" |
895 | [(set (match_operand:V_4REG 0 "register_operand" "= v") | |
896 | (vec_duplicate:V_4REG | |
897 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] | |
898 | "" | |
899 | "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1" | |
900 | [(set_attr "type" "mult") | |
901 | (set_attr "length" "32")]) | |
902 | ||
3d6275e3 | 903 | (define_insn "vec_extract<mode><scalar_mode>" |
03876953 | 904 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") |
3d6275e3 | 905 | (vec_select:<SCALAR_MODE> |
03876953 AS |
906 | (match_operand:V_1REG 1 "register_operand" " v") |
907 | (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] | |
3d6275e3 AS |
908 | "" |
909 | "v_readlane_b32 %0, %1, %2" | |
910 | [(set_attr "type" "vop3a") | |
911 | (set_attr "length" "8") | |
912 | (set_attr "exec" "none") | |
913 | (set_attr "laneselect" "yes")]) | |
914 | ||
915 | (define_insn "vec_extract<mode><scalar_mode>" | |
03876953 | 916 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") |
3d6275e3 | 917 | (vec_select:<SCALAR_MODE> |
03876953 AS |
918 | (match_operand:V_2REG 1 "register_operand" " v") |
919 | (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))] | |
3d6275e3 AS |
920 | "" |
921 | "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" | |
922 | [(set_attr "type" "vmult") | |
923 | (set_attr "length" "16") | |
924 | (set_attr "exec" "none") | |
925 | (set_attr "laneselect" "yes")]) | |
926 | ||
8aeabd9f AS |
927 | (define_insn "vec_extract<mode><scalar_mode>" |
928 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg") | |
929 | (vec_select:<SCALAR_MODE> | |
930 | (match_operand:V_4REG 1 "register_operand" " v") | |
931 | (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))] | |
932 | "" | |
933 | "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2" | |
934 | [(set_attr "type" "vmult") | |
935 | (set_attr "length" "32") | |
936 | (set_attr "exec" "none") | |
937 | (set_attr "laneselect" "yes")]) | |
938 | ||
db80ccd3 AS |
939 | (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop" |
940 | [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v") | |
941 | (vec_select:V_1REG_ALT | |
942 | (match_operand:V_1REG 1 "register_operand" " 0,v") | |
943 | (match_operand 2 "ascending_zero_int_parallel" "")))] | |
944 | "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode) | |
9ae1fbdd AS |
945 | && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode |
946 | /* This comment silences a warning for operands[2]. */" | |
db80ccd3 AS |
947 | "@ |
948 | ; in-place extract %0 | |
949 | v_mov_b32\t%L0, %L1" | |
950 | [(set_attr "type" "vmult") | |
951 | (set_attr "length" "0,8")]) | |
952 | ||
953 | (define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop" | |
954 | [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v") | |
955 | (vec_select:V_2REG_ALT | |
956 | (match_operand:V_2REG 1 "register_operand" " 0,v") | |
957 | (match_operand 2 "ascending_zero_int_parallel" "")))] | |
958 | "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode) | |
9ae1fbdd AS |
959 | && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode |
960 | /* This comment silences a warning for operands[2]. */" | |
db80ccd3 AS |
961 | "@ |
962 | ; in-place extract %0 | |
963 | v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" | |
964 | [(set_attr "type" "vmult") | |
965 | (set_attr "length" "0,8")]) | |
966 | ||
8aeabd9f AS |
967 | (define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop" |
968 | [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v") | |
969 | (vec_select:V_4REG_ALT | |
970 | (match_operand:V_4REG 1 "register_operand" " 0,v") | |
971 | (match_operand 2 "ascending_zero_int_parallel" "")))] | |
972 | "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode) | |
973 | && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode" | |
974 | "@ | |
975 | ; in-place extract %0 | |
976 | v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1" | |
977 | [(set_attr "type" "vmult") | |
978 | (set_attr "length" "0,16")]) | |
979 | ||
980 | (define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>" | |
981 | [(match_operand:V_MOV_ALT 0 "register_operand") | |
982 | (match_operand:V_MOV 1 "register_operand") | |
db80ccd3 | 983 | (match_operand 2 "immediate_operand")] |
8aeabd9f | 984 | "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode) |
84da9bca | 985 | && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode |
68e03492 | 986 | && (!TARGET_WAVE64_COMPAT || MODE_VF (<V_MOV:MODE>mode) <= 32)" |
5cfe0855 | 987 | { |
8aeabd9f | 988 | int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode); |
5cfe0855 AS |
989 | int firstlane = INTVAL (operands[2]) * numlanes; |
990 | rtx tmp; | |
991 | ||
992 | if (firstlane == 0) | |
993 | { | |
8aeabd9f | 994 | rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode, |
db80ccd3 AS |
995 | rtvec_alloc (numlanes)); |
996 | for (int i = 0; i < numlanes; i++) | |
997 | XVECEXP (parallel, 0, i) = GEN_INT (i); | |
8aeabd9f | 998 | emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop |
db80ccd3 | 999 | (operands[0], operands[1], parallel)); |
5cfe0855 AS |
1000 | } else { |
1001 | /* FIXME: optimize this by using DPP where available. */ | |
1002 | ||
8aeabd9f AS |
1003 | rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode); |
1004 | emit_insn (gen_vec_series<V_MOV:vnsi> (permutation, | |
5cfe0855 AS |
1005 | GEN_INT (firstlane*4), |
1006 | GEN_INT (4))); | |
1007 | ||
8aeabd9f AS |
1008 | tmp = gen_reg_rtx (<V_MOV:MODE>mode); |
1009 | emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1], | |
1010 | get_exec (<V_MOV:MODE>mode))); | |
5cfe0855 | 1011 | |
db80ccd3 | 1012 | emit_move_insn (operands[0], |
8aeabd9f | 1013 | gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0)); |
db80ccd3 | 1014 | } |
5cfe0855 AS |
1015 | DONE; |
1016 | }) | |
1017 | ||
b92d1124 AS |
1018 | (define_expand "extract_last_<mode>" |
1019 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
1020 | (match_operand:DI 1 "gcn_alu_operand") | |
8aeabd9f | 1021 | (match_operand:V_MOV 2 "register_operand")] |
b92d1124 AS |
1022 | "can_create_pseudo_p ()" |
1023 | { | |
1024 | rtx dst = operands[0]; | |
1025 | rtx mask = operands[1]; | |
1026 | rtx vect = operands[2]; | |
1027 | rtx tmpreg = gen_reg_rtx (SImode); | |
1028 | ||
1029 | emit_insn (gen_clzdi2 (tmpreg, mask)); | |
1030 | emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg)); | |
1031 | emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg)); | |
1032 | DONE; | |
1033 | }) | |
1034 | ||
1035 | (define_expand "fold_extract_last_<mode>" | |
1036 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
1037 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") | |
1038 | (match_operand:DI 2 "gcn_alu_operand") | |
8aeabd9f | 1039 | (match_operand:V_MOV 3 "register_operand")] |
b92d1124 AS |
1040 | "can_create_pseudo_p ()" |
1041 | { | |
1042 | rtx dst = operands[0]; | |
1043 | rtx default_value = operands[1]; | |
1044 | rtx mask = operands[2]; | |
1045 | rtx vect = operands[3]; | |
1046 | rtx else_label = gen_label_rtx (); | |
1047 | rtx end_label = gen_label_rtx (); | |
1048 | ||
1049 | rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx); | |
1050 | emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label)); | |
1051 | emit_insn (gen_extract_last_<mode> (dst, mask, vect)); | |
1052 | emit_jump_insn (gen_jump (end_label)); | |
1053 | emit_barrier (); | |
1054 | emit_label (else_label); | |
1055 | emit_move_insn (dst, default_value); | |
1056 | emit_label (end_label); | |
1057 | DONE; | |
1058 | }) | |
1059 | ||
3d6275e3 | 1060 | (define_expand "vec_init<mode><scalar_mode>" |
8aeabd9f | 1061 | [(match_operand:V_MOV 0 "register_operand") |
3d6275e3 AS |
1062 | (match_operand 1)] |
1063 | "" | |
1064 | { | |
1065 | gcn_expand_vector_init (operands[0], operands[1]); | |
1066 | DONE; | |
1067 | }) | |
1068 | ||
8aeabd9f AS |
1069 | (define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>" |
1070 | [(match_operand:V_MOV 0 "register_operand") | |
1071 | (match_operand:V_MOV_ALT 1)] | |
1072 | "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode | |
1073 | && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)" | |
769a10d0 AS |
1074 | { |
1075 | gcn_expand_vector_init (operands[0], operands[1]); | |
1076 | DONE; | |
1077 | }) | |
1078 | ||
3d6275e3 AS |
1079 | ;; }}} |
1080 | ;; {{{ Scatter / Gather | |
1081 | ||
1082 | ;; GCN does not have an instruction for loading a vector from contiguous | |
1083 | ;; memory so *all* loads and stores are eventually converted to scatter | |
1084 | ;; or gather. | |
1085 | ;; | |
1086 | ;; GCC does not permit MEM to hold vectors of addresses, so we must use an | |
1087 | ;; unspec. The unspec formats are as follows: | |
1088 | ;; | |
1165109b | 1089 | ;; (unspec:V?? |
3d6275e3 AS |
1090 | ;; [(<address expression>) |
1091 | ;; (<addr_space_t>) | |
1092 | ;; (<use_glc>) | |
1093 | ;; (mem:BLK (scratch))] | |
1094 | ;; UNSPEC_GATHER) | |
1095 | ;; | |
1096 | ;; (unspec:BLK | |
1097 | ;; [(<address expression>) | |
1098 | ;; (<source register>) | |
1099 | ;; (<addr_space_t>) | |
1100 | ;; (<use_glc>) | |
1101 | ;; (<exec>)] | |
1102 | ;; UNSPEC_SCATTER) | |
1103 | ;; | |
1104 | ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>. | |
1105 | ;; - The mem:BLK does not contain any real information, but indicates that an | |
1106 | ;; unknown memory read is taking place. Stores are expected to use a similar | |
1107 | ;; mem:BLK outside the unspec. | |
1108 | ;; - The address space and glc (volatile) fields are there to replace the | |
1109 | ;; fields normally found in a MEM. | |
1110 | ;; - Multiple forms of address expression are supported, below. | |
aad32a00 AS |
1111 | ;; |
1112 | ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on | |
3d6275e3 | 1113 | |
1165109b | 1114 | (define_expand "gather_load<mode><vnsi>" |
8aeabd9f | 1115 | [(match_operand:V_MOV 0 "register_operand") |
3d6275e3 | 1116 | (match_operand:DI 1 "register_operand") |
1165109b | 1117 | (match_operand:<VnSI> 2 "register_operand") |
3d6275e3 AS |
1118 | (match_operand 3 "immediate_operand") |
1119 | (match_operand:SI 4 "gcn_alu_operand")] | |
1120 | "" | |
1121 | { | |
1122 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], | |
1123 | operands[2], operands[4], | |
1124 | INTVAL (operands[3]), NULL); | |
1125 | ||
1165109b | 1126 | if (GET_MODE (addr) == <VnDI>mode) |
3d6275e3 AS |
1127 | emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, |
1128 | const0_rtx, const0_rtx)); | |
1129 | else | |
1130 | emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1], | |
1131 | addr, const0_rtx, const0_rtx, | |
1132 | const0_rtx)); | |
1133 | DONE; | |
1134 | }) | |
1135 | ||
3d6275e3 AS |
1136 | ; Allow any address expression |
1137 | (define_expand "gather<mode>_expr<exec>" | |
8aeabd9f AS |
1138 | [(set (match_operand:V_MOV 0 "register_operand") |
1139 | (unspec:V_MOV | |
3d6275e3 AS |
1140 | [(match_operand 1 "") |
1141 | (match_operand 2 "immediate_operand") | |
1142 | (match_operand 3 "immediate_operand") | |
1143 | (mem:BLK (scratch))] | |
1144 | UNSPEC_GATHER))] | |
1145 | "" | |
1146 | {}) | |
1147 | ||
1148 | (define_insn "gather<mode>_insn_1offset<exec>" | |
392f70cc | 1149 | [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a") |
8aeabd9f | 1150 | (unspec:V_MOV |
392f70cc | 1151 | [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v") |
1165109b | 1152 | (vec_duplicate:<VnDI> |
392f70cc AS |
1153 | (match_operand 2 "immediate_operand" " n,n, n, n"))) |
1154 | (match_operand 3 "immediate_operand" " n,n, n, n") | |
1155 | (match_operand 4 "immediate_operand" " n,n, n, n") | |
3d6275e3 AS |
1156 | (mem:BLK (scratch))] |
1157 | UNSPEC_GATHER))] | |
1158 | "(AS_FLAT_P (INTVAL (operands[3])) | |
023641d9 AS |
1159 | && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)) |
1160 | || (AS_GLOBAL_P (INTVAL (operands[3])) | |
1161 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" | |
3d6275e3 AS |
1162 | { |
1163 | addr_space_t as = INTVAL (operands[3]); | |
1164 | const char *glc = INTVAL (operands[4]) ? " glc" : ""; | |
1165 | ||
1166 | static char buf[200]; | |
1167 | if (AS_FLAT_P (as)) | |
b9bf0c3f | 1168 | sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", glc); |
3d6275e3 | 1169 | else if (AS_GLOBAL_P (as)) |
28dd61b7 | 1170 | sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;" |
3d6275e3 AS |
1171 | "s_waitcnt\tvmcnt(0)", glc); |
1172 | else | |
1173 | gcc_unreachable (); | |
1174 | ||
1175 | return buf; | |
1176 | } | |
1177 | [(set_attr "type" "flat") | |
ae0d2c24 | 1178 | (set_attr "length" "12") |
b9bf0c3f | 1179 | (set_attr "cdna" "*,cdna2,*,cdna2") |
392f70cc | 1180 | (set_attr "xnack" "off,off,on,on")]) |
3d6275e3 AS |
1181 | |
1182 | (define_insn "gather<mode>_insn_1offset_ds<exec>" | |
ae0d2c24 | 1183 | [(set (match_operand:V_MOV 0 "register_operand" "=v,a") |
8aeabd9f | 1184 | (unspec:V_MOV |
ae0d2c24 | 1185 | [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v") |
1165109b | 1186 | (vec_duplicate:<VnSI> |
ae0d2c24 AS |
1187 | (match_operand 2 "immediate_operand" " n,n"))) |
1188 | (match_operand 3 "immediate_operand" " n,n") | |
1189 | (match_operand 4 "immediate_operand" " n,n") | |
3d6275e3 AS |
1190 | (mem:BLK (scratch))] |
1191 | UNSPEC_GATHER))] | |
1192 | "(AS_ANY_DS_P (INTVAL (operands[3])) | |
1193 | && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" | |
1194 | { | |
1195 | addr_space_t as = INTVAL (operands[3]); | |
1196 | static char buf[200]; | |
1197 | sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", | |
1198 | (AS_GDS_P (as) ? " gds" : "")); | |
1199 | return buf; | |
1200 | } | |
1201 | [(set_attr "type" "ds") | |
ae0d2c24 | 1202 | (set_attr "length" "12") |
b9bf0c3f | 1203 | (set_attr "cdna" "*,cdna2")]) |
3d6275e3 AS |
1204 | |
1205 | (define_insn "gather<mode>_insn_2offsets<exec>" | |
392f70cc | 1206 | [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a") |
8aeabd9f | 1207 | (unspec:V_MOV |
1165109b AS |
1208 | [(plus:<VnDI> |
1209 | (plus:<VnDI> | |
1210 | (vec_duplicate:<VnDI> | |
392f70cc | 1211 | (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv")) |
1165109b | 1212 | (sign_extend:<VnDI> |
392f70cc | 1213 | (match_operand:<VnSI> 2 "register_operand" " v, v, v, v"))) |
ae0d2c24 | 1214 | (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" |
392f70cc AS |
1215 | " n, n, n, n"))) |
1216 | (match_operand 4 "immediate_operand" " n, n, n, n") | |
1217 | (match_operand 5 "immediate_operand" " n, n, n, n") | |
3d6275e3 AS |
1218 | (mem:BLK (scratch))] |
1219 | UNSPEC_GATHER))] | |
1220 | "(AS_GLOBAL_P (INTVAL (operands[4])) | |
1221 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" | |
1222 | { | |
1223 | addr_space_t as = INTVAL (operands[4]); | |
1224 | const char *glc = INTVAL (operands[5]) ? " glc" : ""; | |
1225 | ||
1226 | static char buf[200]; | |
1227 | if (AS_GLOBAL_P (as)) | |
8086230e AS |
1228 | sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;" |
1229 | "s_waitcnt\tvmcnt(0)", glc); | |
3d6275e3 AS |
1230 | else |
1231 | gcc_unreachable (); | |
1232 | ||
1233 | return buf; | |
1234 | } | |
1235 | [(set_attr "type" "flat") | |
ae0d2c24 | 1236 | (set_attr "length" "12") |
b9bf0c3f | 1237 | (set_attr "cdna" "*,cdna2,*,cdna2") |
392f70cc | 1238 | (set_attr "xnack" "off,off,on,on")]) |
3d6275e3 | 1239 | |
1165109b | 1240 | (define_expand "scatter_store<mode><vnsi>" |
3d6275e3 | 1241 | [(match_operand:DI 0 "register_operand") |
1165109b | 1242 | (match_operand:<VnSI> 1 "register_operand") |
3d6275e3 AS |
1243 | (match_operand 2 "immediate_operand") |
1244 | (match_operand:SI 3 "gcn_alu_operand") | |
8aeabd9f | 1245 | (match_operand:V_MOV 4 "register_operand")] |
3d6275e3 AS |
1246 | "" |
1247 | { | |
1248 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], | |
1249 | operands[1], operands[3], | |
1250 | INTVAL (operands[2]), NULL); | |
1251 | ||
1165109b | 1252 | if (GET_MODE (addr) == <VnDI>mode) |
3d6275e3 AS |
1253 | emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], |
1254 | const0_rtx, const0_rtx)); | |
1255 | else | |
1256 | emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr, | |
1257 | const0_rtx, operands[4], | |
1258 | const0_rtx, const0_rtx)); | |
1259 | DONE; | |
1260 | }) | |
1261 | ||
3d6275e3 AS |
1262 | ; Allow any address expression |
1263 | (define_expand "scatter<mode>_expr<exec_scatter>" | |
1264 | [(set (mem:BLK (scratch)) | |
1265 | (unspec:BLK | |
1165109b | 1266 | [(match_operand:<VnDI> 0 "") |
8aeabd9f | 1267 | (match_operand:V_MOV 1 "register_operand") |
3d6275e3 AS |
1268 | (match_operand 2 "immediate_operand") |
1269 | (match_operand 3 "immediate_operand")] | |
1270 | UNSPEC_SCATTER))] | |
1271 | "" | |
1272 | {}) | |
1273 | ||
1274 | (define_insn "scatter<mode>_insn_1offset<exec_scatter>" | |
1275 | [(set (mem:BLK (scratch)) | |
1276 | (unspec:BLK | |
ae0d2c24 | 1277 | [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v") |
1165109b | 1278 | (vec_duplicate:<VnDI> |
ae0d2c24 AS |
1279 | (match_operand 1 "immediate_operand" "n,n"))) |
1280 | (match_operand:V_MOV 2 "register_operand" "v,a") | |
1281 | (match_operand 3 "immediate_operand" "n,n") | |
1282 | (match_operand 4 "immediate_operand" "n,n")] | |
3d6275e3 AS |
1283 | UNSPEC_SCATTER))] |
1284 | "(AS_FLAT_P (INTVAL (operands[3])) | |
1285 | && (INTVAL(operands[1]) == 0 | |
b9bf0c3f | 1286 | || ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) |
3d6275e3 AS |
1287 | || (AS_GLOBAL_P (INTVAL (operands[3])) |
1288 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" | |
1289 | { | |
1290 | addr_space_t as = INTVAL (operands[3]); | |
1291 | const char *glc = INTVAL (operands[4]) ? " glc" : ""; | |
1292 | ||
1293 | static char buf[200]; | |
1294 | if (AS_FLAT_P (as)) | |
930c5599 | 1295 | sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc); |
3d6275e3 | 1296 | else if (AS_GLOBAL_P (as)) |
930c5599 | 1297 | sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc); |
3d6275e3 AS |
1298 | else |
1299 | gcc_unreachable (); | |
1300 | ||
1301 | return buf; | |
1302 | } | |
1303 | [(set_attr "type" "flat") | |
ae0d2c24 | 1304 | (set_attr "length" "12") |
b9bf0c3f | 1305 | (set_attr "cdna" "*,cdna2")]) |
3d6275e3 AS |
1306 | |
1307 | (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>" | |
1308 | [(set (mem:BLK (scratch)) | |
1309 | (unspec:BLK | |
ae0d2c24 | 1310 | [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v") |
1165109b | 1311 | (vec_duplicate:<VnSI> |
ae0d2c24 AS |
1312 | (match_operand 1 "immediate_operand" "n,n"))) |
1313 | (match_operand:V_MOV 2 "register_operand" "v,a") | |
1314 | (match_operand 3 "immediate_operand" "n,n") | |
1315 | (match_operand 4 "immediate_operand" "n,n")] | |
3d6275e3 AS |
1316 | UNSPEC_SCATTER))] |
1317 | "(AS_ANY_DS_P (INTVAL (operands[3])) | |
1318 | && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" | |
1319 | { | |
1320 | addr_space_t as = INTVAL (operands[3]); | |
1321 | static char buf[200]; | |
e929d65b | 1322 | sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", |
3d6275e3 AS |
1323 | (AS_GDS_P (as) ? " gds" : "")); |
1324 | return buf; | |
1325 | } | |
1326 | [(set_attr "type" "ds") | |
ae0d2c24 | 1327 | (set_attr "length" "12") |
b9bf0c3f | 1328 | (set_attr "cdna" "*,cdna2")]) |
3d6275e3 AS |
1329 | |
1330 | (define_insn "scatter<mode>_insn_2offsets<exec_scatter>" | |
1331 | [(set (mem:BLK (scratch)) | |
1332 | (unspec:BLK | |
1165109b AS |
1333 | [(plus:<VnDI> |
1334 | (plus:<VnDI> | |
1335 | (vec_duplicate:<VnDI> | |
ae0d2c24 | 1336 | (match_operand:DI 0 "register_operand" "Sv,Sv")) |
1165109b | 1337 | (sign_extend:<VnDI> |
ae0d2c24 AS |
1338 | (match_operand:<VnSI> 1 "register_operand" "v,v"))) |
1339 | (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n"))) | |
1340 | (match_operand:V_MOV 3 "register_operand" "v,a") | |
1341 | (match_operand 4 "immediate_operand" "n,n") | |
1342 | (match_operand 5 "immediate_operand" "n,n")] | |
3d6275e3 AS |
1343 | UNSPEC_SCATTER))] |
1344 | "(AS_GLOBAL_P (INTVAL (operands[4])) | |
1345 | && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" | |
1346 | { | |
1347 | addr_space_t as = INTVAL (operands[4]); | |
1348 | const char *glc = INTVAL (operands[5]) ? " glc" : ""; | |
1349 | ||
1350 | static char buf[200]; | |
1351 | if (AS_GLOBAL_P (as)) | |
8086230e | 1352 | sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc); |
3d6275e3 AS |
1353 | else |
1354 | gcc_unreachable (); | |
1355 | ||
1356 | return buf; | |
1357 | } | |
1358 | [(set_attr "type" "flat") | |
ae0d2c24 | 1359 | (set_attr "length" "12") |
b9bf0c3f | 1360 | (set_attr "cdna" "*,cdna2")]) |
3d6275e3 AS |
1361 | |
1362 | ;; }}} | |
1363 | ;; {{{ Permutations | |
1364 | ||
1365 | (define_insn "ds_bpermute<mode>" | |
03876953 AS |
1366 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
1367 | (unspec:V_1REG | |
1368 | [(match_operand:V_1REG 2 "register_operand" " v") | |
1165109b | 1369 | (match_operand:<VnSI> 1 "register_operand" " v") |
03876953 | 1370 | (match_operand:DI 3 "gcn_exec_reg_operand" " e")] |
3d6275e3 AS |
1371 | UNSPEC_BPERMUTE))] |
1372 | "" | |
1373 | "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" | |
1374 | [(set_attr "type" "vop2") | |
1375 | (set_attr "length" "12")]) | |
1376 | ||
1377 | (define_insn_and_split "ds_bpermute<mode>" | |
03876953 AS |
1378 | [(set (match_operand:V_2REG 0 "register_operand" "=&v") |
1379 | (unspec:V_2REG | |
1380 | [(match_operand:V_2REG 2 "register_operand" " v0") | |
1165109b | 1381 | (match_operand:<VnSI> 1 "register_operand" " v") |
03876953 | 1382 | (match_operand:DI 3 "gcn_exec_reg_operand" " e")] |
3d6275e3 AS |
1383 | UNSPEC_BPERMUTE))] |
1384 | "" | |
1385 | "#" | |
1386 | "reload_completed" | |
1165109b AS |
1387 | [(set (match_dup 4) (unspec:<VnSI> |
1388 | [(match_dup 6) (match_dup 1) (match_dup 3)] | |
1389 | UNSPEC_BPERMUTE)) | |
1390 | (set (match_dup 5) (unspec:<VnSI> | |
1391 | [(match_dup 7) (match_dup 1) (match_dup 3)] | |
1392 | UNSPEC_BPERMUTE))] | |
3d6275e3 AS |
1393 | { |
1394 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); | |
1395 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
1396 | operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
1397 | operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
1398 | } | |
1399 | [(set_attr "type" "vmult") | |
1400 | (set_attr "length" "24")]) | |
1401 | ||
a5879399 | 1402 | (define_insn "@dpp_move<mode>" |
03876953 AS |
1403 | [(set (match_operand:V_noHI 0 "register_operand" "=v") |
1404 | (unspec:V_noHI | |
1405 | [(match_operand:V_noHI 1 "register_operand" " v") | |
1406 | (match_operand:SI 2 "const_int_operand" " n")] | |
a5879399 | 1407 | UNSPEC_MOV_DPP_SHR))] |
68e03492 | 1408 | "TARGET_DPP_FULL" |
a5879399 AS |
1409 | { |
1410 | return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32", | |
1411 | UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); | |
1412 | } | |
1413 | [(set_attr "type" "vop_dpp") | |
1414 | (set_attr "length" "16")]) | |
1415 | ||
1bde3ace AJ |
1416 | (define_insn "@dpp_swap_pairs<mode>" |
1417 | [(set (match_operand:V_noHI 0 "register_operand" "=v") | |
1418 | (unspec:V_noHI | |
1419 | [(match_operand:V_noHI 1 "register_operand" " v")] | |
1420 | UNSPEC_MOV_DPP_SWAP_PAIRS))] | |
1421 | "" | |
1422 | { | |
1423 | return gcn_expand_dpp_swap_pairs_insn (<MODE>mode, "v_mov_b32", | |
1424 | UNSPEC_MOV_DPP_SWAP_PAIRS); | |
1425 | } | |
1426 | [(set_attr "type" "vop_dpp") | |
1427 | (set_attr "length" "16")]) | |
1428 | ||
1429 | (define_insn "@dpp_distribute_even<mode>" | |
1430 | [(set (match_operand:V_noHI 0 "register_operand" "=v") | |
1431 | (unspec:V_noHI | |
1432 | [(match_operand:V_noHI 1 "register_operand" " v")] | |
1433 | UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))] | |
1434 | "" | |
1435 | { | |
1436 | return gcn_expand_dpp_distribute_even_insn (<MODE>mode, "v_mov_b32", | |
1437 | UNSPEC_MOV_DPP_DISTRIBUTE_EVEN); | |
1438 | } | |
1439 | [(set_attr "type" "vop_dpp") | |
1440 | (set_attr "length" "16")]) | |
1441 | ||
1442 | (define_insn "@dpp_distribute_odd<mode>" | |
1443 | [(set (match_operand:V_noHI 0 "register_operand" "=v") | |
1444 | (unspec:V_noHI | |
1445 | [(match_operand:V_noHI 1 "register_operand" " v")] | |
1446 | UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))] | |
1447 | "" | |
1448 | { | |
1449 | return gcn_expand_dpp_distribute_odd_insn (<MODE>mode, "v_mov_b32", | |
1450 | UNSPEC_MOV_DPP_DISTRIBUTE_ODD); | |
1451 | } | |
1452 | [(set_attr "type" "vop_dpp") | |
1453 | (set_attr "length" "16")]) | |
1454 | ||
3d6275e3 AS |
1455 | ;; }}} |
1456 | ;; {{{ ALU special case: add/sub | |
1457 | ||
77f7566e | 1458 | (define_insn "add<mode>3<exec_clobber>" |
e24b0fed | 1459 | [(set (match_operand:V_INT_1REG 0 "register_operand") |
03876953 | 1460 | (plus:V_INT_1REG |
e24b0fed AS |
1461 | (match_operand:V_INT_1REG 1 "register_operand") |
1462 | (match_operand:V_INT_1REG 2 "gcn_alu_operand"))) | |
3d6275e3 AS |
1463 | (clobber (reg:DI VCC_REG))] |
1464 | "" | |
e24b0fed | 1465 | {@ [cons: =0, %1, 2; attrs: type, length] |
b9bf0c3f | 1466 | [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1 |
e24b0fed AS |
1467 | [v,v,vSvB;vop2,8] ^ |
1468 | }) | |
3d6275e3 | 1469 | |
77f7566e | 1470 | (define_insn "add<mode>3_dup<exec_clobber>" |
e24b0fed | 1471 | [(set (match_operand:V_INT_1REG 0 "register_operand") |
03876953 AS |
1472 | (plus:V_INT_1REG |
1473 | (vec_duplicate:V_INT_1REG | |
e24b0fed AS |
1474 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand")) |
1475 | (match_operand:V_INT_1REG 1 "register_operand"))) | |
3d6275e3 AS |
1476 | (clobber (reg:DI VCC_REG))] |
1477 | "" | |
e24b0fed | 1478 | {@ [cons: =0, 1, 2; attrs: type, length] |
b9bf0c3f | 1479 | [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1 |
e24b0fed AS |
1480 | [v,v,SvB;vop2,8] ^ |
1481 | }) | |
3d6275e3 | 1482 | |
1165109b | 1483 | (define_insn "add<mode>3_vcc<exec_vcc>" |
e24b0fed | 1484 | [(set (match_operand:V_SI 0 "register_operand") |
1165109b | 1485 | (plus:V_SI |
e24b0fed AS |
1486 | (match_operand:V_SI 1 "register_operand") |
1487 | (match_operand:V_SI 2 "gcn_alu_operand"))) | |
1488 | (set (match_operand:DI 3 "register_operand") | |
1165109b | 1489 | (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2)) |
3d6275e3 AS |
1490 | (match_dup 1)))] |
1491 | "" | |
e24b0fed | 1492 | {@ [cons: =0, %1, 2, =3; attrs: type, length] |
b9bf0c3f | 1493 | [v,v,vSvA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %2, %1 |
e24b0fed AS |
1494 | [v,v,vSvB,cV;vop2 ,8] ^ |
1495 | [v,v,vSvA,Sg;vop3b,8] ^ | |
1496 | }) | |
3d6275e3 AS |
1497 | |
1498 | ; This pattern only changes the VCC bits when the corresponding lane is | |
1499 | ; enabled, so the set must be described as an ior. | |
1500 | ||
1165109b | 1501 | (define_insn "add<mode>3_vcc_dup<exec_vcc>" |
e24b0fed | 1502 | [(set (match_operand:V_SI 0 "register_operand") |
1165109b AS |
1503 | (plus:V_SI |
1504 | (vec_duplicate:V_SI | |
e24b0fed AS |
1505 | (match_operand:SI 1 "gcn_alu_operand")) |
1506 | (match_operand:V_SI 2 "register_operand"))) | |
1507 | (set (match_operand:DI 3 "register_operand") | |
1165109b AS |
1508 | (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2)) |
1509 | (match_dup 1)) | |
1510 | (vec_duplicate:V_SI (match_dup 2))))] | |
3d6275e3 | 1511 | "" |
e24b0fed | 1512 | {@ [cons: =0, 1, 2, =3; attrs: type, length] |
b9bf0c3f | 1513 | [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2 |
e24b0fed AS |
1514 | [v,SvB,v,cV;vop2 ,8] ^ |
1515 | [v,SvA,v,Sg;vop3b,8] ^ | |
1516 | }) | |
3d6275e3 | 1517 | |
66b01cc3 AS |
1518 | ; v_addc does not accept an SGPR because the VCC read already counts as an |
1519 | ; SGPR use and the number of SGPR operands is limited to 1. It does not | |
1520 | ; accept "B" immediate constants due to a related bus conflict. | |
3d6275e3 | 1521 | |
1165109b AS |
1522 | (define_insn "addc<mode>3<exec_vcc>" |
1523 | [(set (match_operand:V_SI 0 "register_operand" "=v, v") | |
1524 | (plus:V_SI | |
1525 | (plus:V_SI | |
1526 | (vec_merge:V_SI | |
1527 | (vec_duplicate:V_SI (const_int 1)) | |
1528 | (vec_duplicate:V_SI (const_int 0)) | |
66b01cc3 | 1529 | (match_operand:DI 3 "register_operand" " cV,cVSv")) |
1165109b AS |
1530 | (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA")) |
1531 | (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA"))) | |
66b01cc3 | 1532 | (set (match_operand:DI 4 "register_operand" "=cV,cVSg") |
1165109b AS |
1533 | (ior:DI (ltu:DI (plus:V_SI |
1534 | (plus:V_SI | |
1535 | (vec_merge:V_SI | |
1536 | (vec_duplicate:V_SI (const_int 1)) | |
1537 | (vec_duplicate:V_SI (const_int 0)) | |
3d6275e3 AS |
1538 | (match_dup 3)) |
1539 | (match_dup 1)) | |
1540 | (match_dup 2)) | |
1541 | (match_dup 2)) | |
1165109b AS |
1542 | (ltu:DI (plus:V_SI |
1543 | (vec_merge:V_SI | |
1544 | (vec_duplicate:V_SI (const_int 1)) | |
1545 | (vec_duplicate:V_SI (const_int 0)) | |
3d6275e3 AS |
1546 | (match_dup 3)) |
1547 | (match_dup 1)) | |
1548 | (match_dup 1))))] | |
1549 | "" | |
b9bf0c3f | 1550 | "{v_addc_co_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3" |
3d6275e3 AS |
1551 | [(set_attr "type" "vop2,vop3b") |
1552 | (set_attr "length" "4,8")]) | |
1553 | ||
77f7566e | 1554 | (define_insn "sub<mode>3<exec_clobber>" |
03876953 AS |
1555 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v") |
1556 | (minus:V_INT_1REG | |
1557 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v") | |
1558 | (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB"))) | |
3d6275e3 AS |
1559 | (clobber (reg:DI VCC_REG))] |
1560 | "" | |
1561 | "@ | |
b9bf0c3f AS |
1562 | v_sub_co_u32\t%0, vcc, %1, %2 |
1563 | v_subrev_co_u32\t%0, vcc, %2, %1" | |
3d6275e3 AS |
1564 | [(set_attr "type" "vop2") |
1565 | (set_attr "length" "8,8")]) | |
1566 | ||
1165109b AS |
1567 | (define_insn "sub<mode>3_vcc<exec_vcc>" |
1568 | [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") | |
1569 | (minus:V_SI | |
1570 | (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v") | |
1571 | (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB"))) | |
1572 | (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg") | |
1573 | (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2)) | |
3d6275e3 AS |
1574 | (match_dup 1)))] |
1575 | "" | |
1576 | "@ | |
b9bf0c3f AS |
1577 | v_sub_co_u32\t%0, %3, %1, %2 |
1578 | v_sub_co_u32\t%0, %3, %1, %2 | |
1579 | v_subrev_co_u32\t%0, %3, %2, %1 | |
1580 | v_subrev_co_u32\t%0, %3, %2, %1" | |
3d6275e3 AS |
1581 | [(set_attr "type" "vop2,vop3b,vop2,vop3b") |
1582 | (set_attr "length" "8")]) | |
1583 | ||
66b01cc3 AS |
1584 | ; v_subb does not accept an SGPR because the VCC read already counts as an |
1585 | ; SGPR use and the number of SGPR operands is limited to 1. It does not | |
1586 | ; accept "B" immediate constants due to a related bus conflict. | |
3d6275e3 | 1587 | |
1165109b AS |
1588 | (define_insn "subc<mode>3<exec_vcc>" |
1589 | [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v") | |
1590 | (minus:V_SI | |
1591 | (minus:V_SI | |
1592 | (vec_merge:V_SI | |
1593 | (vec_duplicate:V_SI (const_int 1)) | |
1594 | (vec_duplicate:V_SI (const_int 0)) | |
1595 | (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv")) | |
1596 | (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA")) | |
1597 | (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA"))) | |
1598 | (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg") | |
1599 | (ior:DI (gtu:DI (minus:V_SI (minus:V_SI | |
1600 | (vec_merge:V_SI | |
1601 | (vec_duplicate:V_SI (const_int 1)) | |
1602 | (vec_duplicate:V_SI (const_int 0)) | |
1603 | (match_dup 3)) | |
3d6275e3 AS |
1604 | (match_dup 1)) |
1605 | (match_dup 2)) | |
1606 | (match_dup 2)) | |
1165109b AS |
1607 | (ltu:DI (minus:V_SI (vec_merge:V_SI |
1608 | (vec_duplicate:V_SI (const_int 1)) | |
1609 | (vec_duplicate:V_SI (const_int 0)) | |
1610 | (match_dup 3)) | |
1611 | (match_dup 1)) | |
3d6275e3 AS |
1612 | (match_dup 1))))] |
1613 | "" | |
1614 | "@ | |
b9bf0c3f AS |
1615 | {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3 |
1616 | {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3 | |
1617 | {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3 | |
1618 | {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3" | |
3d6275e3 | 1619 | [(set_attr "type" "vop2,vop3b,vop2,vop3b") |
66b01cc3 | 1620 | (set_attr "length" "4,8,4,8")]) |
3d6275e3 | 1621 | |
1165109b AS |
1622 | (define_insn_and_split "add<mode>3" |
1623 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1624 | (plus:V_DI | |
1625 | (match_operand:V_DI 1 "register_operand" "%vDb") | |
1626 | (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))) | |
3d6275e3 AS |
1627 | (clobber (reg:DI VCC_REG))] |
1628 | "" | |
1629 | "#" | |
1165109b AS |
1630 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1631 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1632 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1633 | [(const_int 0)] |
1634 | { | |
1635 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1636 | emit_insn (gen_add<vnsi>3_vcc |
1637 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1638 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1639 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1640 | vcc)); |
1165109b AS |
1641 | emit_insn (gen_addc<vnsi>3 |
1642 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1643 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1644 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1645 | vcc, vcc)); |
1646 | DONE; | |
1647 | } | |
1648 | [(set_attr "type" "vmult") | |
1649 | (set_attr "length" "8")]) | |
1650 | ||
1165109b AS |
1651 | (define_insn_and_split "add<mode>3_exec" |
1652 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
1653 | (vec_merge:V_DI | |
1654 | (plus:V_DI | |
1655 | (match_operand:V_DI 1 "register_operand" "%vDb") | |
1656 | (match_operand:V_DI 2 "gcn_alu_operand" " vDb")) | |
1657 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
1658 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
3d6275e3 AS |
1659 | (clobber (reg:DI VCC_REG))] |
1660 | "" | |
1661 | "#" | |
1165109b AS |
1662 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1663 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1664 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1665 | && gcn_can_split_p (<MODE>mode, operands[4])" | |
3d6275e3 AS |
1666 | [(const_int 0)] |
1667 | { | |
1668 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1669 | emit_insn (gen_add<vnsi>3_vcc_exec |
1670 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1671 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1672 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1673 | vcc, |
1165109b | 1674 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1675 | operands[4])); |
1165109b AS |
1676 | emit_insn (gen_addc<vnsi>3_exec |
1677 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1678 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1679 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1680 | vcc, vcc, |
1165109b | 1681 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1682 | operands[4])); |
1683 | DONE; | |
1684 | } | |
1685 | [(set_attr "type" "vmult") | |
1686 | (set_attr "length" "8")]) | |
1687 | ||
1165109b AS |
1688 | (define_insn_and_split "sub<mode>3" |
1689 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1690 | (minus:V_DI | |
1691 | (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v") | |
1692 | (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb"))) | |
3d6275e3 AS |
1693 | (clobber (reg:DI VCC_REG))] |
1694 | "" | |
1695 | "#" | |
1165109b AS |
1696 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1697 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1698 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1699 | [(const_int 0)] |
1700 | { | |
1701 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1702 | emit_insn (gen_sub<vnsi>3_vcc |
1703 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1704 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1705 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1706 | vcc)); |
1165109b AS |
1707 | emit_insn (gen_subc<vnsi>3 |
1708 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1709 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1710 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1711 | vcc, vcc)); |
1712 | DONE; | |
1713 | } | |
1714 | [(set_attr "type" "vmult") | |
d54fc770 | 1715 | (set_attr "length" "8")]) |
3d6275e3 | 1716 | |
1165109b AS |
1717 | (define_insn_and_split "sub<mode>3_exec" |
1718 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1719 | (vec_merge:V_DI | |
1720 | (minus:V_DI | |
1721 | (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v") | |
1722 | (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB")) | |
1723 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") | |
3abfd4f3 | 1724 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) |
3d6275e3 AS |
1725 | (clobber (reg:DI VCC_REG))] |
1726 | "register_operand (operands[1], VOIDmode) | |
1727 | || register_operand (operands[2], VOIDmode)" | |
1728 | "#" | |
1165109b AS |
1729 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1730 | && gcn_can_split_p (<MODE>mode, operands[1]) | |
1731 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1732 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
1733 | [(const_int 0)] |
1734 | { | |
1735 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1736 | emit_insn (gen_sub<vnsi>3_vcc_exec |
1737 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
1738 | gcn_operand_part (<MODE>mode, operands[1], 0), | |
1739 | gcn_operand_part (<MODE>mode, operands[2], 0), | |
3d6275e3 | 1740 | vcc, |
1165109b | 1741 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1742 | operands[4])); |
1165109b AS |
1743 | emit_insn (gen_subc<vnsi>3_exec |
1744 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1745 | gcn_operand_part (<MODE>mode, operands[1], 1), | |
1746 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1747 | vcc, vcc, |
1165109b | 1748 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1749 | operands[4])); |
1750 | DONE; | |
1751 | } | |
1752 | [(set_attr "type" "vmult") | |
d54fc770 | 1753 | (set_attr "length" "8")]) |
3d6275e3 | 1754 | |
1165109b AS |
1755 | (define_insn_and_split "add<mode>3_zext" |
1756 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1757 | (plus:V_DI | |
1758 | (zero_extend:V_DI | |
1759 | (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) | |
1760 | (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))) | |
3d6275e3 AS |
1761 | (clobber (reg:DI VCC_REG))] |
1762 | "" | |
1763 | "#" | |
1165109b AS |
1764 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1765 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1766 | [(const_int 0)] |
1767 | { | |
1768 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1769 | emit_insn (gen_add<vnsi>3_vcc |
1770 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1771 | operands[1], |
1165109b | 1772 | gcn_operand_part (<MODE>mode, operands[2], 0), |
3d6275e3 | 1773 | vcc)); |
1165109b AS |
1774 | emit_insn (gen_addc<vnsi>3 |
1775 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1776 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 AS |
1777 | const0_rtx, vcc, vcc)); |
1778 | DONE; | |
1779 | } | |
1780 | [(set_attr "type" "vmult") | |
66b01cc3 | 1781 | (set_attr "length" "8")]) |
3d6275e3 | 1782 | |
1165109b AS |
1783 | (define_insn_and_split "add<mode>3_zext_exec" |
1784 | [(set (match_operand:V_DI 0 "register_operand" "= v, v") | |
1785 | (vec_merge:V_DI | |
1786 | (plus:V_DI | |
1787 | (zero_extend:V_DI | |
1788 | (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB")) | |
1789 | (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")) | |
1790 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0") | |
1791 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) | |
3d6275e3 AS |
1792 | (clobber (reg:DI VCC_REG))] |
1793 | "" | |
1794 | "#" | |
1165109b AS |
1795 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1796 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
1797 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
1798 | [(const_int 0)] |
1799 | { | |
1800 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
1801 | emit_insn (gen_add<vnsi>3_vcc_exec |
1802 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1803 | operands[1], |
1165109b | 1804 | gcn_operand_part (<MODE>mode, operands[2], 0), |
3d6275e3 | 1805 | vcc, |
1165109b | 1806 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 1807 | operands[4])); |
1165109b AS |
1808 | emit_insn (gen_addc<vnsi>3_exec |
1809 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1810 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
3d6275e3 | 1811 | const0_rtx, vcc, vcc, |
1165109b | 1812 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
1813 | operands[4])); |
1814 | DONE; | |
1815 | } | |
1816 | [(set_attr "type" "vmult") | |
66b01cc3 | 1817 | (set_attr "length" "8")]) |
3d6275e3 | 1818 | |
75d0b3d7 | 1819 | (define_insn_and_split "add<mode>3_vcc_zext_dup" |
e24b0fed | 1820 | [(set (match_operand:V_DI 0 "register_operand") |
1165109b AS |
1821 | (plus:V_DI |
1822 | (zero_extend:V_DI | |
1823 | (vec_duplicate:<VnSI> | |
e24b0fed AS |
1824 | (match_operand:SI 1 "gcn_alu_operand"))) |
1825 | (match_operand:V_DI 2 "gcn_alu_operand"))) | |
1826 | (set (match_operand:DI 3 "register_operand") | |
75d0b3d7 AS |
1827 | (ltu:DI (plus:V_DI |
1828 | (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) | |
1829 | (match_dup 2)) | |
1830 | (match_dup 1)))] | |
3d6275e3 | 1831 | "" |
e24b0fed AS |
1832 | {@ [cons: =0, 1, 2, =3] |
1833 | [v,ASv,v,&Sg] # | |
1834 | [v,BSv,v,&cV] ^ | |
1835 | } | |
1165109b AS |
1836 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1837 | && gcn_can_split_p (<MODE>mode, operands[2])" | |
3d6275e3 AS |
1838 | [(const_int 0)] |
1839 | { | |
1165109b AS |
1840 | emit_insn (gen_add<vnsi>3_vcc_dup |
1841 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1842 | gcn_operand_part (DImode, operands[1], 0), |
1165109b | 1843 | gcn_operand_part (<MODE>mode, operands[2], 0), |
75d0b3d7 | 1844 | operands[3])); |
1165109b AS |
1845 | emit_insn (gen_addc<vnsi>3 |
1846 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1847 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
75d0b3d7 | 1848 | const0_rtx, operands[3], operands[3])); |
3d6275e3 AS |
1849 | DONE; |
1850 | } | |
1851 | [(set_attr "type" "vmult") | |
1852 | (set_attr "length" "8")]) | |
1853 | ||
75d0b3d7 AS |
1854 | (define_expand "add<mode>3_zext_dup" |
1855 | [(match_operand:V_DI 0 "register_operand") | |
1856 | (match_operand:SI 1 "gcn_alu_operand") | |
1857 | (match_operand:V_DI 2 "gcn_alu_operand")] | |
1858 | "" | |
1859 | { | |
1860 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1861 | emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1], | |
1862 | operands[2], vcc)); | |
1863 | DONE; | |
1864 | }) | |
1865 | ||
1866 | (define_insn_and_split "add<mode>3_vcc_zext_dup_exec" | |
e24b0fed | 1867 | [(set (match_operand:V_DI 0 "register_operand") |
1165109b AS |
1868 | (vec_merge:V_DI |
1869 | (plus:V_DI | |
1870 | (zero_extend:V_DI | |
1871 | (vec_duplicate:<VnSI> | |
e24b0fed AS |
1872 | (match_operand:SI 1 "gcn_alu_operand"))) |
1873 | (match_operand:V_DI 2 "gcn_alu_operand")) | |
1874 | (match_operand:V_DI 4 "gcn_register_or_unspec_operand") | |
1875 | (match_operand:DI 5 "gcn_exec_reg_operand"))) | |
1876 | (set (match_operand:DI 3 "register_operand") | |
75d0b3d7 AS |
1877 | (and:DI |
1878 | (ltu:DI (plus:V_DI | |
1879 | (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1))) | |
1880 | (match_dup 2)) | |
1881 | (match_dup 1)) | |
1882 | (match_dup 5)))] | |
3d6275e3 | 1883 | "" |
e24b0fed AS |
1884 | {@ [cons: =0, 1, 2, =3, 4, 5] |
1885 | [v,ASv,v,&Sg,U0,e] # | |
1886 | [v,BSv,v,&cV,U0,e] ^ | |
1887 | } | |
1165109b AS |
1888 | "gcn_can_split_p (<MODE>mode, operands[0]) |
1889 | && gcn_can_split_p (<MODE>mode, operands[2]) | |
75d0b3d7 | 1890 | && gcn_can_split_p (<MODE>mode, operands[4])" |
3d6275e3 AS |
1891 | [(const_int 0)] |
1892 | { | |
1165109b AS |
1893 | emit_insn (gen_add<vnsi>3_vcc_dup_exec |
1894 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 | 1895 | gcn_operand_part (DImode, operands[1], 0), |
1165109b | 1896 | gcn_operand_part (<MODE>mode, operands[2], 0), |
75d0b3d7 AS |
1897 | operands[3], |
1898 | gcn_operand_part (<MODE>mode, operands[4], 0), | |
1899 | operands[5])); | |
1165109b AS |
1900 | emit_insn (gen_addc<vnsi>3_exec |
1901 | (gcn_operand_part (<MODE>mode, operands[0], 1), | |
1902 | gcn_operand_part (<MODE>mode, operands[2], 1), | |
75d0b3d7 AS |
1903 | const0_rtx, operands[3], operands[3], |
1904 | gcn_operand_part (<MODE>mode, operands[4], 1), | |
1905 | operands[5])); | |
3d6275e3 AS |
1906 | DONE; |
1907 | } | |
1908 | [(set_attr "type" "vmult") | |
1909 | (set_attr "length" "8")]) | |
1910 | ||
75d0b3d7 AS |
1911 | (define_expand "add<mode>3_zext_dup_exec" |
1912 | [(match_operand:V_DI 0 "register_operand") | |
1913 | (match_operand:SI 1 "gcn_alu_operand") | |
1914 | (match_operand:V_DI 2 "gcn_alu_operand") | |
1915 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand") | |
1916 | (match_operand:DI 4 "gcn_exec_reg_operand")] | |
1917 | "" | |
1918 | { | |
1919 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1920 | emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1], | |
1921 | operands[2], vcc, operands[3], | |
1922 | operands[4])); | |
1923 | DONE; | |
1924 | }) | |
1925 | ||
1926 | (define_insn_and_split "add<mode>3_vcc_zext_dup2" | |
e24b0fed | 1927 | [(set (match_operand:V_DI 0 "register_operand") |
1165109b | 1928 | (plus:V_DI |
e24b0fed AS |
1929 | (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand")) |
1930 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand")))) | |
1931 | (set (match_operand:DI 3 "register_operand") | |
75d0b3d7 AS |
1932 | (ltu:DI (plus:V_DI |
1933 | (zero_extend:V_DI (match_dup 1)) | |
1934 | (vec_duplicate:V_DI (match_dup 2))) | |
1935 | (match_dup 1)))] | |
3d6275e3 | 1936 | "" |
e24b0fed AS |
1937 | {@ [cons: =0, 1, 2, =3] |
1938 | [v,v,DbSv,&cV] # | |
1939 | [v,v,DASv,&Sg] ^ | |
1940 | } | |
1165109b | 1941 | "gcn_can_split_p (<MODE>mode, operands[0])" |
3d6275e3 AS |
1942 | [(const_int 0)] |
1943 | { | |
1165109b AS |
1944 | emit_insn (gen_add<vnsi>3_vcc_dup |
1945 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1946 | gcn_operand_part (DImode, operands[2], 0), |
1947 | operands[1], | |
75d0b3d7 | 1948 | operands[3])); |
1165109b AS |
1949 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
1950 | emit_insn (gen_vec_duplicate<vnsi> | |
3d6275e3 | 1951 | (dsthi, gcn_operand_part (DImode, operands[2], 1))); |
75d0b3d7 AS |
1952 | emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3], |
1953 | operands[3])); | |
3d6275e3 AS |
1954 | DONE; |
1955 | } | |
1956 | [(set_attr "type" "vmult") | |
1957 | (set_attr "length" "8")]) | |
1958 | ||
75d0b3d7 AS |
1959 | (define_expand "add<mode>3_zext_dup2" |
1960 | [(match_operand:V_DI 0 "register_operand") | |
1961 | (match_operand:<VnSI> 1 "gcn_alu_operand") | |
1962 | (match_operand:DI 2 "gcn_alu_operand")] | |
1963 | "" | |
1964 | { | |
1965 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1966 | emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1], | |
1967 | operands[2], vcc)); | |
1968 | DONE; | |
1969 | }) | |
1970 | ||
1971 | (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec" | |
e24b0fed | 1972 | [(set (match_operand:V_DI 0 "register_operand") |
1165109b AS |
1973 | (vec_merge:V_DI |
1974 | (plus:V_DI | |
e24b0fed AS |
1975 | (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand")) |
1976 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"))) | |
1977 | (match_operand:V_DI 4 "gcn_register_or_unspec_operand") | |
1978 | (match_operand:DI 5 "gcn_exec_reg_operand"))) | |
1979 | (set (match_operand:DI 3 "register_operand") | |
75d0b3d7 AS |
1980 | (and:DI |
1981 | (ltu:DI (plus:V_DI | |
1982 | (zero_extend:V_DI (match_dup 1)) | |
1983 | (vec_duplicate:V_DI (match_dup 2))) | |
1984 | (match_dup 1)) | |
1985 | (match_dup 5)))] | |
3d6275e3 | 1986 | "" |
e24b0fed AS |
1987 | {@ [cons: =0, 1, 2, =3, 4, 5] |
1988 | [v,v,ASv,&Sg,U0,e] # | |
1989 | [v,v,BSv,&cV,U0,e] ^ | |
1990 | } | |
1165109b | 1991 | "gcn_can_split_p (<MODE>mode, operands[0]) |
75d0b3d7 | 1992 | && gcn_can_split_p (<MODE>mode, operands[4])" |
3d6275e3 AS |
1993 | [(const_int 0)] |
1994 | { | |
1165109b AS |
1995 | emit_insn (gen_add<vnsi>3_vcc_dup_exec |
1996 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
1997 | gcn_operand_part (DImode, operands[2], 0), |
1998 | operands[1], | |
75d0b3d7 AS |
1999 | operands[3], |
2000 | gcn_operand_part (<MODE>mode, operands[4], 0), | |
2001 | operands[5])); | |
1165109b AS |
2002 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
2003 | emit_insn (gen_vec_duplicate<vnsi>_exec | |
3d6275e3 | 2004 | (dsthi, gcn_operand_part (DImode, operands[2], 1), |
75d0b3d7 AS |
2005 | gcn_operand_part (<MODE>mode, operands[4], 1), |
2006 | operands[5])); | |
1165109b | 2007 | emit_insn (gen_addc<vnsi>3_exec |
75d0b3d7 AS |
2008 | (dsthi, dsthi, const0_rtx, operands[3], operands[3], |
2009 | gcn_operand_part (<MODE>mode, operands[4], 1), | |
2010 | operands[5])); | |
3d6275e3 AS |
2011 | DONE; |
2012 | } | |
2013 | [(set_attr "type" "vmult") | |
2014 | (set_attr "length" "8")]) | |
2015 | ||
75d0b3d7 AS |
2016 | (define_expand "add<mode>3_zext_dup2_exec" |
2017 | [(match_operand:V_DI 0 "register_operand") | |
2018 | (match_operand:<VnSI> 1 "gcn_alu_operand") | |
2019 | (match_operand:DI 2 "gcn_alu_operand") | |
2020 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand") | |
2021 | (match_operand:DI 4 "gcn_exec_reg_operand")] | |
2022 | "" | |
2023 | { | |
2024 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
2025 | emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1], | |
2026 | operands[2], vcc, | |
2027 | operands[3], operands[4])); | |
2028 | DONE; | |
2029 | }) | |
2030 | ||
1165109b AS |
2031 | (define_insn_and_split "add<mode>3_sext_dup2" |
2032 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
2033 | (plus:V_DI | |
2034 | (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA")) | |
2035 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) | |
2036 | (clobber (match_scratch:<VnSI> 3 "=&v")) | |
3d6275e3 AS |
2037 | (clobber (reg:DI VCC_REG))] |
2038 | "" | |
2039 | "#" | |
1165109b | 2040 | "gcn_can_split_p (<MODE>mode, operands[0])" |
3d6275e3 AS |
2041 | [(const_int 0)] |
2042 | { | |
2043 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
2044 | emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31))); |
2045 | emit_insn (gen_add<vnsi>3_vcc_dup | |
2046 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
2047 | gcn_operand_part (DImode, operands[2], 0), |
2048 | operands[1], | |
2049 | vcc)); | |
1165109b AS |
2050 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
2051 | emit_insn (gen_vec_duplicate<vnsi> | |
3d6275e3 | 2052 | (dsthi, gcn_operand_part (DImode, operands[2], 1))); |
1165109b | 2053 | emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc)); |
3d6275e3 AS |
2054 | DONE; |
2055 | } | |
2056 | [(set_attr "type" "vmult") | |
2057 | (set_attr "length" "8")]) | |
2058 | ||
1165109b AS |
2059 | (define_insn_and_split "add<mode>3_sext_dup2_exec" |
2060 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
2061 | (vec_merge:V_DI | |
2062 | (plus:V_DI | |
2063 | (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA")) | |
2064 | (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) | |
2065 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
3d6275e3 | 2066 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) |
1165109b | 2067 | (clobber (match_scratch:<VnSI> 5 "=&v")) |
3d6275e3 AS |
2068 | (clobber (reg:DI VCC_REG))] |
2069 | "" | |
2070 | "#" | |
1165109b AS |
2071 | "gcn_can_split_p (<MODE>mode, operands[0]) |
2072 | && gcn_can_split_p (<MODE>mode, operands[3])" | |
3d6275e3 AS |
2073 | [(const_int 0)] |
2074 | { | |
2075 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
1165109b AS |
2076 | emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31), |
2077 | gcn_gen_undef (<VnSI>mode), operands[4])); | |
2078 | emit_insn (gen_add<vnsi>3_vcc_dup_exec | |
2079 | (gcn_operand_part (<MODE>mode, operands[0], 0), | |
3d6275e3 AS |
2080 | gcn_operand_part (DImode, operands[2], 0), |
2081 | operands[1], | |
2082 | vcc, | |
1165109b | 2083 | gcn_operand_part (<MODE>mode, operands[3], 0), |
3d6275e3 | 2084 | operands[4])); |
1165109b AS |
2085 | rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1); |
2086 | emit_insn (gen_vec_duplicate<vnsi>_exec | |
3d6275e3 | 2087 | (dsthi, gcn_operand_part (DImode, operands[2], 1), |
28b733ea AS |
2088 | gcn_operand_part (<MODE>mode, operands[3], 1), |
2089 | operands[4])); | |
1165109b | 2090 | emit_insn (gen_addc<vnsi>3_exec |
3d6275e3 | 2091 | (dsthi, dsthi, operands[5], vcc, vcc, |
1165109b | 2092 | gcn_operand_part (<MODE>mode, operands[3], 1), |
3d6275e3 AS |
2093 | operands[4])); |
2094 | DONE; | |
2095 | } | |
2096 | [(set_attr "type" "vmult") | |
2097 | (set_attr "length" "8")]) | |
2098 | ||
2099 | ;; }}} | |
2100 | ;; {{{ DS memory ALU: add/sub | |
2101 | ||
2102 | (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) | |
2103 | (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) | |
2104 | ||
2105 | ;; FIXME: the vector patterns probably need RD expanded to a vector of | |
2106 | ;; addresses. For now, the only way a vector can get into LDS is | |
2107 | ;; if the user puts it there manually. | |
2108 | ;; | |
2109 | ;; FIXME: the scalar patterns are probably fine in themselves, but need to be | |
2110 | ;; checked to see if anything can ever use them. | |
2111 | ||
2112 | (define_insn "add<mode>3_ds<exec>" | |
2113 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2114 | (plus:DS_ARITH_MODE | |
2115 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") | |
2116 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] | |
2117 | "rtx_equal_p (operands[0], operands[1])" | |
2118 | "ds_add%u0\t%A0, %2%O0" | |
2119 | [(set_attr "type" "ds") | |
2120 | (set_attr "length" "8")]) | |
2121 | ||
2122 | (define_insn "add<mode>3_ds_scalar" | |
2123 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2124 | (plus:DS_ARITH_SCALAR_MODE | |
2125 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
2126 | "%RD") | |
2127 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] | |
2128 | "rtx_equal_p (operands[0], operands[1])" | |
2129 | "ds_add%u0\t%A0, %2%O0" | |
2130 | [(set_attr "type" "ds") | |
2131 | (set_attr "length" "8")]) | |
2132 | ||
2133 | (define_insn "sub<mode>3_ds<exec>" | |
2134 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2135 | (minus:DS_ARITH_MODE | |
2136 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") | |
2137 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] | |
2138 | "rtx_equal_p (operands[0], operands[1])" | |
2139 | "ds_sub%u0\t%A0, %2%O0" | |
2140 | [(set_attr "type" "ds") | |
2141 | (set_attr "length" "8")]) | |
2142 | ||
2143 | (define_insn "sub<mode>3_ds_scalar" | |
2144 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2145 | (minus:DS_ARITH_SCALAR_MODE | |
2146 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
2147 | " RD") | |
2148 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] | |
2149 | "rtx_equal_p (operands[0], operands[1])" | |
2150 | "ds_sub%u0\t%A0, %2%O0" | |
2151 | [(set_attr "type" "ds") | |
2152 | (set_attr "length" "8")]) | |
2153 | ||
2154 | (define_insn "subr<mode>3_ds<exec>" | |
2155 | [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2156 | (minus:DS_ARITH_MODE | |
2157 | (match_operand:DS_ARITH_MODE 2 "register_operand" " v") | |
2158 | (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))] | |
2159 | "rtx_equal_p (operands[0], operands[1])" | |
2160 | "ds_rsub%u0\t%A0, %2%O0" | |
2161 | [(set_attr "type" "ds") | |
2162 | (set_attr "length" "8")]) | |
2163 | ||
2164 | (define_insn "subr<mode>3_ds_scalar" | |
2165 | [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") | |
2166 | (minus:DS_ARITH_SCALAR_MODE | |
2167 | (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") | |
2168 | (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" | |
2169 | " RD")))] | |
2170 | "rtx_equal_p (operands[0], operands[1])" | |
2171 | "ds_rsub%u0\t%A0, %2%O0" | |
2172 | [(set_attr "type" "ds") | |
2173 | (set_attr "length" "8")]) | |
2174 | ||
2175 | ;; }}} | |
2176 | ;; {{{ ALU special case: mult | |
2177 | ||
1165109b AS |
2178 | (define_insn "<su>mul<mode>3_highpart<exec>" |
2179 | [(set (match_operand:V_SI 0 "register_operand" "= v") | |
2180 | (truncate:V_SI | |
2181 | (lshiftrt:<VnDI> | |
2182 | (mult:<VnDI> | |
2183 | (any_extend:<VnDI> | |
2184 | (match_operand:V_SI 1 "gcn_alu_operand" " %v")) | |
2185 | (any_extend:<VnDI> | |
2186 | (match_operand:V_SI 2 "gcn_alu_operand" "vSvA"))) | |
3d6275e3 AS |
2187 | (const_int 32))))] |
2188 | "" | |
2189 | "v_mul_hi<sgnsuffix>0\t%0, %2, %1" | |
2190 | [(set_attr "type" "vop3a") | |
2191 | (set_attr "length" "8")]) | |
2192 | ||
7b945b19 | 2193 | (define_insn "mul<mode>3<exec>" |
03876953 AS |
2194 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
2195 | (mult:V_INT_1REG | |
2196 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") | |
2197 | (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))] | |
3d6275e3 AS |
2198 | "" |
2199 | "v_mul_lo_u32\t%0, %1, %2" | |
2200 | [(set_attr "type" "vop3a") | |
2201 | (set_attr "length" "8")]) | |
2202 | ||
7b945b19 | 2203 | (define_insn "mul<mode>3_dup<exec>" |
03876953 AS |
2204 | [(set (match_operand:V_INT_1REG 0 "register_operand" "= v") |
2205 | (mult:V_INT_1REG | |
2206 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA") | |
2207 | (vec_duplicate:V_INT_1REG | |
2208 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))] | |
3d6275e3 AS |
2209 | "" |
2210 | "v_mul_lo_u32\t%0, %1, %2" | |
2211 | [(set_attr "type" "vop3a") | |
2212 | (set_attr "length" "8")]) | |
2213 | ||
1165109b AS |
2214 | (define_insn_and_split "mul<mode>3" |
2215 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
2216 | (mult:V_DI | |
2217 | (match_operand:V_DI 1 "gcn_alu_operand" "% v") | |
2218 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) | |
2219 | (clobber (match_scratch:<VnSI> 3 "=&v"))] | |
3d6275e3 AS |
2220 | "" |
2221 | "#" | |
2222 | "reload_completed" | |
2223 | [(const_int 0)] | |
2224 | { | |
1165109b AS |
2225 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2226 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2227 | rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2228 | rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2229 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2230 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2231 | rtx tmp = operands[3]; |
2232 | ||
1165109b AS |
2233 | emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo)); |
2234 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo)); | |
2235 | emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo)); | |
2236 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
2237 | emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi)); | |
2238 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
2239 | emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi)); | |
2240 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
2241 | DONE; |
2242 | }) | |
2243 | ||
1165109b AS |
2244 | (define_insn_and_split "mul<mode>3_exec" |
2245 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
2246 | (vec_merge:V_DI | |
2247 | (mult:V_DI | |
2248 | (match_operand:V_DI 1 "gcn_alu_operand" "% v") | |
2249 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) | |
2250 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
2251 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
2252 | (clobber (match_scratch:<VnSI> 5 "=&v"))] | |
3d6275e3 AS |
2253 | "" |
2254 | "#" | |
2255 | "reload_completed" | |
2256 | [(const_int 0)] | |
2257 | { | |
1165109b AS |
2258 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2259 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2260 | rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2261 | rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2262 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2263 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2264 | rtx exec = operands[4]; |
2265 | rtx tmp = operands[5]; | |
2266 | ||
2267 | rtx old_lo, old_hi; | |
2268 | if (GET_CODE (operands[3]) == UNSPEC) | |
2269 | { | |
1165109b | 2270 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
2271 | } |
2272 | else | |
2273 | { | |
1165109b AS |
2274 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
2275 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
2276 | } |
2277 | ||
1165109b AS |
2278 | rtx undef = gcn_gen_undef (<VnSI>mode); |
2279 | ||
2280 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec)); | |
2281 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo, | |
2282 | old_hi, exec)); | |
2283 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec)); | |
2284 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
2285 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec)); | |
2286 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
2287 | emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec)); | |
2288 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
2289 | DONE; |
2290 | }) | |
2291 | ||
1165109b AS |
2292 | (define_insn_and_split "mul<mode>3_zext" |
2293 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
2294 | (mult:V_DI | |
2295 | (zero_extend:V_DI | |
2296 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
2297 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))) | |
2298 | (clobber (match_scratch:<VnSI> 3 "=&v"))] | |
3d6275e3 AS |
2299 | "" |
2300 | "#" | |
2301 | "reload_completed" | |
2302 | [(const_int 0)] | |
2303 | { | |
1165109b AS |
2304 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2305 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 2306 | rtx left = operands[1]; |
1165109b AS |
2307 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
2308 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2309 | rtx tmp = operands[3]; |
2310 | ||
1165109b AS |
2311 | emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); |
2312 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); | |
2313 | emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); | |
2314 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
2315 | DONE; |
2316 | }) | |
2317 | ||
1165109b AS |
2318 | (define_insn_and_split "mul<mode>3_zext_exec" |
2319 | [(set (match_operand:V_DI 0 "register_operand" "=&v") | |
2320 | (vec_merge:V_DI | |
2321 | (mult:V_DI | |
2322 | (zero_extend:V_DI | |
2323 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
2324 | (match_operand:V_DI 2 "gcn_alu_operand" "vDA")) | |
2325 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
2326 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
2327 | (clobber (match_scratch:<VnSI> 5 "=&v"))] | |
3d6275e3 AS |
2328 | "" |
2329 | "#" | |
2330 | "reload_completed" | |
2331 | [(const_int 0)] | |
2332 | { | |
1165109b AS |
2333 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2334 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 2335 | rtx left = operands[1]; |
1165109b AS |
2336 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
2337 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2338 | rtx exec = operands[4]; |
2339 | rtx tmp = operands[5]; | |
2340 | ||
2341 | rtx old_lo, old_hi; | |
2342 | if (GET_CODE (operands[3]) == UNSPEC) | |
2343 | { | |
1165109b | 2344 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
2345 | } |
2346 | else | |
2347 | { | |
1165109b AS |
2348 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
2349 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
2350 | } |
2351 | ||
1165109b | 2352 | rtx undef = gcn_gen_undef (<VnSI>mode); |
3d6275e3 | 2353 | |
1165109b AS |
2354 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); |
2355 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, | |
2356 | old_hi, exec)); | |
2357 | emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); | |
2358 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
2359 | DONE; |
2360 | }) | |
2361 | ||
1165109b AS |
2362 | (define_insn_and_split "mul<mode>3_zext_dup2" |
2363 | [(set (match_operand:V_DI 0 "register_operand" "= &v") | |
2364 | (mult:V_DI | |
2365 | (zero_extend:V_DI | |
2366 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
2367 | (vec_duplicate:V_DI | |
2368 | (match_operand:DI 2 "gcn_alu_operand" "SvDA")))) | |
2369 | (clobber (match_scratch:<VnSI> 3 "= &v"))] | |
3d6275e3 AS |
2370 | "" |
2371 | "#" | |
2372 | "reload_completed" | |
2373 | [(const_int 0)] | |
2374 | { | |
1165109b AS |
2375 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2376 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 2377 | rtx left = operands[1]; |
1165109b AS |
2378 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
2379 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2380 | rtx tmp = operands[3]; |
2381 | ||
1165109b AS |
2382 | emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo)); |
2383 | emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo)); | |
2384 | emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi)); | |
2385 | emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp)); | |
3d6275e3 AS |
2386 | DONE; |
2387 | }) | |
2388 | ||
1165109b AS |
2389 | (define_insn_and_split "mul<mode>3_zext_dup2_exec" |
2390 | [(set (match_operand:V_DI 0 "register_operand" "= &v") | |
2391 | (vec_merge:V_DI | |
2392 | (mult:V_DI | |
2393 | (zero_extend:V_DI | |
2394 | (match_operand:<VnSI> 1 "gcn_alu_operand" " v")) | |
2395 | (vec_duplicate:V_DI | |
2396 | (match_operand:DI 2 "gcn_alu_operand" "SvDA"))) | |
2397 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
2398 | (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) | |
2399 | (clobber (match_scratch:<VnSI> 5 "= &v"))] | |
3d6275e3 AS |
2400 | "" |
2401 | "#" | |
2402 | "reload_completed" | |
2403 | [(const_int 0)] | |
2404 | { | |
1165109b AS |
2405 | rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0); |
2406 | rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1); | |
3d6275e3 | 2407 | rtx left = operands[1]; |
1165109b AS |
2408 | rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0); |
2409 | rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2410 | rtx exec = operands[4]; |
2411 | rtx tmp = operands[5]; | |
2412 | ||
2413 | rtx old_lo, old_hi; | |
2414 | if (GET_CODE (operands[3]) == UNSPEC) | |
2415 | { | |
1165109b | 2416 | old_lo = old_hi = gcn_gen_undef (<VnSI>mode); |
3d6275e3 AS |
2417 | } |
2418 | else | |
2419 | { | |
1165109b AS |
2420 | old_lo = gcn_operand_part (<MODE>mode, operands[3], 0); |
2421 | old_hi = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
2422 | } |
2423 | ||
1165109b | 2424 | rtx undef = gcn_gen_undef (<VnSI>mode); |
3d6275e3 | 2425 | |
1165109b AS |
2426 | emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec)); |
2427 | emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo, | |
2428 | old_hi, exec)); | |
2429 | emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec)); | |
2430 | emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec)); | |
3d6275e3 AS |
2431 | DONE; |
2432 | }) | |
2433 | ||
1bde3ace AJ |
2434 | (define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ]) |
2435 | (define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")]) | |
2436 | (define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")]) | |
2437 | (define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")]) | |
2438 | ||
2439 | (define_expand "cmul<conj_op><mode>3" | |
2440 | [(set (match_operand:V_noHI 0 "register_operand" "=&v") | |
2441 | (unspec:V_noHI | |
2442 | [(match_operand:V_noHI 1 "register_operand" "v") | |
2443 | (match_operand:V_noHI 2 "register_operand" "v")] | |
2444 | UNSPEC_CMUL_OP))] | |
2445 | "" | |
2446 | { | |
2447 | // operands[1] a b | |
2448 | // operands[2] c d | |
2449 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
2450 | emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); // a*c b*d | |
2451 | ||
2452 | rtx s2_perm = gen_reg_rtx (<MODE>mode); | |
2453 | emit_insn (gen_dpp_swap_pairs<mode> (s2_perm, operands[2])); // d c | |
2454 | ||
2455 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
2456 | emit_insn (gen_mul<mode>3 (t2, operands[1], s2_perm)); // a*d b*c | |
2457 | ||
2458 | rtx t1_perm = gen_reg_rtx (<MODE>mode); | |
2459 | emit_insn (gen_dpp_swap_pairs<mode> (t1_perm, t1)); // b*d a*c | |
2460 | ||
2461 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2462 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
2463 | rtx dest = operands[0]; | |
b17c57b0 AS |
2464 | emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm, |
2465 | gcn_gen_undef (<MODE>mode), | |
2466 | even)); // a*c-b*d 0 | |
1bde3ace AJ |
2467 | |
2468 | rtx t2_perm = gen_reg_rtx (<MODE>mode); | |
2469 | emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d | |
2470 | ||
2471 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); | |
2472 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
2473 | emit_insn (gen_<cmul_addsub><mode>3_exec (dest, t2, t2_perm, dest, odd)); | |
2474 | // 0 a*d+b*c | |
2475 | DONE; | |
2476 | }) | |
2477 | ||
2478 | (define_code_iterator addsub [plus minus]) | |
2479 | (define_code_attr addsub_as [(plus "a") (minus "s")]) | |
2480 | ||
2481 | (define_expand "cml<addsub_as><mode>4" | |
2482 | [(set (match_operand:V_FP 0 "register_operand" "=&v") | |
2483 | (addsub:V_FP | |
2484 | (unspec:V_FP | |
2485 | [(match_operand:V_FP 1 "register_operand" "v") | |
2486 | (match_operand:V_FP 2 "register_operand" "v")] | |
2487 | UNSPEC_CMUL) | |
2488 | (match_operand:V_FP 3 "register_operand" "v")))] | |
2489 | "" | |
2490 | { | |
2491 | rtx a = gen_reg_rtx (<MODE>mode); | |
2492 | emit_insn (gen_dpp_distribute_even<mode> (a, operands[1])); // a a | |
2493 | ||
2494 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
2495 | emit_insn (gen_fm<addsub_as><mode>4 (t1, a, operands[2], operands[3])); | |
2496 | // a*c a*d | |
2497 | ||
2498 | rtx b = gen_reg_rtx (<MODE>mode); | |
2499 | emit_insn (gen_dpp_distribute_odd<mode> (b, operands[1])); // b b | |
2500 | ||
2501 | rtx t2 = gen_reg_rtx (<MODE>mode); | |
2502 | emit_insn (gen_mul<mode>3 (t2, b, operands[2])); // b*c b*d | |
2503 | ||
2504 | rtx t2_perm = gen_reg_rtx (<MODE>mode); | |
2505 | emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*d b*c | |
2506 | ||
2507 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2508 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
2509 | rtx dest = operands[0]; | |
b17c57b0 AS |
2510 | emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm, |
2511 | gcn_gen_undef (<MODE>mode), even)); | |
1bde3ace AJ |
2512 | |
2513 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); | |
2514 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
2515 | emit_insn (gen_add<mode>3_exec (dest, t1, t2_perm, dest, odd)); | |
2516 | ||
2517 | DONE; | |
2518 | }) | |
2519 | ||
2520 | (define_expand "vec_addsub<mode>3" | |
2521 | [(set (match_operand:V_noHI 0 "register_operand" "=&v") | |
2522 | (vec_merge:V_noHI | |
2523 | (minus:V_noHI | |
2524 | (match_operand:V_noHI 1 "register_operand" "v") | |
2525 | (match_operand:V_noHI 2 "register_operand" "v")) | |
2526 | (plus:V_noHI (match_dup 1) (match_dup 2)) | |
2527 | (const_int 6148914691236517205)))] | |
2528 | "" | |
2529 | { | |
2530 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2531 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
2532 | rtx dest = operands[0]; | |
2533 | rtx x = operands[1]; | |
2534 | rtx y = operands[2]; | |
b17c57b0 AS |
2535 | emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode), |
2536 | even)); | |
1bde3ace AJ |
2537 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); |
2538 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
2539 | emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd)); | |
2540 | ||
2541 | DONE; | |
2542 | }) | |
2543 | ||
2544 | (define_int_iterator CADD [UNSPEC_CADD90 UNSPEC_CADD270]) | |
2545 | (define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270")]) | |
2546 | (define_int_attr cadd_subadd [(UNSPEC_CADD90 "sub") (UNSPEC_CADD270 "add")]) | |
2547 | (define_int_attr cadd_addsub [(UNSPEC_CADD90 "add") (UNSPEC_CADD270 "sub")]) | |
2548 | ||
2549 | (define_expand "cadd<rot><mode>3" | |
2550 | [(set (match_operand:V_noHI 0 "register_operand" "=&v") | |
2551 | (unspec:V_noHI [(match_operand:V_noHI 1 "register_operand" "v") | |
2552 | (match_operand:V_noHI 2 "register_operand" "v")] | |
2553 | CADD))] | |
2554 | "" | |
2555 | { | |
2556 | rtx dest = operands[0]; | |
2557 | rtx x = operands[1]; | |
2558 | rtx y = gen_reg_rtx (<MODE>mode); | |
2559 | emit_insn (gen_dpp_swap_pairs<mode> (y, operands[2])); | |
2560 | ||
2561 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2562 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
b17c57b0 AS |
2563 | emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y, |
2564 | gcn_gen_undef (<MODE>mode), | |
2565 | even)); | |
1bde3ace AJ |
2566 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); |
2567 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
2568 | emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd)); | |
2569 | ||
2570 | DONE; | |
2571 | }) | |
2572 | ||
2573 | (define_expand "vec_fmaddsub<mode>4" | |
2574 | [(match_operand:V_noHI 0 "register_operand" "=&v") | |
2575 | (match_operand:V_noHI 1 "register_operand" "v") | |
2576 | (match_operand:V_noHI 2 "register_operand" "v") | |
2577 | (match_operand:V_noHI 3 "register_operand" "v")] | |
2578 | "" | |
2579 | { | |
2580 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
2581 | emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); | |
2582 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2583 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
2584 | rtx dest = operands[0]; | |
b17c57b0 AS |
2585 | emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], |
2586 | gcn_gen_undef (<MODE>mode), even)); | |
1bde3ace AJ |
2587 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); |
2588 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
2589 | emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd)); | |
2590 | ||
2591 | DONE; | |
2592 | }) | |
2593 | ||
2594 | (define_expand "vec_fmsubadd<mode>4" | |
2595 | [(match_operand:V_noHI 0 "register_operand" "=&v") | |
2596 | (match_operand:V_noHI 1 "register_operand" "v") | |
2597 | (match_operand:V_noHI 2 "register_operand" "v") | |
2598 | (match_operand:V_noHI 3 "register_operand" "v")] | |
2599 | "" | |
2600 | { | |
2601 | rtx t1 = gen_reg_rtx (<MODE>mode); | |
2602 | emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); | |
2603 | rtx even = gen_rtx_REG (DImode, EXEC_REG); | |
2604 | emit_move_insn (even, get_exec (0x5555555555555555UL)); | |
2605 | rtx dest = operands[0]; | |
b17c57b0 AS |
2606 | emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], |
2607 | gcn_gen_undef (<MODE>mode), even)); | |
1bde3ace AJ |
2608 | rtx odd = gen_rtx_REG (DImode, EXEC_REG); |
2609 | emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL)); | |
b17c57b0 | 2610 | emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd)); |
1bde3ace AJ |
2611 | |
2612 | DONE; | |
2613 | }) | |
2614 | ||
3d6275e3 AS |
2615 | ;; }}} |
2616 | ;; {{{ ALU generic case | |
2617 | ||
3d6275e3 AS |
2618 | (define_code_iterator bitop [and ior xor]) |
2619 | (define_code_iterator shiftop [ashift lshiftrt ashiftrt]) | |
2620 | (define_code_iterator minmaxop [smin smax umin umax]) | |
2621 | ||
2622 | (define_insn "<expander><mode>2<exec>" | |
03876953 AS |
2623 | [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v") |
2624 | (bitunop:V_INT_1REG | |
2625 | (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))] | |
3d6275e3 AS |
2626 | "" |
2627 | "v_<mnemonic>0\t%0, %1" | |
2628 | [(set_attr "type" "vop1") | |
2629 | (set_attr "length" "8")]) | |
2630 | ||
2631 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2632 | [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD") |
2633 | (bitop:V_INT_1REG | |
2634 | (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0") | |
2635 | (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2636 | "" |
2637 | "@ | |
2638 | v_<mnemonic>0\t%0, %2, %1 | |
2639 | ds_<mnemonic>0\t%A0, %2%O0" | |
2640 | [(set_attr "type" "vop2,ds") | |
2641 | (set_attr "length" "8,8")]) | |
2642 | ||
1165109b AS |
2643 | (define_insn_and_split "<expander><mode>3" |
2644 | [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") | |
2645 | (bitop:V_DI | |
2646 | (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") | |
2647 | (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2648 | "" |
2649 | "@ | |
2650 | # | |
2651 | ds_<mnemonic>0\t%A0, %2%O0" | |
1165109b | 2652 | "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" |
3d6275e3 | 2653 | [(set (match_dup 3) |
1165109b | 2654 | (bitop:<VnSI> (match_dup 5) (match_dup 7))) |
3d6275e3 | 2655 | (set (match_dup 4) |
1165109b AS |
2656 | (bitop:<VnSI> (match_dup 6) (match_dup 8)))] |
2657 | { | |
2658 | operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0); | |
2659 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2660 | operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2661 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2662 | operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2663 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
2664 | } |
2665 | [(set_attr "type" "vmult,ds") | |
2666 | (set_attr "length" "16,8")]) | |
2667 | ||
1165109b AS |
2668 | (define_insn_and_split "<expander><mode>3_exec" |
2669 | [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD") | |
2670 | (vec_merge:V_DI | |
2671 | (bitop:V_DI | |
2672 | (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD") | |
2673 | (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")) | |
2674 | (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0") | |
3d6275e3 AS |
2675 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] |
2676 | "!memory_operand (operands[0], VOIDmode) | |
2677 | || (rtx_equal_p (operands[0], operands[1]) | |
2678 | && register_operand (operands[2], VOIDmode))" | |
2679 | "@ | |
2680 | # | |
2681 | ds_<mnemonic>0\t%A0, %2%O0" | |
1165109b | 2682 | "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))" |
3d6275e3 | 2683 | [(set (match_dup 5) |
1165109b AS |
2684 | (vec_merge:<VnSI> |
2685 | (bitop:<VnSI> (match_dup 7) (match_dup 9)) | |
3d6275e3 AS |
2686 | (match_dup 11) |
2687 | (match_dup 4))) | |
2688 | (set (match_dup 6) | |
1165109b AS |
2689 | (vec_merge:<VnSI> |
2690 | (bitop:<VnSI> (match_dup 8) (match_dup 10)) | |
3d6275e3 AS |
2691 | (match_dup 12) |
2692 | (match_dup 4)))] | |
2693 | { | |
1165109b AS |
2694 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0); |
2695 | operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
2696 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
2697 | operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
2698 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
2699 | operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
2700 | operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0); | |
2701 | operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1); | |
3d6275e3 AS |
2702 | } |
2703 | [(set_attr "type" "vmult,ds") | |
2704 | (set_attr "length" "16,8")]) | |
2705 | ||
dc941ea9 | 2706 | (define_expand "<expander><mode>3" |
03876953 AS |
2707 | [(set (match_operand:V_QIHI 0 "register_operand" "= v") |
2708 | (shiftop:V_QIHI | |
2709 | (match_operand:V_QIHI 1 "gcn_alu_operand" " v") | |
2710 | (vec_duplicate:V_QIHI | |
2711 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] | |
dc941ea9 AS |
2712 | "" |
2713 | { | |
2714 | enum {ashift, lshiftrt, ashiftrt}; | |
2715 | bool unsignedp = (<code> == lshiftrt); | |
1165109b | 2716 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
dc941ea9 | 2717 | rtx insi2 = gen_reg_rtx (SImode); |
1165109b | 2718 | rtx outsi = gen_reg_rtx (<VnSI>mode); |
dc941ea9 AS |
2719 | |
2720 | convert_move (insi1, operands[1], unsignedp); | |
2721 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2722 | emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2723 | convert_move (operands[0], outsi, unsignedp); |
2724 | DONE; | |
2725 | }) | |
2726 | ||
1165109b | 2727 | (define_insn "<expander><mode>3<exec>" |
6e0ca3fe AS |
2728 | [(set (match_operand:V_INT_noHI 0 "register_operand" "= v") |
2729 | (shiftop:V_INT_noHI | |
2730 | (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v") | |
2731 | (vec_duplicate:<VnSI> | |
3d6275e3 AS |
2732 | (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] |
2733 | "" | |
2734 | "v_<revmnemonic>0\t%0, %2, %1" | |
2735 | [(set_attr "type" "vop2") | |
2736 | (set_attr "length" "8")]) | |
2737 | ||
dc941ea9 | 2738 | (define_expand "v<expander><mode>3" |
03876953 AS |
2739 | [(set (match_operand:V_QIHI 0 "register_operand" "=v") |
2740 | (shiftop:V_QIHI | |
2741 | (match_operand:V_QIHI 1 "gcn_alu_operand" " v") | |
2742 | (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))] | |
dc941ea9 AS |
2743 | "" |
2744 | { | |
2745 | enum {ashift, lshiftrt, ashiftrt}; | |
b8db70e1 | 2746 | bool unsignedp = (<code> == lshiftrt); |
1165109b AS |
2747 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
2748 | rtx insi2 = gen_reg_rtx (<VnSI>mode); | |
2749 | rtx outsi = gen_reg_rtx (<VnSI>mode); | |
dc941ea9 AS |
2750 | |
2751 | convert_move (insi1, operands[1], unsignedp); | |
2752 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2753 | emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2754 | convert_move (operands[0], outsi, unsignedp); |
2755 | DONE; | |
2756 | }) | |
2757 | ||
1165109b | 2758 | (define_insn "v<expander><mode>3<exec>" |
6e0ca3fe AS |
2759 | [(set (match_operand:V_INT_noHI 0 "register_operand" "=v") |
2760 | (shiftop:V_INT_noHI | |
2761 | (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v") | |
2762 | (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))] | |
3d6275e3 AS |
2763 | "" |
2764 | "v_<revmnemonic>0\t%0, %2, %1" | |
2765 | [(set_attr "type" "vop2") | |
2766 | (set_attr "length" "8")]) | |
2767 | ||
dc941ea9 | 2768 | (define_expand "<expander><mode>3" |
03876953 AS |
2769 | [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand") |
2770 | (minmaxop:V_QIHI | |
2771 | (match_operand:V_QIHI 1 "gcn_valu_src0_operand") | |
2772 | (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))] | |
dc941ea9 AS |
2773 | "" |
2774 | { | |
2775 | enum {smin, umin, smax, umax}; | |
2776 | bool unsignedp = (<code> == umax || <code> == umin); | |
1165109b AS |
2777 | rtx insi1 = gen_reg_rtx (<VnSI>mode); |
2778 | rtx insi2 = gen_reg_rtx (<VnSI>mode); | |
2779 | rtx outsi = gen_reg_rtx (<VnSI>mode); | |
dc941ea9 AS |
2780 | |
2781 | convert_move (insi1, operands[1], unsignedp); | |
2782 | convert_move (insi2, operands[2], unsignedp); | |
1165109b | 2783 | emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2)); |
dc941ea9 AS |
2784 | convert_move (operands[0], outsi, unsignedp); |
2785 | DONE; | |
2786 | }) | |
2787 | ||
553ff252 PAA |
2788 | (define_expand "<expander><mode>3_exec" |
2789 | [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand") | |
2790 | (vec_merge:V_QIHI | |
2791 | (minmaxop:V_QIHI | |
2792 | (match_operand:V_QIHI 1 "gcn_valu_src0_operand") | |
2793 | (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")) | |
2794 | (match_operand:V_QIHI 3 "gcn_register_or_unspec_operand" "U0") | |
2795 | (match_operand:DI 4 "gcn_exec_reg_operand" "e")))] | |
2796 | "" | |
2797 | { | |
2798 | enum {smin, umin, smax, umax}; | |
2799 | bool unsignedp = (<code> == umax || <code> == umin); | |
2800 | rtx insi1 = gen_reg_rtx (<VnSI>mode); | |
2801 | rtx insi2 = gen_reg_rtx (<VnSI>mode); | |
2802 | rtx outsi = gen_reg_rtx (<VnSI>mode); | |
2803 | rtx out = operands[0]; | |
2804 | rtx exec = operands[4]; | |
2805 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
2806 | ||
2807 | convert_move (insi1, operands[1], unsignedp); | |
2808 | convert_move (insi2, operands[2], unsignedp); | |
2809 | emit_insn (gen_<code><vnsi>3_exec (outsi, insi1, insi2, | |
2810 | gcn_gen_undef(<VnSI>mode), exec)); | |
2811 | convert_move (tmp, outsi, unsignedp); | |
2812 | emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec)); | |
2813 | DONE; | |
2814 | }) | |
2815 | ||
1165109b AS |
2816 | (define_insn "<expander><vnsi>3<exec>" |
2817 | [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD") | |
2818 | (minmaxop:V_SI | |
2819 | (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0") | |
2820 | (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))] | |
3d6275e3 AS |
2821 | "" |
2822 | "@ | |
2823 | v_<mnemonic>0\t%0, %2, %1 | |
2824 | ds_<mnemonic>0\t%A0, %2%O0" | |
2825 | [(set_attr "type" "vop2,ds") | |
2826 | (set_attr "length" "8,8")]) | |
2827 | ||
553ff252 PAA |
2828 | (define_insn_and_split "<expander><mode>3" |
2829 | [(set (match_operand:V_DI 0 "register_operand" "=v") | |
2830 | (minmaxop:V_DI | |
2831 | (match_operand:V_DI 1 "gcn_alu_operand" " v") | |
2832 | (match_operand:V_DI 2 "gcn_alu_operand" " v"))) | |
2833 | (clobber (reg:DI VCC_REG))] | |
2834 | "" | |
2835 | "#" | |
2836 | "reload_completed" | |
2837 | [(const_int 0)] | |
2838 | { | |
2839 | rtx out = operands[0]; | |
2840 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
2841 | ||
2842 | enum {smin, smax, umin, umax}; | |
2843 | bool minp = (<code> == smin || <code> == umin); | |
2844 | if (<code> == smin || <code> == smax) | |
2845 | emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) : | |
2846 | gen_rtx_GT (VOIDmode, 0, 0), operands[1], | |
2847 | operands[2])); | |
2848 | else | |
2849 | emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) : | |
2850 | gen_rtx_GTU (VOIDmode, 0, 0), operands[1], | |
2851 | operands[2])); | |
2852 | emit_insn (gen_vcond_mask_<mode>di (out, operands[1], operands[2], vcc)); | |
2853 | } | |
2854 | [(set_attr "type" "mult")]) | |
2855 | ||
2856 | (define_insn_and_split "<expander><mode>3_exec" | |
2857 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
2858 | (vec_merge:V_DI | |
2859 | (minmaxop:V_DI | |
2860 | (match_operand:V_DI 1 "gcn_alu_operand" " v") | |
2861 | (match_operand:V_DI 2 "gcn_alu_operand" " v")) | |
2862 | (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0") | |
2863 | (match_operand:DI 4 "gcn_exec_reg_operand" "+e"))) | |
2864 | (clobber (match_scratch:<VnDI> 5 "= &v")) | |
2865 | (clobber (reg:DI VCC_REG))] | |
2866 | "" | |
2867 | "#" | |
2868 | "reload_completed" | |
2869 | [(const_int 0)] | |
2870 | { | |
2871 | rtx out = operands[0]; | |
2872 | rtx vcc = gen_rtx_REG (DImode, VCC_REG); | |
2873 | rtx exec = operands[4]; | |
2874 | rtx tmp = operands[5]; | |
2875 | ||
2876 | enum {smin, smax, umin, umax}; | |
2877 | bool minp = (<code> == smin || <code> == umin); | |
2878 | if (<code> == smin || <code> == smax) | |
2879 | emit_insn (gen_vec_cmp<mode>di_exec (vcc, | |
2880 | minp ? gen_rtx_LT (VOIDmode, 0, 0) : | |
2881 | gen_rtx_GT (VOIDmode, 0, 0), | |
2882 | operands[1], operands[2], exec)); | |
2883 | else | |
2884 | emit_insn (gen_vec_cmp<mode>di_exec (vcc, | |
2885 | minp ? gen_rtx_LTU (VOIDmode, 0, 0) : | |
2886 | gen_rtx_GTU (VOIDmode, 0, 0), | |
2887 | operands[1], operands[2], exec)); | |
2888 | emit_insn (gen_vcond_mask_<mode>di (tmp, operands[1], operands[2], vcc)); | |
2889 | emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec)); | |
2890 | } | |
2891 | [(set_attr "type" "mult")]) | |
2892 | ||
bf6b5c74 AS |
2893 | ;; }}} |
2894 | ;; {{{ Int unops | |
2895 | ||
2896 | (define_expand "neg<mode>2" | |
2897 | [(match_operand:V_INT 0 "register_operand") | |
2898 | (match_operand:V_INT 1 "register_operand")] | |
2899 | "" | |
2900 | { | |
2901 | emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0), | |
2902 | operands[1])); | |
2903 | DONE; | |
2904 | }) | |
2905 | ||
34574064 AS |
2906 | (define_insn_and_split "one_cmpl<mode>2<exec>" |
2907 | [(set (match_operand:V_DI 0 "register_operand" "= v") | |
2908 | (not:V_DI | |
2909 | (match_operand:V_DI 1 "gcn_alu_operand" "vSvDB")))] | |
2910 | "" | |
2911 | "#" | |
2912 | "reload_completed" | |
2913 | [(set (match_dup 3) (not:<VnSI> (match_dup 5))) | |
2914 | (set (match_dup 4) (not:<VnSI> (match_dup 6)))] | |
2915 | { | |
2916 | operands[3] = gcn_operand_part (<VnDI>mode, operands[0], 0); | |
2917 | operands[4] = gcn_operand_part (<VnDI>mode, operands[0], 1); | |
2918 | operands[5] = gcn_operand_part (<VnDI>mode, operands[1], 0); | |
2919 | operands[6] = gcn_operand_part (<VnDI>mode, operands[1], 1); | |
2920 | } | |
2921 | [(set_attr "type" "mult")]) | |
2922 | ||
3d6275e3 AS |
2923 | ;; }}} |
2924 | ;; {{{ FP binops - special cases | |
2925 | ||
2926 | ; GCN does not directly provide a DFmode subtract instruction, so we do it by | |
2927 | ; adding the negated second operand to the first. | |
2928 | ||
1165109b AS |
2929 | (define_insn "sub<mode>3<exec>" |
2930 | [(set (match_operand:V_DF 0 "register_operand" "= v, v") | |
2931 | (minus:V_DF | |
2932 | (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v") | |
2933 | (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2934 | "" |
2935 | "@ | |
2936 | v_add_f64\t%0, %1, -%2 | |
2937 | v_add_f64\t%0, -%2, %1" | |
2938 | [(set_attr "type" "vop3a") | |
2939 | (set_attr "length" "8,8")]) | |
2940 | ||
abb3993e | 2941 | (define_insn "subdf3" |
3d6275e3 AS |
2942 | [(set (match_operand:DF 0 "register_operand" "= v, v") |
2943 | (minus:DF | |
2944 | (match_operand:DF 1 "gcn_alu_operand" "vSvB, v") | |
2945 | (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))] | |
2946 | "" | |
2947 | "@ | |
2948 | v_add_f64\t%0, %1, -%2 | |
2949 | v_add_f64\t%0, -%2, %1" | |
2950 | [(set_attr "type" "vop3a") | |
2951 | (set_attr "length" "8,8")]) | |
2952 | ||
2953 | ;; }}} | |
2954 | ;; {{{ FP binops - generic | |
2955 | ||
3d6275e3 AS |
2956 | (define_code_iterator comm_fp [plus mult smin smax]) |
2957 | (define_code_iterator nocomm_fp [minus]) | |
2958 | (define_code_iterator all_fp [plus mult minus smin smax]) | |
2959 | ||
2960 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2961 | [(set (match_operand:V_FP 0 "register_operand" "= v") |
2962 | (comm_fp:V_FP | |
2963 | (match_operand:V_FP 1 "gcn_alu_operand" "% v") | |
2964 | (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
2965 | "" |
2966 | "v_<mnemonic>0\t%0, %2, %1" | |
2967 | [(set_attr "type" "vop2") | |
2968 | (set_attr "length" "8")]) | |
2969 | ||
2970 | (define_insn "<expander><mode>3" | |
03876953 AS |
2971 | [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL") |
2972 | (comm_fp:FP | |
2973 | (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0") | |
2974 | (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))] | |
3d6275e3 AS |
2975 | "" |
2976 | "@ | |
2977 | v_<mnemonic>0\t%0, %2, %1 | |
2978 | v_<mnemonic>0\t%0, %1%O0" | |
2979 | [(set_attr "type" "vop2,ds") | |
2980 | (set_attr "length" "8")]) | |
2981 | ||
2982 | (define_insn "<expander><mode>3<exec>" | |
03876953 AS |
2983 | [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v") |
2984 | (nocomm_fp:V_FP_1REG | |
2985 | (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v") | |
2986 | (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2987 | "" |
2988 | "@ | |
2989 | v_<mnemonic>0\t%0, %1, %2 | |
2990 | v_<revmnemonic>0\t%0, %2, %1" | |
2991 | [(set_attr "type" "vop2") | |
2992 | (set_attr "length" "8,8")]) | |
2993 | ||
2994 | (define_insn "<expander><mode>3" | |
03876953 AS |
2995 | [(set (match_operand:FP_1REG 0 "register_operand" "= v, v") |
2996 | (nocomm_fp:FP_1REG | |
2997 | (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v") | |
2998 | (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))] | |
3d6275e3 AS |
2999 | "" |
3000 | "@ | |
3001 | v_<mnemonic>0\t%0, %1, %2 | |
3002 | v_<revmnemonic>0\t%0, %2, %1" | |
3003 | [(set_attr "type" "vop2") | |
3004 | (set_attr "length" "8,8")]) | |
3005 | ||
10aa0356 AS |
3006 | (define_code_iterator fminmaxop [smin smax]) |
3007 | (define_expand "<fexpander><mode>3" | |
3008 | [(set (match_operand:FP 0 "gcn_valu_dst_operand") | |
3009 | (fminmaxop:FP | |
3010 | (match_operand:FP 1 "gcn_valu_src0_operand") | |
3011 | (match_operand:FP 2 "gcn_valu_src1_operand")))] | |
3012 | "" | |
3013 | {}) | |
3014 | ||
3015 | (define_expand "<fexpander><mode>3<exec>" | |
3016 | [(set (match_operand:V_FP 0 "gcn_valu_dst_operand") | |
3017 | (fminmaxop:V_FP | |
3018 | (match_operand:V_FP 1 "gcn_valu_src0_operand") | |
3019 | (match_operand:V_FP 2 "gcn_valu_src1_operand")))] | |
3020 | "" | |
3021 | {}) | |
3022 | ||
3d6275e3 AS |
3023 | ;; }}} |
3024 | ;; {{{ FP unops | |
3025 | ||
3026 | (define_insn "abs<mode>2" | |
03876953 AS |
3027 | [(set (match_operand:FP 0 "register_operand" "=v") |
3028 | (abs:FP (match_operand:FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
3029 | "" |
3030 | "v_add%i0\t%0, 0, |%1|" | |
3031 | [(set_attr "type" "vop3a") | |
3032 | (set_attr "length" "8")]) | |
3033 | ||
3034 | (define_insn "abs<mode>2<exec>" | |
03876953 AS |
3035 | [(set (match_operand:V_FP 0 "register_operand" "=v") |
3036 | (abs:V_FP | |
3037 | (match_operand:V_FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
3038 | "" |
3039 | "v_add%i0\t%0, 0, |%1|" | |
3040 | [(set_attr "type" "vop3a") | |
3041 | (set_attr "length" "8")]) | |
3042 | ||
3043 | (define_insn "neg<mode>2<exec>" | |
03876953 AS |
3044 | [(set (match_operand:V_FP 0 "register_operand" "=v") |
3045 | (neg:V_FP | |
3046 | (match_operand:V_FP 1 "register_operand" " v")))] | |
3d6275e3 AS |
3047 | "" |
3048 | "v_add%i0\t%0, 0, -%1" | |
3049 | [(set_attr "type" "vop3a") | |
3050 | (set_attr "length" "8")]) | |
3051 | ||
3052 | (define_insn "sqrt<mode>2<exec>" | |
03876953 AS |
3053 | [(set (match_operand:V_FP 0 "register_operand" "= v") |
3054 | (sqrt:V_FP | |
3055 | (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
3056 | "flag_unsafe_math_optimizations" |
3057 | "v_sqrt%i0\t%0, %1" | |
3058 | [(set_attr "type" "vop1") | |
3059 | (set_attr "length" "8")]) | |
3060 | ||
3061 | (define_insn "sqrt<mode>2" | |
03876953 AS |
3062 | [(set (match_operand:FP 0 "register_operand" "= v") |
3063 | (sqrt:FP | |
3064 | (match_operand:FP 1 "gcn_alu_operand" "vSvB")))] | |
3d6275e3 AS |
3065 | "flag_unsafe_math_optimizations" |
3066 | "v_sqrt%i0\t%0, %1" | |
3067 | [(set_attr "type" "vop1") | |
3068 | (set_attr "length" "8")]) | |
eff73c10 KCY |
3069 | |
3070 | ; These FP unops have f64, f32 and f16 versions. | |
3071 | (define_int_iterator MATH_UNOP_1OR2REG | |
3072 | [UNSPEC_FLOOR UNSPEC_CEIL]) | |
3073 | ||
3074 | ; These FP unops only have f16/f32 versions. | |
3075 | (define_int_iterator MATH_UNOP_1REG | |
3076 | [UNSPEC_EXP2 UNSPEC_LOG2]) | |
3077 | ||
3078 | (define_int_iterator MATH_UNOP_TRIG | |
3079 | [UNSPEC_SIN UNSPEC_COS]) | |
3080 | ||
3081 | (define_int_attr math_unop | |
3082 | [(UNSPEC_FLOOR "floor") | |
3083 | (UNSPEC_CEIL "ceil") | |
3084 | (UNSPEC_EXP2 "exp2") | |
3085 | (UNSPEC_LOG2 "log2") | |
3086 | (UNSPEC_SIN "sin") | |
3087 | (UNSPEC_COS "cos")]) | |
3088 | ||
db6a9fe3 KCY |
3089 | (define_int_attr math_unop_insn |
3090 | [(UNSPEC_FLOOR "floor") | |
3091 | (UNSPEC_CEIL "ceil") | |
3092 | (UNSPEC_EXP2 "exp") | |
3093 | (UNSPEC_LOG2 "log") | |
3094 | (UNSPEC_SIN "sin") | |
3095 | (UNSPEC_COS "cos")]) | |
3096 | ||
eff73c10 KCY |
3097 | (define_insn "<math_unop><mode>2" |
3098 | [(set (match_operand:FP 0 "register_operand" "= v") | |
3099 | (unspec:FP | |
3100 | [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] | |
3101 | MATH_UNOP_1OR2REG))] | |
3102 | "" | |
db6a9fe3 | 3103 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3104 | [(set_attr "type" "vop1") |
3105 | (set_attr "length" "8")]) | |
3106 | ||
3107 | (define_insn "<math_unop><mode>2<exec>" | |
3108 | [(set (match_operand:V_FP 0 "register_operand" "= v") | |
3109 | (unspec:V_FP | |
3110 | [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] | |
3111 | MATH_UNOP_1OR2REG))] | |
3112 | "" | |
db6a9fe3 | 3113 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3114 | [(set_attr "type" "vop1") |
3115 | (set_attr "length" "8")]) | |
3116 | ||
3117 | (define_insn "<math_unop><mode>2" | |
3118 | [(set (match_operand:FP_1REG 0 "register_operand" "= v") | |
3119 | (unspec:FP_1REG | |
3120 | [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] | |
3121 | MATH_UNOP_1REG))] | |
3122 | "flag_unsafe_math_optimizations" | |
db6a9fe3 | 3123 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3124 | [(set_attr "type" "vop1") |
3125 | (set_attr "length" "8")]) | |
3126 | ||
3127 | (define_insn "<math_unop><mode>2<exec>" | |
3128 | [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") | |
3129 | (unspec:V_FP_1REG | |
3130 | [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] | |
3131 | MATH_UNOP_1REG))] | |
3132 | "flag_unsafe_math_optimizations" | |
db6a9fe3 | 3133 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3134 | [(set_attr "type" "vop1") |
3135 | (set_attr "length" "8")]) | |
3136 | ||
3137 | (define_insn "*<math_unop><mode>2_insn" | |
3138 | [(set (match_operand:FP_1REG 0 "register_operand" "= v") | |
3139 | (unspec:FP_1REG | |
3140 | [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] | |
3141 | MATH_UNOP_TRIG))] | |
3142 | "flag_unsafe_math_optimizations" | |
db6a9fe3 | 3143 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3144 | [(set_attr "type" "vop1") |
3145 | (set_attr "length" "8")]) | |
3146 | ||
3147 | (define_insn "*<math_unop><mode>2<exec>_insn" | |
3148 | [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") | |
3149 | (unspec:V_FP_1REG | |
3150 | [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] | |
3151 | MATH_UNOP_TRIG))] | |
3152 | "flag_unsafe_math_optimizations" | |
db6a9fe3 | 3153 | "v_<math_unop_insn>%i0\t%0, %1" |
eff73c10 KCY |
3154 | [(set_attr "type" "vop1") |
3155 | (set_attr "length" "8")]) | |
3156 | ||
3157 | ; Trigonometric functions need their input scaled by 1/(2*PI) first. | |
3158 | ||
3159 | (define_expand "<math_unop><mode>2" | |
3160 | [(set (match_dup 2) | |
3161 | (mult:FP_1REG | |
3162 | (match_dup 3) | |
3163 | (match_operand:FP_1REG 1 "gcn_alu_operand"))) | |
3164 | (set (match_operand:FP_1REG 0 "register_operand") | |
3165 | (unspec:FP_1REG | |
3166 | [(match_dup 2)] | |
3167 | MATH_UNOP_TRIG))] | |
3168 | "flag_unsafe_math_optimizations" | |
3169 | { | |
3170 | operands[2] = gen_reg_rtx (<MODE>mode); | |
3171 | operands[3] = const_double_from_real_value (gcn_dconst1over2pi (), | |
3172 | <MODE>mode); | |
3173 | }) | |
3174 | ||
3175 | (define_expand "<math_unop><mode>2<exec>" | |
3176 | [(set (match_dup 2) | |
3177 | (mult:V_FP_1REG | |
3178 | (match_dup 3) | |
3179 | (match_operand:V_FP_1REG 1 "gcn_alu_operand"))) | |
3180 | (set (match_operand:V_FP_1REG 0 "register_operand") | |
3181 | (unspec:V_FP_1REG | |
3182 | [(match_dup 2)] | |
3183 | MATH_UNOP_TRIG))] | |
3184 | "flag_unsafe_math_optimizations" | |
3185 | { | |
3186 | operands[2] = gen_reg_rtx (<MODE>mode); | |
3187 | operands[3] = | |
3188 | gcn_vec_constant (<MODE>mode, | |
3189 | const_double_from_real_value (gcn_dconst1over2pi (), | |
3190 | <SCALAR_MODE>mode)); | |
3191 | }) | |
3192 | ||
3193 | ; Implement ldexp pattern | |
3194 | ||
eff73c10 | 3195 | (define_insn "ldexp<mode>3<exec>" |
0be4fbea AS |
3196 | [(set (match_operand:SV_FP 0 "register_operand" "= v") |
3197 | (unspec:SV_FP | |
3198 | [(match_operand:SV_FP 1 "gcn_alu_operand" " vA") | |
45381d6f | 3199 | (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")] |
eff73c10 KCY |
3200 | UNSPEC_LDEXP))] |
3201 | "" | |
3202 | "v_ldexp%i0\t%0, %1, %2" | |
3203 | [(set_attr "type" "vop3a") | |
3204 | (set_attr "length" "8")]) | |
3205 | ||
3206 | ; Implement frexp patterns | |
3207 | ||
3208 | (define_insn "frexp<mode>_exp2" | |
3209 | [(set (match_operand:SI 0 "register_operand" "=v") | |
3210 | (unspec:SI | |
3211 | [(match_operand:FP 1 "gcn_alu_operand" "vB")] | |
3212 | UNSPEC_FREXP_EXP))] | |
3213 | "" | |
3214 | "v_frexp_exp_i32%i1\t%0, %1" | |
3215 | [(set_attr "type" "vop1") | |
3216 | (set_attr "length" "8")]) | |
3217 | ||
3218 | (define_insn "frexp<mode>_mant2" | |
3219 | [(set (match_operand:FP 0 "register_operand" "=v") | |
3220 | (unspec:FP | |
3221 | [(match_operand:FP 1 "gcn_alu_operand" "vB")] | |
3222 | UNSPEC_FREXP_MANT))] | |
3223 | "" | |
3224 | "v_frexp_mant%i1\t%0, %1" | |
3225 | [(set_attr "type" "vop1") | |
3226 | (set_attr "length" "8")]) | |
3227 | ||
3228 | (define_insn "frexp<mode>_exp2<exec>" | |
45381d6f AS |
3229 | [(set (match_operand:<VnSI> 0 "register_operand" "=v") |
3230 | (unspec:<VnSI> | |
eff73c10 KCY |
3231 | [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] |
3232 | UNSPEC_FREXP_EXP))] | |
3233 | "" | |
3234 | "v_frexp_exp_i32%i1\t%0, %1" | |
3235 | [(set_attr "type" "vop1") | |
3236 | (set_attr "length" "8")]) | |
3237 | ||
3238 | (define_insn "frexp<mode>_mant2<exec>" | |
3239 | [(set (match_operand:V_FP 0 "register_operand" "=v") | |
3240 | (unspec:V_FP | |
3241 | [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] | |
3242 | UNSPEC_FREXP_MANT))] | |
3243 | "" | |
3244 | "v_frexp_mant%i1\t%0, %1" | |
3245 | [(set_attr "type" "vop1") | |
3246 | (set_attr "length" "8")]) | |
3d6275e3 AS |
3247 | |
3248 | ;; }}} | |
3249 | ;; {{{ FP fused multiply and add | |
3250 | ||
3251 | (define_insn "fma<mode>4<exec>" | |
03876953 AS |
3252 | [(set (match_operand:V_FP 0 "register_operand" "= v, v") |
3253 | (fma:V_FP | |
3254 | (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA") | |
3255 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA") | |
3256 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))] | |
3d6275e3 AS |
3257 | "" |
3258 | "v_fma%i0\t%0, %1, %2, %3" | |
3259 | [(set_attr "type" "vop3a") | |
3260 | (set_attr "length" "8")]) | |
3261 | ||
3262 | (define_insn "fma<mode>4_negop2<exec>" | |
03876953 AS |
3263 | [(set (match_operand:V_FP 0 "register_operand" "= v, v, v") |
3264 | (fma:V_FP | |
3265 | (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
3266 | (neg:V_FP | |
3267 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
3268 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] | |
3d6275e3 AS |
3269 | "" |
3270 | "v_fma%i0\t%0, %1, -%2, %3" | |
3271 | [(set_attr "type" "vop3a") | |
3272 | (set_attr "length" "8")]) | |
3273 | ||
3274 | (define_insn "fma<mode>4" | |
03876953 AS |
3275 | [(set (match_operand:FP 0 "register_operand" "= v, v") |
3276 | (fma:FP | |
3277 | (match_operand:FP 1 "gcn_alu_operand" "% vA, vA") | |
3278 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA") | |
3279 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))] | |
3d6275e3 AS |
3280 | "" |
3281 | "v_fma%i0\t%0, %1, %2, %3" | |
3282 | [(set_attr "type" "vop3a") | |
3283 | (set_attr "length" "8")]) | |
3284 | ||
3285 | (define_insn "fma<mode>4_negop2" | |
03876953 AS |
3286 | [(set (match_operand:FP 0 "register_operand" "= v, v, v") |
3287 | (fma:FP | |
3288 | (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
3289 | (neg:FP | |
3290 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
3291 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))] | |
3d6275e3 AS |
3292 | "" |
3293 | "v_fma%i0\t%0, %1, -%2, %3" | |
3294 | [(set_attr "type" "vop3a") | |
3295 | (set_attr "length" "8")]) | |
3296 | ||
1bde3ace AJ |
3297 | (define_insn "fms<mode>4<exec>" |
3298 | [(set (match_operand:V_FP 0 "register_operand" "= v, v") | |
3299 | (fma:V_FP | |
3300 | (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA") | |
3301 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA") | |
3302 | (neg:V_FP | |
3303 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA"))))] | |
3304 | "" | |
3305 | "v_fma%i0\t%0, %1, %2, -%3" | |
3306 | [(set_attr "type" "vop3a") | |
3307 | (set_attr "length" "8")]) | |
3308 | ||
3309 | (define_insn "fms<mode>4_negop2<exec>" | |
3310 | [(set (match_operand:V_FP 0 "register_operand" "= v, v, v") | |
3311 | (fma:V_FP | |
3312 | (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
3313 | (neg:V_FP | |
3314 | (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
3315 | (neg:V_FP | |
3316 | (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))] | |
3317 | "" | |
3318 | "v_fma%i0\t%0, %1, -%2, -%3" | |
3319 | [(set_attr "type" "vop3a") | |
3320 | (set_attr "length" "8")]) | |
3321 | ||
3322 | (define_insn "fms<mode>4" | |
3323 | [(set (match_operand:FP 0 "register_operand" "= v, v") | |
3324 | (fma:FP | |
3325 | (match_operand:FP 1 "gcn_alu_operand" "% vA, vA") | |
3326 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA") | |
3327 | (neg:FP | |
3328 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA"))))] | |
3329 | "" | |
3330 | "v_fma%i0\t%0, %1, %2, -%3" | |
3331 | [(set_attr "type" "vop3a") | |
3332 | (set_attr "length" "8")]) | |
3333 | ||
3334 | (define_insn "fms<mode>4_negop2" | |
3335 | [(set (match_operand:FP 0 "register_operand" "= v, v, v") | |
3336 | (fma:FP | |
3337 | (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA") | |
3338 | (neg:FP | |
3339 | (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA")) | |
3340 | (neg:FP | |
3341 | (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))] | |
3342 | "" | |
3343 | "v_fma%i0\t%0, %1, -%2, -%3" | |
3344 | [(set_attr "type" "vop3a") | |
3345 | (set_attr "length" "8")]) | |
3346 | ||
3d6275e3 AS |
3347 | ;; }}} |
3348 | ;; {{{ FP division | |
3349 | ||
3350 | (define_insn "recip<mode>2<exec>" | |
cfdc45f7 AS |
3351 | [(set (match_operand:SV_FP 0 "register_operand" "= v") |
3352 | (unspec:SV_FP | |
3353 | [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")] | |
c8812bac | 3354 | UNSPEC_RCP))] |
3d6275e3 AS |
3355 | "" |
3356 | "v_rcp%i0\t%0, %1" | |
3357 | [(set_attr "type" "vop1") | |
3358 | (set_attr "length" "8")]) | |
3359 | ||
cfdc45f7 AS |
3360 | ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the |
3361 | ;; one that matches op3 adjusted for best results in reciprocal division. | |
3362 | ;; It also emits a VCC mask that is intended for input to v_div_fmas. | |
3363 | ;; The caller is expected to call this twice, once for each input. The output | |
3364 | ;; VCC is the same in both cases, so the caller may discard one. | |
3365 | (define_insn "div_scale<mode><exec_vcc>" | |
3366 | [(set (match_operand:SV_SFDF 0 "register_operand" "=v") | |
3367 | (unspec:SV_SFDF | |
3368 | [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v") | |
3369 | (match_operand:SV_SFDF 2 "gcn_alu_operand" "v") | |
3370 | (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")] | |
3371 | UNSPEC_DIV_SCALE)) | |
3372 | (set (match_operand:DI 4 "register_operand" "=SvcV") | |
3373 | (unspec:DI | |
3374 | [(match_dup 1) (match_dup 2) (match_dup 3)] | |
3375 | UNSPEC_DIV_SCALE))] | |
3376 | "" | |
3377 | "v_div_scale%i0\t%0, %4, %3, %1, %2" | |
3378 | [(set_attr "type" "vop3b") | |
3d6275e3 AS |
3379 | (set_attr "length" "8")]) |
3380 | ||
cfdc45f7 AS |
3381 | ;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale |
3382 | ;; to conditionally scale the output of the whole division operation. | |
3383 | ;; This is necessary to counter the adjustments made by v_div_scale and | |
3384 | ;; replaces the last FMA instruction of the Newton Raphson algorithm. | |
3385 | (define_insn "div_fmas<mode><exec>" | |
3386 | [(set (match_operand:SV_SFDF 0 "register_operand" "=v") | |
3387 | (unspec:SV_SFDF | |
3388 | [(plus:SV_SFDF | |
3389 | (mult:SV_SFDF | |
3390 | (match_operand:SV_SFDF 1 "gcn_alu_operand" "v") | |
3391 | (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")) | |
3392 | (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")) | |
3393 | (match_operand:DI 4 "register_operand" "cV")] | |
3394 | UNSPEC_DIV_FMAS))] | |
3395 | "" | |
3396 | "v_div_fmas%i0\t%0, %1, %2, %3; %4" | |
3397 | [(set_attr "type" "vop3a") | |
3398 | (set_attr "length" "8") | |
3399 | (set_attr "vccwait" "5")]) | |
3400 | ||
3401 | ;; v_div_fixup takes the inputs and outputs of a division operation already | |
3402 | ;; completed and cleans up the floating-point sign bit, infinity, underflow, | |
3403 | ;; overflow, and NaN status. It will also emit any FP exceptions. | |
3404 | ;; op1: quotient, op2: denominator, op3: numerator | |
3405 | (define_insn "div_fixup<mode><exec>" | |
3406 | [(set (match_operand:SV_FP 0 "register_operand" "=v") | |
3407 | (unspec:SV_FP | |
3408 | [(match_operand:SV_FP 1 "register_operand" "v") | |
3409 | (match_operand:SV_FP 2 "gcn_alu_operand" "v") | |
3410 | (match_operand:SV_FP 3 "gcn_alu_operand" "v")] | |
3411 | UNSPEC_DIV_FIXUP))] | |
3412 | "" | |
3413 | "v_div_fixup%i0\t%0, %1, %2, %3" | |
3414 | [(set_attr "type" "vop3a") | |
3415 | (set_attr "length" "8")]) | |
3d6275e3 AS |
3416 | |
3417 | (define_expand "div<mode>3" | |
cfdc45f7 AS |
3418 | [(match_operand:SV_SFDF 0 "register_operand") |
3419 | (match_operand:SV_SFDF 1 "gcn_alu_operand") | |
3420 | (match_operand:SV_SFDF 2 "gcn_alu_operand")] | |
3421 | "" | |
3422 | { | |
3423 | rtx numerator = operands[1]; | |
3424 | rtx denominator = operands[2]; | |
3425 | ||
3426 | /* Scale the inputs if they are close to the FP limits. | |
3427 | This will be reversed later. */ | |
3428 | rtx vcc = gen_reg_rtx (DImode); | |
3429 | rtx discardedvcc = gen_reg_rtx (DImode); | |
3430 | rtx scaled_numerator = gen_reg_rtx (<MODE>mode); | |
3431 | rtx scaled_denominator = gen_reg_rtx (<MODE>mode); | |
3432 | emit_insn (gen_div_scale<mode> (scaled_denominator, | |
3433 | denominator, numerator, | |
3434 | denominator, discardedvcc)); | |
3435 | emit_insn (gen_div_scale<mode> (scaled_numerator, | |
3436 | denominator, numerator, | |
3437 | numerator, vcc)); | |
3438 | ||
3439 | /* Find the reciprocal of the denominator, and use Newton-Raphson to | |
3440 | improve the accuracy over the basic hardware instruction. */ | |
c8812bac JB |
3441 | rtx one = gcn_vec_constant (<MODE>mode, |
3442 | const_double_from_real_value (dconst1, <SCALAR_MODE>mode)); | |
3d6275e3 | 3443 | rtx initrcp = gen_reg_rtx (<MODE>mode); |
cfdc45f7 AS |
3444 | rtx fma1 = gen_reg_rtx (<MODE>mode); |
3445 | rtx rcp = gen_reg_rtx (<MODE>mode); | |
3446 | emit_insn (gen_recip<mode>2 (initrcp, scaled_denominator)); | |
3447 | emit_insn (gen_fma<mode>4_negop2 (fma1, initrcp, scaled_denominator, one)); | |
3448 | emit_insn (gen_fma<mode>4 (rcp, fma1, initrcp, initrcp)); | |
3449 | ||
3450 | /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve | |
3451 | the accuracy. The "div_fmas" instruction reverses any scaling | |
3452 | performed by "div_scale", above. */ | |
3453 | rtx div_est = gen_reg_rtx (<MODE>mode); | |
3454 | rtx fma2 = gen_reg_rtx (<MODE>mode); | |
3455 | rtx fma3 = gen_reg_rtx (<MODE>mode); | |
3456 | rtx fma4 = gen_reg_rtx (<MODE>mode); | |
3457 | rtx fmas = gen_reg_rtx (<MODE>mode); | |
3458 | emit_insn (gen_mul<mode>3 (div_est, scaled_numerator, rcp)); | |
3459 | emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, scaled_denominator, | |
3460 | scaled_numerator)); | |
3461 | emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est)); | |
3462 | emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, scaled_denominator, | |
3463 | scaled_numerator)); | |
3464 | emit_insn (gen_div_fmas<mode> (fmas, fma4, rcp, fma3, vcc)); | |
3465 | ||
3466 | /* Finally, use "div_fixup" to get the details right and find errors. */ | |
3467 | emit_insn (gen_div_fixup<mode> (operands[0], fmas, denominator, | |
3468 | numerator)); | |
3d6275e3 AS |
3469 | DONE; |
3470 | }) | |
3471 | ||
3472 | ;; }}} | |
3473 | ;; {{{ Int/FP conversions | |
3474 | ||
3475 | (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) | |
3476 | (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) | |
3477 | ||
45381d6f AS |
3478 | (define_mode_iterator VCVT_MODE |
3479 | [V2HI V2SI V2HF V2SF V2DF | |
3480 | V4HI V4SI V4HF V4SF V4DF | |
3481 | V8HI V8SI V8HF V8SF V8DF | |
3482 | V16HI V16SI V16HF V16SF V16DF | |
3483 | V32HI V32SI V32HF V32SF V32DF | |
3484 | V64HI V64SI V64HF V64SF V64DF]) | |
3485 | (define_mode_iterator VCVT_FMODE | |
3486 | [V2HF V2SF V2DF | |
3487 | V4HF V4SF V4DF | |
3488 | V8HF V8SF V8DF | |
3489 | V16HF V16SF V16DF | |
3490 | V32HF V32SF V32DF | |
3491 | V64HF V64SF V64DF]) | |
3492 | (define_mode_iterator VCVT_IMODE | |
3493 | [V2HI V2SI | |
3494 | V4HI V4SI | |
3495 | V8HI V8SI | |
3496 | V16HI V16SI | |
3497 | V32HI V32SI | |
3498 | V64HI V64SI]) | |
3d6275e3 AS |
3499 | |
3500 | (define_code_iterator cvt_op [fix unsigned_fix | |
3501 | float unsigned_float | |
3502 | float_extend float_truncate]) | |
3503 | (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") | |
3504 | (float "float") (unsigned_float "floatuns") | |
3505 | (float_extend "extend") (float_truncate "trunc")]) | |
3506 | (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") | |
3507 | (float "%i0%i1") (unsigned_float "%i0%u1") | |
3508 | (float_extend "%i0%i1") | |
3509 | (float_truncate "%i0%i1")]) | |
3510 | ||
3511 | (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2" | |
3512 | [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") | |
3513 | (cvt_op:CVT_TO_MODE | |
3514 | (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] | |
3515 | "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode, | |
3516 | <cvt_name>_cvt)" | |
3517 | "v_cvt<cvt_operands>\t%0, %1" | |
3518 | [(set_attr "type" "vop1") | |
3519 | (set_attr "length" "8")]) | |
3520 | ||
3d66c777 AS |
3521 | (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>" |
3522 | [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") | |
3523 | (cvt_op:VCVT_FMODE | |
3524 | (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] | |
0d8753cf AS |
3525 | "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode) |
3526 | && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode, | |
3527 | <cvt_name>_cvt)" | |
3d66c777 AS |
3528 | "v_cvt<cvt_operands>\t%0, %1" |
3529 | [(set_attr "type" "vop1") | |
3530 | (set_attr "length" "8")]) | |
3531 | ||
3532 | (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>" | |
3533 | [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") | |
3534 | (cvt_op:VCVT_IMODE | |
3535 | (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] | |
0d8753cf AS |
3536 | "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode) |
3537 | && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode, | |
3538 | <cvt_name>_cvt)" | |
3d6275e3 AS |
3539 | "v_cvt<cvt_operands>\t%0, %1" |
3540 | [(set_attr "type" "vop1") | |
3541 | (set_attr "length" "8")]) | |
3542 | ||
3543 | ;; }}} | |
3544 | ;; {{{ Int/int conversions | |
3545 | ||
99890e15 | 3546 | (define_code_iterator all_convert [truncate zero_extend sign_extend]) |
3d66c777 AS |
3547 | (define_code_iterator zero_convert [truncate zero_extend]) |
3548 | (define_code_attr convop [ | |
3549 | (sign_extend "extend") | |
3550 | (zero_extend "zero_extend") | |
3551 | (truncate "trunc")]) | |
3552 | ||
99890e15 AS |
3553 | (define_expand "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>" |
3554 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") | |
3555 | (all_convert:V_INT_1REG | |
3556 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
3557 | "") | |
3558 | ||
3559 | (define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>" | |
03876953 AS |
3560 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
3561 | (zero_convert:V_INT_1REG | |
3562 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
68e03492 | 3563 | "TARGET_SDWA" |
03876953 | 3564 | "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>" |
3d66c777 AS |
3565 | [(set_attr "type" "vop_sdwa") |
3566 | (set_attr "length" "8")]) | |
3567 | ||
99890e15 | 3568 | (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>" |
03876953 AS |
3569 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
3570 | (sign_extend:V_INT_1REG | |
3571 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
68e03492 | 3572 | "TARGET_SDWA" |
03876953 | 3573 | "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>" |
3d66c777 AS |
3574 | [(set_attr "type" "vop_sdwa") |
3575 | (set_attr "length" "8")]) | |
3576 | ||
99890e15 AS |
3577 | (define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_shift<exec>" |
3578 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") | |
3579 | (all_convert:V_INT_1REG | |
3580 | (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))] | |
68e03492 | 3581 | "!TARGET_SDWA" |
99890e15 AS |
3582 | { |
3583 | enum {extend, zero_extend, trunc}; | |
3584 | rtx shiftwidth = (<V_INT_1REG_ALT:SCALAR_MODE>mode == QImode | |
3585 | || <V_INT_1REG:SCALAR_MODE>mode == QImode | |
3586 | ? GEN_INT (24) | |
3587 | : <V_INT_1REG_ALT:SCALAR_MODE>mode == HImode | |
3588 | || <V_INT_1REG:SCALAR_MODE>mode == HImode | |
3589 | ? GEN_INT (16) | |
3590 | : NULL); | |
3591 | operands[2] = shiftwidth; | |
3592 | ||
3593 | if (!shiftwidth) | |
3594 | return "v_mov_b32 %0, %1"; | |
3595 | else if (<convop> == extend || <convop> == trunc) | |
3596 | return "v_lshlrev_b32\t%0, %2, %1\;v_ashrrev_i32\t%0, %2, %0"; | |
3597 | else | |
3598 | return "v_lshlrev_b32\t%0, %2, %1\;v_lshrrev_b32\t%0, %2, %0"; | |
3599 | } | |
3600 | [(set_attr "type" "mult") | |
3601 | (set_attr "length" "8")]) | |
3602 | ||
3d6275e3 AS |
3603 | ;; GCC can already do these for scalar types, but not for vector types. |
3604 | ;; Unfortunately you can't just do SUBREG on a vector to select the low part, | |
3605 | ;; so there must be a few tricks here. | |
3606 | ||
1165109b | 3607 | (define_insn_and_split "trunc<vndi><mode>2" |
03876953 AS |
3608 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
3609 | (truncate:V_INT_1REG | |
1165109b | 3610 | (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))] |
3d6275e3 AS |
3611 | "" |
3612 | "#" | |
3613 | "reload_completed" | |
3d66c777 | 3614 | [(const_int 0)] |
3d6275e3 | 3615 | { |
1165109b | 3616 | rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); |
3d66c777 AS |
3617 | rtx out = operands[0]; |
3618 | ||
1165109b AS |
3619 | if (<MODE>mode != <VnSI>mode) |
3620 | emit_insn (gen_trunc<vnsi><mode>2 (out, inlo)); | |
3d66c777 AS |
3621 | else |
3622 | emit_move_insn (out, inlo); | |
3d6275e3 AS |
3623 | } |
3624 | [(set_attr "type" "vop2") | |
3d66c777 AS |
3625 | (set_attr "length" "4")]) |
3626 | ||
1165109b | 3627 | (define_insn_and_split "trunc<vndi><mode>2_exec" |
03876953 AS |
3628 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
3629 | (vec_merge:V_INT_1REG | |
3630 | (truncate:V_INT_1REG | |
1165109b | 3631 | (match_operand:<VnDI> 1 "gcn_alu_operand" " v")) |
03876953 AS |
3632 | (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0") |
3633 | (match_operand:DI 3 "gcn_exec_operand" " e")))] | |
3d6275e3 AS |
3634 | "" |
3635 | "#" | |
3636 | "reload_completed" | |
3d66c777 | 3637 | [(const_int 0)] |
3d6275e3 | 3638 | { |
3d66c777 | 3639 | rtx out = operands[0]; |
1165109b | 3640 | rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0); |
3d66c777 AS |
3641 | rtx merge = operands[2]; |
3642 | rtx exec = operands[3]; | |
3643 | ||
1165109b AS |
3644 | if (<MODE>mode != <VnSI>mode) |
3645 | emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec)); | |
3d66c777 | 3646 | else |
b7886845 | 3647 | emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec)); |
3d6275e3 AS |
3648 | } |
3649 | [(set_attr "type" "vop2") | |
3d66c777 AS |
3650 | (set_attr "length" "4")]) |
3651 | ||
1165109b AS |
3652 | (define_insn_and_split "<convop><mode><vndi>2" |
3653 | [(set (match_operand:<VnDI> 0 "register_operand" "=v") | |
3654 | (any_extend:<VnDI> | |
03876953 | 3655 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))] |
3d66c777 AS |
3656 | "" |
3657 | "#" | |
3658 | "reload_completed" | |
3659 | [(const_int 0)] | |
3660 | { | |
1165109b AS |
3661 | rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); |
3662 | rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); | |
3d66c777 AS |
3663 | rtx in = operands[1]; |
3664 | ||
1165109b AS |
3665 | if (<MODE>mode != <VnSI>mode) |
3666 | emit_insn (gen_<convop><mode><vnsi>2 (outlo, in)); | |
3d66c777 AS |
3667 | else |
3668 | emit_move_insn (outlo, in); | |
3669 | if ('<su>' == 's') | |
1165109b | 3670 | emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31))); |
3d66c777 | 3671 | else |
1165109b | 3672 | emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx)); |
3d66c777 AS |
3673 | } |
3674 | [(set_attr "type" "mult") | |
3675 | (set_attr "length" "12")]) | |
3676 | ||
1165109b AS |
3677 | (define_insn_and_split "<convop><mode><vndi>2_exec" |
3678 | [(set (match_operand:<VnDI> 0 "register_operand" "=v") | |
3679 | (vec_merge:<VnDI> | |
3680 | (any_extend:<VnDI> | |
03876953 | 3681 | (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")) |
1165109b | 3682 | (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0") |
03876953 | 3683 | (match_operand:DI 3 "gcn_exec_operand" " e")))] |
3d66c777 AS |
3684 | "" |
3685 | "#" | |
3686 | "reload_completed" | |
3687 | [(const_int 0)] | |
3688 | { | |
1165109b AS |
3689 | rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0); |
3690 | rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1); | |
3d66c777 | 3691 | rtx in = operands[1]; |
1165109b AS |
3692 | rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0); |
3693 | rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1); | |
3d66c777 AS |
3694 | rtx exec = operands[3]; |
3695 | ||
1165109b AS |
3696 | if (<MODE>mode != <VnSI>mode) |
3697 | emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec)); | |
3d66c777 | 3698 | else |
b7886845 | 3699 | emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec)); |
3d66c777 | 3700 | if ('<su>' == 's') |
1165109b AS |
3701 | emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi, |
3702 | exec)); | |
3d66c777 | 3703 | else |
1165109b AS |
3704 | emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi, |
3705 | exec)); | |
3d66c777 AS |
3706 | } |
3707 | [(set_attr "type" "mult") | |
3708 | (set_attr "length" "12")]) | |
3d6275e3 AS |
3709 | |
3710 | ;; }}} | |
3711 | ;; {{{ Vector comparison/merge | |
3712 | ||
3713 | (define_insn "vec_cmp<mode>di" | |
c7ec7bd1 | 3714 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e") |
dbde9e2d | 3715 | (match_operator:DI 1 "gcn_fp_compare_operator" |
c7ec7bd1 AS |
3716 | [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B") |
3717 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])) | |
3718 | (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))] | |
3d6275e3 AS |
3719 | "" |
3720 | "@ | |
3721 | v_cmp%E1\tvcc, %2, %3 | |
3722 | v_cmp%E1\tvcc, %2, %3 | |
3723 | v_cmpx%E1\tvcc, %2, %3 | |
3724 | v_cmpx%E1\tvcc, %2, %3 | |
3725 | v_cmp%E1\t%0, %2, %3 | |
c7ec7bd1 AS |
3726 | v_cmp%E1\t%0, %2, %3 |
3727 | v_cmpx%E1\t%2, %3 | |
3728 | v_cmpx%E1\t%2, %3" | |
3729 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") | |
3730 | (set_attr "length" "4,8,4,8,8,8,4,8") | |
3731 | (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) | |
3d6275e3 AS |
3732 | |
3733 | (define_expand "vec_cmpu<mode>di" | |
3734 | [(match_operand:DI 0 "register_operand") | |
f4d4a406 | 3735 | (match_operator 1 "gcn_compare_operator" |
03876953 AS |
3736 | [(match_operand:V_INT_noQI 2 "gcn_alu_operand") |
3737 | (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])] | |
3d6275e3 AS |
3738 | "" |
3739 | { | |
3740 | /* Unsigned comparisons use the same patterns as signed comparisons, | |
3741 | except that they use unsigned operators (e.g. LTU vs LT). | |
3742 | The '%E1' directive then does the Right Thing. */ | |
3743 | emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2], | |
3744 | operands[3])); | |
3745 | DONE; | |
3746 | }) | |
3747 | ||
0e159efc | 3748 | ; There's no instruction for 8-bit vector comparison, so we need to extend. |
1165109b | 3749 | (define_expand "vec_cmp<u><mode>di" |
0e159efc | 3750 | [(match_operand:DI 0 "register_operand") |
f4d4a406 | 3751 | (match_operator 1 "gcn_compare_operator" |
1165109b AS |
3752 | [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) |
3753 | (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])] | |
0e159efc AS |
3754 | "can_create_pseudo_p ()" |
3755 | { | |
1165109b AS |
3756 | rtx sitmp1 = gen_reg_rtx (<VnSI>mode); |
3757 | rtx sitmp2 = gen_reg_rtx (<VnSI>mode); | |
0e159efc | 3758 | |
1165109b AS |
3759 | emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2])); |
3760 | emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3])); | |
3761 | emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2)); | |
0e159efc AS |
3762 | DONE; |
3763 | }) | |
3764 | ||
3d6275e3 | 3765 | (define_insn "vec_cmp<mode>di_exec" |
c7ec7bd1 | 3766 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e") |
3d6275e3 | 3767 | (and:DI |
f4d4a406 | 3768 | (match_operator 1 "gcn_fp_compare_operator" |
c7ec7bd1 AS |
3769 | [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B") |
3770 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]) | |
3771 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e"))) | |
3772 | (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))] | |
3d6275e3 AS |
3773 | "" |
3774 | "@ | |
3775 | v_cmp%E1\tvcc, %2, %3 | |
3776 | v_cmp%E1\tvcc, %2, %3 | |
3777 | v_cmpx%E1\tvcc, %2, %3 | |
3778 | v_cmpx%E1\tvcc, %2, %3 | |
3779 | v_cmp%E1\t%0, %2, %3 | |
c7ec7bd1 AS |
3780 | v_cmp%E1\t%0, %2, %3 |
3781 | v_cmpx%E1\t%2, %3 | |
3782 | v_cmpx%E1\t%2, %3" | |
3783 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") | |
3784 | (set_attr "length" "4,8,4,8,8,8,4,8") | |
3785 | (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) | |
3d6275e3 | 3786 | |
0e159efc AS |
3787 | (define_expand "vec_cmpu<mode>di_exec" |
3788 | [(match_operand:DI 0 "register_operand") | |
f4d4a406 | 3789 | (match_operator 1 "gcn_compare_operator" |
03876953 AS |
3790 | [(match_operand:V_INT_noQI 2 "gcn_alu_operand") |
3791 | (match_operand:V_INT_noQI 3 "gcn_vop3_operand")]) | |
0e159efc AS |
3792 | (match_operand:DI 4 "gcn_exec_reg_operand")] |
3793 | "" | |
3794 | { | |
3795 | /* Unsigned comparisons use the same patterns as signed comparisons, | |
3796 | except that they use unsigned operators (e.g. LTU vs LT). | |
3797 | The '%E1' directive then does the Right Thing. */ | |
3798 | emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1], | |
3799 | operands[2], operands[3], | |
3800 | operands[4])); | |
3801 | DONE; | |
3802 | }) | |
3803 | ||
1165109b | 3804 | (define_expand "vec_cmp<u><mode>di_exec" |
0e159efc | 3805 | [(match_operand:DI 0 "register_operand") |
f4d4a406 | 3806 | (match_operator 1 "gcn_compare_operator" |
1165109b AS |
3807 | [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand")) |
3808 | (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))]) | |
0e159efc AS |
3809 | (match_operand:DI 4 "gcn_exec_reg_operand")] |
3810 | "can_create_pseudo_p ()" | |
3811 | { | |
1165109b AS |
3812 | rtx sitmp1 = gen_reg_rtx (<VnSI>mode); |
3813 | rtx sitmp2 = gen_reg_rtx (<VnSI>mode); | |
0e159efc | 3814 | |
1165109b AS |
3815 | emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2], |
3816 | operands[2], operands[4])); | |
3817 | emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3], | |
3818 | operands[3], operands[4])); | |
3819 | emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1, | |
3820 | sitmp2, operands[4])); | |
0e159efc AS |
3821 | DONE; |
3822 | }) | |
3823 | ||
3d6275e3 | 3824 | (define_insn "vec_cmp<mode>di_dup" |
c7ec7bd1 | 3825 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e") |
dbde9e2d | 3826 | (match_operator:DI 1 "gcn_fp_compare_operator" |
03876953 | 3827 | [(vec_duplicate:V_noQI |
3d6275e3 | 3828 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" |
c7ec7bd1 AS |
3829 | " Sv, B,Sv,B, A,Sv,B")) |
3830 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])) | |
3831 | (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))] | |
3d6275e3 AS |
3832 | "" |
3833 | "@ | |
3834 | v_cmp%E1\tvcc, %2, %3 | |
3835 | v_cmp%E1\tvcc, %2, %3 | |
3836 | v_cmpx%E1\tvcc, %2, %3 | |
3837 | v_cmpx%E1\tvcc, %2, %3 | |
c7ec7bd1 AS |
3838 | v_cmp%E1\t%0, %2, %3 |
3839 | v_cmpx%E1\t%2, %3 | |
3840 | v_cmpx%E1\t%2, %3" | |
3841 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") | |
3842 | (set_attr "length" "4,8,4,8,8,4,8") | |
3843 | (set_attr "rdna" "*,*,no,no,*,yes,yes")]) | |
3d6275e3 AS |
3844 | |
3845 | (define_insn "vec_cmp<mode>di_dup_exec" | |
c7ec7bd1 | 3846 | [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e") |
3d6275e3 | 3847 | (and:DI |
f4d4a406 | 3848 | (match_operator 1 "gcn_fp_compare_operator" |
03876953 | 3849 | [(vec_duplicate:V_noQI |
3d6275e3 | 3850 | (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" |
c7ec7bd1 AS |
3851 | " Sv, B,Sv,B, A,Sv,B")) |
3852 | (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]) | |
3853 | (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e"))) | |
3854 | (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))] | |
3d6275e3 AS |
3855 | "" |
3856 | "@ | |
3857 | v_cmp%E1\tvcc, %2, %3 | |
3858 | v_cmp%E1\tvcc, %2, %3 | |
3859 | v_cmpx%E1\tvcc, %2, %3 | |
3860 | v_cmpx%E1\tvcc, %2, %3 | |
c7ec7bd1 AS |
3861 | v_cmp%E1\t%0, %2, %3 |
3862 | v_cmpx%E1\t%2, %3 | |
3863 | v_cmpx%E1\t%2, %3" | |
3864 | [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") | |
3865 | (set_attr "length" "4,8,4,8,8,4,8") | |
3866 | (set_attr "rdna" "*,*,no,no,*,yes,yes")]) | |
3d6275e3 AS |
3867 | |
3868 | (define_expand "vcond_mask_<mode>di" | |
3869 | [(parallel | |
03876953 AS |
3870 | [(set (match_operand:V_ALL 0 "register_operand" "") |
3871 | (vec_merge:V_ALL | |
3872 | (match_operand:V_ALL 1 "gcn_vop3_operand" "") | |
3873 | (match_operand:V_ALL 2 "gcn_alu_operand" "") | |
2b99bed8 | 3874 | (match_operand:DI 3 "register_operand" ""))) |
1165109b | 3875 | (clobber (scratch:<VnDI>))])] |
3d6275e3 AS |
3876 | "" |
3877 | "") | |
3878 | ||
03876953 AS |
3879 | (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>" |
3880 | [(match_operand:V_ALL 0 "register_operand") | |
3881 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
3882 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 3883 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
3884 | [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") |
3885 | (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])] | |
3d6275e3 AS |
3886 | "" |
3887 | { | |
3888 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 3889 | emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di |
96eb1765 | 3890 | (tmp, operands[3], operands[4], operands[5])); |
03876953 | 3891 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 3892 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
3893 | DONE; |
3894 | }) | |
3895 | ||
03876953 AS |
3896 | (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec" |
3897 | [(match_operand:V_ALL 0 "register_operand") | |
3898 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
3899 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 3900 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
3901 | [(match_operand:V_ALL_ALT 4 "gcn_alu_operand") |
3902 | (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")]) | |
3d6275e3 AS |
3903 | (match_operand:DI 6 "gcn_exec_reg_operand" "e")] |
3904 | "" | |
3905 | { | |
3906 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 3907 | emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec |
96eb1765 | 3908 | (tmp, operands[3], operands[4], operands[5], operands[6])); |
03876953 | 3909 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 3910 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
3911 | DONE; |
3912 | }) | |
3913 | ||
03876953 AS |
3914 | (define_expand "vcondu<V_ALL:mode><V_INT:mode>" |
3915 | [(match_operand:V_ALL 0 "register_operand") | |
3916 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
3917 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 3918 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
3919 | [(match_operand:V_INT 4 "gcn_alu_operand") |
3920 | (match_operand:V_INT 5 "gcn_vop3_operand")])] | |
3d6275e3 AS |
3921 | "" |
3922 | { | |
3923 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 3924 | emit_insn (gen_vec_cmpu<V_INT:mode>di |
96eb1765 | 3925 | (tmp, operands[3], operands[4], operands[5])); |
03876953 | 3926 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 3927 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
3928 | DONE; |
3929 | }) | |
3930 | ||
03876953 AS |
3931 | (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec" |
3932 | [(match_operand:V_ALL 0 "register_operand") | |
3933 | (match_operand:V_ALL 1 "gcn_vop3_operand") | |
3934 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
f4d4a406 | 3935 | (match_operator 3 "gcn_fp_compare_operator" |
03876953 AS |
3936 | [(match_operand:V_INT 4 "gcn_alu_operand") |
3937 | (match_operand:V_INT 5 "gcn_vop3_operand")]) | |
3d6275e3 AS |
3938 | (match_operand:DI 6 "gcn_exec_reg_operand" "e")] |
3939 | "" | |
3940 | { | |
3941 | rtx tmp = gen_reg_rtx (DImode); | |
03876953 | 3942 | emit_insn (gen_vec_cmpu<V_INT:mode>di_exec |
96eb1765 | 3943 | (tmp, operands[3], operands[4], operands[5], operands[6])); |
03876953 | 3944 | emit_insn (gen_vcond_mask_<V_ALL:mode>di |
96eb1765 | 3945 | (operands[0], operands[1], operands[2], tmp)); |
3d6275e3 AS |
3946 | DONE; |
3947 | }) | |
3948 | ||
3949 | ;; }}} | |
3950 | ;; {{{ Fully masked loop support | |
3951 | ||
3952 | (define_expand "while_ultsidi" | |
3953 | [(match_operand:DI 0 "register_operand") | |
3954 | (match_operand:SI 1 "") | |
48960b68 AS |
3955 | (match_operand:SI 2 "") |
3956 | (match_operand:SI 3 "")] | |
3d6275e3 AS |
3957 | "" |
3958 | { | |
3959 | if (GET_CODE (operands[1]) != CONST_INT | |
3960 | || GET_CODE (operands[2]) != CONST_INT) | |
3961 | { | |
3962 | rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); | |
3963 | rtx tmp = _0_1_2_3; | |
3964 | if (GET_CODE (operands[1]) != CONST_INT | |
3965 | || INTVAL (operands[1]) != 0) | |
3966 | { | |
3967 | tmp = gen_reg_rtx (V64SImode); | |
3968 | emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1])); | |
3969 | } | |
3970 | emit_insn (gen_vec_cmpv64sidi_dup (operands[0], | |
3971 | gen_rtx_GT (VOIDmode, 0, 0), | |
3972 | operands[2], tmp)); | |
3973 | } | |
3974 | else | |
3975 | { | |
3976 | HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); | |
3977 | HOST_WIDE_INT mask = (diff >= 64 ? -1 | |
3978 | : ~((unsigned HOST_WIDE_INT)-1 << diff)); | |
3979 | emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); | |
3980 | } | |
48960b68 AS |
3981 | if (INTVAL (operands[3]) < 64) |
3982 | emit_insn (gen_anddi3 (operands[0], operands[0], | |
3983 | gen_rtx_CONST_INT (VOIDmode, | |
3984 | ~((unsigned HOST_WIDE_INT)-1 | |
3985 | << INTVAL (operands[3]))))); | |
3d6275e3 AS |
3986 | DONE; |
3987 | }) | |
3988 | ||
3989 | (define_expand "maskload<mode>di" | |
8aeabd9f AS |
3990 | [(match_operand:V_MOV 0 "register_operand") |
3991 | (match_operand:V_MOV 1 "memory_operand") | |
3d6275e3 AS |
3992 | (match_operand 2 "")] |
3993 | "" | |
3994 | { | |
3995 | rtx exec = force_reg (DImode, operands[2]); | |
3996 | rtx addr = gcn_expand_scalar_to_vector_address | |
1165109b | 3997 | (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode)); |
3d6275e3 AS |
3998 | rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); |
3999 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); | |
95607c12 AS |
4000 | |
4001 | /* Masked lanes are required to hold zero. */ | |
4002 | emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); | |
4003 | ||
4004 | emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, | |
4005 | operands[0], exec)); | |
3d6275e3 AS |
4006 | DONE; |
4007 | }) | |
4008 | ||
4009 | (define_expand "maskstore<mode>di" | |
8aeabd9f AS |
4010 | [(match_operand:V_MOV 0 "memory_operand") |
4011 | (match_operand:V_MOV 1 "register_operand") | |
3d6275e3 AS |
4012 | (match_operand 2 "")] |
4013 | "" | |
4014 | { | |
4015 | rtx exec = force_reg (DImode, operands[2]); | |
4016 | rtx addr = gcn_expand_scalar_to_vector_address | |
1165109b | 4017 | (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode)); |
3d6275e3 AS |
4018 | rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); |
4019 | rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); | |
4020 | emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec)); | |
4021 | DONE; | |
4022 | }) | |
4023 | ||
1165109b | 4024 | (define_expand "mask_gather_load<mode><vnsi>" |
8aeabd9f | 4025 | [(match_operand:V_MOV 0 "register_operand") |
3d6275e3 | 4026 | (match_operand:DI 1 "register_operand") |
1165109b | 4027 | (match_operand:<VnSI> 2 "register_operand") |
3d6275e3 AS |
4028 | (match_operand 3 "immediate_operand") |
4029 | (match_operand:SI 4 "gcn_alu_operand") | |
4030 | (match_operand:DI 5 "")] | |
4031 | "" | |
4032 | { | |
4033 | rtx exec = force_reg (DImode, operands[5]); | |
4034 | ||
95607c12 AS |
4035 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], |
4036 | operands[2], operands[4], | |
4037 | INTVAL (operands[3]), exec); | |
4038 | ||
4039 | /* Masked lanes are required to hold zero. */ | |
4040 | emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0)); | |
4041 | ||
1165109b | 4042 | if (GET_MODE (addr) == <VnDI>mode) |
95607c12 AS |
4043 | emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, |
4044 | const0_rtx, const0_rtx, | |
4045 | const0_rtx, operands[0], | |
4046 | exec)); | |
4047 | else | |
4048 | emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1], | |
4049 | addr, const0_rtx, | |
4050 | const0_rtx, const0_rtx, | |
4051 | operands[0], exec)); | |
3d6275e3 AS |
4052 | DONE; |
4053 | }) | |
4054 | ||
1165109b | 4055 | (define_expand "mask_scatter_store<mode><vnsi>" |
3d6275e3 | 4056 | [(match_operand:DI 0 "register_operand") |
1165109b | 4057 | (match_operand:<VnSI> 1 "register_operand") |
3d6275e3 AS |
4058 | (match_operand 2 "immediate_operand") |
4059 | (match_operand:SI 3 "gcn_alu_operand") | |
8aeabd9f | 4060 | (match_operand:V_MOV 4 "register_operand") |
3d6275e3 AS |
4061 | (match_operand:DI 5 "")] |
4062 | "" | |
4063 | { | |
4064 | rtx exec = force_reg (DImode, operands[5]); | |
4065 | ||
b5fb73b6 AS |
4066 | rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], |
4067 | operands[1], operands[3], | |
4068 | INTVAL (operands[2]), exec); | |
3d6275e3 | 4069 | |
1165109b | 4070 | if (GET_MODE (addr) == <VnDI>mode) |
b5fb73b6 AS |
4071 | emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, |
4072 | operands[4], const0_rtx, | |
4073 | const0_rtx, | |
4074 | exec)); | |
4075 | else | |
4076 | emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr, | |
4077 | const0_rtx, operands[4], | |
4078 | const0_rtx, const0_rtx, | |
4079 | exec)); | |
3d6275e3 AS |
4080 | DONE; |
4081 | }) | |
4082 | ||
5a80a6c3 | 4083 | (define_code_iterator cond_op [plus minus mult]) |
3d6275e3 AS |
4084 | |
4085 | (define_expand "cond_<expander><mode>" | |
03876953 | 4086 | [(match_operand:V_ALL 0 "register_operand") |
3d6275e3 | 4087 | (match_operand:DI 1 "register_operand") |
03876953 AS |
4088 | (cond_op:V_ALL |
4089 | (match_operand:V_ALL 2 "gcn_alu_operand") | |
4090 | (match_operand:V_ALL 3 "gcn_alu_operand")) | |
4091 | (match_operand:V_ALL 4 "register_operand")] | |
3d6275e3 AS |
4092 | "" |
4093 | { | |
4094 | operands[1] = force_reg (DImode, operands[1]); | |
4095 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
4096 | ||
4097 | emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], | |
4098 | operands[3], operands[4], | |
4099 | operands[1])); | |
4100 | DONE; | |
4101 | }) | |
4102 | ||
553ff252 PAA |
4103 | (define_code_iterator cond_fminmaxop [smin smax]) |
4104 | ||
4105 | (define_expand "cond_<fexpander><mode>" | |
4106 | [(match_operand:V_FP 0 "register_operand") | |
4107 | (match_operand:DI 1 "register_operand") | |
4108 | (cond_fminmaxop:V_FP | |
4109 | (match_operand:V_FP 2 "gcn_alu_operand") | |
4110 | (match_operand:V_FP 3 "gcn_alu_operand")) | |
4111 | (match_operand:V_FP 4 "register_operand")] | |
4112 | "" | |
4113 | { | |
4114 | operands[1] = force_reg (DImode, operands[1]); | |
4115 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
4116 | ||
4117 | emit_insn (gen_<fexpander><mode>3_exec (operands[0], operands[2], | |
4118 | operands[3], operands[4], | |
4119 | operands[1])); | |
4120 | DONE; | |
4121 | }) | |
4122 | ||
4123 | (define_code_iterator cond_minmaxop [smin smax umin umax]) | |
4124 | ||
4125 | (define_expand "cond_<expander><mode>" | |
4126 | [(match_operand:V_INT 0 "register_operand") | |
4127 | (match_operand:DI 1 "register_operand") | |
4128 | (cond_minmaxop:V_INT | |
4129 | (match_operand:V_INT 2 "gcn_alu_operand") | |
4130 | (match_operand:V_INT 3 "gcn_alu_operand")) | |
4131 | (match_operand:V_INT 4 "register_operand")] | |
4132 | "" | |
4133 | { | |
4134 | operands[1] = force_reg (DImode, operands[1]); | |
4135 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
4136 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
4137 | ||
4138 | emit_insn (gen_<expander><mode>3_exec (tmp, operands[2], operands[3], | |
4139 | gcn_gen_undef(<MODE>mode), | |
4140 | operands[1])); | |
4141 | emit_insn (gen_vcond_mask_<mode>di (operands[0], tmp, operands[4], | |
4142 | operands[1])); | |
4143 | DONE; | |
4144 | }) | |
4145 | ||
3d6275e3 AS |
4146 | (define_code_iterator cond_bitop [and ior xor]) |
4147 | ||
4148 | (define_expand "cond_<expander><mode>" | |
03876953 | 4149 | [(match_operand:V_INT 0 "register_operand") |
3d6275e3 | 4150 | (match_operand:DI 1 "register_operand") |
03876953 AS |
4151 | (cond_bitop:V_INT |
4152 | (match_operand:V_INT 2 "gcn_alu_operand") | |
4153 | (match_operand:V_INT 3 "gcn_alu_operand")) | |
4154 | (match_operand:V_INT 4 "register_operand")] | |
3d6275e3 AS |
4155 | "" |
4156 | { | |
4157 | operands[1] = force_reg (DImode, operands[1]); | |
4158 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
4159 | ||
4160 | emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], | |
4161 | operands[3], operands[4], | |
4162 | operands[1])); | |
4163 | DONE; | |
4164 | }) | |
4165 | ||
9c7e898b PAA |
4166 | (define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt]) |
4167 | ||
4168 | (define_expand "cond_<expander><mode>" | |
4169 | [(match_operand:V_INT_noHI 0 "register_operand") | |
4170 | (match_operand:DI 1 "register_operand") | |
4171 | (cond_shiftop:V_INT_noHI | |
4172 | (match_operand:V_INT_noHI 2 "gcn_alu_operand") | |
4173 | (match_operand:V_INT_noHI 3 "gcn_alu_operand")) | |
4174 | (match_operand:V_INT_noHI 4 "register_operand")] | |
4175 | "" | |
4176 | { | |
4177 | operands[1] = force_reg (DImode, operands[1]); | |
4178 | operands[2] = force_reg (<MODE>mode, operands[2]); | |
4179 | ||
4180 | rtx shiftby = gen_reg_rtx (<VnSI>mode); | |
4181 | convert_move (shiftby, operands[3], 0); | |
4182 | ||
4183 | emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2], | |
4184 | shiftby, operands[4], | |
4185 | operands[1])); | |
4186 | DONE; | |
4187 | }) | |
4188 | ||
3d6275e3 AS |
4189 | ;; }}} |
4190 | ;; {{{ Vector reductions | |
4191 | ||
4192 | (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR | |
4193 | UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR | |
4194 | UNSPEC_PLUS_DPP_SHR | |
4195 | UNSPEC_AND_DPP_SHR | |
4196 | UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) | |
4197 | ||
4198 | (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR | |
4199 | UNSPEC_AND_DPP_SHR | |
4200 | UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) | |
4201 | ||
4202 | ; FIXME: Isn't there a better way of doing this? | |
4203 | (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") | |
4204 | (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") | |
4205 | (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") | |
4206 | (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") | |
4207 | (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") | |
4208 | (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") | |
4209 | (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") | |
4210 | (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) | |
4211 | ||
4212 | (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") | |
4213 | (UNSPEC_SMAX_DPP_SHR "smax") | |
4214 | (UNSPEC_UMIN_DPP_SHR "umin") | |
4215 | (UNSPEC_UMAX_DPP_SHR "umax") | |
4216 | (UNSPEC_PLUS_DPP_SHR "plus") | |
4217 | (UNSPEC_AND_DPP_SHR "and") | |
4218 | (UNSPEC_IOR_DPP_SHR "ior") | |
4219 | (UNSPEC_XOR_DPP_SHR "xor")]) | |
4220 | ||
4221 | (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") | |
4222 | (UNSPEC_SMAX_DPP_SHR "v_max%i0") | |
4223 | (UNSPEC_UMIN_DPP_SHR "v_min%u0") | |
4224 | (UNSPEC_UMAX_DPP_SHR "v_max%u0") | |
a5879399 AS |
4225 | (UNSPEC_PLUS_DPP_SHR "v_add%U0") |
4226 | (UNSPEC_AND_DPP_SHR "v_and%B0") | |
4227 | (UNSPEC_IOR_DPP_SHR "v_or%B0") | |
4228 | (UNSPEC_XOR_DPP_SHR "v_xor%B0")]) | |
3d6275e3 AS |
4229 | |
4230 | (define_expand "reduc_<reduc_op>_scal_<mode>" | |
4231 | [(set (match_operand:<SCALAR_MODE> 0 "register_operand") | |
4232 | (unspec:<SCALAR_MODE> | |
f539029c | 4233 | [(match_operand:V_ALL 1 "register_operand")] |
3d6275e3 | 4234 | REDUC_UNSPEC))] |
68e03492 | 4235 | "!TARGET_WAVE64_COMPAT" |
3d6275e3 AS |
4236 | { |
4237 | rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], | |
4238 | <reduc_unspec>); | |
4239 | ||
f539029c AS |
4240 | rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1); |
4241 | emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp, | |
4242 | last_lane)); | |
3d6275e3 AS |
4243 | |
4244 | DONE; | |
4245 | }) | |
4246 | ||
10aa0356 AS |
4247 | (define_expand "reduc_<fexpander>_scal_<mode>" |
4248 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
4249 | (fminmaxop:V_FP | |
4250 | (match_operand:V_FP 1 "register_operand"))] | |
68e03492 | 4251 | "!TARGET_WAVE64_COMPAT" |
10aa0356 AS |
4252 | { |
4253 | /* fmin/fmax are identical to smin/smax. */ | |
4254 | emit_insn (gen_reduc_<expander>_scal_<mode> (operands[0], operands[1])); | |
4255 | DONE; | |
4256 | }) | |
4257 | ||
bf628a97 AS |
4258 | ;; Warning: This "-ffast-math" implementation converts in-order reductions |
4259 | ;; into associative reductions. It's also used where OpenMP or | |
4260 | ;; OpenACC paralellization has already broken the in-order semantics. | |
4261 | (define_expand "fold_left_plus_<mode>" | |
4262 | [(match_operand:<SCALAR_MODE> 0 "register_operand") | |
4263 | (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand") | |
f539029c | 4264 | (match_operand:V_FP 2 "gcn_alu_operand")] |
68e03492 | 4265 | "!TARGET_WAVE64_COMPAT |
7cc2262e | 4266 | && can_create_pseudo_p () |
bf628a97 AS |
4267 | && (flag_openacc || flag_openmp |
4268 | || flag_associative_math)" | |
4269 | { | |
4270 | rtx dest = operands[0]; | |
4271 | rtx scalar = operands[1]; | |
4272 | rtx vector = operands[2]; | |
4273 | rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode); | |
4274 | ||
4275 | emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector)); | |
4276 | emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp)); | |
4277 | DONE; | |
4278 | }) | |
3d6275e3 AS |
4279 | |
4280 | (define_insn "*<reduc_op>_dpp_shr_<mode>" | |
f539029c AS |
4281 | [(set (match_operand:V_1REG 0 "register_operand" "=v") |
4282 | (unspec:V_1REG | |
4283 | [(match_operand:V_1REG 1 "register_operand" "v") | |
4284 | (match_operand:V_1REG 2 "register_operand" "v") | |
4285 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 | 4286 | REDUC_UNSPEC))] |
023641d9 | 4287 | "TARGET_DPP_FULL" |
3d6275e3 AS |
4288 | { |
4289 | return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", | |
4290 | <reduc_unspec>, INTVAL (operands[3])); | |
4291 | } | |
4292 | [(set_attr "type" "vop_dpp") | |
4293 | (set_attr "length" "8")]) | |
4294 | ||
1165109b | 4295 | (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>" |
f539029c AS |
4296 | [(set (match_operand:V_DI 0 "register_operand" "=v") |
4297 | (unspec:V_DI | |
4298 | [(match_operand:V_DI 1 "register_operand" "v") | |
4299 | (match_operand:V_DI 2 "register_operand" "v") | |
4300 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 AS |
4301 | REDUC_2REG_UNSPEC))] |
4302 | "" | |
4303 | "#" | |
4304 | "reload_completed" | |
4305 | [(set (match_dup 4) | |
1165109b | 4306 | (unspec:<VnSI> |
3d6275e3 AS |
4307 | [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) |
4308 | (set (match_dup 5) | |
1165109b | 4309 | (unspec:<VnSI> |
3d6275e3 AS |
4310 | [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] |
4311 | { | |
1165109b AS |
4312 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); |
4313 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
4314 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
4315 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
4316 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
4317 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
4318 | } |
4319 | [(set_attr "type" "vmult") | |
4320 | (set_attr "length" "16")]) | |
4321 | ||
4322 | ; Special cases for addition. | |
4323 | ||
a5879399 | 4324 | (define_insn "*plus_carry_dpp_shr_<mode>" |
f539029c AS |
4325 | [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") |
4326 | (unspec:V_INT_1REG | |
4327 | [(match_operand:V_INT_1REG 1 "register_operand" "v") | |
4328 | (match_operand:V_INT_1REG 2 "register_operand" "v") | |
03876953 | 4329 | (match_operand:SI 3 "const_int_operand" "n")] |
3d6275e3 AS |
4330 | UNSPEC_PLUS_CARRY_DPP_SHR)) |
4331 | (clobber (reg:DI VCC_REG))] | |
68e03492 | 4332 | "TARGET_DPP_FULL" |
3d6275e3 | 4333 | { |
b9bf0c3f | 4334 | return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add_co_u32", |
3d6275e3 AS |
4335 | UNSPEC_PLUS_CARRY_DPP_SHR, |
4336 | INTVAL (operands[3])); | |
4337 | } | |
4338 | [(set_attr "type" "vop_dpp") | |
4339 | (set_attr "length" "8")]) | |
4340 | ||
1165109b | 4341 | (define_insn "*plus_carry_in_dpp_shr_<mode>" |
f539029c AS |
4342 | [(set (match_operand:V_SI 0 "register_operand" "=v") |
4343 | (unspec:V_SI | |
4344 | [(match_operand:V_SI 1 "register_operand" "v") | |
4345 | (match_operand:V_SI 2 "register_operand" "v") | |
4346 | (match_operand:SI 3 "const_int_operand" "n") | |
4347 | (match_operand:DI 4 "register_operand" "cV")] | |
3d6275e3 AS |
4348 | UNSPEC_PLUS_CARRY_IN_DPP_SHR)) |
4349 | (clobber (reg:DI VCC_REG))] | |
68e03492 | 4350 | "TARGET_DPP_FULL" |
3d6275e3 | 4351 | { |
b9bf0c3f | 4352 | return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc_co_u32", |
3d6275e3 AS |
4353 | UNSPEC_PLUS_CARRY_IN_DPP_SHR, |
4354 | INTVAL (operands[3])); | |
4355 | } | |
4356 | [(set_attr "type" "vop_dpp") | |
4357 | (set_attr "length" "8")]) | |
4358 | ||
1165109b | 4359 | (define_insn_and_split "*plus_carry_dpp_shr_<mode>" |
f539029c AS |
4360 | [(set (match_operand:V_DI 0 "register_operand" "=v") |
4361 | (unspec:V_DI | |
4362 | [(match_operand:V_DI 1 "register_operand" "v") | |
4363 | (match_operand:V_DI 2 "register_operand" "v") | |
4364 | (match_operand:SI 3 "const_int_operand" "n")] | |
3d6275e3 AS |
4365 | UNSPEC_PLUS_CARRY_DPP_SHR)) |
4366 | (clobber (reg:DI VCC_REG))] | |
4367 | "" | |
4368 | "#" | |
4369 | "reload_completed" | |
4370 | [(parallel [(set (match_dup 4) | |
1165109b | 4371 | (unspec:<VnSI> |
3d6275e3 AS |
4372 | [(match_dup 6) (match_dup 8) (match_dup 3)] |
4373 | UNSPEC_PLUS_CARRY_DPP_SHR)) | |
4374 | (clobber (reg:DI VCC_REG))]) | |
4375 | (parallel [(set (match_dup 5) | |
1165109b | 4376 | (unspec:<VnSI> |
3d6275e3 AS |
4377 | [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] |
4378 | UNSPEC_PLUS_CARRY_IN_DPP_SHR)) | |
4379 | (clobber (reg:DI VCC_REG))])] | |
4380 | { | |
1165109b AS |
4381 | operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); |
4382 | operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); | |
4383 | operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0); | |
4384 | operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1); | |
4385 | operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0); | |
4386 | operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1); | |
3d6275e3 AS |
4387 | } |
4388 | [(set_attr "type" "vmult") | |
4389 | (set_attr "length" "16")]) | |
4390 | ||
3d6275e3 AS |
4391 | ;; }}} |
4392 | ;; {{{ Miscellaneous | |
4393 | ||
1165109b AS |
4394 | (define_expand "vec_series<mode>" |
4395 | [(match_operand:V_SI 0 "register_operand") | |
3d6275e3 AS |
4396 | (match_operand:SI 1 "gcn_alu_operand") |
4397 | (match_operand:SI 2 "gcn_alu_operand")] | |
4398 | "" | |
4399 | { | |
1165109b AS |
4400 | rtx tmp = gen_reg_rtx (<MODE>mode); |
4401 | rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1)); | |
3d6275e3 | 4402 | |
1165109b AS |
4403 | emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2])); |
4404 | emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1])); | |
3d6275e3 AS |
4405 | DONE; |
4406 | }) | |
4407 | ||
1165109b AS |
4408 | (define_expand "vec_series<mode>" |
4409 | [(match_operand:V_DI 0 "register_operand") | |
3d6275e3 AS |
4410 | (match_operand:DI 1 "gcn_alu_operand") |
4411 | (match_operand:DI 2 "gcn_alu_operand")] | |
4412 | "" | |
4413 | { | |
1165109b AS |
4414 | rtx tmp = gen_reg_rtx (<MODE>mode); |
4415 | rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1)); | |
4416 | rtx op1vec = gen_reg_rtx (<MODE>mode); | |
3d6275e3 | 4417 | |
1165109b AS |
4418 | emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2])); |
4419 | emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1])); | |
4420 | emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec)); | |
3d6275e3 AS |
4421 | DONE; |
4422 | }) | |
4423 | ||
4424 | ;; }}} |