]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn-valu.md
hosthooks.h: Fix GCC_HOST_HOOKS_H typo
[thirdparty/gcc.git] / gcc / config / gcn / gcn-valu.md
CommitLineData
a945c346 1;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
3d6275e3
AS
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3. If not see
15;; <http://www.gnu.org/licenses/>.
16
17;; {{{ Vector iterators
cfdc45f7 18; SV iterators include both scalar and vector modes.
3d6275e3 19
1165109b 20; Vector modes for specific types
1165109b 21(define_mode_iterator V_QI
45381d6f 22 [V2QI V4QI V8QI V16QI V32QI V64QI])
1165109b 23(define_mode_iterator V_HI
45381d6f 24 [V2HI V4HI V8HI V16HI V32HI V64HI])
1165109b 25(define_mode_iterator V_HF
45381d6f 26 [V2HF V4HF V8HF V16HF V32HF V64HF])
1165109b 27(define_mode_iterator V_SI
45381d6f 28 [V2SI V4SI V8SI V16SI V32SI V64SI])
1165109b 29(define_mode_iterator V_SF
45381d6f 30 [V2SF V4SF V8SF V16SF V32SF V64SF])
1165109b 31(define_mode_iterator V_DI
45381d6f 32 [V2DI V4DI V8DI V16DI V32DI V64DI])
1165109b 33(define_mode_iterator V_DF
45381d6f
AS
34 [V2DF V4DF V8DF V16DF V32DF V64DF])
35
dc941ea9 36; Vector modes for sub-dword modes
03876953 37(define_mode_iterator V_QIHI
45381d6f
AS
38 [V2QI V2HI
39 V4QI V4HI
40 V8QI V8HI
41 V16QI V16HI
42 V32QI V32HI
43 V64QI V64HI])
dc941ea9 44
3d6275e3 45; Vector modes for one vector register
03876953 46(define_mode_iterator V_1REG
45381d6f
AS
47 [V2QI V2HI V2SI V2HF V2SF
48 V4QI V4HI V4SI V4HF V4SF
49 V8QI V8HI V8SI V8HF V8SF
50 V16QI V16HI V16SI V16HF V16SF
51 V32QI V32HI V32SI V32HF V32SF
52 V64QI V64HI V64SI V64HF V64SF])
db80ccd3
AS
53(define_mode_iterator V_1REG_ALT
54 [V2QI V2HI V2SI V2HF V2SF
55 V4QI V4HI V4SI V4HF V4SF
56 V8QI V8HI V8SI V8HF V8SF
57 V16QI V16HI V16SI V16HF V16SF
58 V32QI V32HI V32SI V32HF V32SF
59 V64QI V64HI V64SI V64HF V64SF])
3d6275e3 60
03876953 61(define_mode_iterator V_INT_1REG
45381d6f
AS
62 [V2QI V2HI V2SI
63 V4QI V4HI V4SI
64 V8QI V8HI V8SI
65 V16QI V16HI V16SI
66 V32QI V32HI V32SI
67 V64QI V64HI V64SI])
03876953 68(define_mode_iterator V_INT_1REG_ALT
45381d6f
AS
69 [V2QI V2HI V2SI
70 V4QI V4HI V4SI
71 V8QI V8HI V8SI
72 V16QI V16HI V16SI
73 V32QI V32HI V32SI
74 V64QI V64HI V64SI])
03876953 75(define_mode_iterator V_FP_1REG
45381d6f
AS
76 [V2HF V2SF
77 V4HF V4SF
78 V8HF V8SF
79 V16HF V16SF
80 V32HF V32SF
81 V64HF V64SF])
82
3d6275e3 83; Vector modes for two vector registers
03876953 84(define_mode_iterator V_2REG
45381d6f
AS
85 [V2DI V2DF
86 V4DI V4DF
87 V8DI V8DF
88 V16DI V16DF
89 V32DI V32DF
90 V64DI V64DF])
db80ccd3
AS
91(define_mode_iterator V_2REG_ALT
92 [V2DI V2DF
93 V4DI V4DF
94 V8DI V8DF
95 V16DI V16DF
96 V32DI V32DF
97 V64DI V64DF])
45381d6f 98
8aeabd9f
AS
99; Vector modes for four vector registers
100(define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
101(define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
102
03876953
AS
103; Vector modes with native support
104(define_mode_iterator V_noQI
45381d6f
AS
105 [V2HI V2HF V2SI V2SF V2DI V2DF
106 V4HI V4HF V4SI V4SF V4DI V4DF
107 V8HI V8HF V8SI V8SF V8DI V8DF
108 V16HI V16HF V16SI V16SF V16DI V16DF
109 V32HI V32HF V32SI V32SF V32DI V32DF
110 V64HI V64HF V64SI V64SF V64DI V64DF])
03876953 111(define_mode_iterator V_noHI
45381d6f
AS
112 [V2HF V2SI V2SF V2DI V2DF
113 V4HF V4SI V4SF V4DI V4DF
114 V8HF V8SI V8SF V8DI V8DF
115 V16HF V16SI V16SF V16DI V16DF
116 V32HF V32SI V32SF V32DI V32DF
117 V64HF V64SI V64SF V64DI V64DF])
03876953
AS
118
119(define_mode_iterator V_INT_noQI
45381d6f
AS
120 [V2HI V2SI V2DI
121 V4HI V4SI V4DI
122 V8HI V8SI V8DI
123 V16HI V16SI V16DI
124 V32HI V32SI V32DI
125 V64HI V64SI V64DI])
6e0ca3fe 126(define_mode_iterator V_INT_noHI
45381d6f
AS
127 [V2SI V2DI
128 V4SI V4DI
129 V8SI V8DI
130 V16SI V16DI
131 V32SI V32DI
132 V64SI V64DI])
03876953 133
cfdc45f7
AS
134(define_mode_iterator SV_SFDF
135 [SF DF
136 V2SF V2DF
137 V4SF V4DF
138 V8SF V8DF
139 V16SF V16DF
140 V32SF V32DF
141 V64SF V64DF])
142
8aeabd9f 143; All modes in which we want to do more than just moves.
03876953 144(define_mode_iterator V_ALL
45381d6f
AS
145 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
146 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
147 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
148 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
149 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
150 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
03876953 151(define_mode_iterator V_ALL_ALT
45381d6f
AS
152 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
153 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
154 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
155 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
156 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
157 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
03876953
AS
158
159(define_mode_iterator V_INT
45381d6f
AS
160 [V2QI V2HI V2SI V2DI
161 V4QI V4HI V4SI V4DI
162 V8QI V8HI V8SI V8DI
163 V16QI V16HI V16SI V16DI
164 V32QI V32HI V32SI V32DI
165 V64QI V64HI V64SI V64DI])
03876953 166(define_mode_iterator V_FP
45381d6f
AS
167 [V2HF V2SF V2DF
168 V4HF V4SF V4DF
169 V8HF V8SF V8DF
170 V16HF V16SF V16DF
171 V32HF V32SF V32DF
172 V64HF V64SF V64DF])
cfdc45f7
AS
173(define_mode_iterator SV_FP
174 [HF SF DF
175 V2HF V2SF V2DF
176 V4HF V4SF V4DF
177 V8HF V8SF V8DF
178 V16HF V16SF V16DF
179 V32HF V32SF V32DF
180 V64HF V64SF V64DF])
45381d6f 181
8aeabd9f
AS
182; All modes that need moves, including those without many insns.
183(define_mode_iterator V_MOV
184 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
185 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
186 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
187 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
188 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
189 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
190(define_mode_iterator V_MOV_ALT
191 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
192 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
193 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
194 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
195 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
196 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
197
3d6275e3 198(define_mode_attr scalar_mode
8aeabd9f 199 [(QI "qi") (HI "hi") (SI "si") (TI "ti")
cfdc45f7 200 (HF "hf") (SF "sf") (DI "di") (DF "df")
8aeabd9f 201 (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
45381d6f 202 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
8aeabd9f 203 (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
45381d6f 204 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
8aeabd9f 205 (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
45381d6f 206 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
8aeabd9f 207 (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
45381d6f 208 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
8aeabd9f 209 (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
45381d6f 210 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
8aeabd9f 211 (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
3d6275e3
AS
212 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
213
214(define_mode_attr SCALAR_MODE
8aeabd9f 215 [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
cfdc45f7 216 (HF "HF") (SF "SF") (DI "DI") (DF "DF")
8aeabd9f 217 (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
45381d6f 218 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
8aeabd9f 219 (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
45381d6f 220 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
8aeabd9f 221 (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
45381d6f 222 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
8aeabd9f 223 (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
45381d6f 224 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
8aeabd9f 225 (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
45381d6f 226 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
8aeabd9f 227 (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
3d6275e3
AS
228 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
229
1165109b 230(define_mode_attr vnsi
8aeabd9f 231 [(QI "si") (HI "si") (SI "si") (TI "si")
0be4fbea
AS
232 (HF "si") (SF "si") (DI "si") (DF "si")
233 (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
8aeabd9f 234 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
45381d6f 235 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
8aeabd9f 236 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
45381d6f 237 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
8aeabd9f 238 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
45381d6f 239 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
8aeabd9f 240 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
45381d6f 241 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
8aeabd9f 242 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
45381d6f 243 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
8aeabd9f 244 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
1165109b
AS
245
246(define_mode_attr VnSI
8aeabd9f 247 [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
0be4fbea
AS
248 (HF "SI") (SF "SI") (DI "SI") (DF "SI")
249 (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
8aeabd9f 250 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
45381d6f 251 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
8aeabd9f 252 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
45381d6f 253 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
8aeabd9f 254 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
45381d6f 255 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
8aeabd9f 256 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
45381d6f 257 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
8aeabd9f 258 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
45381d6f 259 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
8aeabd9f 260 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
1165109b
AS
261
262(define_mode_attr vndi
45381d6f 263 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
8aeabd9f 264 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
45381d6f 265 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
8aeabd9f 266 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
45381d6f 267 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
8aeabd9f 268 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
45381d6f 269 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
8aeabd9f 270 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
45381d6f 271 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
8aeabd9f 272 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
45381d6f 273 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
8aeabd9f 274 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
1165109b
AS
275
276(define_mode_attr VnDI
45381d6f 277 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
8aeabd9f 278 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
45381d6f 279 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
8aeabd9f 280 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
45381d6f 281 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
8aeabd9f 282 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
45381d6f 283 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
8aeabd9f 284 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
45381d6f 285 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
8aeabd9f 286 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
45381d6f 287 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
8aeabd9f 288 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
1165109b 289
45381d6f
AS
290(define_mode_attr sdwa
291 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
292 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
293 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
294 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
295 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
296 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
3d66c777 297
3d6275e3
AS
298;; }}}
299;; {{{ Substitutions
300
301(define_subst_attr "exec" "vec_merge"
302 "" "_exec")
303(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
304 "" "_exec")
305(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
306 "" "_exec")
307(define_subst_attr "exec_scatter" "scatter_store"
308 "" "_exec")
309
310(define_subst "vec_merge"
8aeabd9f
AS
311 [(set (match_operand:V_MOV 0)
312 (match_operand:V_MOV 1))]
3d6275e3
AS
313 ""
314 [(set (match_dup 0)
8aeabd9f 315 (vec_merge:V_MOV
3d6275e3 316 (match_dup 1)
8aeabd9f 317 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
318 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
319
320(define_subst "vec_merge_with_clobber"
8aeabd9f
AS
321 [(set (match_operand:V_MOV 0)
322 (match_operand:V_MOV 1))
3d6275e3
AS
323 (clobber (match_operand 2))]
324 ""
325 [(set (match_dup 0)
8aeabd9f 326 (vec_merge:V_MOV
3d6275e3 327 (match_dup 1)
8aeabd9f 328 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
329 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
330 (clobber (match_dup 2))])
331
332(define_subst "vec_merge_with_vcc"
8aeabd9f
AS
333 [(set (match_operand:V_MOV 0)
334 (match_operand:V_MOV 1))
3d6275e3
AS
335 (set (match_operand:DI 2)
336 (match_operand:DI 3))]
337 ""
338 [(parallel
339 [(set (match_dup 0)
8aeabd9f 340 (vec_merge:V_MOV
3d6275e3 341 (match_dup 1)
8aeabd9f 342 (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
3d6275e3
AS
343 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
344 (set (match_dup 2)
345 (and:DI (match_dup 3)
346 (reg:DI EXEC_REG)))])])
347
348(define_subst "scatter_store"
349 [(set (mem:BLK (scratch))
350 (unspec:BLK
351 [(match_operand 0)
352 (match_operand 1)
353 (match_operand 2)
354 (match_operand 3)]
355 UNSPEC_SCATTER))]
356 ""
357 [(set (mem:BLK (scratch))
358 (unspec:BLK
359 [(match_dup 0)
360 (match_dup 1)
361 (match_dup 2)
362 (match_dup 3)
363 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
364 UNSPEC_SCATTER))])
365
366;; }}}
367;; {{{ Vector moves
368
369; This is the entry point for all vector register moves. Memory accesses can
370; come this way also, but will more usually use the reload_in/out,
371; gather/scatter, maskload/store, etc.
372
373(define_expand "mov<mode>"
8aeabd9f
AS
374 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
375 (match_operand:V_MOV 1 "general_operand"))]
3d6275e3
AS
376 ""
377 {
45381d6f
AS
378 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
379 registers, but we can convert the MEM to a mode that does work. */
380 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
381 && SUBREG_P (operands[1])
382 && GET_MODE_SIZE (GET_MODE (operands[1]))
383 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
384 {
385 rtx src = SUBREG_REG (operands[1]);
386 rtx mem = copy_rtx (operands[0]);
387 PUT_MODE_RAW (mem, GET_MODE (src));
388 emit_move_insn (mem, src);
389 DONE;
390 }
391 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
392 && SUBREG_P (operands[0])
393 && GET_MODE_SIZE (GET_MODE (operands[0]))
394 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
395 {
396 rtx dest = SUBREG_REG (operands[0]);
397 rtx mem = copy_rtx (operands[1]);
398 PUT_MODE_RAW (mem, GET_MODE (dest));
399 emit_move_insn (dest, mem);
400 DONE;
401 }
402
403 /* SUBREG of MEM is not supported. */
404 gcc_assert ((!SUBREG_P (operands[0])
405 || !MEM_P (SUBREG_REG (operands[0])))
406 && (!SUBREG_P (operands[1])
407 || !MEM_P (SUBREG_REG (operands[1]))));
408
3d6275e3
AS
409 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
410 {
411 operands[1] = force_reg (<MODE>mode, operands[1]);
1165109b 412 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
3d6275e3
AS
413 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
414 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
415 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
416 operands[0],
417 scratch);
418 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
419 DONE;
420 }
421 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
422 {
1165109b 423 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
3d6275e3
AS
424 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
425 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
426 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
427 operands[1],
428 scratch);
429 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
430 DONE;
431 }
432 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
433 {
434 gcc_assert (!reload_completed);
1165109b 435 rtx scratch = gen_reg_rtx (<VnDI>mode);
3d6275e3
AS
436 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
437 DONE;
438 }
439 })
440
441; A pseudo instruction that helps LRA use the "U0" constraint.
442
443(define_insn "mov<mode>_unspec"
8aeabd9f
AS
444 [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
445 (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
3d6275e3
AS
446 ""
447 ""
448 [(set_attr "type" "unknown")
449 (set_attr "length" "0")])
450
451(define_insn "*mov<mode>"
ae0d2c24
AS
452 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
453 (match_operand:V_1REG 1 "general_operand"))]
3d6275e3 454 ""
b9bf0c3f 455 {@ [cons: =0, 1; attrs: type, length, cdna]
ae0d2c24
AS
456 [v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
457 [v ,B ;vop1 ,8,* ] ^
458 [v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
459 [$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
460 [a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
461 })
3d6275e3
AS
462
463(define_insn "mov<mode>_exec"
ddfa4393 464 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
03876953 465 (vec_merge:V_1REG
ddfa4393
AS
466 (match_operand:V_1REG 1 "general_operand")
467 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
468 (match_operand:DI 3 "register_operand")))
469 (clobber (match_scratch:<VnDI> 4))]
3d6275e3 470 "!MEM_P (operands[0]) || REG_P (operands[1])"
ddfa4393
AS
471 {@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
472 [v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
473 [v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
474 [v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
475 [v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
476 [v,m ,U0,e ,&v;* ,16] #
477 [m,v ,U0,e ,&v;* ,16] #
478 })
3d6275e3
AS
479
480; This variant does not accept an unspec, but does permit MEM
481; read/modify/write which is necessary for maskstore.
482
483;(define_insn "*mov<mode>_exec_match"
03876953
AS
484; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
485; (vec_merge:V_1REG
486; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
3d6275e3 487; (match_dup 0)
03876953 488; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
1165109b 489; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
3d6275e3
AS
490; "!MEM_P (operands[0]) || REG_P (operands[1])"
491; "@
492; v_mov_b32\t%0, %1
493; v_mov_b32\t%0, %1
494; #
495; #"
496; [(set_attr "type" "vop1,vop1,*,*")
497; (set_attr "length" "4,8,16,16")])
498
499(define_insn "*mov<mode>"
ae0d2c24
AS
500 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
501 (match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
3d6275e3 502 ""
ae0d2c24
AS
503 "@
504 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
505 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
506 else \
507 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
508 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
509 return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
510 else \
511 return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
512 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
513 return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
514 else \
515 return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
516 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
517 return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
518 else \
519 return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
520 [(set_attr "type" "vmult,vmult,vmult,vmult")
521 (set_attr "length" "16,16,16,8")
b9bf0c3f 522 (set_attr "cdna" "*,*,*,cdna2")])
3d6275e3
AS
523
524(define_insn "mov<mode>_exec"
03876953
AS
525 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
526 (vec_merge:V_2REG
527 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
b7886845 528 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
3d6275e3 529 " U0,vDA0,vDA0,U0,U0")
b7886845 530 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
1165109b 531 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
3d6275e3
AS
532 "!MEM_P (operands[0]) || REG_P (operands[1])"
533 {
534 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
535 switch (which_alternative)
536 {
537 case 0:
538 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
539 case 1:
b7886845
AS
540 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
541 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
3d6275e3 542 case 2:
b7886845
AS
543 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
544 "v_cndmask_b32\t%H0, %H2, %H1, %3";
3d6275e3
AS
545 }
546 else
547 switch (which_alternative)
548 {
549 case 0:
550 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
551 case 1:
b7886845
AS
552 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
553 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
3d6275e3 554 case 2:
b7886845
AS
555 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
556 "v_cndmask_b32\t%L0, %L2, %L1, %3";
3d6275e3
AS
557 }
558
559 return "#";
560 }
561 [(set_attr "type" "vmult,vmult,vmult,*,*")
562 (set_attr "length" "16,16,16,16,16")])
563
8aeabd9f 564(define_insn "*mov<mode>_4reg"
ae0d2c24
AS
565 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
566 (match_operand:V_4REG 1 "general_operand"))]
8aeabd9f 567 ""
b9bf0c3f 568 {@ [cons: =0, 1; attrs: type, length, cdna]
ecb22ddb
AS
569 [v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
570 [v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
571 [$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
572 [a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
ae0d2c24 573 })
8aeabd9f
AS
574
575(define_insn "mov<mode>_exec"
576 [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
577 (vec_merge:V_4REG
578 (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
579 (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
580 " U0,vDA0,vDA0,U0,U0")
581 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
582 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
583 "!MEM_P (operands[0]) || REG_P (operands[1])"
584 {
585 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
586 switch (which_alternative)
587 {
588 case 0:
589 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
590 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
591 case 1:
592 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
593 "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
594 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
595 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
596 case 2:
597 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
598 "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
599 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
600 "v_cndmask_b32\t%K0, %K2, %K1, %3";
601 }
602 else
603 switch (which_alternative)
604 {
605 case 0:
606 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
607 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
608 case 1:
609 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
610 "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
611 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
612 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
613 case 2:
614 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
615 "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
616 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
617 "v_cndmask_b32\t%K0, %K2, %K1, %3";
618 }
619
620 return "#";
621 }
622 [(set_attr "type" "vmult,vmult,vmult,*,*")
623 (set_attr "length" "32")])
624
3d6275e3
AS
625; This variant does not accept an unspec, but does permit MEM
626; read/modify/write which is necessary for maskstore.
627
628;(define_insn "*mov<mode>_exec_match"
03876953
AS
629; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
630; (vec_merge:V_2REG
631; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
3d6275e3 632; (match_dup 0)
03876953 633; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
1165109b 634; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
3d6275e3
AS
635; "!MEM_P (operands[0]) || REG_P (operands[1])"
636; "@
637; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
638; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
639; else \
640; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
641; #
642; #"
643; [(set_attr "type" "vmult,*,*")
644; (set_attr "length" "16,16,16")])
645
646; A SGPR-base load looks like:
647; <load> v, Sv
648;
649; There's no hardware instruction that corresponds to this, but vector base
650; addresses are placed in an SGPR because it is easier to add to a vector.
651; We also have a temporary vT, and the vector v1 holding numbered lanes.
652;
653; Rewrite as:
654; vT = v1 << log2(element-size)
655; vT += Sv
656; flat_load v, vT
657
a0e6306b 658(define_insn "@mov<mode>_sgprbase"
ddfa4393 659 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
03876953 660 (unspec:V_1REG
ddfa4393 661 [(match_operand:V_1REG 1 "general_operand")]
3d6275e3 662 UNSPEC_SGPRBASE))
ddfa4393 663 (clobber (match_operand:<VnDI> 2 "register_operand"))]
3d6275e3 664 "lra_in_progress || reload_completed"
b9bf0c3f 665 {@ [cons: =0, 1, =2; attrs: type, length, cdna]
ae0d2c24
AS
666 [v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
667 [v,vB,&v;vop1,8 ,* ] ^
668 [v,m ,&v;* ,12,* ] #
669 [m,v ,&v;* ,12,* ] #
670 [a,m ,&v;* ,12,cdna2] #
671 [m,a ,&v;* ,12,cdna2] #
ddfa4393 672 })
3d6275e3 673
a0e6306b 674(define_insn "@mov<mode>_sgprbase"
ae0d2c24 675 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
03876953 676 (unspec:V_2REG
ae0d2c24 677 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
3d6275e3 678 UNSPEC_SGPRBASE))
ae0d2c24 679 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
3d6275e3
AS
680 "lra_in_progress || reload_completed"
681 "@
682 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
683 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
684 else \
685 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
686 #
ae0d2c24
AS
687 #
688 #
3d6275e3 689 #"
ae0d2c24
AS
690 [(set_attr "type" "vmult,*,*,*,*")
691 (set_attr "length" "8,12,12,12,12")
b9bf0c3f 692 (set_attr "cdna" "*,*,*,cdna2,cdna2")])
3d6275e3 693
a0e6306b 694(define_insn "@mov<mode>_sgprbase"
ddfa4393 695 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
8aeabd9f 696 (unspec:V_4REG
ddfa4393 697 [(match_operand:V_4REG 1 "general_operand")]
8aeabd9f 698 UNSPEC_SGPRBASE))
ddfa4393 699 (clobber (match_operand:<VnDI> 2 "register_operand"))]
8aeabd9f 700 "lra_in_progress || reload_completed"
ddfa4393
AS
701 {@ [cons: =0, 1, =2; attrs: type, length]
702 [v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
703 [v,m ,&v;* ,12] #
704 [m,v ,&v;* ,12] #
705 })
8aeabd9f 706
3d6275e3
AS
707; Expand scalar addresses into gather/scatter patterns
708
709(define_split
8aeabd9f
AS
710 [(set (match_operand:V_MOV 0 "memory_operand")
711 (unspec:V_MOV
712 [(match_operand:V_MOV 1 "general_operand")]
3d6275e3 713 UNSPEC_SGPRBASE))
1165109b 714 (clobber (match_scratch:<VnDI> 2))]
3d6275e3
AS
715 ""
716 [(set (mem:BLK (scratch))
717 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
718 UNSPEC_SCATTER))]
719 {
720 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
721 operands[0],
722 operands[2]);
723 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
724 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
725 })
726
727(define_split
8aeabd9f
AS
728 [(set (match_operand:V_MOV 0 "memory_operand")
729 (vec_merge:V_MOV
730 (match_operand:V_MOV 1 "general_operand")
731 (match_operand:V_MOV 2 "")
3d6275e3 732 (match_operand:DI 3 "gcn_exec_reg_operand")))
1165109b 733 (clobber (match_scratch:<VnDI> 4))]
3d6275e3
AS
734 ""
735 [(set (mem:BLK (scratch))
736 (unspec:BLK [(match_dup 5) (match_dup 1)
737 (match_dup 6) (match_dup 7) (match_dup 3)]
738 UNSPEC_SCATTER))]
739 {
740 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
741 operands[3],
742 operands[0],
743 operands[4]);
744 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
745 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
746 })
747
748(define_split
8aeabd9f
AS
749 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
750 (unspec:V_MOV
751 [(match_operand:V_MOV 1 "memory_operand")]
3d6275e3 752 UNSPEC_SGPRBASE))
1165109b 753 (clobber (match_scratch:<VnDI> 2))]
3d6275e3
AS
754 ""
755 [(set (match_dup 0)
8aeabd9f 756 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
03876953
AS
757 (mem:BLK (scratch))]
758 UNSPEC_GATHER))]
3d6275e3
AS
759 {
760 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
761 operands[1],
762 operands[2]);
763 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
764 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
765 })
766
767(define_split
8aeabd9f
AS
768 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
769 (vec_merge:V_MOV
770 (match_operand:V_MOV 1 "memory_operand")
771 (match_operand:V_MOV 2 "")
3d6275e3 772 (match_operand:DI 3 "gcn_exec_reg_operand")))
1165109b 773 (clobber (match_scratch:<VnDI> 4))]
3d6275e3
AS
774 ""
775 [(set (match_dup 0)
8aeabd9f
AS
776 (vec_merge:V_MOV
777 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
03876953
AS
778 (mem:BLK (scratch))]
779 UNSPEC_GATHER)
3d6275e3
AS
780 (match_dup 2)
781 (match_dup 3)))]
782 {
783 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
784 operands[3],
785 operands[1],
786 operands[4]);
787 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
788 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
789 })
790
791; TODO: Add zero/sign extending variants.
792
793;; }}}
794;; {{{ Lane moves
795
796; v_writelane and v_readlane work regardless of exec flags.
797; We allow source to be scratch.
798;
799; FIXME these should take A immediates
800
801(define_insn "*vec_set<mode>"
03876953
AS
802 [(set (match_operand:V_1REG 0 "register_operand" "= v")
803 (vec_merge:V_1REG
804 (vec_duplicate:V_1REG
805 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
806 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 807 (ashift (const_int 1)
03876953 808 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
3d6275e3
AS
809 ""
810 "v_writelane_b32 %0, %1, %2"
811 [(set_attr "type" "vop3a")
812 (set_attr "length" "8")
813 (set_attr "exec" "none")
814 (set_attr "laneselect" "yes")])
815
816; FIXME: 64bit operations really should be splitters, but I am not sure how
817; to represent vertical subregs.
818(define_insn "*vec_set<mode>"
03876953
AS
819 [(set (match_operand:V_2REG 0 "register_operand" "= v")
820 (vec_merge:V_2REG
821 (vec_duplicate:V_2REG
822 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
823 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 824 (ashift (const_int 1)
03876953 825 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
3d6275e3
AS
826 ""
827 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
828 [(set_attr "type" "vmult")
829 (set_attr "length" "16")
830 (set_attr "exec" "none")
831 (set_attr "laneselect" "yes")])
832
833(define_expand "vec_set<mode>"
8aeabd9f
AS
834 [(set (match_operand:V_MOV 0 "register_operand")
835 (vec_merge:V_MOV
836 (vec_duplicate:V_MOV
3d6275e3
AS
837 (match_operand:<SCALAR_MODE> 1 "register_operand"))
838 (match_dup 0)
839 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
840 "")
841
842(define_insn "*vec_set<mode>_1"
03876953
AS
843 [(set (match_operand:V_1REG 0 "register_operand" "=v")
844 (vec_merge:V_1REG
845 (vec_duplicate:V_1REG
846 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
847 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
848 (match_operand:SI 2 "const_int_operand" " i")))]
1165109b 849 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
3d6275e3
AS
850 {
851 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
852 return "v_writelane_b32 %0, %1, %2";
853 }
854 [(set_attr "type" "vop3a")
855 (set_attr "length" "8")
856 (set_attr "exec" "none")
857 (set_attr "laneselect" "yes")])
858
859(define_insn "*vec_set<mode>_1"
03876953
AS
860 [(set (match_operand:V_2REG 0 "register_operand" "=v")
861 (vec_merge:V_2REG
862 (vec_duplicate:V_2REG
863 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
864 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
865 (match_operand:SI 2 "const_int_operand" " i")))]
1165109b 866 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
3d6275e3
AS
867 {
868 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
869 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
870 }
871 [(set_attr "type" "vmult")
872 (set_attr "length" "16")
873 (set_attr "exec" "none")
874 (set_attr "laneselect" "yes")])
875
876(define_insn "vec_duplicate<mode><exec>"
03876953
AS
877 [(set (match_operand:V_1REG 0 "register_operand" "=v")
878 (vec_duplicate:V_1REG
879 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
3d6275e3
AS
880 ""
881 "v_mov_b32\t%0, %1"
882 [(set_attr "type" "vop3a")
883 (set_attr "length" "8")])
884
885(define_insn "vec_duplicate<mode><exec>"
03876953
AS
886 [(set (match_operand:V_2REG 0 "register_operand" "= v")
887 (vec_duplicate:V_2REG
3d6275e3
AS
888 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
889 ""
890 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
891 [(set_attr "type" "vop3a")
892 (set_attr "length" "16")])
893
8aeabd9f
AS
894(define_insn "vec_duplicate<mode><exec>"
895 [(set (match_operand:V_4REG 0 "register_operand" "= v")
896 (vec_duplicate:V_4REG
897 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
898 ""
899 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
900 [(set_attr "type" "mult")
901 (set_attr "length" "32")])
902
3d6275e3 903(define_insn "vec_extract<mode><scalar_mode>"
03876953 904 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
3d6275e3 905 (vec_select:<SCALAR_MODE>
03876953
AS
906 (match_operand:V_1REG 1 "register_operand" " v")
907 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
3d6275e3
AS
908 ""
909 "v_readlane_b32 %0, %1, %2"
910 [(set_attr "type" "vop3a")
911 (set_attr "length" "8")
912 (set_attr "exec" "none")
913 (set_attr "laneselect" "yes")])
914
915(define_insn "vec_extract<mode><scalar_mode>"
03876953 916 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
3d6275e3 917 (vec_select:<SCALAR_MODE>
03876953
AS
918 (match_operand:V_2REG 1 "register_operand" " v")
919 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
3d6275e3
AS
920 ""
921 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
922 [(set_attr "type" "vmult")
923 (set_attr "length" "16")
924 (set_attr "exec" "none")
925 (set_attr "laneselect" "yes")])
926
8aeabd9f
AS
927(define_insn "vec_extract<mode><scalar_mode>"
928 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
929 (vec_select:<SCALAR_MODE>
930 (match_operand:V_4REG 1 "register_operand" " v")
931 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
932 ""
933 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
934 [(set_attr "type" "vmult")
935 (set_attr "length" "32")
936 (set_attr "exec" "none")
937 (set_attr "laneselect" "yes")])
938
db80ccd3
AS
939(define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
940 [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
941 (vec_select:V_1REG_ALT
942 (match_operand:V_1REG 1 "register_operand" " 0,v")
943 (match_operand 2 "ascending_zero_int_parallel" "")))]
944 "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode)
9ae1fbdd
AS
945 && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode
946 /* This comment silences a warning for operands[2]. */"
db80ccd3
AS
947 "@
948 ; in-place extract %0
949 v_mov_b32\t%L0, %L1"
950 [(set_attr "type" "vmult")
951 (set_attr "length" "0,8")])
952
953(define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
954 [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
955 (vec_select:V_2REG_ALT
956 (match_operand:V_2REG 1 "register_operand" " 0,v")
957 (match_operand 2 "ascending_zero_int_parallel" "")))]
958 "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode)
9ae1fbdd
AS
959 && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode
960 /* This comment silences a warning for operands[2]. */"
db80ccd3
AS
961 "@
962 ; in-place extract %0
963 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
964 [(set_attr "type" "vmult")
965 (set_attr "length" "0,8")])
966
8aeabd9f
AS
967(define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
968 [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
969 (vec_select:V_4REG_ALT
970 (match_operand:V_4REG 1 "register_operand" " 0,v")
971 (match_operand 2 "ascending_zero_int_parallel" "")))]
972 "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
973 && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
974 "@
975 ; in-place extract %0
976 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
977 [(set_attr "type" "vmult")
978 (set_attr "length" "0,16")])
979
980(define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
981 [(match_operand:V_MOV_ALT 0 "register_operand")
982 (match_operand:V_MOV 1 "register_operand")
db80ccd3 983 (match_operand 2 "immediate_operand")]
8aeabd9f 984 "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
84da9bca 985 && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode
68e03492 986 && (!TARGET_WAVE64_COMPAT || MODE_VF (<V_MOV:MODE>mode) <= 32)"
5cfe0855 987 {
8aeabd9f 988 int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
5cfe0855
AS
989 int firstlane = INTVAL (operands[2]) * numlanes;
990 rtx tmp;
991
992 if (firstlane == 0)
993 {
8aeabd9f 994 rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
db80ccd3
AS
995 rtvec_alloc (numlanes));
996 for (int i = 0; i < numlanes; i++)
997 XVECEXP (parallel, 0, i) = GEN_INT (i);
8aeabd9f 998 emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
db80ccd3 999 (operands[0], operands[1], parallel));
5cfe0855
AS
1000 } else {
1001 /* FIXME: optimize this by using DPP where available. */
1002
8aeabd9f
AS
1003 rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
1004 emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
5cfe0855
AS
1005 GEN_INT (firstlane*4),
1006 GEN_INT (4)));
1007
8aeabd9f
AS
1008 tmp = gen_reg_rtx (<V_MOV:MODE>mode);
1009 emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
1010 get_exec (<V_MOV:MODE>mode)));
5cfe0855 1011
db80ccd3 1012 emit_move_insn (operands[0],
8aeabd9f 1013 gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
db80ccd3 1014 }
5cfe0855
AS
1015 DONE;
1016 })
1017
b92d1124
AS
1018(define_expand "extract_last_<mode>"
1019 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1020 (match_operand:DI 1 "gcn_alu_operand")
8aeabd9f 1021 (match_operand:V_MOV 2 "register_operand")]
b92d1124
AS
1022 "can_create_pseudo_p ()"
1023 {
1024 rtx dst = operands[0];
1025 rtx mask = operands[1];
1026 rtx vect = operands[2];
1027 rtx tmpreg = gen_reg_rtx (SImode);
1028
1029 emit_insn (gen_clzdi2 (tmpreg, mask));
1030 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
1031 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
1032 DONE;
1033 })
1034
1035(define_expand "fold_extract_last_<mode>"
1036 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1037 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
1038 (match_operand:DI 2 "gcn_alu_operand")
8aeabd9f 1039 (match_operand:V_MOV 3 "register_operand")]
b92d1124
AS
1040 "can_create_pseudo_p ()"
1041 {
1042 rtx dst = operands[0];
1043 rtx default_value = operands[1];
1044 rtx mask = operands[2];
1045 rtx vect = operands[3];
1046 rtx else_label = gen_label_rtx ();
1047 rtx end_label = gen_label_rtx ();
1048
1049 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
1050 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
1051 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
1052 emit_jump_insn (gen_jump (end_label));
1053 emit_barrier ();
1054 emit_label (else_label);
1055 emit_move_insn (dst, default_value);
1056 emit_label (end_label);
1057 DONE;
1058 })
1059
3d6275e3 1060(define_expand "vec_init<mode><scalar_mode>"
8aeabd9f 1061 [(match_operand:V_MOV 0 "register_operand")
3d6275e3
AS
1062 (match_operand 1)]
1063 ""
1064 {
1065 gcn_expand_vector_init (operands[0], operands[1]);
1066 DONE;
1067 })
1068
8aeabd9f
AS
1069(define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
1070 [(match_operand:V_MOV 0 "register_operand")
1071 (match_operand:V_MOV_ALT 1)]
1072 "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
1073 && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
769a10d0
AS
1074 {
1075 gcn_expand_vector_init (operands[0], operands[1]);
1076 DONE;
1077 })
1078
3d6275e3
AS
1079;; }}}
1080;; {{{ Scatter / Gather
1081
1082;; GCN does not have an instruction for loading a vector from contiguous
1083;; memory so *all* loads and stores are eventually converted to scatter
1084;; or gather.
1085;;
1086;; GCC does not permit MEM to hold vectors of addresses, so we must use an
1087;; unspec. The unspec formats are as follows:
1088;;
1165109b 1089;; (unspec:V??
3d6275e3
AS
1090;; [(<address expression>)
1091;; (<addr_space_t>)
1092;; (<use_glc>)
1093;; (mem:BLK (scratch))]
1094;; UNSPEC_GATHER)
1095;;
1096;; (unspec:BLK
1097;; [(<address expression>)
1098;; (<source register>)
1099;; (<addr_space_t>)
1100;; (<use_glc>)
1101;; (<exec>)]
1102;; UNSPEC_SCATTER)
1103;;
1104;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
1105;; - The mem:BLK does not contain any real information, but indicates that an
1106;; unknown memory read is taking place. Stores are expected to use a similar
1107;; mem:BLK outside the unspec.
1108;; - The address space and glc (volatile) fields are there to replace the
1109;; fields normally found in a MEM.
1110;; - Multiple forms of address expression are supported, below.
aad32a00
AS
1111;;
1112;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
3d6275e3 1113
1165109b 1114(define_expand "gather_load<mode><vnsi>"
8aeabd9f 1115 [(match_operand:V_MOV 0 "register_operand")
3d6275e3 1116 (match_operand:DI 1 "register_operand")
1165109b 1117 (match_operand:<VnSI> 2 "register_operand")
3d6275e3
AS
1118 (match_operand 3 "immediate_operand")
1119 (match_operand:SI 4 "gcn_alu_operand")]
1120 ""
1121 {
1122 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
1123 operands[2], operands[4],
1124 INTVAL (operands[3]), NULL);
1125
1165109b 1126 if (GET_MODE (addr) == <VnDI>mode)
3d6275e3
AS
1127 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
1128 const0_rtx, const0_rtx));
1129 else
1130 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
1131 addr, const0_rtx, const0_rtx,
1132 const0_rtx));
1133 DONE;
1134 })
1135
3d6275e3
AS
1136; Allow any address expression
1137(define_expand "gather<mode>_expr<exec>"
8aeabd9f
AS
1138 [(set (match_operand:V_MOV 0 "register_operand")
1139 (unspec:V_MOV
3d6275e3
AS
1140 [(match_operand 1 "")
1141 (match_operand 2 "immediate_operand")
1142 (match_operand 3 "immediate_operand")
1143 (mem:BLK (scratch))]
1144 UNSPEC_GATHER))]
1145 ""
1146 {})
1147
1148(define_insn "gather<mode>_insn_1offset<exec>"
392f70cc 1149 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
8aeabd9f 1150 (unspec:V_MOV
392f70cc 1151 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v")
1165109b 1152 (vec_duplicate:<VnDI>
392f70cc
AS
1153 (match_operand 2 "immediate_operand" " n,n, n, n")))
1154 (match_operand 3 "immediate_operand" " n,n, n, n")
1155 (match_operand 4 "immediate_operand" " n,n, n, n")
3d6275e3
AS
1156 (mem:BLK (scratch))]
1157 UNSPEC_GATHER))]
1158 "(AS_FLAT_P (INTVAL (operands[3]))
023641d9
AS
1159 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))
1160 || (AS_GLOBAL_P (INTVAL (operands[3]))
1161 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
3d6275e3
AS
1162 {
1163 addr_space_t as = INTVAL (operands[3]);
1164 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1165
1166 static char buf[200];
1167 if (AS_FLAT_P (as))
b9bf0c3f 1168 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", glc);
3d6275e3 1169 else if (AS_GLOBAL_P (as))
28dd61b7 1170 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
3d6275e3
AS
1171 "s_waitcnt\tvmcnt(0)", glc);
1172 else
1173 gcc_unreachable ();
1174
1175 return buf;
1176 }
1177 [(set_attr "type" "flat")
ae0d2c24 1178 (set_attr "length" "12")
b9bf0c3f 1179 (set_attr "cdna" "*,cdna2,*,cdna2")
392f70cc 1180 (set_attr "xnack" "off,off,on,on")])
3d6275e3
AS
1181
1182(define_insn "gather<mode>_insn_1offset_ds<exec>"
ae0d2c24 1183 [(set (match_operand:V_MOV 0 "register_operand" "=v,a")
8aeabd9f 1184 (unspec:V_MOV
ae0d2c24 1185 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v")
1165109b 1186 (vec_duplicate:<VnSI>
ae0d2c24
AS
1187 (match_operand 2 "immediate_operand" " n,n")))
1188 (match_operand 3 "immediate_operand" " n,n")
1189 (match_operand 4 "immediate_operand" " n,n")
3d6275e3
AS
1190 (mem:BLK (scratch))]
1191 UNSPEC_GATHER))]
1192 "(AS_ANY_DS_P (INTVAL (operands[3]))
1193 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1194 {
1195 addr_space_t as = INTVAL (operands[3]);
1196 static char buf[200];
1197 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1198 (AS_GDS_P (as) ? " gds" : ""));
1199 return buf;
1200 }
1201 [(set_attr "type" "ds")
ae0d2c24 1202 (set_attr "length" "12")
b9bf0c3f 1203 (set_attr "cdna" "*,cdna2")])
3d6275e3
AS
1204
1205(define_insn "gather<mode>_insn_2offsets<exec>"
392f70cc 1206 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
8aeabd9f 1207 (unspec:V_MOV
1165109b
AS
1208 [(plus:<VnDI>
1209 (plus:<VnDI>
1210 (vec_duplicate:<VnDI>
392f70cc 1211 (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv"))
1165109b 1212 (sign_extend:<VnDI>
392f70cc 1213 (match_operand:<VnSI> 2 "register_operand" " v, v, v, v")))
ae0d2c24 1214 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
392f70cc
AS
1215 " n, n, n, n")))
1216 (match_operand 4 "immediate_operand" " n, n, n, n")
1217 (match_operand 5 "immediate_operand" " n, n, n, n")
3d6275e3
AS
1218 (mem:BLK (scratch))]
1219 UNSPEC_GATHER))]
1220 "(AS_GLOBAL_P (INTVAL (operands[4]))
1221 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1222 {
1223 addr_space_t as = INTVAL (operands[4]);
1224 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1225
1226 static char buf[200];
1227 if (AS_GLOBAL_P (as))
8086230e
AS
1228 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1229 "s_waitcnt\tvmcnt(0)", glc);
3d6275e3
AS
1230 else
1231 gcc_unreachable ();
1232
1233 return buf;
1234 }
1235 [(set_attr "type" "flat")
ae0d2c24 1236 (set_attr "length" "12")
b9bf0c3f 1237 (set_attr "cdna" "*,cdna2,*,cdna2")
392f70cc 1238 (set_attr "xnack" "off,off,on,on")])
3d6275e3 1239
1165109b 1240(define_expand "scatter_store<mode><vnsi>"
3d6275e3 1241 [(match_operand:DI 0 "register_operand")
1165109b 1242 (match_operand:<VnSI> 1 "register_operand")
3d6275e3
AS
1243 (match_operand 2 "immediate_operand")
1244 (match_operand:SI 3 "gcn_alu_operand")
8aeabd9f 1245 (match_operand:V_MOV 4 "register_operand")]
3d6275e3
AS
1246 ""
1247 {
1248 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1249 operands[1], operands[3],
1250 INTVAL (operands[2]), NULL);
1251
1165109b 1252 if (GET_MODE (addr) == <VnDI>mode)
3d6275e3
AS
1253 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1254 const0_rtx, const0_rtx));
1255 else
1256 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1257 const0_rtx, operands[4],
1258 const0_rtx, const0_rtx));
1259 DONE;
1260 })
1261
3d6275e3
AS
1262; Allow any address expression
1263(define_expand "scatter<mode>_expr<exec_scatter>"
1264 [(set (mem:BLK (scratch))
1265 (unspec:BLK
1165109b 1266 [(match_operand:<VnDI> 0 "")
8aeabd9f 1267 (match_operand:V_MOV 1 "register_operand")
3d6275e3
AS
1268 (match_operand 2 "immediate_operand")
1269 (match_operand 3 "immediate_operand")]
1270 UNSPEC_SCATTER))]
1271 ""
1272 {})
1273
1274(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1275 [(set (mem:BLK (scratch))
1276 (unspec:BLK
ae0d2c24 1277 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v")
1165109b 1278 (vec_duplicate:<VnDI>
ae0d2c24
AS
1279 (match_operand 1 "immediate_operand" "n,n")))
1280 (match_operand:V_MOV 2 "register_operand" "v,a")
1281 (match_operand 3 "immediate_operand" "n,n")
1282 (match_operand 4 "immediate_operand" "n,n")]
3d6275e3
AS
1283 UNSPEC_SCATTER))]
1284 "(AS_FLAT_P (INTVAL (operands[3]))
1285 && (INTVAL(operands[1]) == 0
b9bf0c3f 1286 || ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
3d6275e3
AS
1287 || (AS_GLOBAL_P (INTVAL (operands[3]))
1288 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1289 {
1290 addr_space_t as = INTVAL (operands[3]);
1291 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1292
1293 static char buf[200];
1294 if (AS_FLAT_P (as))
930c5599 1295 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
3d6275e3 1296 else if (AS_GLOBAL_P (as))
930c5599 1297 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
3d6275e3
AS
1298 else
1299 gcc_unreachable ();
1300
1301 return buf;
1302 }
1303 [(set_attr "type" "flat")
ae0d2c24 1304 (set_attr "length" "12")
b9bf0c3f 1305 (set_attr "cdna" "*,cdna2")])
3d6275e3
AS
1306
1307(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1308 [(set (mem:BLK (scratch))
1309 (unspec:BLK
ae0d2c24 1310 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v")
1165109b 1311 (vec_duplicate:<VnSI>
ae0d2c24
AS
1312 (match_operand 1 "immediate_operand" "n,n")))
1313 (match_operand:V_MOV 2 "register_operand" "v,a")
1314 (match_operand 3 "immediate_operand" "n,n")
1315 (match_operand 4 "immediate_operand" "n,n")]
3d6275e3
AS
1316 UNSPEC_SCATTER))]
1317 "(AS_ANY_DS_P (INTVAL (operands[3]))
1318 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1319 {
1320 addr_space_t as = INTVAL (operands[3]);
1321 static char buf[200];
e929d65b 1322 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
3d6275e3
AS
1323 (AS_GDS_P (as) ? " gds" : ""));
1324 return buf;
1325 }
1326 [(set_attr "type" "ds")
ae0d2c24 1327 (set_attr "length" "12")
b9bf0c3f 1328 (set_attr "cdna" "*,cdna2")])
3d6275e3
AS
1329
1330(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1331 [(set (mem:BLK (scratch))
1332 (unspec:BLK
1165109b
AS
1333 [(plus:<VnDI>
1334 (plus:<VnDI>
1335 (vec_duplicate:<VnDI>
ae0d2c24 1336 (match_operand:DI 0 "register_operand" "Sv,Sv"))
1165109b 1337 (sign_extend:<VnDI>
ae0d2c24
AS
1338 (match_operand:<VnSI> 1 "register_operand" "v,v")))
1339 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n")))
1340 (match_operand:V_MOV 3 "register_operand" "v,a")
1341 (match_operand 4 "immediate_operand" "n,n")
1342 (match_operand 5 "immediate_operand" "n,n")]
3d6275e3
AS
1343 UNSPEC_SCATTER))]
1344 "(AS_GLOBAL_P (INTVAL (operands[4]))
1345 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1346 {
1347 addr_space_t as = INTVAL (operands[4]);
1348 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1349
1350 static char buf[200];
1351 if (AS_GLOBAL_P (as))
8086230e 1352 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
3d6275e3
AS
1353 else
1354 gcc_unreachable ();
1355
1356 return buf;
1357 }
1358 [(set_attr "type" "flat")
ae0d2c24 1359 (set_attr "length" "12")
b9bf0c3f 1360 (set_attr "cdna" "*,cdna2")])
3d6275e3
AS
1361
1362;; }}}
1363;; {{{ Permutations
1364
1365(define_insn "ds_bpermute<mode>"
03876953
AS
1366 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1367 (unspec:V_1REG
1368 [(match_operand:V_1REG 2 "register_operand" " v")
1165109b 1369 (match_operand:<VnSI> 1 "register_operand" " v")
03876953 1370 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
3d6275e3
AS
1371 UNSPEC_BPERMUTE))]
1372 ""
1373 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1374 [(set_attr "type" "vop2")
1375 (set_attr "length" "12")])
1376
1377(define_insn_and_split "ds_bpermute<mode>"
03876953
AS
1378 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1379 (unspec:V_2REG
1380 [(match_operand:V_2REG 2 "register_operand" " v0")
1165109b 1381 (match_operand:<VnSI> 1 "register_operand" " v")
03876953 1382 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
3d6275e3
AS
1383 UNSPEC_BPERMUTE))]
1384 ""
1385 "#"
1386 "reload_completed"
1165109b
AS
1387 [(set (match_dup 4) (unspec:<VnSI>
1388 [(match_dup 6) (match_dup 1) (match_dup 3)]
1389 UNSPEC_BPERMUTE))
1390 (set (match_dup 5) (unspec:<VnSI>
1391 [(match_dup 7) (match_dup 1) (match_dup 3)]
1392 UNSPEC_BPERMUTE))]
3d6275e3
AS
1393 {
1394 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1395 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1396 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1397 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1398 }
1399 [(set_attr "type" "vmult")
1400 (set_attr "length" "24")])
1401
a5879399 1402(define_insn "@dpp_move<mode>"
03876953
AS
1403 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1404 (unspec:V_noHI
1405 [(match_operand:V_noHI 1 "register_operand" " v")
1406 (match_operand:SI 2 "const_int_operand" " n")]
a5879399 1407 UNSPEC_MOV_DPP_SHR))]
68e03492 1408 "TARGET_DPP_FULL"
a5879399
AS
1409 {
1410 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1411 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1412 }
1413 [(set_attr "type" "vop_dpp")
1414 (set_attr "length" "16")])
1415
1bde3ace
AJ
1416(define_insn "@dpp_swap_pairs<mode>"
1417 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1418 (unspec:V_noHI
1419 [(match_operand:V_noHI 1 "register_operand" " v")]
1420 UNSPEC_MOV_DPP_SWAP_PAIRS))]
1421 ""
1422 {
1423 return gcn_expand_dpp_swap_pairs_insn (<MODE>mode, "v_mov_b32",
1424 UNSPEC_MOV_DPP_SWAP_PAIRS);
1425 }
1426 [(set_attr "type" "vop_dpp")
1427 (set_attr "length" "16")])
1428
1429(define_insn "@dpp_distribute_even<mode>"
1430 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1431 (unspec:V_noHI
1432 [(match_operand:V_noHI 1 "register_operand" " v")]
1433 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1434 ""
1435 {
1436 return gcn_expand_dpp_distribute_even_insn (<MODE>mode, "v_mov_b32",
1437 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN);
1438 }
1439 [(set_attr "type" "vop_dpp")
1440 (set_attr "length" "16")])
1441
1442(define_insn "@dpp_distribute_odd<mode>"
1443 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1444 (unspec:V_noHI
1445 [(match_operand:V_noHI 1 "register_operand" " v")]
1446 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1447 ""
1448 {
1449 return gcn_expand_dpp_distribute_odd_insn (<MODE>mode, "v_mov_b32",
1450 UNSPEC_MOV_DPP_DISTRIBUTE_ODD);
1451 }
1452 [(set_attr "type" "vop_dpp")
1453 (set_attr "length" "16")])
1454
3d6275e3
AS
1455;; }}}
1456;; {{{ ALU special case: add/sub
1457
77f7566e 1458(define_insn "add<mode>3<exec_clobber>"
e24b0fed 1459 [(set (match_operand:V_INT_1REG 0 "register_operand")
03876953 1460 (plus:V_INT_1REG
e24b0fed
AS
1461 (match_operand:V_INT_1REG 1 "register_operand")
1462 (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
3d6275e3
AS
1463 (clobber (reg:DI VCC_REG))]
1464 ""
e24b0fed 1465 {@ [cons: =0, %1, 2; attrs: type, length]
b9bf0c3f 1466 [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
e24b0fed
AS
1467 [v,v,vSvB;vop2,8] ^
1468 })
3d6275e3 1469
77f7566e 1470(define_insn "add<mode>3_dup<exec_clobber>"
e24b0fed 1471 [(set (match_operand:V_INT_1REG 0 "register_operand")
03876953
AS
1472 (plus:V_INT_1REG
1473 (vec_duplicate:V_INT_1REG
e24b0fed
AS
1474 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
1475 (match_operand:V_INT_1REG 1 "register_operand")))
3d6275e3
AS
1476 (clobber (reg:DI VCC_REG))]
1477 ""
e24b0fed 1478 {@ [cons: =0, 1, 2; attrs: type, length]
b9bf0c3f 1479 [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
e24b0fed
AS
1480 [v,v,SvB;vop2,8] ^
1481 })
3d6275e3 1482
1165109b 1483(define_insn "add<mode>3_vcc<exec_vcc>"
e24b0fed 1484 [(set (match_operand:V_SI 0 "register_operand")
1165109b 1485 (plus:V_SI
e24b0fed
AS
1486 (match_operand:V_SI 1 "register_operand")
1487 (match_operand:V_SI 2 "gcn_alu_operand")))
1488 (set (match_operand:DI 3 "register_operand")
1165109b 1489 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
3d6275e3
AS
1490 (match_dup 1)))]
1491 ""
e24b0fed 1492 {@ [cons: =0, %1, 2, =3; attrs: type, length]
b9bf0c3f 1493 [v,v,vSvA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %2, %1
e24b0fed
AS
1494 [v,v,vSvB,cV;vop2 ,8] ^
1495 [v,v,vSvA,Sg;vop3b,8] ^
1496 })
3d6275e3
AS
1497
1498; This pattern only changes the VCC bits when the corresponding lane is
1499; enabled, so the set must be described as an ior.
1500
1165109b 1501(define_insn "add<mode>3_vcc_dup<exec_vcc>"
e24b0fed 1502 [(set (match_operand:V_SI 0 "register_operand")
1165109b
AS
1503 (plus:V_SI
1504 (vec_duplicate:V_SI
e24b0fed
AS
1505 (match_operand:SI 1 "gcn_alu_operand"))
1506 (match_operand:V_SI 2 "register_operand")))
1507 (set (match_operand:DI 3 "register_operand")
1165109b
AS
1508 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1509 (match_dup 1))
1510 (vec_duplicate:V_SI (match_dup 2))))]
3d6275e3 1511 ""
e24b0fed 1512 {@ [cons: =0, 1, 2, =3; attrs: type, length]
b9bf0c3f 1513 [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
e24b0fed
AS
1514 [v,SvB,v,cV;vop2 ,8] ^
1515 [v,SvA,v,Sg;vop3b,8] ^
1516 })
3d6275e3 1517
66b01cc3
AS
1518; v_addc does not accept an SGPR because the VCC read already counts as an
1519; SGPR use and the number of SGPR operands is limited to 1. It does not
1520; accept "B" immediate constants due to a related bus conflict.
3d6275e3 1521
1165109b
AS
1522(define_insn "addc<mode>3<exec_vcc>"
1523 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1524 (plus:V_SI
1525 (plus:V_SI
1526 (vec_merge:V_SI
1527 (vec_duplicate:V_SI (const_int 1))
1528 (vec_duplicate:V_SI (const_int 0))
66b01cc3 1529 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1165109b
AS
1530 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1531 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
66b01cc3 1532 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1165109b
AS
1533 (ior:DI (ltu:DI (plus:V_SI
1534 (plus:V_SI
1535 (vec_merge:V_SI
1536 (vec_duplicate:V_SI (const_int 1))
1537 (vec_duplicate:V_SI (const_int 0))
3d6275e3
AS
1538 (match_dup 3))
1539 (match_dup 1))
1540 (match_dup 2))
1541 (match_dup 2))
1165109b
AS
1542 (ltu:DI (plus:V_SI
1543 (vec_merge:V_SI
1544 (vec_duplicate:V_SI (const_int 1))
1545 (vec_duplicate:V_SI (const_int 0))
3d6275e3
AS
1546 (match_dup 3))
1547 (match_dup 1))
1548 (match_dup 1))))]
1549 ""
b9bf0c3f 1550 "{v_addc_co_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
3d6275e3
AS
1551 [(set_attr "type" "vop2,vop3b")
1552 (set_attr "length" "4,8")])
1553
77f7566e 1554(define_insn "sub<mode>3<exec_clobber>"
03876953
AS
1555 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1556 (minus:V_INT_1REG
1557 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1558 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
3d6275e3
AS
1559 (clobber (reg:DI VCC_REG))]
1560 ""
1561 "@
b9bf0c3f
AS
1562 v_sub_co_u32\t%0, vcc, %1, %2
1563 v_subrev_co_u32\t%0, vcc, %2, %1"
3d6275e3
AS
1564 [(set_attr "type" "vop2")
1565 (set_attr "length" "8,8")])
1566
1165109b
AS
1567(define_insn "sub<mode>3_vcc<exec_vcc>"
1568 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1569 (minus:V_SI
1570 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1571 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1572 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1573 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
3d6275e3
AS
1574 (match_dup 1)))]
1575 ""
1576 "@
b9bf0c3f
AS
1577 v_sub_co_u32\t%0, %3, %1, %2
1578 v_sub_co_u32\t%0, %3, %1, %2
1579 v_subrev_co_u32\t%0, %3, %2, %1
1580 v_subrev_co_u32\t%0, %3, %2, %1"
3d6275e3
AS
1581 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1582 (set_attr "length" "8")])
1583
66b01cc3
AS
1584; v_subb does not accept an SGPR because the VCC read already counts as an
1585; SGPR use and the number of SGPR operands is limited to 1. It does not
1586; accept "B" immediate constants due to a related bus conflict.
3d6275e3 1587
1165109b
AS
1588(define_insn "subc<mode>3<exec_vcc>"
1589 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1590 (minus:V_SI
1591 (minus:V_SI
1592 (vec_merge:V_SI
1593 (vec_duplicate:V_SI (const_int 1))
1594 (vec_duplicate:V_SI (const_int 0))
1595 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1596 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1597 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1598 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1599 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1600 (vec_merge:V_SI
1601 (vec_duplicate:V_SI (const_int 1))
1602 (vec_duplicate:V_SI (const_int 0))
1603 (match_dup 3))
3d6275e3
AS
1604 (match_dup 1))
1605 (match_dup 2))
1606 (match_dup 2))
1165109b
AS
1607 (ltu:DI (minus:V_SI (vec_merge:V_SI
1608 (vec_duplicate:V_SI (const_int 1))
1609 (vec_duplicate:V_SI (const_int 0))
1610 (match_dup 3))
1611 (match_dup 1))
3d6275e3
AS
1612 (match_dup 1))))]
1613 ""
1614 "@
b9bf0c3f
AS
1615 {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1616 {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1617 {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
1618 {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
3d6275e3 1619 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
66b01cc3 1620 (set_attr "length" "4,8,4,8")])
3d6275e3 1621
1165109b
AS
1622(define_insn_and_split "add<mode>3"
1623 [(set (match_operand:V_DI 0 "register_operand" "= v")
1624 (plus:V_DI
1625 (match_operand:V_DI 1 "register_operand" "%vDb")
1626 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
3d6275e3
AS
1627 (clobber (reg:DI VCC_REG))]
1628 ""
1629 "#"
1165109b
AS
1630 "gcn_can_split_p (<MODE>mode, operands[0])
1631 && gcn_can_split_p (<MODE>mode, operands[1])
1632 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1633 [(const_int 0)]
1634 {
1635 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1636 emit_insn (gen_add<vnsi>3_vcc
1637 (gcn_operand_part (<MODE>mode, operands[0], 0),
1638 gcn_operand_part (<MODE>mode, operands[1], 0),
1639 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1640 vcc));
1165109b
AS
1641 emit_insn (gen_addc<vnsi>3
1642 (gcn_operand_part (<MODE>mode, operands[0], 1),
1643 gcn_operand_part (<MODE>mode, operands[1], 1),
1644 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1645 vcc, vcc));
1646 DONE;
1647 }
1648 [(set_attr "type" "vmult")
1649 (set_attr "length" "8")])
1650
1165109b
AS
1651(define_insn_and_split "add<mode>3_exec"
1652 [(set (match_operand:V_DI 0 "register_operand" "= v")
1653 (vec_merge:V_DI
1654 (plus:V_DI
1655 (match_operand:V_DI 1 "register_operand" "%vDb")
1656 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1657 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1658 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
3d6275e3
AS
1659 (clobber (reg:DI VCC_REG))]
1660 ""
1661 "#"
1165109b
AS
1662 "gcn_can_split_p (<MODE>mode, operands[0])
1663 && gcn_can_split_p (<MODE>mode, operands[1])
1664 && gcn_can_split_p (<MODE>mode, operands[2])
1665 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1666 [(const_int 0)]
1667 {
1668 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1669 emit_insn (gen_add<vnsi>3_vcc_exec
1670 (gcn_operand_part (<MODE>mode, operands[0], 0),
1671 gcn_operand_part (<MODE>mode, operands[1], 0),
1672 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1673 vcc,
1165109b 1674 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1675 operands[4]));
1165109b
AS
1676 emit_insn (gen_addc<vnsi>3_exec
1677 (gcn_operand_part (<MODE>mode, operands[0], 1),
1678 gcn_operand_part (<MODE>mode, operands[1], 1),
1679 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1680 vcc, vcc,
1165109b 1681 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1682 operands[4]));
1683 DONE;
1684 }
1685 [(set_attr "type" "vmult")
1686 (set_attr "length" "8")])
1687
1165109b
AS
1688(define_insn_and_split "sub<mode>3"
1689 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1690 (minus:V_DI
1691 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1692 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
3d6275e3
AS
1693 (clobber (reg:DI VCC_REG))]
1694 ""
1695 "#"
1165109b
AS
1696 "gcn_can_split_p (<MODE>mode, operands[0])
1697 && gcn_can_split_p (<MODE>mode, operands[1])
1698 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1699 [(const_int 0)]
1700 {
1701 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1702 emit_insn (gen_sub<vnsi>3_vcc
1703 (gcn_operand_part (<MODE>mode, operands[0], 0),
1704 gcn_operand_part (<MODE>mode, operands[1], 0),
1705 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1706 vcc));
1165109b
AS
1707 emit_insn (gen_subc<vnsi>3
1708 (gcn_operand_part (<MODE>mode, operands[0], 1),
1709 gcn_operand_part (<MODE>mode, operands[1], 1),
1710 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1711 vcc, vcc));
1712 DONE;
1713 }
1714 [(set_attr "type" "vmult")
d54fc770 1715 (set_attr "length" "8")])
3d6275e3 1716
1165109b
AS
1717(define_insn_and_split "sub<mode>3_exec"
1718 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1719 (vec_merge:V_DI
1720 (minus:V_DI
1721 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1722 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1723 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
3abfd4f3 1724 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
3d6275e3
AS
1725 (clobber (reg:DI VCC_REG))]
1726 "register_operand (operands[1], VOIDmode)
1727 || register_operand (operands[2], VOIDmode)"
1728 "#"
1165109b
AS
1729 "gcn_can_split_p (<MODE>mode, operands[0])
1730 && gcn_can_split_p (<MODE>mode, operands[1])
1731 && gcn_can_split_p (<MODE>mode, operands[2])
1732 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
1733 [(const_int 0)]
1734 {
1735 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1736 emit_insn (gen_sub<vnsi>3_vcc_exec
1737 (gcn_operand_part (<MODE>mode, operands[0], 0),
1738 gcn_operand_part (<MODE>mode, operands[1], 0),
1739 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1740 vcc,
1165109b 1741 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1742 operands[4]));
1165109b
AS
1743 emit_insn (gen_subc<vnsi>3_exec
1744 (gcn_operand_part (<MODE>mode, operands[0], 1),
1745 gcn_operand_part (<MODE>mode, operands[1], 1),
1746 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1747 vcc, vcc,
1165109b 1748 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1749 operands[4]));
1750 DONE;
1751 }
1752 [(set_attr "type" "vmult")
d54fc770 1753 (set_attr "length" "8")])
3d6275e3 1754
1165109b
AS
1755(define_insn_and_split "add<mode>3_zext"
1756 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1757 (plus:V_DI
1758 (zero_extend:V_DI
1759 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1760 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
3d6275e3
AS
1761 (clobber (reg:DI VCC_REG))]
1762 ""
1763 "#"
1165109b
AS
1764 "gcn_can_split_p (<MODE>mode, operands[0])
1765 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1766 [(const_int 0)]
1767 {
1768 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1769 emit_insn (gen_add<vnsi>3_vcc
1770 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1771 operands[1],
1165109b 1772 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1773 vcc));
1165109b
AS
1774 emit_insn (gen_addc<vnsi>3
1775 (gcn_operand_part (<MODE>mode, operands[0], 1),
1776 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3
AS
1777 const0_rtx, vcc, vcc));
1778 DONE;
1779 }
1780 [(set_attr "type" "vmult")
66b01cc3 1781 (set_attr "length" "8")])
3d6275e3 1782
1165109b
AS
1783(define_insn_and_split "add<mode>3_zext_exec"
1784 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1785 (vec_merge:V_DI
1786 (plus:V_DI
1787 (zero_extend:V_DI
1788 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1789 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1790 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1791 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
3d6275e3
AS
1792 (clobber (reg:DI VCC_REG))]
1793 ""
1794 "#"
1165109b
AS
1795 "gcn_can_split_p (<MODE>mode, operands[0])
1796 && gcn_can_split_p (<MODE>mode, operands[2])
1797 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
1798 [(const_int 0)]
1799 {
1800 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
1801 emit_insn (gen_add<vnsi>3_vcc_exec
1802 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1803 operands[1],
1165109b 1804 gcn_operand_part (<MODE>mode, operands[2], 0),
3d6275e3 1805 vcc,
1165109b 1806 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 1807 operands[4]));
1165109b
AS
1808 emit_insn (gen_addc<vnsi>3_exec
1809 (gcn_operand_part (<MODE>mode, operands[0], 1),
1810 gcn_operand_part (<MODE>mode, operands[2], 1),
3d6275e3 1811 const0_rtx, vcc, vcc,
1165109b 1812 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
1813 operands[4]));
1814 DONE;
1815 }
1816 [(set_attr "type" "vmult")
66b01cc3 1817 (set_attr "length" "8")])
3d6275e3 1818
75d0b3d7 1819(define_insn_and_split "add<mode>3_vcc_zext_dup"
e24b0fed 1820 [(set (match_operand:V_DI 0 "register_operand")
1165109b
AS
1821 (plus:V_DI
1822 (zero_extend:V_DI
1823 (vec_duplicate:<VnSI>
e24b0fed
AS
1824 (match_operand:SI 1 "gcn_alu_operand")))
1825 (match_operand:V_DI 2 "gcn_alu_operand")))
1826 (set (match_operand:DI 3 "register_operand")
75d0b3d7
AS
1827 (ltu:DI (plus:V_DI
1828 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1829 (match_dup 2))
1830 (match_dup 1)))]
3d6275e3 1831 ""
e24b0fed
AS
1832 {@ [cons: =0, 1, 2, =3]
1833 [v,ASv,v,&Sg] #
1834 [v,BSv,v,&cV] ^
1835 }
1165109b
AS
1836 "gcn_can_split_p (<MODE>mode, operands[0])
1837 && gcn_can_split_p (<MODE>mode, operands[2])"
3d6275e3
AS
1838 [(const_int 0)]
1839 {
1165109b
AS
1840 emit_insn (gen_add<vnsi>3_vcc_dup
1841 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1842 gcn_operand_part (DImode, operands[1], 0),
1165109b 1843 gcn_operand_part (<MODE>mode, operands[2], 0),
75d0b3d7 1844 operands[3]));
1165109b
AS
1845 emit_insn (gen_addc<vnsi>3
1846 (gcn_operand_part (<MODE>mode, operands[0], 1),
1847 gcn_operand_part (<MODE>mode, operands[2], 1),
75d0b3d7 1848 const0_rtx, operands[3], operands[3]));
3d6275e3
AS
1849 DONE;
1850 }
1851 [(set_attr "type" "vmult")
1852 (set_attr "length" "8")])
1853
75d0b3d7
AS
1854(define_expand "add<mode>3_zext_dup"
1855 [(match_operand:V_DI 0 "register_operand")
1856 (match_operand:SI 1 "gcn_alu_operand")
1857 (match_operand:V_DI 2 "gcn_alu_operand")]
1858 ""
1859 {
1860 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1861 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1862 operands[2], vcc));
1863 DONE;
1864 })
1865
1866(define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
e24b0fed 1867 [(set (match_operand:V_DI 0 "register_operand")
1165109b
AS
1868 (vec_merge:V_DI
1869 (plus:V_DI
1870 (zero_extend:V_DI
1871 (vec_duplicate:<VnSI>
e24b0fed
AS
1872 (match_operand:SI 1 "gcn_alu_operand")))
1873 (match_operand:V_DI 2 "gcn_alu_operand"))
1874 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1875 (match_operand:DI 5 "gcn_exec_reg_operand")))
1876 (set (match_operand:DI 3 "register_operand")
75d0b3d7
AS
1877 (and:DI
1878 (ltu:DI (plus:V_DI
1879 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1880 (match_dup 2))
1881 (match_dup 1))
1882 (match_dup 5)))]
3d6275e3 1883 ""
e24b0fed
AS
1884 {@ [cons: =0, 1, 2, =3, 4, 5]
1885 [v,ASv,v,&Sg,U0,e] #
1886 [v,BSv,v,&cV,U0,e] ^
1887 }
1165109b
AS
1888 "gcn_can_split_p (<MODE>mode, operands[0])
1889 && gcn_can_split_p (<MODE>mode, operands[2])
75d0b3d7 1890 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1891 [(const_int 0)]
1892 {
1165109b
AS
1893 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1894 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3 1895 gcn_operand_part (DImode, operands[1], 0),
1165109b 1896 gcn_operand_part (<MODE>mode, operands[2], 0),
75d0b3d7
AS
1897 operands[3],
1898 gcn_operand_part (<MODE>mode, operands[4], 0),
1899 operands[5]));
1165109b
AS
1900 emit_insn (gen_addc<vnsi>3_exec
1901 (gcn_operand_part (<MODE>mode, operands[0], 1),
1902 gcn_operand_part (<MODE>mode, operands[2], 1),
75d0b3d7
AS
1903 const0_rtx, operands[3], operands[3],
1904 gcn_operand_part (<MODE>mode, operands[4], 1),
1905 operands[5]));
3d6275e3
AS
1906 DONE;
1907 }
1908 [(set_attr "type" "vmult")
1909 (set_attr "length" "8")])
1910
75d0b3d7
AS
1911(define_expand "add<mode>3_zext_dup_exec"
1912 [(match_operand:V_DI 0 "register_operand")
1913 (match_operand:SI 1 "gcn_alu_operand")
1914 (match_operand:V_DI 2 "gcn_alu_operand")
1915 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1916 (match_operand:DI 4 "gcn_exec_reg_operand")]
1917 ""
1918 {
1919 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1920 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1921 operands[2], vcc, operands[3],
1922 operands[4]));
1923 DONE;
1924 })
1925
1926(define_insn_and_split "add<mode>3_vcc_zext_dup2"
e24b0fed 1927 [(set (match_operand:V_DI 0 "register_operand")
1165109b 1928 (plus:V_DI
e24b0fed
AS
1929 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1930 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"))))
1931 (set (match_operand:DI 3 "register_operand")
75d0b3d7
AS
1932 (ltu:DI (plus:V_DI
1933 (zero_extend:V_DI (match_dup 1))
1934 (vec_duplicate:V_DI (match_dup 2)))
1935 (match_dup 1)))]
3d6275e3 1936 ""
e24b0fed
AS
1937 {@ [cons: =0, 1, 2, =3]
1938 [v,v,DbSv,&cV] #
1939 [v,v,DASv,&Sg] ^
1940 }
1165109b 1941 "gcn_can_split_p (<MODE>mode, operands[0])"
3d6275e3
AS
1942 [(const_int 0)]
1943 {
1165109b
AS
1944 emit_insn (gen_add<vnsi>3_vcc_dup
1945 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1946 gcn_operand_part (DImode, operands[2], 0),
1947 operands[1],
75d0b3d7 1948 operands[3]));
1165109b
AS
1949 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1950 emit_insn (gen_vec_duplicate<vnsi>
3d6275e3 1951 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
75d0b3d7
AS
1952 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1953 operands[3]));
3d6275e3
AS
1954 DONE;
1955 }
1956 [(set_attr "type" "vmult")
1957 (set_attr "length" "8")])
1958
75d0b3d7
AS
1959(define_expand "add<mode>3_zext_dup2"
1960 [(match_operand:V_DI 0 "register_operand")
1961 (match_operand:<VnSI> 1 "gcn_alu_operand")
1962 (match_operand:DI 2 "gcn_alu_operand")]
1963 ""
1964 {
1965 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1966 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1967 operands[2], vcc));
1968 DONE;
1969 })
1970
1971(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
e24b0fed 1972 [(set (match_operand:V_DI 0 "register_operand")
1165109b
AS
1973 (vec_merge:V_DI
1974 (plus:V_DI
e24b0fed
AS
1975 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1976 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand")))
1977 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1978 (match_operand:DI 5 "gcn_exec_reg_operand")))
1979 (set (match_operand:DI 3 "register_operand")
75d0b3d7
AS
1980 (and:DI
1981 (ltu:DI (plus:V_DI
1982 (zero_extend:V_DI (match_dup 1))
1983 (vec_duplicate:V_DI (match_dup 2)))
1984 (match_dup 1))
1985 (match_dup 5)))]
3d6275e3 1986 ""
e24b0fed
AS
1987 {@ [cons: =0, 1, 2, =3, 4, 5]
1988 [v,v,ASv,&Sg,U0,e] #
1989 [v,v,BSv,&cV,U0,e] ^
1990 }
1165109b 1991 "gcn_can_split_p (<MODE>mode, operands[0])
75d0b3d7 1992 && gcn_can_split_p (<MODE>mode, operands[4])"
3d6275e3
AS
1993 [(const_int 0)]
1994 {
1165109b
AS
1995 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1996 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
1997 gcn_operand_part (DImode, operands[2], 0),
1998 operands[1],
75d0b3d7
AS
1999 operands[3],
2000 gcn_operand_part (<MODE>mode, operands[4], 0),
2001 operands[5]));
1165109b
AS
2002 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2003 emit_insn (gen_vec_duplicate<vnsi>_exec
3d6275e3 2004 (dsthi, gcn_operand_part (DImode, operands[2], 1),
75d0b3d7
AS
2005 gcn_operand_part (<MODE>mode, operands[4], 1),
2006 operands[5]));
1165109b 2007 emit_insn (gen_addc<vnsi>3_exec
75d0b3d7
AS
2008 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
2009 gcn_operand_part (<MODE>mode, operands[4], 1),
2010 operands[5]));
3d6275e3
AS
2011 DONE;
2012 }
2013 [(set_attr "type" "vmult")
2014 (set_attr "length" "8")])
2015
75d0b3d7
AS
2016(define_expand "add<mode>3_zext_dup2_exec"
2017 [(match_operand:V_DI 0 "register_operand")
2018 (match_operand:<VnSI> 1 "gcn_alu_operand")
2019 (match_operand:DI 2 "gcn_alu_operand")
2020 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
2021 (match_operand:DI 4 "gcn_exec_reg_operand")]
2022 ""
2023 {
2024 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2025 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
2026 operands[2], vcc,
2027 operands[3], operands[4]));
2028 DONE;
2029 })
2030
1165109b
AS
2031(define_insn_and_split "add<mode>3_sext_dup2"
2032 [(set (match_operand:V_DI 0 "register_operand" "= v")
2033 (plus:V_DI
2034 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
2035 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
2036 (clobber (match_scratch:<VnSI> 3 "=&v"))
3d6275e3
AS
2037 (clobber (reg:DI VCC_REG))]
2038 ""
2039 "#"
1165109b 2040 "gcn_can_split_p (<MODE>mode, operands[0])"
3d6275e3
AS
2041 [(const_int 0)]
2042 {
2043 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
2044 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
2045 emit_insn (gen_add<vnsi>3_vcc_dup
2046 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
2047 gcn_operand_part (DImode, operands[2], 0),
2048 operands[1],
2049 vcc));
1165109b
AS
2050 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2051 emit_insn (gen_vec_duplicate<vnsi>
3d6275e3 2052 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1165109b 2053 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
3d6275e3
AS
2054 DONE;
2055 }
2056 [(set_attr "type" "vmult")
2057 (set_attr "length" "8")])
2058
1165109b
AS
2059(define_insn_and_split "add<mode>3_sext_dup2_exec"
2060 [(set (match_operand:V_DI 0 "register_operand" "= v")
2061 (vec_merge:V_DI
2062 (plus:V_DI
2063 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
2064 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
2065 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
3d6275e3 2066 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1165109b 2067 (clobber (match_scratch:<VnSI> 5 "=&v"))
3d6275e3
AS
2068 (clobber (reg:DI VCC_REG))]
2069 ""
2070 "#"
1165109b
AS
2071 "gcn_can_split_p (<MODE>mode, operands[0])
2072 && gcn_can_split_p (<MODE>mode, operands[3])"
3d6275e3
AS
2073 [(const_int 0)]
2074 {
2075 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1165109b
AS
2076 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
2077 gcn_gen_undef (<VnSI>mode), operands[4]));
2078 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2079 (gcn_operand_part (<MODE>mode, operands[0], 0),
3d6275e3
AS
2080 gcn_operand_part (DImode, operands[2], 0),
2081 operands[1],
2082 vcc,
1165109b 2083 gcn_operand_part (<MODE>mode, operands[3], 0),
3d6275e3 2084 operands[4]));
1165109b
AS
2085 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2086 emit_insn (gen_vec_duplicate<vnsi>_exec
3d6275e3 2087 (dsthi, gcn_operand_part (DImode, operands[2], 1),
28b733ea
AS
2088 gcn_operand_part (<MODE>mode, operands[3], 1),
2089 operands[4]));
1165109b 2090 emit_insn (gen_addc<vnsi>3_exec
3d6275e3 2091 (dsthi, dsthi, operands[5], vcc, vcc,
1165109b 2092 gcn_operand_part (<MODE>mode, operands[3], 1),
3d6275e3
AS
2093 operands[4]));
2094 DONE;
2095 }
2096 [(set_attr "type" "vmult")
2097 (set_attr "length" "8")])
2098
2099;; }}}
2100;; {{{ DS memory ALU: add/sub
2101
2102(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
2103(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
2104
2105;; FIXME: the vector patterns probably need RD expanded to a vector of
2106;; addresses. For now, the only way a vector can get into LDS is
2107;; if the user puts it there manually.
2108;;
2109;; FIXME: the scalar patterns are probably fine in themselves, but need to be
2110;; checked to see if anything can ever use them.
2111
2112(define_insn "add<mode>3_ds<exec>"
2113 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2114 (plus:DS_ARITH_MODE
2115 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
2116 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2117 "rtx_equal_p (operands[0], operands[1])"
2118 "ds_add%u0\t%A0, %2%O0"
2119 [(set_attr "type" "ds")
2120 (set_attr "length" "8")])
2121
2122(define_insn "add<mode>3_ds_scalar"
2123 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2124 (plus:DS_ARITH_SCALAR_MODE
2125 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2126 "%RD")
2127 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2128 "rtx_equal_p (operands[0], operands[1])"
2129 "ds_add%u0\t%A0, %2%O0"
2130 [(set_attr "type" "ds")
2131 (set_attr "length" "8")])
2132
2133(define_insn "sub<mode>3_ds<exec>"
2134 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2135 (minus:DS_ARITH_MODE
2136 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
2137 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2138 "rtx_equal_p (operands[0], operands[1])"
2139 "ds_sub%u0\t%A0, %2%O0"
2140 [(set_attr "type" "ds")
2141 (set_attr "length" "8")])
2142
2143(define_insn "sub<mode>3_ds_scalar"
2144 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2145 (minus:DS_ARITH_SCALAR_MODE
2146 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2147 " RD")
2148 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2149 "rtx_equal_p (operands[0], operands[1])"
2150 "ds_sub%u0\t%A0, %2%O0"
2151 [(set_attr "type" "ds")
2152 (set_attr "length" "8")])
2153
2154(define_insn "subr<mode>3_ds<exec>"
2155 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2156 (minus:DS_ARITH_MODE
2157 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
2158 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
2159 "rtx_equal_p (operands[0], operands[1])"
2160 "ds_rsub%u0\t%A0, %2%O0"
2161 [(set_attr "type" "ds")
2162 (set_attr "length" "8")])
2163
2164(define_insn "subr<mode>3_ds_scalar"
2165 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2166 (minus:DS_ARITH_SCALAR_MODE
2167 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
2168 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2169 " RD")))]
2170 "rtx_equal_p (operands[0], operands[1])"
2171 "ds_rsub%u0\t%A0, %2%O0"
2172 [(set_attr "type" "ds")
2173 (set_attr "length" "8")])
2174
2175;; }}}
2176;; {{{ ALU special case: mult
2177
1165109b
AS
2178(define_insn "<su>mul<mode>3_highpart<exec>"
2179 [(set (match_operand:V_SI 0 "register_operand" "= v")
2180 (truncate:V_SI
2181 (lshiftrt:<VnDI>
2182 (mult:<VnDI>
2183 (any_extend:<VnDI>
2184 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
2185 (any_extend:<VnDI>
2186 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
3d6275e3
AS
2187 (const_int 32))))]
2188 ""
2189 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
2190 [(set_attr "type" "vop3a")
2191 (set_attr "length" "8")])
2192
7b945b19 2193(define_insn "mul<mode>3<exec>"
03876953
AS
2194 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2195 (mult:V_INT_1REG
2196 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2197 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
3d6275e3
AS
2198 ""
2199 "v_mul_lo_u32\t%0, %1, %2"
2200 [(set_attr "type" "vop3a")
2201 (set_attr "length" "8")])
2202
7b945b19 2203(define_insn "mul<mode>3_dup<exec>"
03876953
AS
2204 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2205 (mult:V_INT_1REG
2206 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2207 (vec_duplicate:V_INT_1REG
2208 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
3d6275e3
AS
2209 ""
2210 "v_mul_lo_u32\t%0, %1, %2"
2211 [(set_attr "type" "vop3a")
2212 (set_attr "length" "8")])
2213
1165109b
AS
2214(define_insn_and_split "mul<mode>3"
2215 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2216 (mult:V_DI
2217 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2218 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2219 (clobber (match_scratch:<VnSI> 3 "=&v"))]
3d6275e3
AS
2220 ""
2221 "#"
2222 "reload_completed"
2223 [(const_int 0)]
2224 {
1165109b
AS
2225 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2226 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2227 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2228 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2229 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2230 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2231 rtx tmp = operands[3];
2232
1165109b
AS
2233 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
2234 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
2235 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
2236 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2237 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
2238 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2239 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
2240 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
2241 DONE;
2242 })
2243
1165109b
AS
2244(define_insn_and_split "mul<mode>3_exec"
2245 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2246 (vec_merge:V_DI
2247 (mult:V_DI
2248 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2249 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2250 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2251 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2252 (clobber (match_scratch:<VnSI> 5 "=&v"))]
3d6275e3
AS
2253 ""
2254 "#"
2255 "reload_completed"
2256 [(const_int 0)]
2257 {
1165109b
AS
2258 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2259 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2260 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2261 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2262 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2263 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2264 rtx exec = operands[4];
2265 rtx tmp = operands[5];
2266
2267 rtx old_lo, old_hi;
2268 if (GET_CODE (operands[3]) == UNSPEC)
2269 {
1165109b 2270 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
2271 }
2272 else
2273 {
1165109b
AS
2274 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2275 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
2276 }
2277
1165109b
AS
2278 rtx undef = gcn_gen_undef (<VnSI>mode);
2279
2280 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2281 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2282 old_hi, exec));
2283 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2284 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2285 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2286 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2287 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2288 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
2289 DONE;
2290 })
2291
1165109b
AS
2292(define_insn_and_split "mul<mode>3_zext"
2293 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2294 (mult:V_DI
2295 (zero_extend:V_DI
2296 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2297 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2298 (clobber (match_scratch:<VnSI> 3 "=&v"))]
3d6275e3
AS
2299 ""
2300 "#"
2301 "reload_completed"
2302 [(const_int 0)]
2303 {
1165109b
AS
2304 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2305 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 2306 rtx left = operands[1];
1165109b
AS
2307 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2308 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2309 rtx tmp = operands[3];
2310
1165109b
AS
2311 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2312 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2313 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2314 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
2315 DONE;
2316 })
2317
1165109b
AS
2318(define_insn_and_split "mul<mode>3_zext_exec"
2319 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2320 (vec_merge:V_DI
2321 (mult:V_DI
2322 (zero_extend:V_DI
2323 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2324 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2325 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2326 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2327 (clobber (match_scratch:<VnSI> 5 "=&v"))]
3d6275e3
AS
2328 ""
2329 "#"
2330 "reload_completed"
2331 [(const_int 0)]
2332 {
1165109b
AS
2333 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2334 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 2335 rtx left = operands[1];
1165109b
AS
2336 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2337 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2338 rtx exec = operands[4];
2339 rtx tmp = operands[5];
2340
2341 rtx old_lo, old_hi;
2342 if (GET_CODE (operands[3]) == UNSPEC)
2343 {
1165109b 2344 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
2345 }
2346 else
2347 {
1165109b
AS
2348 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2349 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
2350 }
2351
1165109b 2352 rtx undef = gcn_gen_undef (<VnSI>mode);
3d6275e3 2353
1165109b
AS
2354 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2355 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2356 old_hi, exec));
2357 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2358 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
2359 DONE;
2360 })
2361
1165109b
AS
2362(define_insn_and_split "mul<mode>3_zext_dup2"
2363 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2364 (mult:V_DI
2365 (zero_extend:V_DI
2366 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2367 (vec_duplicate:V_DI
2368 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2369 (clobber (match_scratch:<VnSI> 3 "= &v"))]
3d6275e3
AS
2370 ""
2371 "#"
2372 "reload_completed"
2373 [(const_int 0)]
2374 {
1165109b
AS
2375 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2376 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 2377 rtx left = operands[1];
1165109b
AS
2378 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2379 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2380 rtx tmp = operands[3];
2381
1165109b
AS
2382 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2383 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2384 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2385 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
3d6275e3
AS
2386 DONE;
2387 })
2388
1165109b
AS
2389(define_insn_and_split "mul<mode>3_zext_dup2_exec"
2390 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2391 (vec_merge:V_DI
2392 (mult:V_DI
2393 (zero_extend:V_DI
2394 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2395 (vec_duplicate:V_DI
2396 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2397 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2398 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2399 (clobber (match_scratch:<VnSI> 5 "= &v"))]
3d6275e3
AS
2400 ""
2401 "#"
2402 "reload_completed"
2403 [(const_int 0)]
2404 {
1165109b
AS
2405 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2406 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
3d6275e3 2407 rtx left = operands[1];
1165109b
AS
2408 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2409 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2410 rtx exec = operands[4];
2411 rtx tmp = operands[5];
2412
2413 rtx old_lo, old_hi;
2414 if (GET_CODE (operands[3]) == UNSPEC)
2415 {
1165109b 2416 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
3d6275e3
AS
2417 }
2418 else
2419 {
1165109b
AS
2420 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2421 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
2422 }
2423
1165109b 2424 rtx undef = gcn_gen_undef (<VnSI>mode);
3d6275e3 2425
1165109b
AS
2426 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2427 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2428 old_hi, exec));
2429 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2430 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
3d6275e3
AS
2431 DONE;
2432 })
2433
1bde3ace
AJ
2434(define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ])
2435(define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")])
2436(define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")])
2437(define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")])
2438
2439(define_expand "cmul<conj_op><mode>3"
2440 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2441 (unspec:V_noHI
2442 [(match_operand:V_noHI 1 "register_operand" "v")
2443 (match_operand:V_noHI 2 "register_operand" "v")]
2444 UNSPEC_CMUL_OP))]
2445 ""
2446 {
2447 // operands[1] a b
2448 // operands[2] c d
2449 rtx t1 = gen_reg_rtx (<MODE>mode);
2450 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); // a*c b*d
2451
2452 rtx s2_perm = gen_reg_rtx (<MODE>mode);
2453 emit_insn (gen_dpp_swap_pairs<mode> (s2_perm, operands[2])); // d c
2454
2455 rtx t2 = gen_reg_rtx (<MODE>mode);
2456 emit_insn (gen_mul<mode>3 (t2, operands[1], s2_perm)); // a*d b*c
2457
2458 rtx t1_perm = gen_reg_rtx (<MODE>mode);
2459 emit_insn (gen_dpp_swap_pairs<mode> (t1_perm, t1)); // b*d a*c
2460
2461 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2462 emit_move_insn (even, get_exec (0x5555555555555555UL));
2463 rtx dest = operands[0];
b17c57b0
AS
2464 emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
2465 gcn_gen_undef (<MODE>mode),
2466 even)); // a*c-b*d 0
1bde3ace
AJ
2467
2468 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2469 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d
2470
2471 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2472 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2473 emit_insn (gen_<cmul_addsub><mode>3_exec (dest, t2, t2_perm, dest, odd));
2474 // 0 a*d+b*c
2475 DONE;
2476 })
2477
2478(define_code_iterator addsub [plus minus])
2479(define_code_attr addsub_as [(plus "a") (minus "s")])
2480
2481(define_expand "cml<addsub_as><mode>4"
2482 [(set (match_operand:V_FP 0 "register_operand" "=&v")
2483 (addsub:V_FP
2484 (unspec:V_FP
2485 [(match_operand:V_FP 1 "register_operand" "v")
2486 (match_operand:V_FP 2 "register_operand" "v")]
2487 UNSPEC_CMUL)
2488 (match_operand:V_FP 3 "register_operand" "v")))]
2489 ""
2490 {
2491 rtx a = gen_reg_rtx (<MODE>mode);
2492 emit_insn (gen_dpp_distribute_even<mode> (a, operands[1])); // a a
2493
2494 rtx t1 = gen_reg_rtx (<MODE>mode);
2495 emit_insn (gen_fm<addsub_as><mode>4 (t1, a, operands[2], operands[3]));
2496 // a*c a*d
2497
2498 rtx b = gen_reg_rtx (<MODE>mode);
2499 emit_insn (gen_dpp_distribute_odd<mode> (b, operands[1])); // b b
2500
2501 rtx t2 = gen_reg_rtx (<MODE>mode);
2502 emit_insn (gen_mul<mode>3 (t2, b, operands[2])); // b*c b*d
2503
2504 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2505 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*d b*c
2506
2507 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2508 emit_move_insn (even, get_exec (0x5555555555555555UL));
2509 rtx dest = operands[0];
b17c57b0
AS
2510 emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
2511 gcn_gen_undef (<MODE>mode), even));
1bde3ace
AJ
2512
2513 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2514 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2515 emit_insn (gen_add<mode>3_exec (dest, t1, t2_perm, dest, odd));
2516
2517 DONE;
2518 })
2519
2520(define_expand "vec_addsub<mode>3"
2521 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2522 (vec_merge:V_noHI
2523 (minus:V_noHI
2524 (match_operand:V_noHI 1 "register_operand" "v")
2525 (match_operand:V_noHI 2 "register_operand" "v"))
2526 (plus:V_noHI (match_dup 1) (match_dup 2))
2527 (const_int 6148914691236517205)))]
2528 ""
2529 {
2530 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2531 emit_move_insn (even, get_exec (0x5555555555555555UL));
2532 rtx dest = operands[0];
2533 rtx x = operands[1];
2534 rtx y = operands[2];
b17c57b0
AS
2535 emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
2536 even));
1bde3ace
AJ
2537 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2538 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2539 emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
2540
2541 DONE;
2542 })
2543
2544(define_int_iterator CADD [UNSPEC_CADD90 UNSPEC_CADD270])
2545(define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270")])
2546(define_int_attr cadd_subadd [(UNSPEC_CADD90 "sub") (UNSPEC_CADD270 "add")])
2547(define_int_attr cadd_addsub [(UNSPEC_CADD90 "add") (UNSPEC_CADD270 "sub")])
2548
2549(define_expand "cadd<rot><mode>3"
2550 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2551 (unspec:V_noHI [(match_operand:V_noHI 1 "register_operand" "v")
2552 (match_operand:V_noHI 2 "register_operand" "v")]
2553 CADD))]
2554 ""
2555 {
2556 rtx dest = operands[0];
2557 rtx x = operands[1];
2558 rtx y = gen_reg_rtx (<MODE>mode);
2559 emit_insn (gen_dpp_swap_pairs<mode> (y, operands[2]));
2560
2561 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2562 emit_move_insn (even, get_exec (0x5555555555555555UL));
b17c57b0
AS
2563 emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
2564 gcn_gen_undef (<MODE>mode),
2565 even));
1bde3ace
AJ
2566 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2567 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2568 emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
2569
2570 DONE;
2571 })
2572
2573(define_expand "vec_fmaddsub<mode>4"
2574 [(match_operand:V_noHI 0 "register_operand" "=&v")
2575 (match_operand:V_noHI 1 "register_operand" "v")
2576 (match_operand:V_noHI 2 "register_operand" "v")
2577 (match_operand:V_noHI 3 "register_operand" "v")]
2578 ""
2579 {
2580 rtx t1 = gen_reg_rtx (<MODE>mode);
2581 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2582 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2583 emit_move_insn (even, get_exec (0x5555555555555555UL));
2584 rtx dest = operands[0];
b17c57b0
AS
2585 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
2586 gcn_gen_undef (<MODE>mode), even));
1bde3ace
AJ
2587 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2588 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2589 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
2590
2591 DONE;
2592 })
2593
2594(define_expand "vec_fmsubadd<mode>4"
2595 [(match_operand:V_noHI 0 "register_operand" "=&v")
2596 (match_operand:V_noHI 1 "register_operand" "v")
2597 (match_operand:V_noHI 2 "register_operand" "v")
2598 (match_operand:V_noHI 3 "register_operand" "v")]
2599 ""
2600 {
2601 rtx t1 = gen_reg_rtx (<MODE>mode);
2602 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2603 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2604 emit_move_insn (even, get_exec (0x5555555555555555UL));
2605 rtx dest = operands[0];
b17c57b0
AS
2606 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
2607 gcn_gen_undef (<MODE>mode), even));
1bde3ace
AJ
2608 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2609 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
b17c57b0 2610 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
1bde3ace
AJ
2611
2612 DONE;
2613 })
2614
3d6275e3
AS
2615;; }}}
2616;; {{{ ALU generic case
2617
3d6275e3
AS
2618(define_code_iterator bitop [and ior xor])
2619(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2620(define_code_iterator minmaxop [smin smax umin umax])
2621
2622(define_insn "<expander><mode>2<exec>"
03876953
AS
2623 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2624 (bitunop:V_INT_1REG
2625 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
3d6275e3
AS
2626 ""
2627 "v_<mnemonic>0\t%0, %1"
2628 [(set_attr "type" "vop1")
2629 (set_attr "length" "8")])
2630
2631(define_insn "<expander><mode>3<exec>"
03876953
AS
2632 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2633 (bitop:V_INT_1REG
2634 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2635 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2636 ""
2637 "@
2638 v_<mnemonic>0\t%0, %2, %1
2639 ds_<mnemonic>0\t%A0, %2%O0"
2640 [(set_attr "type" "vop2,ds")
2641 (set_attr "length" "8,8")])
2642
1165109b
AS
2643(define_insn_and_split "<expander><mode>3"
2644 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2645 (bitop:V_DI
2646 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2647 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2648 ""
2649 "@
2650 #
2651 ds_<mnemonic>0\t%A0, %2%O0"
1165109b 2652 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
3d6275e3 2653 [(set (match_dup 3)
1165109b 2654 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
3d6275e3 2655 (set (match_dup 4)
1165109b
AS
2656 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2657 {
2658 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2659 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2660 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2661 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2662 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2663 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
2664 }
2665 [(set_attr "type" "vmult,ds")
2666 (set_attr "length" "16,8")])
2667
1165109b
AS
2668(define_insn_and_split "<expander><mode>3_exec"
2669 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2670 (vec_merge:V_DI
2671 (bitop:V_DI
2672 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2673 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2674 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
3d6275e3
AS
2675 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2676 "!memory_operand (operands[0], VOIDmode)
2677 || (rtx_equal_p (operands[0], operands[1])
2678 && register_operand (operands[2], VOIDmode))"
2679 "@
2680 #
2681 ds_<mnemonic>0\t%A0, %2%O0"
1165109b 2682 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
3d6275e3 2683 [(set (match_dup 5)
1165109b
AS
2684 (vec_merge:<VnSI>
2685 (bitop:<VnSI> (match_dup 7) (match_dup 9))
3d6275e3
AS
2686 (match_dup 11)
2687 (match_dup 4)))
2688 (set (match_dup 6)
1165109b
AS
2689 (vec_merge:<VnSI>
2690 (bitop:<VnSI> (match_dup 8) (match_dup 10))
3d6275e3
AS
2691 (match_dup 12)
2692 (match_dup 4)))]
2693 {
1165109b
AS
2694 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2695 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2696 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2697 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2698 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2699 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2700 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2701 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
3d6275e3
AS
2702 }
2703 [(set_attr "type" "vmult,ds")
2704 (set_attr "length" "16,8")])
2705
dc941ea9 2706(define_expand "<expander><mode>3"
03876953
AS
2707 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2708 (shiftop:V_QIHI
2709 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2710 (vec_duplicate:V_QIHI
2711 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
dc941ea9
AS
2712 ""
2713 {
2714 enum {ashift, lshiftrt, ashiftrt};
2715 bool unsignedp = (<code> == lshiftrt);
1165109b 2716 rtx insi1 = gen_reg_rtx (<VnSI>mode);
dc941ea9 2717 rtx insi2 = gen_reg_rtx (SImode);
1165109b 2718 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2719
2720 convert_move (insi1, operands[1], unsignedp);
2721 convert_move (insi2, operands[2], unsignedp);
1165109b 2722 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2723 convert_move (operands[0], outsi, unsignedp);
2724 DONE;
2725 })
2726
1165109b 2727(define_insn "<expander><mode>3<exec>"
6e0ca3fe
AS
2728 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2729 (shiftop:V_INT_noHI
2730 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2731 (vec_duplicate:<VnSI>
3d6275e3
AS
2732 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2733 ""
2734 "v_<revmnemonic>0\t%0, %2, %1"
2735 [(set_attr "type" "vop2")
2736 (set_attr "length" "8")])
2737
dc941ea9 2738(define_expand "v<expander><mode>3"
03876953
AS
2739 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2740 (shiftop:V_QIHI
2741 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2742 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
dc941ea9
AS
2743 ""
2744 {
2745 enum {ashift, lshiftrt, ashiftrt};
b8db70e1 2746 bool unsignedp = (<code> == lshiftrt);
1165109b
AS
2747 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2748 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2749 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2750
2751 convert_move (insi1, operands[1], unsignedp);
2752 convert_move (insi2, operands[2], unsignedp);
1165109b 2753 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2754 convert_move (operands[0], outsi, unsignedp);
2755 DONE;
2756 })
2757
1165109b 2758(define_insn "v<expander><mode>3<exec>"
6e0ca3fe
AS
2759 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2760 (shiftop:V_INT_noHI
2761 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2762 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
3d6275e3
AS
2763 ""
2764 "v_<revmnemonic>0\t%0, %2, %1"
2765 [(set_attr "type" "vop2")
2766 (set_attr "length" "8")])
2767
dc941ea9 2768(define_expand "<expander><mode>3"
03876953
AS
2769 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2770 (minmaxop:V_QIHI
2771 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2772 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
dc941ea9
AS
2773 ""
2774 {
2775 enum {smin, umin, smax, umax};
2776 bool unsignedp = (<code> == umax || <code> == umin);
1165109b
AS
2777 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2778 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2779 rtx outsi = gen_reg_rtx (<VnSI>mode);
dc941ea9
AS
2780
2781 convert_move (insi1, operands[1], unsignedp);
2782 convert_move (insi2, operands[2], unsignedp);
1165109b 2783 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
dc941ea9
AS
2784 convert_move (operands[0], outsi, unsignedp);
2785 DONE;
2786 })
2787
553ff252
PAA
2788(define_expand "<expander><mode>3_exec"
2789 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2790 (vec_merge:V_QIHI
2791 (minmaxop:V_QIHI
2792 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2793 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand"))
2794 (match_operand:V_QIHI 3 "gcn_register_or_unspec_operand" "U0")
2795 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]
2796 ""
2797 {
2798 enum {smin, umin, smax, umax};
2799 bool unsignedp = (<code> == umax || <code> == umin);
2800 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2801 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2802 rtx outsi = gen_reg_rtx (<VnSI>mode);
2803 rtx out = operands[0];
2804 rtx exec = operands[4];
2805 rtx tmp = gen_reg_rtx (<MODE>mode);
2806
2807 convert_move (insi1, operands[1], unsignedp);
2808 convert_move (insi2, operands[2], unsignedp);
2809 emit_insn (gen_<code><vnsi>3_exec (outsi, insi1, insi2,
2810 gcn_gen_undef(<VnSI>mode), exec));
2811 convert_move (tmp, outsi, unsignedp);
2812 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2813 DONE;
2814 })
2815
1165109b
AS
2816(define_insn "<expander><vnsi>3<exec>"
2817 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2818 (minmaxop:V_SI
2819 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2820 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
3d6275e3
AS
2821 ""
2822 "@
2823 v_<mnemonic>0\t%0, %2, %1
2824 ds_<mnemonic>0\t%A0, %2%O0"
2825 [(set_attr "type" "vop2,ds")
2826 (set_attr "length" "8,8")])
2827
553ff252
PAA
2828(define_insn_and_split "<expander><mode>3"
2829 [(set (match_operand:V_DI 0 "register_operand" "=v")
2830 (minmaxop:V_DI
2831 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2832 (match_operand:V_DI 2 "gcn_alu_operand" " v")))
2833 (clobber (reg:DI VCC_REG))]
2834 ""
2835 "#"
2836 "reload_completed"
2837 [(const_int 0)]
2838 {
2839 rtx out = operands[0];
2840 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2841
2842 enum {smin, smax, umin, umax};
2843 bool minp = (<code> == smin || <code> == umin);
2844 if (<code> == smin || <code> == smax)
2845 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2846 gen_rtx_GT (VOIDmode, 0, 0), operands[1],
2847 operands[2]));
2848 else
2849 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2850 gen_rtx_GTU (VOIDmode, 0, 0), operands[1],
2851 operands[2]));
2852 emit_insn (gen_vcond_mask_<mode>di (out, operands[1], operands[2], vcc));
2853 }
2854 [(set_attr "type" "mult")])
2855
2856(define_insn_and_split "<expander><mode>3_exec"
2857 [(set (match_operand:V_DI 0 "register_operand" "= v")
2858 (vec_merge:V_DI
2859 (minmaxop:V_DI
2860 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2861 (match_operand:V_DI 2 "gcn_alu_operand" " v"))
2862 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2863 (match_operand:DI 4 "gcn_exec_reg_operand" "+e")))
2864 (clobber (match_scratch:<VnDI> 5 "= &v"))
2865 (clobber (reg:DI VCC_REG))]
2866 ""
2867 "#"
2868 "reload_completed"
2869 [(const_int 0)]
2870 {
2871 rtx out = operands[0];
2872 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2873 rtx exec = operands[4];
2874 rtx tmp = operands[5];
2875
2876 enum {smin, smax, umin, umax};
2877 bool minp = (<code> == smin || <code> == umin);
2878 if (<code> == smin || <code> == smax)
2879 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2880 minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2881 gen_rtx_GT (VOIDmode, 0, 0),
2882 operands[1], operands[2], exec));
2883 else
2884 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2885 minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2886 gen_rtx_GTU (VOIDmode, 0, 0),
2887 operands[1], operands[2], exec));
2888 emit_insn (gen_vcond_mask_<mode>di (tmp, operands[1], operands[2], vcc));
2889 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2890 }
2891 [(set_attr "type" "mult")])
2892
bf6b5c74
AS
2893;; }}}
2894;; {{{ Int unops
2895
2896(define_expand "neg<mode>2"
2897 [(match_operand:V_INT 0 "register_operand")
2898 (match_operand:V_INT 1 "register_operand")]
2899 ""
2900 {
2901 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2902 operands[1]));
2903 DONE;
2904 })
2905
34574064
AS
2906(define_insn_and_split "one_cmpl<mode>2<exec>"
2907 [(set (match_operand:V_DI 0 "register_operand" "= v")
2908 (not:V_DI
2909 (match_operand:V_DI 1 "gcn_alu_operand" "vSvDB")))]
2910 ""
2911 "#"
2912 "reload_completed"
2913 [(set (match_dup 3) (not:<VnSI> (match_dup 5)))
2914 (set (match_dup 4) (not:<VnSI> (match_dup 6)))]
2915 {
2916 operands[3] = gcn_operand_part (<VnDI>mode, operands[0], 0);
2917 operands[4] = gcn_operand_part (<VnDI>mode, operands[0], 1);
2918 operands[5] = gcn_operand_part (<VnDI>mode, operands[1], 0);
2919 operands[6] = gcn_operand_part (<VnDI>mode, operands[1], 1);
2920 }
2921 [(set_attr "type" "mult")])
2922
3d6275e3
AS
2923;; }}}
2924;; {{{ FP binops - special cases
2925
2926; GCN does not directly provide a DFmode subtract instruction, so we do it by
2927; adding the negated second operand to the first.
2928
1165109b
AS
2929(define_insn "sub<mode>3<exec>"
2930 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2931 (minus:V_DF
2932 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2933 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2934 ""
2935 "@
2936 v_add_f64\t%0, %1, -%2
2937 v_add_f64\t%0, -%2, %1"
2938 [(set_attr "type" "vop3a")
2939 (set_attr "length" "8,8")])
2940
abb3993e 2941(define_insn "subdf3"
3d6275e3
AS
2942 [(set (match_operand:DF 0 "register_operand" "= v, v")
2943 (minus:DF
2944 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2945 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2946 ""
2947 "@
2948 v_add_f64\t%0, %1, -%2
2949 v_add_f64\t%0, -%2, %1"
2950 [(set_attr "type" "vop3a")
2951 (set_attr "length" "8,8")])
2952
2953;; }}}
2954;; {{{ FP binops - generic
2955
3d6275e3
AS
2956(define_code_iterator comm_fp [plus mult smin smax])
2957(define_code_iterator nocomm_fp [minus])
2958(define_code_iterator all_fp [plus mult minus smin smax])
2959
2960(define_insn "<expander><mode>3<exec>"
03876953
AS
2961 [(set (match_operand:V_FP 0 "register_operand" "= v")
2962 (comm_fp:V_FP
2963 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2964 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
2965 ""
2966 "v_<mnemonic>0\t%0, %2, %1"
2967 [(set_attr "type" "vop2")
2968 (set_attr "length" "8")])
2969
2970(define_insn "<expander><mode>3"
03876953
AS
2971 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2972 (comm_fp:FP
2973 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2974 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
3d6275e3
AS
2975 ""
2976 "@
2977 v_<mnemonic>0\t%0, %2, %1
2978 v_<mnemonic>0\t%0, %1%O0"
2979 [(set_attr "type" "vop2,ds")
2980 (set_attr "length" "8")])
2981
2982(define_insn "<expander><mode>3<exec>"
03876953
AS
2983 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2984 (nocomm_fp:V_FP_1REG
2985 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2986 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2987 ""
2988 "@
2989 v_<mnemonic>0\t%0, %1, %2
2990 v_<revmnemonic>0\t%0, %2, %1"
2991 [(set_attr "type" "vop2")
2992 (set_attr "length" "8,8")])
2993
2994(define_insn "<expander><mode>3"
03876953
AS
2995 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2996 (nocomm_fp:FP_1REG
2997 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2998 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3d6275e3
AS
2999 ""
3000 "@
3001 v_<mnemonic>0\t%0, %1, %2
3002 v_<revmnemonic>0\t%0, %2, %1"
3003 [(set_attr "type" "vop2")
3004 (set_attr "length" "8,8")])
3005
10aa0356
AS
3006(define_code_iterator fminmaxop [smin smax])
3007(define_expand "<fexpander><mode>3"
3008 [(set (match_operand:FP 0 "gcn_valu_dst_operand")
3009 (fminmaxop:FP
3010 (match_operand:FP 1 "gcn_valu_src0_operand")
3011 (match_operand:FP 2 "gcn_valu_src1_operand")))]
3012 ""
3013 {})
3014
3015(define_expand "<fexpander><mode>3<exec>"
3016 [(set (match_operand:V_FP 0 "gcn_valu_dst_operand")
3017 (fminmaxop:V_FP
3018 (match_operand:V_FP 1 "gcn_valu_src0_operand")
3019 (match_operand:V_FP 2 "gcn_valu_src1_operand")))]
3020 ""
3021 {})
3022
3d6275e3
AS
3023;; }}}
3024;; {{{ FP unops
3025
3026(define_insn "abs<mode>2"
03876953
AS
3027 [(set (match_operand:FP 0 "register_operand" "=v")
3028 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
3d6275e3
AS
3029 ""
3030 "v_add%i0\t%0, 0, |%1|"
3031 [(set_attr "type" "vop3a")
3032 (set_attr "length" "8")])
3033
3034(define_insn "abs<mode>2<exec>"
03876953
AS
3035 [(set (match_operand:V_FP 0 "register_operand" "=v")
3036 (abs:V_FP
3037 (match_operand:V_FP 1 "register_operand" " v")))]
3d6275e3
AS
3038 ""
3039 "v_add%i0\t%0, 0, |%1|"
3040 [(set_attr "type" "vop3a")
3041 (set_attr "length" "8")])
3042
3043(define_insn "neg<mode>2<exec>"
03876953
AS
3044 [(set (match_operand:V_FP 0 "register_operand" "=v")
3045 (neg:V_FP
3046 (match_operand:V_FP 1 "register_operand" " v")))]
3d6275e3
AS
3047 ""
3048 "v_add%i0\t%0, 0, -%1"
3049 [(set_attr "type" "vop3a")
3050 (set_attr "length" "8")])
3051
3052(define_insn "sqrt<mode>2<exec>"
03876953
AS
3053 [(set (match_operand:V_FP 0 "register_operand" "= v")
3054 (sqrt:V_FP
3055 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
3056 "flag_unsafe_math_optimizations"
3057 "v_sqrt%i0\t%0, %1"
3058 [(set_attr "type" "vop1")
3059 (set_attr "length" "8")])
3060
3061(define_insn "sqrt<mode>2"
03876953
AS
3062 [(set (match_operand:FP 0 "register_operand" "= v")
3063 (sqrt:FP
3064 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
3d6275e3
AS
3065 "flag_unsafe_math_optimizations"
3066 "v_sqrt%i0\t%0, %1"
3067 [(set_attr "type" "vop1")
3068 (set_attr "length" "8")])
eff73c10
KCY
3069
3070; These FP unops have f64, f32 and f16 versions.
3071(define_int_iterator MATH_UNOP_1OR2REG
3072 [UNSPEC_FLOOR UNSPEC_CEIL])
3073
3074; These FP unops only have f16/f32 versions.
3075(define_int_iterator MATH_UNOP_1REG
3076 [UNSPEC_EXP2 UNSPEC_LOG2])
3077
3078(define_int_iterator MATH_UNOP_TRIG
3079 [UNSPEC_SIN UNSPEC_COS])
3080
3081(define_int_attr math_unop
3082 [(UNSPEC_FLOOR "floor")
3083 (UNSPEC_CEIL "ceil")
3084 (UNSPEC_EXP2 "exp2")
3085 (UNSPEC_LOG2 "log2")
3086 (UNSPEC_SIN "sin")
3087 (UNSPEC_COS "cos")])
3088
db6a9fe3
KCY
3089(define_int_attr math_unop_insn
3090 [(UNSPEC_FLOOR "floor")
3091 (UNSPEC_CEIL "ceil")
3092 (UNSPEC_EXP2 "exp")
3093 (UNSPEC_LOG2 "log")
3094 (UNSPEC_SIN "sin")
3095 (UNSPEC_COS "cos")])
3096
eff73c10
KCY
3097(define_insn "<math_unop><mode>2"
3098 [(set (match_operand:FP 0 "register_operand" "= v")
3099 (unspec:FP
3100 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
3101 MATH_UNOP_1OR2REG))]
3102 ""
db6a9fe3 3103 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3104 [(set_attr "type" "vop1")
3105 (set_attr "length" "8")])
3106
3107(define_insn "<math_unop><mode>2<exec>"
3108 [(set (match_operand:V_FP 0 "register_operand" "= v")
3109 (unspec:V_FP
3110 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
3111 MATH_UNOP_1OR2REG))]
3112 ""
db6a9fe3 3113 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3114 [(set_attr "type" "vop1")
3115 (set_attr "length" "8")])
3116
3117(define_insn "<math_unop><mode>2"
3118 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3119 (unspec:FP_1REG
3120 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3121 MATH_UNOP_1REG))]
3122 "flag_unsafe_math_optimizations"
db6a9fe3 3123 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3124 [(set_attr "type" "vop1")
3125 (set_attr "length" "8")])
3126
3127(define_insn "<math_unop><mode>2<exec>"
3128 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3129 (unspec:V_FP_1REG
3130 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3131 MATH_UNOP_1REG))]
3132 "flag_unsafe_math_optimizations"
db6a9fe3 3133 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3134 [(set_attr "type" "vop1")
3135 (set_attr "length" "8")])
3136
3137(define_insn "*<math_unop><mode>2_insn"
3138 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3139 (unspec:FP_1REG
3140 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3141 MATH_UNOP_TRIG))]
3142 "flag_unsafe_math_optimizations"
db6a9fe3 3143 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3144 [(set_attr "type" "vop1")
3145 (set_attr "length" "8")])
3146
3147(define_insn "*<math_unop><mode>2<exec>_insn"
3148 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3149 (unspec:V_FP_1REG
3150 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3151 MATH_UNOP_TRIG))]
3152 "flag_unsafe_math_optimizations"
db6a9fe3 3153 "v_<math_unop_insn>%i0\t%0, %1"
eff73c10
KCY
3154 [(set_attr "type" "vop1")
3155 (set_attr "length" "8")])
3156
3157; Trigonometric functions need their input scaled by 1/(2*PI) first.
3158
3159(define_expand "<math_unop><mode>2"
3160 [(set (match_dup 2)
3161 (mult:FP_1REG
3162 (match_dup 3)
3163 (match_operand:FP_1REG 1 "gcn_alu_operand")))
3164 (set (match_operand:FP_1REG 0 "register_operand")
3165 (unspec:FP_1REG
3166 [(match_dup 2)]
3167 MATH_UNOP_TRIG))]
3168 "flag_unsafe_math_optimizations"
3169 {
3170 operands[2] = gen_reg_rtx (<MODE>mode);
3171 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
3172 <MODE>mode);
3173 })
3174
3175(define_expand "<math_unop><mode>2<exec>"
3176 [(set (match_dup 2)
3177 (mult:V_FP_1REG
3178 (match_dup 3)
3179 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
3180 (set (match_operand:V_FP_1REG 0 "register_operand")
3181 (unspec:V_FP_1REG
3182 [(match_dup 2)]
3183 MATH_UNOP_TRIG))]
3184 "flag_unsafe_math_optimizations"
3185 {
3186 operands[2] = gen_reg_rtx (<MODE>mode);
3187 operands[3] =
3188 gcn_vec_constant (<MODE>mode,
3189 const_double_from_real_value (gcn_dconst1over2pi (),
3190 <SCALAR_MODE>mode));
3191 })
3192
3193; Implement ldexp pattern
3194
eff73c10 3195(define_insn "ldexp<mode>3<exec>"
0be4fbea
AS
3196 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3197 (unspec:SV_FP
3198 [(match_operand:SV_FP 1 "gcn_alu_operand" " vA")
45381d6f 3199 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
eff73c10
KCY
3200 UNSPEC_LDEXP))]
3201 ""
3202 "v_ldexp%i0\t%0, %1, %2"
3203 [(set_attr "type" "vop3a")
3204 (set_attr "length" "8")])
3205
3206; Implement frexp patterns
3207
3208(define_insn "frexp<mode>_exp2"
3209 [(set (match_operand:SI 0 "register_operand" "=v")
3210 (unspec:SI
3211 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3212 UNSPEC_FREXP_EXP))]
3213 ""
3214 "v_frexp_exp_i32%i1\t%0, %1"
3215 [(set_attr "type" "vop1")
3216 (set_attr "length" "8")])
3217
3218(define_insn "frexp<mode>_mant2"
3219 [(set (match_operand:FP 0 "register_operand" "=v")
3220 (unspec:FP
3221 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3222 UNSPEC_FREXP_MANT))]
3223 ""
3224 "v_frexp_mant%i1\t%0, %1"
3225 [(set_attr "type" "vop1")
3226 (set_attr "length" "8")])
3227
3228(define_insn "frexp<mode>_exp2<exec>"
45381d6f
AS
3229 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
3230 (unspec:<VnSI>
eff73c10
KCY
3231 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3232 UNSPEC_FREXP_EXP))]
3233 ""
3234 "v_frexp_exp_i32%i1\t%0, %1"
3235 [(set_attr "type" "vop1")
3236 (set_attr "length" "8")])
3237
3238(define_insn "frexp<mode>_mant2<exec>"
3239 [(set (match_operand:V_FP 0 "register_operand" "=v")
3240 (unspec:V_FP
3241 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3242 UNSPEC_FREXP_MANT))]
3243 ""
3244 "v_frexp_mant%i1\t%0, %1"
3245 [(set_attr "type" "vop1")
3246 (set_attr "length" "8")])
3d6275e3
AS
3247
3248;; }}}
3249;; {{{ FP fused multiply and add
3250
3251(define_insn "fma<mode>4<exec>"
03876953
AS
3252 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3253 (fma:V_FP
3254 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3255 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3256 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
3d6275e3
AS
3257 ""
3258 "v_fma%i0\t%0, %1, %2, %3"
3259 [(set_attr "type" "vop3a")
3260 (set_attr "length" "8")])
3261
3262(define_insn "fma<mode>4_negop2<exec>"
03876953
AS
3263 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3264 (fma:V_FP
3265 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3266 (neg:V_FP
3267 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3268 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3d6275e3
AS
3269 ""
3270 "v_fma%i0\t%0, %1, -%2, %3"
3271 [(set_attr "type" "vop3a")
3272 (set_attr "length" "8")])
3273
3274(define_insn "fma<mode>4"
03876953
AS
3275 [(set (match_operand:FP 0 "register_operand" "= v, v")
3276 (fma:FP
3277 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3278 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3279 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
3d6275e3
AS
3280 ""
3281 "v_fma%i0\t%0, %1, %2, %3"
3282 [(set_attr "type" "vop3a")
3283 (set_attr "length" "8")])
3284
3285(define_insn "fma<mode>4_negop2"
03876953
AS
3286 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3287 (fma:FP
3288 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3289 (neg:FP
3290 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3291 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3d6275e3
AS
3292 ""
3293 "v_fma%i0\t%0, %1, -%2, %3"
3294 [(set_attr "type" "vop3a")
3295 (set_attr "length" "8")])
3296
1bde3ace
AJ
3297(define_insn "fms<mode>4<exec>"
3298 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3299 (fma:V_FP
3300 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3301 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3302 (neg:V_FP
3303 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3304 ""
3305 "v_fma%i0\t%0, %1, %2, -%3"
3306 [(set_attr "type" "vop3a")
3307 (set_attr "length" "8")])
3308
3309(define_insn "fms<mode>4_negop2<exec>"
3310 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3311 (fma:V_FP
3312 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3313 (neg:V_FP
3314 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3315 (neg:V_FP
3316 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3317 ""
3318 "v_fma%i0\t%0, %1, -%2, -%3"
3319 [(set_attr "type" "vop3a")
3320 (set_attr "length" "8")])
3321
3322(define_insn "fms<mode>4"
3323 [(set (match_operand:FP 0 "register_operand" "= v, v")
3324 (fma:FP
3325 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3326 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3327 (neg:FP
3328 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3329 ""
3330 "v_fma%i0\t%0, %1, %2, -%3"
3331 [(set_attr "type" "vop3a")
3332 (set_attr "length" "8")])
3333
3334(define_insn "fms<mode>4_negop2"
3335 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3336 (fma:FP
3337 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3338 (neg:FP
3339 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3340 (neg:FP
3341 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3342 ""
3343 "v_fma%i0\t%0, %1, -%2, -%3"
3344 [(set_attr "type" "vop3a")
3345 (set_attr "length" "8")])
3346
3d6275e3
AS
3347;; }}}
3348;; {{{ FP division
3349
3350(define_insn "recip<mode>2<exec>"
cfdc45f7
AS
3351 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3352 (unspec:SV_FP
3353 [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")]
c8812bac 3354 UNSPEC_RCP))]
3d6275e3
AS
3355 ""
3356 "v_rcp%i0\t%0, %1"
3357 [(set_attr "type" "vop1")
3358 (set_attr "length" "8")])
3359
cfdc45f7
AS
3360;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
3361;; one that matches op3 adjusted for best results in reciprocal division.
3362;; It also emits a VCC mask that is intended for input to v_div_fmas.
3363;; The caller is expected to call this twice, once for each input. The output
3364;; VCC is the same in both cases, so the caller may discard one.
3365(define_insn "div_scale<mode><exec_vcc>"
3366 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3367 (unspec:SV_SFDF
3368 [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3369 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")
3370 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")]
3371 UNSPEC_DIV_SCALE))
3372 (set (match_operand:DI 4 "register_operand" "=SvcV")
3373 (unspec:DI
3374 [(match_dup 1) (match_dup 2) (match_dup 3)]
3375 UNSPEC_DIV_SCALE))]
3376 ""
3377 "v_div_scale%i0\t%0, %4, %3, %1, %2"
3378 [(set_attr "type" "vop3b")
3d6275e3
AS
3379 (set_attr "length" "8")])
3380
cfdc45f7
AS
3381;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale
3382;; to conditionally scale the output of the whole division operation.
3383;; This is necessary to counter the adjustments made by v_div_scale and
3384;; replaces the last FMA instruction of the Newton Raphson algorithm.
3385(define_insn "div_fmas<mode><exec>"
3386 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3387 (unspec:SV_SFDF
3388 [(plus:SV_SFDF
3389 (mult:SV_SFDF
3390 (match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3391 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v"))
3392 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v"))
3393 (match_operand:DI 4 "register_operand" "cV")]
3394 UNSPEC_DIV_FMAS))]
3395 ""
3396 "v_div_fmas%i0\t%0, %1, %2, %3; %4"
3397 [(set_attr "type" "vop3a")
3398 (set_attr "length" "8")
3399 (set_attr "vccwait" "5")])
3400
3401;; v_div_fixup takes the inputs and outputs of a division operation already
3402;; completed and cleans up the floating-point sign bit, infinity, underflow,
3403;; overflow, and NaN status. It will also emit any FP exceptions.
3404;; op1: quotient, op2: denominator, op3: numerator
3405(define_insn "div_fixup<mode><exec>"
3406 [(set (match_operand:SV_FP 0 "register_operand" "=v")
3407 (unspec:SV_FP
3408 [(match_operand:SV_FP 1 "register_operand" "v")
3409 (match_operand:SV_FP 2 "gcn_alu_operand" "v")
3410 (match_operand:SV_FP 3 "gcn_alu_operand" "v")]
3411 UNSPEC_DIV_FIXUP))]
3412 ""
3413 "v_div_fixup%i0\t%0, %1, %2, %3"
3414 [(set_attr "type" "vop3a")
3415 (set_attr "length" "8")])
3d6275e3
AS
3416
3417(define_expand "div<mode>3"
cfdc45f7
AS
3418 [(match_operand:SV_SFDF 0 "register_operand")
3419 (match_operand:SV_SFDF 1 "gcn_alu_operand")
3420 (match_operand:SV_SFDF 2 "gcn_alu_operand")]
3421 ""
3422 {
3423 rtx numerator = operands[1];
3424 rtx denominator = operands[2];
3425
3426 /* Scale the inputs if they are close to the FP limits.
3427 This will be reversed later. */
3428 rtx vcc = gen_reg_rtx (DImode);
3429 rtx discardedvcc = gen_reg_rtx (DImode);
3430 rtx scaled_numerator = gen_reg_rtx (<MODE>mode);
3431 rtx scaled_denominator = gen_reg_rtx (<MODE>mode);
3432 emit_insn (gen_div_scale<mode> (scaled_denominator,
3433 denominator, numerator,
3434 denominator, discardedvcc));
3435 emit_insn (gen_div_scale<mode> (scaled_numerator,
3436 denominator, numerator,
3437 numerator, vcc));
3438
3439 /* Find the reciprocal of the denominator, and use Newton-Raphson to
3440 improve the accuracy over the basic hardware instruction. */
c8812bac
JB
3441 rtx one = gcn_vec_constant (<MODE>mode,
3442 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
3d6275e3 3443 rtx initrcp = gen_reg_rtx (<MODE>mode);
cfdc45f7
AS
3444 rtx fma1 = gen_reg_rtx (<MODE>mode);
3445 rtx rcp = gen_reg_rtx (<MODE>mode);
3446 emit_insn (gen_recip<mode>2 (initrcp, scaled_denominator));
3447 emit_insn (gen_fma<mode>4_negop2 (fma1, initrcp, scaled_denominator, one));
3448 emit_insn (gen_fma<mode>4 (rcp, fma1, initrcp, initrcp));
3449
3450 /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve
3451 the accuracy. The "div_fmas" instruction reverses any scaling
3452 performed by "div_scale", above. */
3453 rtx div_est = gen_reg_rtx (<MODE>mode);
3454 rtx fma2 = gen_reg_rtx (<MODE>mode);
3455 rtx fma3 = gen_reg_rtx (<MODE>mode);
3456 rtx fma4 = gen_reg_rtx (<MODE>mode);
3457 rtx fmas = gen_reg_rtx (<MODE>mode);
3458 emit_insn (gen_mul<mode>3 (div_est, scaled_numerator, rcp));
3459 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, scaled_denominator,
3460 scaled_numerator));
3461 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
3462 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, scaled_denominator,
3463 scaled_numerator));
3464 emit_insn (gen_div_fmas<mode> (fmas, fma4, rcp, fma3, vcc));
3465
3466 /* Finally, use "div_fixup" to get the details right and find errors. */
3467 emit_insn (gen_div_fixup<mode> (operands[0], fmas, denominator,
3468 numerator));
3d6275e3
AS
3469 DONE;
3470 })
3471
3472;; }}}
3473;; {{{ Int/FP conversions
3474
3475(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
3476(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
3477
45381d6f
AS
3478(define_mode_iterator VCVT_MODE
3479 [V2HI V2SI V2HF V2SF V2DF
3480 V4HI V4SI V4HF V4SF V4DF
3481 V8HI V8SI V8HF V8SF V8DF
3482 V16HI V16SI V16HF V16SF V16DF
3483 V32HI V32SI V32HF V32SF V32DF
3484 V64HI V64SI V64HF V64SF V64DF])
3485(define_mode_iterator VCVT_FMODE
3486 [V2HF V2SF V2DF
3487 V4HF V4SF V4DF
3488 V8HF V8SF V8DF
3489 V16HF V16SF V16DF
3490 V32HF V32SF V32DF
3491 V64HF V64SF V64DF])
3492(define_mode_iterator VCVT_IMODE
3493 [V2HI V2SI
3494 V4HI V4SI
3495 V8HI V8SI
3496 V16HI V16SI
3497 V32HI V32SI
3498 V64HI V64SI])
3d6275e3
AS
3499
3500(define_code_iterator cvt_op [fix unsigned_fix
3501 float unsigned_float
3502 float_extend float_truncate])
3503(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
3504 (float "float") (unsigned_float "floatuns")
3505 (float_extend "extend") (float_truncate "trunc")])
3506(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
3507 (float "%i0%i1") (unsigned_float "%i0%u1")
3508 (float_extend "%i0%i1")
3509 (float_truncate "%i0%i1")])
3510
3511(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
3512 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
3513 (cvt_op:CVT_TO_MODE
3514 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
3515 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
3516 <cvt_name>_cvt)"
3517 "v_cvt<cvt_operands>\t%0, %1"
3518 [(set_attr "type" "vop1")
3519 (set_attr "length" "8")])
3520
3d66c777
AS
3521(define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
3522 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
3523 (cvt_op:VCVT_FMODE
3524 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
0d8753cf
AS
3525 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3526 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
3527 <cvt_name>_cvt)"
3d66c777
AS
3528 "v_cvt<cvt_operands>\t%0, %1"
3529 [(set_attr "type" "vop1")
3530 (set_attr "length" "8")])
3531
3532(define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
3533 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
3534 (cvt_op:VCVT_IMODE
3535 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
0d8753cf
AS
3536 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3537 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
3538 <cvt_name>_cvt)"
3d6275e3
AS
3539 "v_cvt<cvt_operands>\t%0, %1"
3540 [(set_attr "type" "vop1")
3541 (set_attr "length" "8")])
3542
3543;; }}}
3544;; {{{ Int/int conversions
3545
99890e15 3546(define_code_iterator all_convert [truncate zero_extend sign_extend])
3d66c777
AS
3547(define_code_iterator zero_convert [truncate zero_extend])
3548(define_code_attr convop [
3549 (sign_extend "extend")
3550 (zero_extend "zero_extend")
3551 (truncate "trunc")])
3552
99890e15
AS
3553(define_expand "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3554 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3555 (all_convert:V_INT_1REG
3556 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3557 "")
3558
3559(define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>"
03876953
AS
3560 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3561 (zero_convert:V_INT_1REG
3562 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
68e03492 3563 "TARGET_SDWA"
03876953 3564 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
3d66c777
AS
3565 [(set_attr "type" "vop_sdwa")
3566 (set_attr "length" "8")])
3567
99890e15 3568(define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>"
03876953
AS
3569 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3570 (sign_extend:V_INT_1REG
3571 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
68e03492 3572 "TARGET_SDWA"
03876953 3573 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
3d66c777
AS
3574 [(set_attr "type" "vop_sdwa")
3575 (set_attr "length" "8")])
3576
99890e15
AS
3577(define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_shift<exec>"
3578 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3579 (all_convert:V_INT_1REG
3580 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
68e03492 3581 "!TARGET_SDWA"
99890e15
AS
3582 {
3583 enum {extend, zero_extend, trunc};
3584 rtx shiftwidth = (<V_INT_1REG_ALT:SCALAR_MODE>mode == QImode
3585 || <V_INT_1REG:SCALAR_MODE>mode == QImode
3586 ? GEN_INT (24)
3587 : <V_INT_1REG_ALT:SCALAR_MODE>mode == HImode
3588 || <V_INT_1REG:SCALAR_MODE>mode == HImode
3589 ? GEN_INT (16)
3590 : NULL);
3591 operands[2] = shiftwidth;
3592
3593 if (!shiftwidth)
3594 return "v_mov_b32 %0, %1";
3595 else if (<convop> == extend || <convop> == trunc)
3596 return "v_lshlrev_b32\t%0, %2, %1\;v_ashrrev_i32\t%0, %2, %0";
3597 else
3598 return "v_lshlrev_b32\t%0, %2, %1\;v_lshrrev_b32\t%0, %2, %0";
3599 }
3600 [(set_attr "type" "mult")
3601 (set_attr "length" "8")])
3602
3d6275e3
AS
3603;; GCC can already do these for scalar types, but not for vector types.
3604;; Unfortunately you can't just do SUBREG on a vector to select the low part,
3605;; so there must be a few tricks here.
3606
1165109b 3607(define_insn_and_split "trunc<vndi><mode>2"
03876953
AS
3608 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3609 (truncate:V_INT_1REG
1165109b 3610 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
3d6275e3
AS
3611 ""
3612 "#"
3613 "reload_completed"
3d66c777 3614 [(const_int 0)]
3d6275e3 3615 {
1165109b 3616 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3d66c777
AS
3617 rtx out = operands[0];
3618
1165109b
AS
3619 if (<MODE>mode != <VnSI>mode)
3620 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
3d66c777
AS
3621 else
3622 emit_move_insn (out, inlo);
3d6275e3
AS
3623 }
3624 [(set_attr "type" "vop2")
3d66c777
AS
3625 (set_attr "length" "4")])
3626
1165109b 3627(define_insn_and_split "trunc<vndi><mode>2_exec"
03876953
AS
3628 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3629 (vec_merge:V_INT_1REG
3630 (truncate:V_INT_1REG
1165109b 3631 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
03876953
AS
3632 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
3633 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3d6275e3
AS
3634 ""
3635 "#"
3636 "reload_completed"
3d66c777 3637 [(const_int 0)]
3d6275e3 3638 {
3d66c777 3639 rtx out = operands[0];
1165109b 3640 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3d66c777
AS
3641 rtx merge = operands[2];
3642 rtx exec = operands[3];
3643
1165109b
AS
3644 if (<MODE>mode != <VnSI>mode)
3645 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3d66c777 3646 else
b7886845 3647 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3d6275e3
AS
3648 }
3649 [(set_attr "type" "vop2")
3d66c777
AS
3650 (set_attr "length" "4")])
3651
1165109b
AS
3652(define_insn_and_split "<convop><mode><vndi>2"
3653 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3654 (any_extend:<VnDI>
03876953 3655 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3d66c777
AS
3656 ""
3657 "#"
3658 "reload_completed"
3659 [(const_int 0)]
3660 {
1165109b
AS
3661 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3662 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3d66c777
AS
3663 rtx in = operands[1];
3664
1165109b
AS
3665 if (<MODE>mode != <VnSI>mode)
3666 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3d66c777
AS
3667 else
3668 emit_move_insn (outlo, in);
3669 if ('<su>' == 's')
1165109b 3670 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3d66c777 3671 else
1165109b 3672 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3d66c777
AS
3673 }
3674 [(set_attr "type" "mult")
3675 (set_attr "length" "12")])
3676
1165109b
AS
3677(define_insn_and_split "<convop><mode><vndi>2_exec"
3678 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3679 (vec_merge:<VnDI>
3680 (any_extend:<VnDI>
03876953 3681 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
1165109b 3682 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
03876953 3683 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3d66c777
AS
3684 ""
3685 "#"
3686 "reload_completed"
3687 [(const_int 0)]
3688 {
1165109b
AS
3689 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3690 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3d66c777 3691 rtx in = operands[1];
1165109b
AS
3692 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3693 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3d66c777
AS
3694 rtx exec = operands[3];
3695
1165109b
AS
3696 if (<MODE>mode != <VnSI>mode)
3697 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3d66c777 3698 else
b7886845 3699 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3d66c777 3700 if ('<su>' == 's')
1165109b
AS
3701 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3702 exec));
3d66c777 3703 else
1165109b
AS
3704 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3705 exec));
3d66c777
AS
3706 }
3707 [(set_attr "type" "mult")
3708 (set_attr "length" "12")])
3d6275e3
AS
3709
3710;; }}}
3711;; {{{ Vector comparison/merge
3712
3713(define_insn "vec_cmp<mode>di"
c7ec7bd1 3714 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
dbde9e2d 3715 (match_operator:DI 1 "gcn_fp_compare_operator"
c7ec7bd1
AS
3716 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3717 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
3718 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
3d6275e3
AS
3719 ""
3720 "@
3721 v_cmp%E1\tvcc, %2, %3
3722 v_cmp%E1\tvcc, %2, %3
3723 v_cmpx%E1\tvcc, %2, %3
3724 v_cmpx%E1\tvcc, %2, %3
3725 v_cmp%E1\t%0, %2, %3
c7ec7bd1
AS
3726 v_cmp%E1\t%0, %2, %3
3727 v_cmpx%E1\t%2, %3
3728 v_cmpx%E1\t%2, %3"
3729 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3730 (set_attr "length" "4,8,4,8,8,8,4,8")
3731 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3d6275e3
AS
3732
3733(define_expand "vec_cmpu<mode>di"
3734 [(match_operand:DI 0 "register_operand")
f4d4a406 3735 (match_operator 1 "gcn_compare_operator"
03876953
AS
3736 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3737 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3d6275e3
AS
3738 ""
3739 {
3740 /* Unsigned comparisons use the same patterns as signed comparisons,
3741 except that they use unsigned operators (e.g. LTU vs LT).
3742 The '%E1' directive then does the Right Thing. */
3743 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3744 operands[3]));
3745 DONE;
3746 })
3747
0e159efc 3748; There's no instruction for 8-bit vector comparison, so we need to extend.
1165109b 3749(define_expand "vec_cmp<u><mode>di"
0e159efc 3750 [(match_operand:DI 0 "register_operand")
f4d4a406 3751 (match_operator 1 "gcn_compare_operator"
1165109b
AS
3752 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3753 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
0e159efc
AS
3754 "can_create_pseudo_p ()"
3755 {
1165109b
AS
3756 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3757 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
0e159efc 3758
1165109b
AS
3759 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3760 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3761 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
0e159efc
AS
3762 DONE;
3763 })
3764
3d6275e3 3765(define_insn "vec_cmp<mode>di_exec"
c7ec7bd1 3766 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3d6275e3 3767 (and:DI
f4d4a406 3768 (match_operator 1 "gcn_fp_compare_operator"
c7ec7bd1
AS
3769 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3770 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
3771 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
3772 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
3d6275e3
AS
3773 ""
3774 "@
3775 v_cmp%E1\tvcc, %2, %3
3776 v_cmp%E1\tvcc, %2, %3
3777 v_cmpx%E1\tvcc, %2, %3
3778 v_cmpx%E1\tvcc, %2, %3
3779 v_cmp%E1\t%0, %2, %3
c7ec7bd1
AS
3780 v_cmp%E1\t%0, %2, %3
3781 v_cmpx%E1\t%2, %3
3782 v_cmpx%E1\t%2, %3"
3783 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3784 (set_attr "length" "4,8,4,8,8,8,4,8")
3785 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3d6275e3 3786
0e159efc
AS
3787(define_expand "vec_cmpu<mode>di_exec"
3788 [(match_operand:DI 0 "register_operand")
f4d4a406 3789 (match_operator 1 "gcn_compare_operator"
03876953
AS
3790 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3791 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
0e159efc
AS
3792 (match_operand:DI 4 "gcn_exec_reg_operand")]
3793 ""
3794 {
3795 /* Unsigned comparisons use the same patterns as signed comparisons,
3796 except that they use unsigned operators (e.g. LTU vs LT).
3797 The '%E1' directive then does the Right Thing. */
3798 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3799 operands[2], operands[3],
3800 operands[4]));
3801 DONE;
3802 })
3803
1165109b 3804(define_expand "vec_cmp<u><mode>di_exec"
0e159efc 3805 [(match_operand:DI 0 "register_operand")
f4d4a406 3806 (match_operator 1 "gcn_compare_operator"
1165109b
AS
3807 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3808 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
0e159efc
AS
3809 (match_operand:DI 4 "gcn_exec_reg_operand")]
3810 "can_create_pseudo_p ()"
3811 {
1165109b
AS
3812 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3813 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
0e159efc 3814
1165109b
AS
3815 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3816 operands[2], operands[4]));
3817 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3818 operands[3], operands[4]));
3819 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3820 sitmp2, operands[4]));
0e159efc
AS
3821 DONE;
3822 })
3823
3d6275e3 3824(define_insn "vec_cmp<mode>di_dup"
c7ec7bd1 3825 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
dbde9e2d 3826 (match_operator:DI 1 "gcn_fp_compare_operator"
03876953 3827 [(vec_duplicate:V_noQI
3d6275e3 3828 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
c7ec7bd1
AS
3829 " Sv, B,Sv,B, A,Sv,B"))
3830 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
3831 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
3d6275e3
AS
3832 ""
3833 "@
3834 v_cmp%E1\tvcc, %2, %3
3835 v_cmp%E1\tvcc, %2, %3
3836 v_cmpx%E1\tvcc, %2, %3
3837 v_cmpx%E1\tvcc, %2, %3
c7ec7bd1
AS
3838 v_cmp%E1\t%0, %2, %3
3839 v_cmpx%E1\t%2, %3
3840 v_cmpx%E1\t%2, %3"
3841 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3842 (set_attr "length" "4,8,4,8,8,4,8")
3843 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3d6275e3
AS
3844
3845(define_insn "vec_cmp<mode>di_dup_exec"
c7ec7bd1 3846 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3d6275e3 3847 (and:DI
f4d4a406 3848 (match_operator 1 "gcn_fp_compare_operator"
03876953 3849 [(vec_duplicate:V_noQI
3d6275e3 3850 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
c7ec7bd1
AS
3851 " Sv, B,Sv,B, A,Sv,B"))
3852 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
3853 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
3854 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
3d6275e3
AS
3855 ""
3856 "@
3857 v_cmp%E1\tvcc, %2, %3
3858 v_cmp%E1\tvcc, %2, %3
3859 v_cmpx%E1\tvcc, %2, %3
3860 v_cmpx%E1\tvcc, %2, %3
c7ec7bd1
AS
3861 v_cmp%E1\t%0, %2, %3
3862 v_cmpx%E1\t%2, %3
3863 v_cmpx%E1\t%2, %3"
3864 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3865 (set_attr "length" "4,8,4,8,8,4,8")
3866 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3d6275e3
AS
3867
3868(define_expand "vcond_mask_<mode>di"
3869 [(parallel
03876953
AS
3870 [(set (match_operand:V_ALL 0 "register_operand" "")
3871 (vec_merge:V_ALL
3872 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3873 (match_operand:V_ALL 2 "gcn_alu_operand" "")
2b99bed8 3874 (match_operand:DI 3 "register_operand" "")))
1165109b 3875 (clobber (scratch:<VnDI>))])]
3d6275e3
AS
3876 ""
3877 "")
3878
03876953
AS
3879(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3880 [(match_operand:V_ALL 0 "register_operand")
3881 (match_operand:V_ALL 1 "gcn_vop3_operand")
3882 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 3883 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
3884 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3885 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3d6275e3
AS
3886 ""
3887 {
3888 rtx tmp = gen_reg_rtx (DImode);
03876953 3889 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
96eb1765 3890 (tmp, operands[3], operands[4], operands[5]));
03876953 3891 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 3892 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
3893 DONE;
3894 })
3895
03876953
AS
3896(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3897 [(match_operand:V_ALL 0 "register_operand")
3898 (match_operand:V_ALL 1 "gcn_vop3_operand")
3899 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 3900 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
3901 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3902 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3d6275e3
AS
3903 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3904 ""
3905 {
3906 rtx tmp = gen_reg_rtx (DImode);
03876953 3907 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
96eb1765 3908 (tmp, operands[3], operands[4], operands[5], operands[6]));
03876953 3909 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 3910 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
3911 DONE;
3912 })
3913
03876953
AS
3914(define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3915 [(match_operand:V_ALL 0 "register_operand")
3916 (match_operand:V_ALL 1 "gcn_vop3_operand")
3917 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 3918 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
3919 [(match_operand:V_INT 4 "gcn_alu_operand")
3920 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3d6275e3
AS
3921 ""
3922 {
3923 rtx tmp = gen_reg_rtx (DImode);
03876953 3924 emit_insn (gen_vec_cmpu<V_INT:mode>di
96eb1765 3925 (tmp, operands[3], operands[4], operands[5]));
03876953 3926 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 3927 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
3928 DONE;
3929 })
3930
03876953
AS
3931(define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3932 [(match_operand:V_ALL 0 "register_operand")
3933 (match_operand:V_ALL 1 "gcn_vop3_operand")
3934 (match_operand:V_ALL 2 "gcn_alu_operand")
f4d4a406 3935 (match_operator 3 "gcn_fp_compare_operator"
03876953
AS
3936 [(match_operand:V_INT 4 "gcn_alu_operand")
3937 (match_operand:V_INT 5 "gcn_vop3_operand")])
3d6275e3
AS
3938 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3939 ""
3940 {
3941 rtx tmp = gen_reg_rtx (DImode);
03876953 3942 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
96eb1765 3943 (tmp, operands[3], operands[4], operands[5], operands[6]));
03876953 3944 emit_insn (gen_vcond_mask_<V_ALL:mode>di
96eb1765 3945 (operands[0], operands[1], operands[2], tmp));
3d6275e3
AS
3946 DONE;
3947 })
3948
3949;; }}}
3950;; {{{ Fully masked loop support
3951
3952(define_expand "while_ultsidi"
3953 [(match_operand:DI 0 "register_operand")
3954 (match_operand:SI 1 "")
48960b68
AS
3955 (match_operand:SI 2 "")
3956 (match_operand:SI 3 "")]
3d6275e3
AS
3957 ""
3958 {
3959 if (GET_CODE (operands[1]) != CONST_INT
3960 || GET_CODE (operands[2]) != CONST_INT)
3961 {
3962 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3963 rtx tmp = _0_1_2_3;
3964 if (GET_CODE (operands[1]) != CONST_INT
3965 || INTVAL (operands[1]) != 0)
3966 {
3967 tmp = gen_reg_rtx (V64SImode);
3968 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3969 }
3970 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3971 gen_rtx_GT (VOIDmode, 0, 0),
3972 operands[2], tmp));
3973 }
3974 else
3975 {
3976 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3977 HOST_WIDE_INT mask = (diff >= 64 ? -1
3978 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3979 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3980 }
48960b68
AS
3981 if (INTVAL (operands[3]) < 64)
3982 emit_insn (gen_anddi3 (operands[0], operands[0],
3983 gen_rtx_CONST_INT (VOIDmode,
3984 ~((unsigned HOST_WIDE_INT)-1
3985 << INTVAL (operands[3])))));
3d6275e3
AS
3986 DONE;
3987 })
3988
3989(define_expand "maskload<mode>di"
8aeabd9f
AS
3990 [(match_operand:V_MOV 0 "register_operand")
3991 (match_operand:V_MOV 1 "memory_operand")
3d6275e3
AS
3992 (match_operand 2 "")]
3993 ""
3994 {
3995 rtx exec = force_reg (DImode, operands[2]);
3996 rtx addr = gcn_expand_scalar_to_vector_address
1165109b 3997 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3d6275e3
AS
3998 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3999 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
95607c12
AS
4000
4001 /* Masked lanes are required to hold zero. */
4002 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4003
4004 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
4005 operands[0], exec));
3d6275e3
AS
4006 DONE;
4007 })
4008
4009(define_expand "maskstore<mode>di"
8aeabd9f
AS
4010 [(match_operand:V_MOV 0 "memory_operand")
4011 (match_operand:V_MOV 1 "register_operand")
3d6275e3
AS
4012 (match_operand 2 "")]
4013 ""
4014 {
4015 rtx exec = force_reg (DImode, operands[2]);
4016 rtx addr = gcn_expand_scalar_to_vector_address
1165109b 4017 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
3d6275e3
AS
4018 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
4019 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
4020 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
4021 DONE;
4022 })
4023
1165109b 4024(define_expand "mask_gather_load<mode><vnsi>"
8aeabd9f 4025 [(match_operand:V_MOV 0 "register_operand")
3d6275e3 4026 (match_operand:DI 1 "register_operand")
1165109b 4027 (match_operand:<VnSI> 2 "register_operand")
3d6275e3
AS
4028 (match_operand 3 "immediate_operand")
4029 (match_operand:SI 4 "gcn_alu_operand")
4030 (match_operand:DI 5 "")]
4031 ""
4032 {
4033 rtx exec = force_reg (DImode, operands[5]);
4034
95607c12
AS
4035 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
4036 operands[2], operands[4],
4037 INTVAL (operands[3]), exec);
4038
4039 /* Masked lanes are required to hold zero. */
4040 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4041
1165109b 4042 if (GET_MODE (addr) == <VnDI>mode)
95607c12
AS
4043 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
4044 const0_rtx, const0_rtx,
4045 const0_rtx, operands[0],
4046 exec));
4047 else
4048 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
4049 addr, const0_rtx,
4050 const0_rtx, const0_rtx,
4051 operands[0], exec));
3d6275e3
AS
4052 DONE;
4053 })
4054
1165109b 4055(define_expand "mask_scatter_store<mode><vnsi>"
3d6275e3 4056 [(match_operand:DI 0 "register_operand")
1165109b 4057 (match_operand:<VnSI> 1 "register_operand")
3d6275e3
AS
4058 (match_operand 2 "immediate_operand")
4059 (match_operand:SI 3 "gcn_alu_operand")
8aeabd9f 4060 (match_operand:V_MOV 4 "register_operand")
3d6275e3
AS
4061 (match_operand:DI 5 "")]
4062 ""
4063 {
4064 rtx exec = force_reg (DImode, operands[5]);
4065
b5fb73b6
AS
4066 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
4067 operands[1], operands[3],
4068 INTVAL (operands[2]), exec);
3d6275e3 4069
1165109b 4070 if (GET_MODE (addr) == <VnDI>mode)
b5fb73b6
AS
4071 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
4072 operands[4], const0_rtx,
4073 const0_rtx,
4074 exec));
4075 else
4076 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
4077 const0_rtx, operands[4],
4078 const0_rtx, const0_rtx,
4079 exec));
3d6275e3
AS
4080 DONE;
4081 })
4082
5a80a6c3 4083(define_code_iterator cond_op [plus minus mult])
3d6275e3
AS
4084
4085(define_expand "cond_<expander><mode>"
03876953 4086 [(match_operand:V_ALL 0 "register_operand")
3d6275e3 4087 (match_operand:DI 1 "register_operand")
03876953
AS
4088 (cond_op:V_ALL
4089 (match_operand:V_ALL 2 "gcn_alu_operand")
4090 (match_operand:V_ALL 3 "gcn_alu_operand"))
4091 (match_operand:V_ALL 4 "register_operand")]
3d6275e3
AS
4092 ""
4093 {
4094 operands[1] = force_reg (DImode, operands[1]);
4095 operands[2] = force_reg (<MODE>mode, operands[2]);
4096
4097 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4098 operands[3], operands[4],
4099 operands[1]));
4100 DONE;
4101 })
4102
553ff252
PAA
4103(define_code_iterator cond_fminmaxop [smin smax])
4104
4105(define_expand "cond_<fexpander><mode>"
4106 [(match_operand:V_FP 0 "register_operand")
4107 (match_operand:DI 1 "register_operand")
4108 (cond_fminmaxop:V_FP
4109 (match_operand:V_FP 2 "gcn_alu_operand")
4110 (match_operand:V_FP 3 "gcn_alu_operand"))
4111 (match_operand:V_FP 4 "register_operand")]
4112 ""
4113 {
4114 operands[1] = force_reg (DImode, operands[1]);
4115 operands[2] = force_reg (<MODE>mode, operands[2]);
4116
4117 emit_insn (gen_<fexpander><mode>3_exec (operands[0], operands[2],
4118 operands[3], operands[4],
4119 operands[1]));
4120 DONE;
4121 })
4122
4123(define_code_iterator cond_minmaxop [smin smax umin umax])
4124
4125(define_expand "cond_<expander><mode>"
4126 [(match_operand:V_INT 0 "register_operand")
4127 (match_operand:DI 1 "register_operand")
4128 (cond_minmaxop:V_INT
4129 (match_operand:V_INT 2 "gcn_alu_operand")
4130 (match_operand:V_INT 3 "gcn_alu_operand"))
4131 (match_operand:V_INT 4 "register_operand")]
4132 ""
4133 {
4134 operands[1] = force_reg (DImode, operands[1]);
4135 operands[2] = force_reg (<MODE>mode, operands[2]);
4136 rtx tmp = gen_reg_rtx (<MODE>mode);
4137
4138 emit_insn (gen_<expander><mode>3_exec (tmp, operands[2], operands[3],
4139 gcn_gen_undef(<MODE>mode),
4140 operands[1]));
4141 emit_insn (gen_vcond_mask_<mode>di (operands[0], tmp, operands[4],
4142 operands[1]));
4143 DONE;
4144 })
4145
3d6275e3
AS
4146(define_code_iterator cond_bitop [and ior xor])
4147
4148(define_expand "cond_<expander><mode>"
03876953 4149 [(match_operand:V_INT 0 "register_operand")
3d6275e3 4150 (match_operand:DI 1 "register_operand")
03876953
AS
4151 (cond_bitop:V_INT
4152 (match_operand:V_INT 2 "gcn_alu_operand")
4153 (match_operand:V_INT 3 "gcn_alu_operand"))
4154 (match_operand:V_INT 4 "register_operand")]
3d6275e3
AS
4155 ""
4156 {
4157 operands[1] = force_reg (DImode, operands[1]);
4158 operands[2] = force_reg (<MODE>mode, operands[2]);
4159
4160 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4161 operands[3], operands[4],
4162 operands[1]));
4163 DONE;
4164 })
4165
9c7e898b
PAA
4166(define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
4167
4168(define_expand "cond_<expander><mode>"
4169 [(match_operand:V_INT_noHI 0 "register_operand")
4170 (match_operand:DI 1 "register_operand")
4171 (cond_shiftop:V_INT_noHI
4172 (match_operand:V_INT_noHI 2 "gcn_alu_operand")
4173 (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
4174 (match_operand:V_INT_noHI 4 "register_operand")]
4175 ""
4176 {
4177 operands[1] = force_reg (DImode, operands[1]);
4178 operands[2] = force_reg (<MODE>mode, operands[2]);
4179
4180 rtx shiftby = gen_reg_rtx (<VnSI>mode);
4181 convert_move (shiftby, operands[3], 0);
4182
4183 emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
4184 shiftby, operands[4],
4185 operands[1]));
4186 DONE;
4187 })
4188
3d6275e3
AS
4189;; }}}
4190;; {{{ Vector reductions
4191
4192(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
4193 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
4194 UNSPEC_PLUS_DPP_SHR
4195 UNSPEC_AND_DPP_SHR
4196 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4197
4198(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
4199 UNSPEC_AND_DPP_SHR
4200 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4201
4202; FIXME: Isn't there a better way of doing this?
4203(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
4204 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
4205 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
4206 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
4207 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
4208 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
4209 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
4210 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
4211
4212(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
4213 (UNSPEC_SMAX_DPP_SHR "smax")
4214 (UNSPEC_UMIN_DPP_SHR "umin")
4215 (UNSPEC_UMAX_DPP_SHR "umax")
4216 (UNSPEC_PLUS_DPP_SHR "plus")
4217 (UNSPEC_AND_DPP_SHR "and")
4218 (UNSPEC_IOR_DPP_SHR "ior")
4219 (UNSPEC_XOR_DPP_SHR "xor")])
4220
4221(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
4222 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
4223 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
4224 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
a5879399
AS
4225 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
4226 (UNSPEC_AND_DPP_SHR "v_and%B0")
4227 (UNSPEC_IOR_DPP_SHR "v_or%B0")
4228 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
3d6275e3
AS
4229
4230(define_expand "reduc_<reduc_op>_scal_<mode>"
4231 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
4232 (unspec:<SCALAR_MODE>
f539029c 4233 [(match_operand:V_ALL 1 "register_operand")]
3d6275e3 4234 REDUC_UNSPEC))]
68e03492 4235 "!TARGET_WAVE64_COMPAT"
3d6275e3
AS
4236 {
4237 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
4238 <reduc_unspec>);
4239
f539029c
AS
4240 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
4241 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
4242 last_lane));
3d6275e3
AS
4243
4244 DONE;
4245 })
4246
10aa0356
AS
4247(define_expand "reduc_<fexpander>_scal_<mode>"
4248 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4249 (fminmaxop:V_FP
4250 (match_operand:V_FP 1 "register_operand"))]
68e03492 4251 "!TARGET_WAVE64_COMPAT"
10aa0356
AS
4252 {
4253 /* fmin/fmax are identical to smin/smax. */
4254 emit_insn (gen_reduc_<expander>_scal_<mode> (operands[0], operands[1]));
4255 DONE;
4256 })
4257
bf628a97
AS
4258;; Warning: This "-ffast-math" implementation converts in-order reductions
4259;; into associative reductions. It's also used where OpenMP or
4260;; OpenACC paralellization has already broken the in-order semantics.
4261(define_expand "fold_left_plus_<mode>"
4262 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4263 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
f539029c 4264 (match_operand:V_FP 2 "gcn_alu_operand")]
68e03492 4265 "!TARGET_WAVE64_COMPAT
7cc2262e 4266 && can_create_pseudo_p ()
bf628a97
AS
4267 && (flag_openacc || flag_openmp
4268 || flag_associative_math)"
4269 {
4270 rtx dest = operands[0];
4271 rtx scalar = operands[1];
4272 rtx vector = operands[2];
4273 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
4274
4275 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
4276 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
4277 DONE;
4278 })
3d6275e3
AS
4279
4280(define_insn "*<reduc_op>_dpp_shr_<mode>"
f539029c
AS
4281 [(set (match_operand:V_1REG 0 "register_operand" "=v")
4282 (unspec:V_1REG
4283 [(match_operand:V_1REG 1 "register_operand" "v")
4284 (match_operand:V_1REG 2 "register_operand" "v")
4285 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3 4286 REDUC_UNSPEC))]
023641d9 4287 "TARGET_DPP_FULL"
3d6275e3
AS
4288 {
4289 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
4290 <reduc_unspec>, INTVAL (operands[3]));
4291 }
4292 [(set_attr "type" "vop_dpp")
4293 (set_attr "length" "8")])
4294
1165109b 4295(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
f539029c
AS
4296 [(set (match_operand:V_DI 0 "register_operand" "=v")
4297 (unspec:V_DI
4298 [(match_operand:V_DI 1 "register_operand" "v")
4299 (match_operand:V_DI 2 "register_operand" "v")
4300 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
4301 REDUC_2REG_UNSPEC))]
4302 ""
4303 "#"
4304 "reload_completed"
4305 [(set (match_dup 4)
1165109b 4306 (unspec:<VnSI>
3d6275e3
AS
4307 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
4308 (set (match_dup 5)
1165109b 4309 (unspec:<VnSI>
3d6275e3
AS
4310 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
4311 {
1165109b
AS
4312 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4313 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4314 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4315 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4316 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4317 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
4318 }
4319 [(set_attr "type" "vmult")
4320 (set_attr "length" "16")])
4321
4322; Special cases for addition.
4323
a5879399 4324(define_insn "*plus_carry_dpp_shr_<mode>"
f539029c
AS
4325 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
4326 (unspec:V_INT_1REG
4327 [(match_operand:V_INT_1REG 1 "register_operand" "v")
4328 (match_operand:V_INT_1REG 2 "register_operand" "v")
03876953 4329 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
4330 UNSPEC_PLUS_CARRY_DPP_SHR))
4331 (clobber (reg:DI VCC_REG))]
68e03492 4332 "TARGET_DPP_FULL"
3d6275e3 4333 {
b9bf0c3f 4334 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add_co_u32",
3d6275e3
AS
4335 UNSPEC_PLUS_CARRY_DPP_SHR,
4336 INTVAL (operands[3]));
4337 }
4338 [(set_attr "type" "vop_dpp")
4339 (set_attr "length" "8")])
4340
1165109b 4341(define_insn "*plus_carry_in_dpp_shr_<mode>"
f539029c
AS
4342 [(set (match_operand:V_SI 0 "register_operand" "=v")
4343 (unspec:V_SI
4344 [(match_operand:V_SI 1 "register_operand" "v")
4345 (match_operand:V_SI 2 "register_operand" "v")
4346 (match_operand:SI 3 "const_int_operand" "n")
4347 (match_operand:DI 4 "register_operand" "cV")]
3d6275e3
AS
4348 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4349 (clobber (reg:DI VCC_REG))]
68e03492 4350 "TARGET_DPP_FULL"
3d6275e3 4351 {
b9bf0c3f 4352 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc_co_u32",
3d6275e3
AS
4353 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
4354 INTVAL (operands[3]));
4355 }
4356 [(set_attr "type" "vop_dpp")
4357 (set_attr "length" "8")])
4358
1165109b 4359(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
f539029c
AS
4360 [(set (match_operand:V_DI 0 "register_operand" "=v")
4361 (unspec:V_DI
4362 [(match_operand:V_DI 1 "register_operand" "v")
4363 (match_operand:V_DI 2 "register_operand" "v")
4364 (match_operand:SI 3 "const_int_operand" "n")]
3d6275e3
AS
4365 UNSPEC_PLUS_CARRY_DPP_SHR))
4366 (clobber (reg:DI VCC_REG))]
4367 ""
4368 "#"
4369 "reload_completed"
4370 [(parallel [(set (match_dup 4)
1165109b 4371 (unspec:<VnSI>
3d6275e3
AS
4372 [(match_dup 6) (match_dup 8) (match_dup 3)]
4373 UNSPEC_PLUS_CARRY_DPP_SHR))
4374 (clobber (reg:DI VCC_REG))])
4375 (parallel [(set (match_dup 5)
1165109b 4376 (unspec:<VnSI>
3d6275e3
AS
4377 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
4378 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4379 (clobber (reg:DI VCC_REG))])]
4380 {
1165109b
AS
4381 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4382 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4383 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4384 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4385 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4386 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3d6275e3
AS
4387 }
4388 [(set_attr "type" "vmult")
4389 (set_attr "length" "16")])
4390
3d6275e3
AS
4391;; }}}
4392;; {{{ Miscellaneous
4393
1165109b
AS
4394(define_expand "vec_series<mode>"
4395 [(match_operand:V_SI 0 "register_operand")
3d6275e3
AS
4396 (match_operand:SI 1 "gcn_alu_operand")
4397 (match_operand:SI 2 "gcn_alu_operand")]
4398 ""
4399 {
1165109b
AS
4400 rtx tmp = gen_reg_rtx (<MODE>mode);
4401 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3d6275e3 4402
1165109b
AS
4403 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
4404 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3d6275e3
AS
4405 DONE;
4406 })
4407
1165109b
AS
4408(define_expand "vec_series<mode>"
4409 [(match_operand:V_DI 0 "register_operand")
3d6275e3
AS
4410 (match_operand:DI 1 "gcn_alu_operand")
4411 (match_operand:DI 2 "gcn_alu_operand")]
4412 ""
4413 {
1165109b
AS
4414 rtx tmp = gen_reg_rtx (<MODE>mode);
4415 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
4416 rtx op1vec = gen_reg_rtx (<MODE>mode);
3d6275e3 4417
1165109b
AS
4418 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
4419 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
4420 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3d6275e3
AS
4421 DONE;
4422 })
4423
4424;; }}}