]>
Commit | Line | Data |
---|---|---|
29e6733c | 1 | ;; VSX patterns. |
83ffe9cd | 2 | ;; Copyright (C) 2009-2023 Free Software Foundation, Inc. |
29e6733c MM |
3 | ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> |
4 | ||
5 | ;; This file is part of GCC. | |
6 | ||
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published | |
9 | ;; by the Free Software Foundation; either version 3, or (at your | |
10 | ;; option) any later version. | |
11 | ||
12 | ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | ;; License for more details. | |
16 | ||
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING3. If not see | |
19 | ;; <http://www.gnu.org/licenses/>. | |
20 | ||
e9e6d4f6 KN |
21 | ;; Iterator for comparison types |
22 | (define_code_iterator CMP_TEST [eq lt gt unordered]) | |
23 | ||
394a527f CL |
24 | ;; Mode attribute for vector floate and floato conversions |
25 | (define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) | |
26 | ||
29e6733c MM |
27 | ;; Iterator for both scalar and vector floating point types supported by VSX |
28 | (define_mode_iterator VSX_B [DF V4SF V2DF]) | |
29 | ||
30 | ;; Iterator for the 2 64-bit vector types | |
31 | (define_mode_iterator VSX_D [V2DF V2DI]) | |
32 | ||
c477a667 MM |
33 | ;; Mode iterator to handle swapping words on little endian for the 128-bit |
34 | ;; types that goes in a single vector register. | |
35 | (define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") | |
9393bc31 | 36 | (TF "FLOAT128_VECTOR_P (TFmode)") |
4a89b7e7 | 37 | TI |
6579b156 | 38 | V1TI]) |
c477a667 | 39 | |
29e6733c MM |
40 | ;; Iterator for the 2 32-bit vector types |
41 | (define_mode_iterator VSX_W [V4SF V4SI]) | |
42 | ||
688e4919 MM |
43 | ;; Iterator for the DF types |
44 | (define_mode_iterator VSX_DF [V2DF DF]) | |
45 | ||
29e6733c MM |
46 | ;; Iterator for vector floating point types supported by VSX |
47 | (define_mode_iterator VSX_F [V4SF V2DF]) | |
48 | ||
49 | ;; Iterator for logical types supported by VSX | |
c477a667 MM |
50 | (define_mode_iterator VSX_L [V16QI |
51 | V8HI | |
52 | V4SI | |
53 | V2DI | |
54 | V4SF | |
55 | V2DF | |
56 | V1TI | |
57 | TI | |
58 | (KF "FLOAT128_VECTOR_P (KFmode)") | |
711c065c | 59 | (TF "FLOAT128_VECTOR_P (TFmode)")]) |
29e6733c | 60 | |
50c78b9a | 61 | ;; Iterator for memory moves. |
c477a667 MM |
62 | (define_mode_iterator VSX_M [V16QI |
63 | V8HI | |
64 | V4SI | |
65 | V2DI | |
66 | V4SF | |
67 | V2DF | |
68 | V1TI | |
69 | (KF "FLOAT128_VECTOR_P (KFmode)") | |
50c78b9a | 70 | (TF "FLOAT128_VECTOR_P (TFmode)") |
4a89b7e7 | 71 | TI]) |
d86e633a | 72 | |
fc504349 CL |
73 | (define_mode_attr VSX_XXBR [(V8HI "h") |
74 | (V4SI "w") | |
75 | (V4SF "w") | |
76 | (V2DF "d") | |
77 | (V2DI "d") | |
78 | (V1TI "q")]) | |
79 | ||
29e6733c MM |
80 | ;; Map into the appropriate load/store name based on the type |
81 | (define_mode_attr VSm [(V16QI "vw4") | |
82 | (V8HI "vw4") | |
83 | (V4SI "vw4") | |
84 | (V4SF "vw4") | |
85 | (V2DF "vd2") | |
86 | (V2DI "vd2") | |
87 | (DF "d") | |
c477a667 MM |
88 | (TF "vd2") |
89 | (KF "vd2") | |
a16a872d | 90 | (V1TI "vd2") |
c6d5ff83 | 91 | (TI "vd2")]) |
29e6733c | 92 | |
29e6733c MM |
93 | ;; Map the register class used |
94 | (define_mode_attr VSr [(V16QI "v") | |
95 | (V8HI "v") | |
96 | (V4SI "v") | |
8d3620ba | 97 | (V4SF "wa") |
85949949 SB |
98 | (V2DI "wa") |
99 | (V2DF "wa") | |
e670418f | 100 | (DI "wa") |
cc998fd5 | 101 | (DF "wa") |
72e3386e | 102 | (SF "wa") |
cb152d12 SB |
103 | (TF "wa") |
104 | (KF "wa") | |
a16a872d | 105 | (V1TI "v") |
e670418f | 106 | (TI "wa")]) |
29e6733c | 107 | |
cb152d12 SB |
108 | ;; What value we need in the "isa" field, to make the IEEE QP float work. |
109 | (define_mode_attr VSisa [(V16QI "*") | |
110 | (V8HI "*") | |
111 | (V4SI "*") | |
112 | (V4SF "*") | |
113 | (V2DI "*") | |
114 | (V2DF "*") | |
115 | (DI "*") | |
116 | (DF "*") | |
117 | (SF "*") | |
118 | (V1TI "*") | |
119 | (TI "*") | |
120 | (TF "p9tf") | |
121 | (KF "p9kf")]) | |
59f5868d | 122 | |
c3217088 PB |
123 | ;; A mode attribute to disparage use of GPR registers, except for scalar |
124 | ;; integer modes. | |
125 | (define_mode_attr ??r [(V16QI "??r") | |
126 | (V8HI "??r") | |
127 | (V4SI "??r") | |
128 | (V4SF "??r") | |
129 | (V2DI "??r") | |
130 | (V2DF "??r") | |
131 | (V1TI "??r") | |
132 | (KF "??r") | |
133 | (TF "??r") | |
134 | (TI "r")]) | |
135 | ||
00fd0628 PB |
136 | ;; A mode attribute used for 128-bit constant values. |
137 | (define_mode_attr nW [(V16QI "W") | |
138 | (V8HI "W") | |
139 | (V4SI "W") | |
140 | (V4SF "W") | |
141 | (V2DI "W") | |
142 | (V2DF "W") | |
143 | (V1TI "W") | |
144 | (KF "W") | |
145 | (TF "W") | |
146 | (TI "n")]) | |
147 | ||
29e6733c MM |
148 | ;; Same size integer type for floating point data |
149 | (define_mode_attr VSi [(V4SF "v4si") | |
150 | (V2DF "v2di") | |
151 | (DF "di")]) | |
152 | ||
153 | (define_mode_attr VSI [(V4SF "V4SI") | |
154 | (V2DF "V2DI") | |
155 | (DF "DI")]) | |
156 | ||
157 | ;; Word size for same size conversion | |
158 | (define_mode_attr VSc [(V4SF "w") | |
159 | (V2DF "d") | |
160 | (DF "d")]) | |
161 | ||
29e6733c MM |
162 | ;; Map into either s or v, depending on whether this is a scalar or vector |
163 | ;; operation | |
164 | (define_mode_attr VSv [(V16QI "v") | |
165 | (V8HI "v") | |
166 | (V4SI "v") | |
167 | (V4SF "v") | |
168 | (V2DI "v") | |
169 | (V2DF "v") | |
a16a872d | 170 | (V1TI "v") |
c477a667 MM |
171 | (DF "s") |
172 | (KF "v")]) | |
29e6733c MM |
173 | |
174 | ;; Appropriate type for add ops (and other simple FP ops) | |
4356b75d | 175 | (define_mode_attr VStype_simple [(V2DF "vecdouble") |
29e6733c MM |
176 | (V4SF "vecfloat") |
177 | (DF "fp")]) | |
178 | ||
29e6733c | 179 | ;; Appropriate type for multiply ops |
4356b75d | 180 | (define_mode_attr VStype_mul [(V2DF "vecdouble") |
29e6733c MM |
181 | (V4SF "vecfloat") |
182 | (DF "dmul")]) | |
183 | ||
4356b75d PH |
184 | ;; Appropriate type for divide ops. |
185 | (define_mode_attr VStype_div [(V2DF "vecdiv") | |
186 | (V4SF "vecfdiv") | |
29e6733c MM |
187 | (DF "ddiv")]) |
188 | ||
5aebfdad RH |
189 | ;; Map to a double-sized vector mode |
190 | (define_mode_attr VS_double [(V4SI "V8SI") | |
191 | (V4SF "V8SF") | |
192 | (V2DI "V4DI") | |
a16a872d MM |
193 | (V2DF "V4DF") |
194 | (V1TI "V2TI")]) | |
5aebfdad | 195 | |
50c78b9a | 196 | ;; Iterators for loading constants with xxspltib |
787c7a65 | 197 | (define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) |
50c78b9a MM |
198 | (define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) |
199 | ||
fc504349 CL |
200 | ;; Vector reverse byte modes |
201 | (define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) | |
202 | ||
787c7a65 MM |
203 | ;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. |
204 | ;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be | |
205 | ;; done on ISA 2.07 and not just ISA 3.0. | |
206 | (define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) | |
207 | (define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) | |
b8eaa754 | 208 | (define_mode_iterator VSX_EXTRACT_I4 [V16QI V8HI V4SI V2DI]) |
c5e74d9d | 209 | |
902cb7b1 KN |
210 | (define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") |
211 | (V8HI "h") | |
212 | (V4SI "w")]) | |
213 | ||
c5e74d9d MM |
214 | ;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and |
215 | ;; insert to validate the operand number. | |
216 | (define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") | |
217 | (V8HI "const_0_to_7_operand") | |
218 | (V4SI "const_0_to_3_operand")]) | |
219 | ||
220 | ;; Mode attribute to give the constraint for vector extract and insert | |
221 | ;; operations. | |
222 | (define_mode_attr VSX_EX [(V16QI "v") | |
223 | (V8HI "v") | |
224 | (V4SI "wa")]) | |
225 | ||
156b5cca MM |
226 | ;; Mode iterator for binary floating types other than double to |
227 | ;; optimize convert to that floating point type from an extract | |
228 | ;; of an integer type | |
229 | (define_mode_iterator VSX_EXTRACT_FL [SF | |
230 | (IF "FLOAT128_2REG_P (IFmode)") | |
231 | (KF "TARGET_FLOAT128_HW") | |
232 | (TF "FLOAT128_2REG_P (TFmode) | |
233 | || (FLOAT128_IEEE_P (TFmode) | |
234 | && TARGET_FLOAT128_HW)")]) | |
235 | ||
16370e79 MM |
236 | ;; Mode iterator for binary floating types that have a direct conversion |
237 | ;; from 64-bit integer to floating point | |
238 | (define_mode_iterator FL_CONV [SF | |
239 | DF | |
240 | (KF "TARGET_FLOAT128_HW") | |
241 | (TF "TARGET_FLOAT128_HW | |
242 | && FLOAT128_IEEE_P (TFmode)")]) | |
243 | ||
6019c0fc MM |
244 | ;; Iterator for the 2 short vector types to do a splat from an integer |
245 | (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) | |
246 | ||
247 | ;; Mode attribute to give the count for the splat instruction to splat | |
248 | ;; the value in the 64-bit integer slot | |
249 | (define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) | |
250 | ||
251 | ;; Mode attribute to give the suffix for the splat instruction | |
252 | (define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) | |
253 | ||
02ef74ba CL |
254 | ;; Iterator for the move to mask instructions |
255 | (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI]) | |
256 | (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI]) | |
257 | ||
f1ad419e CL |
258 | ;; Longer vec int modes for rotate/mask ops |
259 | ;; and Vector Integer Multiply/Divide/Modulo Instructions | |
260 | (define_mode_iterator VIlong [V2DI V4SI]) | |
261 | ||
29e6733c | 262 | ;; Constants for creating unspecs |
f3c33d9d MM |
263 | (define_c_enum "unspec" |
264 | [UNSPEC_VSX_CONCAT | |
265 | UNSPEC_VSX_CVDPSXWS | |
266 | UNSPEC_VSX_CVDPUXWS | |
267 | UNSPEC_VSX_CVSPDP | |
26bca0ed | 268 | UNSPEC_VSX_CVHPSP |
0bd62dca MM |
269 | UNSPEC_VSX_CVSPDPN |
270 | UNSPEC_VSX_CVDPSPN | |
f3c33d9d MM |
271 | UNSPEC_VSX_CVSXWDP |
272 | UNSPEC_VSX_CVUXWDP | |
273 | UNSPEC_VSX_CVSXDSP | |
274 | UNSPEC_VSX_CVUXDSP | |
be1418c7 CL |
275 | UNSPEC_VSX_FLOAT2 |
276 | UNSPEC_VSX_UNS_FLOAT2 | |
277 | UNSPEC_VSX_FLOATE | |
278 | UNSPEC_VSX_UNS_FLOATE | |
279 | UNSPEC_VSX_FLOATO | |
280 | UNSPEC_VSX_UNS_FLOATO | |
f3c33d9d MM |
281 | UNSPEC_VSX_TDIV |
282 | UNSPEC_VSX_TSQRT | |
f3c33d9d MM |
283 | UNSPEC_VSX_SET |
284 | UNSPEC_VSX_ROUND_I | |
285 | UNSPEC_VSX_ROUND_IC | |
286 | UNSPEC_VSX_SLDWI | |
26bca0ed CL |
287 | UNSPEC_VSX_XXPERM |
288 | ||
bf53d4b8 | 289 | UNSPEC_VSX_XXSPLTW |
2ccdda19 BS |
290 | UNSPEC_VSX_XXSPLTD |
291 | UNSPEC_VSX_DIVSD | |
292 | UNSPEC_VSX_DIVUD | |
f03122f2 CL |
293 | UNSPEC_VSX_DIVSQ |
294 | UNSPEC_VSX_DIVUQ | |
295 | UNSPEC_VSX_DIVESQ | |
296 | UNSPEC_VSX_DIVEUQ | |
297 | UNSPEC_VSX_MODSQ | |
298 | UNSPEC_VSX_MODUQ | |
2ccdda19 | 299 | UNSPEC_VSX_MULSD |
50c78b9a | 300 | UNSPEC_VSX_SIGN_EXTEND |
94bedeaf | 301 | UNSPEC_VSX_XVCVBF16SPN |
8ee2640b | 302 | UNSPEC_VSX_XVCVSPBF16 |
e5898daf | 303 | UNSPEC_VSX_XVCVSPSXDS |
58b475a2 | 304 | UNSPEC_VSX_XVCVSPHP |
e0d32185 MM |
305 | UNSPEC_VSX_VSLO |
306 | UNSPEC_VSX_EXTRACT | |
e9e6d4f6 | 307 | UNSPEC_VSX_SXEXPDP |
b70bb05b | 308 | UNSPEC_VSX_SXSIG |
e9e6d4f6 | 309 | UNSPEC_VSX_SIEXPDP |
b70bb05b | 310 | UNSPEC_VSX_SIEXPQP |
e9e6d4f6 | 311 | UNSPEC_VSX_SCMPEXPDP |
fc756f9f | 312 | UNSPEC_VSX_SCMPEXPQP |
e9e6d4f6 | 313 | UNSPEC_VSX_STSTDC |
26bca0ed CL |
314 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH |
315 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL | |
e9e6d4f6 KN |
316 | UNSPEC_VSX_VXEXP |
317 | UNSPEC_VSX_VXSIG | |
318 | UNSPEC_VSX_VIEXP | |
319 | UNSPEC_VSX_VTSTDC | |
e5898daf | 320 | UNSPEC_VSX_VSIGNED2 |
1262c6cf | 321 | |
902cb7b1 | 322 | UNSPEC_LXVL |
1262c6cf CL |
323 | UNSPEC_LXVLL |
324 | UNSPEC_LVSL_REG | |
325 | UNSPEC_LVSR_REG | |
902cb7b1 | 326 | UNSPEC_STXVL |
1262c6cf CL |
327 | UNSPEC_STXVLL |
328 | UNSPEC_XL_LEN_R | |
329 | UNSPEC_XST_LEN_R | |
330 | ||
902cb7b1 KN |
331 | UNSPEC_VCLZLSBB |
332 | UNSPEC_VCTZLSBB | |
333 | UNSPEC_VEXTUBLX | |
334 | UNSPEC_VEXTUHLX | |
335 | UNSPEC_VEXTUWLX | |
336 | UNSPEC_VEXTUBRX | |
337 | UNSPEC_VEXTUHRX | |
338 | UNSPEC_VEXTUWRX | |
339 | UNSPEC_VCMPNEB | |
340 | UNSPEC_VCMPNEZB | |
341 | UNSPEC_VCMPNEH | |
342 | UNSPEC_VCMPNEZH | |
343 | UNSPEC_VCMPNEW | |
344 | UNSPEC_VCMPNEZW | |
16370e79 MM |
345 | UNSPEC_XXEXTRACTUW |
346 | UNSPEC_XXINSERTW | |
4d85d480 CL |
347 | UNSPEC_VSX_FIRST_MATCH_INDEX |
348 | UNSPEC_VSX_FIRST_MATCH_EOS_INDEX | |
349 | UNSPEC_VSX_FIRST_MISMATCH_INDEX | |
350 | UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX | |
b8eaa754 | 351 | UNSPEC_XXGENPCV |
02ef74ba | 352 | UNSPEC_MTVSBM |
87325119 | 353 | UNSPEC_EXTENDDITI2 |
02ef74ba CL |
354 | UNSPEC_VCNTMB |
355 | UNSPEC_VEXPAND | |
356 | UNSPEC_VEXTRACT | |
30d02149 CL |
357 | UNSPEC_EXTRACTL |
358 | UNSPEC_EXTRACTR | |
530e9095 CL |
359 | UNSPEC_INSERTL |
360 | UNSPEC_INSERTR | |
3f029aea CL |
361 | UNSPEC_REPLACE_ELT |
362 | UNSPEC_REPLACE_UN | |
f1ad419e CL |
363 | UNSPEC_VDIVES |
364 | UNSPEC_VDIVEU | |
943d631a | 365 | UNSPEC_VMSUMCUD |
d2883be3 MM |
366 | UNSPEC_XXEVAL |
367 | UNSPEC_XXSPLTIW | |
bb24717e | 368 | UNSPEC_XXSPLTIDP |
d2883be3 MM |
369 | UNSPEC_XXSPLTI32DX |
370 | UNSPEC_XXBLEND | |
371 | UNSPEC_XXPERMX | |
f3c33d9d | 372 | ]) |
29e6733c | 373 | |
8ee2640b | 374 | (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 |
94bedeaf | 375 | UNSPEC_VSX_XVCVBF16SPN]) |
8ee2640b PB |
376 | |
377 | (define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16") | |
94bedeaf | 378 | (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")]) |
8ee2640b | 379 | |
30d02149 CL |
380 | ;; Like VI, defined in vector.md, but add ISA 2.07 integer vector ops |
381 | (define_mode_iterator VI2 [V4SI V8HI V16QI V2DI]) | |
382 | ||
3f029aea CL |
383 | ;; Vector extract_elt iterator/attr for 32-bit and 64-bit elements |
384 | (define_mode_iterator REPLACE_ELT [V4SI V4SF V2DI V2DF]) | |
385 | (define_mode_attr REPLACE_ELT_char [(V4SI "w") (V4SF "w") | |
386 | (V2DI "d") (V2DF "d")]) | |
387 | (define_mode_attr REPLACE_ELT_sh [(V4SI "2") (V4SF "2") | |
388 | (V2DI "3") (V2DF "3")]) | |
389 | (define_mode_attr REPLACE_ELT_max [(V4SI "12") (V4SF "12") | |
390 | (V2DI "8") (V2DF "8")]) | |
391 | ||
d2883be3 MM |
392 | ;; Like VM2 in altivec.md, just do char, short, int, long, float and double |
393 | (define_mode_iterator VM3 [V4SI | |
394 | V8HI | |
395 | V16QI | |
396 | V4SF | |
397 | V2DF | |
398 | V2DI]) | |
399 | ||
400 | (define_mode_attr VM3_char [(V2DI "d") | |
401 | (V4SI "w") | |
402 | (V8HI "h") | |
403 | (V16QI "b") | |
404 | (V2DF "d") | |
405 | (V4SF "w")]) | |
406 | ||
407 | ||
29e6733c | 408 | ;; VSX moves |
0cf68694 BS |
409 | |
410 | ;; The patterns for LE permuted loads and stores come before the general | |
411 | ;; VSX moves so they match first. | |
6e8b7d9c | 412 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
012f609e | 413 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
2025a48d | 414 | (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 415 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 416 | "#" |
a3a821c9 | 417 | "&& 1" |
0cf68694 | 418 | [(set (match_dup 2) |
6e8b7d9c | 419 | (vec_select:<MODE> |
0cf68694 BS |
420 | (match_dup 1) |
421 | (parallel [(const_int 1) (const_int 0)]))) | |
422 | (set (match_dup 0) | |
6e8b7d9c | 423 | (vec_select:<MODE> |
0cf68694 BS |
424 | (match_dup 2) |
425 | (parallel [(const_int 1) (const_int 0)])))] | |
0cf68694 | 426 | { |
a3a821c9 KN |
427 | rtx mem = operands[1]; |
428 | ||
429 | /* Don't apply the swap optimization if we've already performed register | |
430 | allocation and the hard register destination is not in the altivec | |
431 | range. */ | |
432 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 433 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0])) |
a3a821c9 KN |
434 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) |
435 | { | |
436 | rtx mem_address = XEXP (mem, 0); | |
437 | enum machine_mode mode = GET_MODE (mem); | |
438 | ||
439 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
440 | { | |
441 | /* Replace the source memory address with masked address. */ | |
442 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
443 | emit_insn (lvx_set_expr); | |
444 | DONE; | |
445 | } | |
446 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
447 | { | |
448 | /* This rtl is already in the form that matches lvx | |
449 | instruction, so leave it alone. */ | |
450 | DONE; | |
451 | } | |
452 | /* Otherwise, fall through to transform into a swapping load. */ | |
453 | } | |
0cf68694 BS |
454 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
455 | : operands[0]; | |
456 | } | |
0cf68694 BS |
457 | [(set_attr "type" "vecload") |
458 | (set_attr "length" "8")]) | |
459 | ||
6e8b7d9c | 460 | (define_insn_and_split "*vsx_le_perm_load_<mode>" |
7858932e | 461 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
2025a48d | 462 | (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 463 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 464 | "#" |
a3a821c9 | 465 | "&& 1" |
0cf68694 | 466 | [(set (match_dup 2) |
6e8b7d9c | 467 | (vec_select:<MODE> |
0cf68694 BS |
468 | (match_dup 1) |
469 | (parallel [(const_int 2) (const_int 3) | |
470 | (const_int 0) (const_int 1)]))) | |
471 | (set (match_dup 0) | |
6e8b7d9c | 472 | (vec_select:<MODE> |
0cf68694 BS |
473 | (match_dup 2) |
474 | (parallel [(const_int 2) (const_int 3) | |
475 | (const_int 0) (const_int 1)])))] | |
0cf68694 | 476 | { |
a3a821c9 KN |
477 | rtx mem = operands[1]; |
478 | ||
479 | /* Don't apply the swap optimization if we've already performed register | |
480 | allocation and the hard register destination is not in the altivec | |
481 | range. */ | |
482 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 483 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
484 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
485 | { | |
486 | rtx mem_address = XEXP (mem, 0); | |
487 | enum machine_mode mode = GET_MODE (mem); | |
488 | ||
489 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
490 | { | |
491 | /* Replace the source memory address with masked address. */ | |
492 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
493 | emit_insn (lvx_set_expr); | |
494 | DONE; | |
495 | } | |
496 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
497 | { | |
498 | /* This rtl is already in the form that matches lvx | |
499 | instruction, so leave it alone. */ | |
500 | DONE; | |
501 | } | |
502 | /* Otherwise, fall through to transform into a swapping load. */ | |
503 | } | |
0cf68694 BS |
504 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
505 | : operands[0]; | |
506 | } | |
0cf68694 BS |
507 | [(set_attr "type" "vecload") |
508 | (set_attr "length" "8")]) | |
509 | ||
510 | (define_insn_and_split "*vsx_le_perm_load_v8hi" | |
511 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
2025a48d | 512 | (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 513 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 514 | "#" |
a3a821c9 | 515 | "&& 1" |
0cf68694 BS |
516 | [(set (match_dup 2) |
517 | (vec_select:V8HI | |
518 | (match_dup 1) | |
519 | (parallel [(const_int 4) (const_int 5) | |
520 | (const_int 6) (const_int 7) | |
521 | (const_int 0) (const_int 1) | |
522 | (const_int 2) (const_int 3)]))) | |
523 | (set (match_dup 0) | |
524 | (vec_select:V8HI | |
525 | (match_dup 2) | |
526 | (parallel [(const_int 4) (const_int 5) | |
527 | (const_int 6) (const_int 7) | |
528 | (const_int 0) (const_int 1) | |
529 | (const_int 2) (const_int 3)])))] | |
0cf68694 | 530 | { |
a3a821c9 KN |
531 | rtx mem = operands[1]; |
532 | ||
533 | /* Don't apply the swap optimization if we've already performed register | |
534 | allocation and the hard register destination is not in the altivec | |
535 | range. */ | |
536 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 537 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
538 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
539 | { | |
540 | rtx mem_address = XEXP (mem, 0); | |
541 | enum machine_mode mode = GET_MODE (mem); | |
542 | ||
543 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
544 | { | |
545 | /* Replace the source memory address with masked address. */ | |
546 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
547 | emit_insn (lvx_set_expr); | |
548 | DONE; | |
549 | } | |
550 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
551 | { | |
552 | /* This rtl is already in the form that matches lvx | |
553 | instruction, so leave it alone. */ | |
554 | DONE; | |
555 | } | |
556 | /* Otherwise, fall through to transform into a swapping load. */ | |
557 | } | |
0cf68694 BS |
558 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
559 | : operands[0]; | |
560 | } | |
0cf68694 BS |
561 | [(set_attr "type" "vecload") |
562 | (set_attr "length" "8")]) | |
563 | ||
564 | (define_insn_and_split "*vsx_le_perm_load_v16qi" | |
565 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
2025a48d | 566 | (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] |
5d57fdc1 | 567 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 568 | "#" |
a3a821c9 | 569 | "&& 1" |
0cf68694 BS |
570 | [(set (match_dup 2) |
571 | (vec_select:V16QI | |
572 | (match_dup 1) | |
573 | (parallel [(const_int 8) (const_int 9) | |
574 | (const_int 10) (const_int 11) | |
575 | (const_int 12) (const_int 13) | |
576 | (const_int 14) (const_int 15) | |
577 | (const_int 0) (const_int 1) | |
578 | (const_int 2) (const_int 3) | |
579 | (const_int 4) (const_int 5) | |
580 | (const_int 6) (const_int 7)]))) | |
581 | (set (match_dup 0) | |
582 | (vec_select:V16QI | |
583 | (match_dup 2) | |
584 | (parallel [(const_int 8) (const_int 9) | |
585 | (const_int 10) (const_int 11) | |
586 | (const_int 12) (const_int 13) | |
587 | (const_int 14) (const_int 15) | |
588 | (const_int 0) (const_int 1) | |
589 | (const_int 2) (const_int 3) | |
590 | (const_int 4) (const_int 5) | |
591 | (const_int 6) (const_int 7)])))] | |
0cf68694 | 592 | { |
a3a821c9 KN |
593 | rtx mem = operands[1]; |
594 | ||
595 | /* Don't apply the swap optimization if we've already performed register | |
596 | allocation and the hard register destination is not in the altivec | |
597 | range. */ | |
598 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f | 599 | && (!HARD_REGISTER_P (operands[0]) |
a3a821c9 KN |
600 | || ALTIVEC_REGNO_P (REGNO(operands[0])))) |
601 | { | |
602 | rtx mem_address = XEXP (mem, 0); | |
603 | enum machine_mode mode = GET_MODE (mem); | |
604 | ||
605 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
606 | { | |
607 | /* Replace the source memory address with masked address. */ | |
608 | rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); | |
609 | emit_insn (lvx_set_expr); | |
610 | DONE; | |
611 | } | |
612 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
613 | { | |
614 | /* This rtl is already in the form that matches lvx | |
615 | instruction, so leave it alone. */ | |
616 | DONE; | |
617 | } | |
618 | /* Otherwise, fall through to transform into a swapping load. */ | |
619 | } | |
0cf68694 BS |
620 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) |
621 | : operands[0]; | |
622 | } | |
0cf68694 BS |
623 | [(set_attr "type" "vecload") |
624 | (set_attr "length" "8")]) | |
625 | ||
411f1755 | 626 | (define_insn "*vsx_le_perm_store_<mode>" |
2025a48d | 627 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") |
012f609e | 628 | (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 629 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 630 | "#" |
411f1755 BS |
631 | [(set_attr "type" "vecstore") |
632 | (set_attr "length" "12")]) | |
633 | ||
634 | (define_split | |
ad18eed2 SB |
635 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") |
636 | (match_operand:VSX_D 1 "vsx_register_operand"))] | |
5d57fdc1 | 637 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 | 638 | [(set (match_dup 2) |
6e8b7d9c | 639 | (vec_select:<MODE> |
0cf68694 BS |
640 | (match_dup 1) |
641 | (parallel [(const_int 1) (const_int 0)]))) | |
642 | (set (match_dup 0) | |
6e8b7d9c | 643 | (vec_select:<MODE> |
0cf68694 BS |
644 | (match_dup 2) |
645 | (parallel [(const_int 1) (const_int 0)])))] | |
0cf68694 | 646 | { |
a3a821c9 KN |
647 | rtx mem = operands[0]; |
648 | ||
649 | /* Don't apply the swap optimization if we've already performed register | |
650 | allocation and the hard register source is not in the altivec range. */ | |
651 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
652 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
653 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
654 | { |
655 | rtx mem_address = XEXP (mem, 0); | |
656 | enum machine_mode mode = GET_MODE (mem); | |
657 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
658 | { | |
659 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
660 | emit_insn (stvx_set_expr); | |
661 | DONE; | |
662 | } | |
663 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
664 | { | |
665 | /* This rtl is already in the form that matches stvx instruction, | |
666 | so leave it alone. */ | |
667 | DONE; | |
668 | } | |
669 | /* Otherwise, fall through to transform into a swapping store. */ | |
670 | } | |
671 | ||
0cf68694 BS |
672 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
673 | : operands[1]; | |
411f1755 BS |
674 | }) |
675 | ||
676 | ;; The post-reload split requires that we re-permute the source | |
677 | ;; register in case it is still live. | |
678 | (define_split | |
ad18eed2 SB |
679 | [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") |
680 | (match_operand:VSX_D 1 "vsx_register_operand"))] | |
5d57fdc1 | 681 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
682 | [(set (match_dup 1) |
683 | (vec_select:<MODE> | |
684 | (match_dup 1) | |
685 | (parallel [(const_int 1) (const_int 0)]))) | |
686 | (set (match_dup 0) | |
687 | (vec_select:<MODE> | |
688 | (match_dup 1) | |
689 | (parallel [(const_int 1) (const_int 0)]))) | |
690 | (set (match_dup 1) | |
691 | (vec_select:<MODE> | |
692 | (match_dup 1) | |
693 | (parallel [(const_int 1) (const_int 0)])))] | |
694 | "") | |
0cf68694 | 695 | |
411f1755 | 696 | (define_insn "*vsx_le_perm_store_<mode>" |
2025a48d | 697 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") |
7858932e | 698 | (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 699 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 700 | "#" |
411f1755 BS |
701 | [(set_attr "type" "vecstore") |
702 | (set_attr "length" "12")]) | |
703 | ||
704 | (define_split | |
ad18eed2 SB |
705 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") |
706 | (match_operand:VSX_W 1 "vsx_register_operand"))] | |
5d57fdc1 | 707 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 | 708 | [(set (match_dup 2) |
6e8b7d9c | 709 | (vec_select:<MODE> |
0cf68694 BS |
710 | (match_dup 1) |
711 | (parallel [(const_int 2) (const_int 3) | |
712 | (const_int 0) (const_int 1)]))) | |
713 | (set (match_dup 0) | |
6e8b7d9c | 714 | (vec_select:<MODE> |
0cf68694 BS |
715 | (match_dup 2) |
716 | (parallel [(const_int 2) (const_int 3) | |
717 | (const_int 0) (const_int 1)])))] | |
0cf68694 | 718 | { |
a3a821c9 KN |
719 | rtx mem = operands[0]; |
720 | ||
721 | /* Don't apply the swap optimization if we've already performed register | |
722 | allocation and the hard register source is not in the altivec range. */ | |
723 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
724 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
725 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
726 | { |
727 | rtx mem_address = XEXP (mem, 0); | |
728 | enum machine_mode mode = GET_MODE (mem); | |
729 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
730 | { | |
731 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
732 | emit_insn (stvx_set_expr); | |
733 | DONE; | |
734 | } | |
735 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
736 | { | |
737 | /* This rtl is already in the form that matches stvx instruction, | |
738 | so leave it alone. */ | |
739 | DONE; | |
740 | } | |
741 | /* Otherwise, fall through to transform into a swapping store. */ | |
742 | } | |
743 | ||
0cf68694 BS |
744 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
745 | : operands[1]; | |
411f1755 BS |
746 | }) |
747 | ||
748 | ;; The post-reload split requires that we re-permute the source | |
749 | ;; register in case it is still live. | |
750 | (define_split | |
ad18eed2 SB |
751 | [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") |
752 | (match_operand:VSX_W 1 "vsx_register_operand"))] | |
5d57fdc1 | 753 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
754 | [(set (match_dup 1) |
755 | (vec_select:<MODE> | |
756 | (match_dup 1) | |
757 | (parallel [(const_int 2) (const_int 3) | |
758 | (const_int 0) (const_int 1)]))) | |
759 | (set (match_dup 0) | |
760 | (vec_select:<MODE> | |
761 | (match_dup 1) | |
762 | (parallel [(const_int 2) (const_int 3) | |
763 | (const_int 0) (const_int 1)]))) | |
764 | (set (match_dup 1) | |
765 | (vec_select:<MODE> | |
766 | (match_dup 1) | |
767 | (parallel [(const_int 2) (const_int 3) | |
768 | (const_int 0) (const_int 1)])))] | |
769 | "") | |
0cf68694 | 770 | |
411f1755 | 771 | (define_insn "*vsx_le_perm_store_v8hi" |
2025a48d | 772 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") |
0cf68694 | 773 | (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 774 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 775 | "#" |
411f1755 BS |
776 | [(set_attr "type" "vecstore") |
777 | (set_attr "length" "12")]) | |
778 | ||
779 | (define_split | |
ad18eed2 SB |
780 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") |
781 | (match_operand:V8HI 1 "vsx_register_operand"))] | |
5d57fdc1 | 782 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 BS |
783 | [(set (match_dup 2) |
784 | (vec_select:V8HI | |
785 | (match_dup 1) | |
786 | (parallel [(const_int 4) (const_int 5) | |
787 | (const_int 6) (const_int 7) | |
788 | (const_int 0) (const_int 1) | |
789 | (const_int 2) (const_int 3)]))) | |
790 | (set (match_dup 0) | |
791 | (vec_select:V8HI | |
792 | (match_dup 2) | |
793 | (parallel [(const_int 4) (const_int 5) | |
794 | (const_int 6) (const_int 7) | |
795 | (const_int 0) (const_int 1) | |
796 | (const_int 2) (const_int 3)])))] | |
0cf68694 | 797 | { |
a3a821c9 KN |
798 | rtx mem = operands[0]; |
799 | ||
800 | /* Don't apply the swap optimization if we've already performed register | |
801 | allocation and the hard register source is not in the altivec range. */ | |
802 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
803 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
804 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
805 | { |
806 | rtx mem_address = XEXP (mem, 0); | |
807 | enum machine_mode mode = GET_MODE (mem); | |
808 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
809 | { | |
810 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
811 | emit_insn (stvx_set_expr); | |
812 | DONE; | |
813 | } | |
814 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
815 | { | |
816 | /* This rtl is already in the form that matches stvx instruction, | |
817 | so leave it alone. */ | |
818 | DONE; | |
819 | } | |
820 | /* Otherwise, fall through to transform into a swapping store. */ | |
821 | } | |
822 | ||
0cf68694 BS |
823 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
824 | : operands[1]; | |
411f1755 BS |
825 | }) |
826 | ||
827 | ;; The post-reload split requires that we re-permute the source | |
828 | ;; register in case it is still live. | |
829 | (define_split | |
ad18eed2 SB |
830 | [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") |
831 | (match_operand:V8HI 1 "vsx_register_operand"))] | |
5d57fdc1 | 832 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
833 | [(set (match_dup 1) |
834 | (vec_select:V8HI | |
835 | (match_dup 1) | |
836 | (parallel [(const_int 4) (const_int 5) | |
837 | (const_int 6) (const_int 7) | |
838 | (const_int 0) (const_int 1) | |
839 | (const_int 2) (const_int 3)]))) | |
840 | (set (match_dup 0) | |
841 | (vec_select:V8HI | |
842 | (match_dup 1) | |
843 | (parallel [(const_int 4) (const_int 5) | |
844 | (const_int 6) (const_int 7) | |
845 | (const_int 0) (const_int 1) | |
846 | (const_int 2) (const_int 3)]))) | |
847 | (set (match_dup 1) | |
848 | (vec_select:V8HI | |
849 | (match_dup 1) | |
850 | (parallel [(const_int 4) (const_int 5) | |
851 | (const_int 6) (const_int 7) | |
852 | (const_int 0) (const_int 1) | |
853 | (const_int 2) (const_int 3)])))] | |
854 | "") | |
0cf68694 | 855 | |
411f1755 | 856 | (define_insn "*vsx_le_perm_store_v16qi" |
2025a48d | 857 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") |
0cf68694 | 858 | (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] |
5d57fdc1 | 859 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
0cf68694 | 860 | "#" |
411f1755 BS |
861 | [(set_attr "type" "vecstore") |
862 | (set_attr "length" "12")]) | |
863 | ||
864 | (define_split | |
ad18eed2 SB |
865 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") |
866 | (match_operand:V16QI 1 "vsx_register_operand"))] | |
5d57fdc1 | 867 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" |
0cf68694 BS |
868 | [(set (match_dup 2) |
869 | (vec_select:V16QI | |
870 | (match_dup 1) | |
871 | (parallel [(const_int 8) (const_int 9) | |
872 | (const_int 10) (const_int 11) | |
873 | (const_int 12) (const_int 13) | |
874 | (const_int 14) (const_int 15) | |
875 | (const_int 0) (const_int 1) | |
876 | (const_int 2) (const_int 3) | |
877 | (const_int 4) (const_int 5) | |
878 | (const_int 6) (const_int 7)]))) | |
879 | (set (match_dup 0) | |
880 | (vec_select:V16QI | |
881 | (match_dup 2) | |
882 | (parallel [(const_int 8) (const_int 9) | |
883 | (const_int 10) (const_int 11) | |
884 | (const_int 12) (const_int 13) | |
885 | (const_int 14) (const_int 15) | |
886 | (const_int 0) (const_int 1) | |
887 | (const_int 2) (const_int 3) | |
888 | (const_int 4) (const_int 5) | |
889 | (const_int 6) (const_int 7)])))] | |
0cf68694 | 890 | { |
a3a821c9 KN |
891 | rtx mem = operands[0]; |
892 | ||
893 | /* Don't apply the swap optimization if we've already performed register | |
894 | allocation and the hard register source is not in the altivec range. */ | |
895 | if ((MEM_ALIGN (mem) >= 128) | |
2e42a52f PB |
896 | && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) |
897 | || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) | |
a3a821c9 KN |
898 | { |
899 | rtx mem_address = XEXP (mem, 0); | |
900 | enum machine_mode mode = GET_MODE (mem); | |
901 | if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) | |
902 | { | |
903 | rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); | |
904 | emit_insn (stvx_set_expr); | |
905 | DONE; | |
906 | } | |
907 | else if (rs6000_quadword_masked_address_p (mem_address)) | |
908 | { | |
909 | /* This rtl is already in the form that matches stvx instruction, | |
910 | so leave it alone. */ | |
911 | DONE; | |
912 | } | |
913 | /* Otherwise, fall through to transform into a swapping store. */ | |
914 | } | |
915 | ||
0cf68694 BS |
916 | operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) |
917 | : operands[1]; | |
411f1755 BS |
918 | }) |
919 | ||
920 | ;; The post-reload split requires that we re-permute the source | |
921 | ;; register in case it is still live. | |
922 | (define_split | |
ad18eed2 SB |
923 | [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") |
924 | (match_operand:V16QI 1 "vsx_register_operand"))] | |
5d57fdc1 | 925 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" |
411f1755 BS |
926 | [(set (match_dup 1) |
927 | (vec_select:V16QI | |
928 | (match_dup 1) | |
929 | (parallel [(const_int 8) (const_int 9) | |
930 | (const_int 10) (const_int 11) | |
931 | (const_int 12) (const_int 13) | |
932 | (const_int 14) (const_int 15) | |
933 | (const_int 0) (const_int 1) | |
934 | (const_int 2) (const_int 3) | |
935 | (const_int 4) (const_int 5) | |
936 | (const_int 6) (const_int 7)]))) | |
937 | (set (match_dup 0) | |
938 | (vec_select:V16QI | |
939 | (match_dup 1) | |
940 | (parallel [(const_int 8) (const_int 9) | |
941 | (const_int 10) (const_int 11) | |
942 | (const_int 12) (const_int 13) | |
943 | (const_int 14) (const_int 15) | |
944 | (const_int 0) (const_int 1) | |
945 | (const_int 2) (const_int 3) | |
946 | (const_int 4) (const_int 5) | |
947 | (const_int 6) (const_int 7)]))) | |
948 | (set (match_dup 1) | |
949 | (vec_select:V16QI | |
950 | (match_dup 1) | |
951 | (parallel [(const_int 8) (const_int 9) | |
952 | (const_int 10) (const_int 11) | |
953 | (const_int 12) (const_int 13) | |
954 | (const_int 14) (const_int 15) | |
955 | (const_int 0) (const_int 1) | |
956 | (const_int 2) (const_int 3) | |
957 | (const_int 4) (const_int 5) | |
958 | (const_int 6) (const_int 7)])))] | |
959 | "") | |
0cf68694 | 960 | |
c477a667 MM |
961 | ;; Little endian word swapping for 128-bit types that are either scalars or the |
962 | ;; special V1TI container class, which it is not appropriate to use vec_select | |
963 | ;; for the type. | |
964 | (define_insn "*vsx_le_permute_<mode>" | |
f1701864 CL |
965 | [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") |
966 | (rotate:VEC_TI | |
967 | (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r") | |
c477a667 | 968 | (const_int 64)))] |
32928931 | 969 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" |
c477a667 MM |
970 | "@ |
971 | xxpermdi %x0,%x1,%x1,2 | |
972 | lxvd2x %x0,%y1 | |
d00fdf85 PB |
973 | stxvd2x %x1,%y0 |
974 | mr %0,%L1\;mr %L0,%1 | |
975 | ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 | |
976 | std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" | |
911c8df0 | 977 | [(set_attr "length" "*,*,*,8,8,8") |
d00fdf85 | 978 | (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) |
c477a667 MM |
979 | |
980 | (define_insn_and_split "*vsx_le_undo_permute_<mode>" | |
f1701864 CL |
981 | [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa") |
982 | (rotate:VEC_TI | |
983 | (rotate:VEC_TI | |
984 | (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa") | |
c477a667 MM |
985 | (const_int 64)) |
986 | (const_int 64)))] | |
987 | "!BYTES_BIG_ENDIAN && TARGET_VSX" | |
988 | "@ | |
989 | # | |
990 | xxlor %x0,%x1" | |
0ec7641e | 991 | "&& 1" |
c477a667 MM |
992 | [(set (match_dup 0) (match_dup 1))] |
993 | { | |
994 | if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) | |
995 | { | |
996 | emit_note (NOTE_INSN_DELETED); | |
997 | DONE; | |
998 | } | |
999 | } | |
1000 | [(set_attr "length" "0,4") | |
7c788ce2 | 1001 | (set_attr "type" "veclogical")]) |
c477a667 MM |
1002 | |
1003 | (define_insn_and_split "*vsx_le_perm_load_<mode>" | |
cb152d12 | 1004 | [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r") |
d00fdf85 | 1005 | (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] |
cb25dea3 PB |
1006 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
1007 | && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" | |
d00fdf85 PB |
1008 | "@ |
1009 | # | |
1010 | #" | |
cb25dea3 PB |
1011 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
1012 | && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" | |
02d3ba0e | 1013 | [(const_int 0)] |
c477a667 | 1014 | { |
02d3ba0e RS |
1015 | rtx tmp = (can_create_pseudo_p () |
1016 | ? gen_reg_rtx_and_attrs (operands[0]) | |
1017 | : operands[0]); | |
1018 | rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); | |
1019 | rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); | |
1020 | DONE; | |
c477a667 | 1021 | } |
d00fdf85 | 1022 | [(set_attr "type" "vecload,load") |
cb152d12 SB |
1023 | (set_attr "length" "8,8") |
1024 | (set_attr "isa" "<VSisa>,*")]) | |
c477a667 MM |
1025 | |
1026 | (define_insn "*vsx_le_perm_store_<mode>" | |
d00fdf85 | 1027 | [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") |
cb152d12 | 1028 | (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] |
cb25dea3 | 1029 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
3a7794b4 | 1030 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" |
d00fdf85 PB |
1031 | "@ |
1032 | # | |
1033 | #" | |
1034 | [(set_attr "type" "vecstore,store") | |
cb152d12 SB |
1035 | (set_attr "length" "12,8") |
1036 | (set_attr "isa" "<VSisa>,*")]) | |
c477a667 MM |
1037 | |
1038 | (define_split | |
ad18eed2 SB |
1039 | [(set (match_operand:VSX_LE_128 0 "memory_operand") |
1040 | (match_operand:VSX_LE_128 1 "vsx_register_operand"))] | |
cb25dea3 PB |
1041 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR |
1042 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" | |
02d3ba0e | 1043 | [(const_int 0)] |
c477a667 | 1044 | { |
02d3ba0e RS |
1045 | rtx tmp = (can_create_pseudo_p () |
1046 | ? gen_reg_rtx_and_attrs (operands[0]) | |
1047 | : operands[0]); | |
1048 | rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); | |
1049 | rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); | |
1050 | DONE; | |
c477a667 MM |
1051 | }) |
1052 | ||
d00fdf85 PB |
1053 | ;; Peepholes to catch loads and stores for TImode if TImode landed in |
1054 | ;; GPR registers on a little endian system. | |
1055 | (define_peephole2 | |
f1701864 CL |
1056 | [(set (match_operand:VEC_TI 0 "int_reg_operand") |
1057 | (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand") | |
d00fdf85 | 1058 | (const_int 64))) |
f1701864 CL |
1059 | (set (match_operand:VEC_TI 2 "int_reg_operand") |
1060 | (rotate:VEC_TI (match_dup 0) | |
d00fdf85 | 1061 | (const_int 64)))] |
4a89b7e7 | 1062 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
d00fdf85 PB |
1063 | && (rtx_equal_p (operands[0], operands[2]) |
1064 | || peep2_reg_dead_p (2, operands[0]))" | |
1065 | [(set (match_dup 2) (match_dup 1))]) | |
1066 | ||
1067 | (define_peephole2 | |
f1701864 CL |
1068 | [(set (match_operand:VEC_TI 0 "int_reg_operand") |
1069 | (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand") | |
d00fdf85 | 1070 | (const_int 64))) |
f1701864 CL |
1071 | (set (match_operand:VEC_TI 2 "memory_operand") |
1072 | (rotate:VEC_TI (match_dup 0) | |
d00fdf85 | 1073 | (const_int 64)))] |
4a89b7e7 | 1074 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
d00fdf85 PB |
1075 | && peep2_reg_dead_p (2, operands[0])" |
1076 | [(set (match_dup 2) (match_dup 1))]) | |
1077 | ||
9393bc31 MM |
1078 | ;; Peephole to catch memory to memory transfers for TImode if TImode landed in |
1079 | ;; VSX registers on a little endian system. The vector types and IEEE 128-bit | |
1080 | ;; floating point are handled by the more generic swap elimination pass. | |
1081 | (define_peephole2 | |
ad18eed2 SB |
1082 | [(set (match_operand:TI 0 "vsx_register_operand") |
1083 | (rotate:TI (match_operand:TI 1 "vsx_register_operand") | |
9393bc31 | 1084 | (const_int 64))) |
ad18eed2 | 1085 | (set (match_operand:TI 2 "vsx_register_operand") |
9393bc31 MM |
1086 | (rotate:TI (match_dup 0) |
1087 | (const_int 64)))] | |
4a89b7e7 | 1088 | "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR |
9393bc31 MM |
1089 | && (rtx_equal_p (operands[0], operands[2]) |
1090 | || peep2_reg_dead_p (2, operands[0]))" | |
1091 | [(set (match_dup 2) (match_dup 1))]) | |
1092 | ||
c477a667 MM |
1093 | ;; The post-reload split requires that we re-permute the source |
1094 | ;; register in case it is still live. | |
1095 | (define_split | |
ad18eed2 SB |
1096 | [(set (match_operand:VSX_LE_128 0 "memory_operand") |
1097 | (match_operand:VSX_LE_128 1 "vsx_register_operand"))] | |
cb25dea3 PB |
1098 | "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR |
1099 | && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" | |
02d3ba0e RS |
1100 | [(const_int 0)] |
1101 | { | |
1102 | rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); | |
1103 | rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); | |
1104 | rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); | |
1105 | DONE; | |
1106 | }) | |
0cf68694 | 1107 | |
50c78b9a MM |
1108 | ;; Vector constants that can be generated with XXSPLTIB that was added in ISA |
1109 | ;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. | |
1110 | (define_insn "xxspltib_v16qi" | |
1111 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
1112 | (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] | |
1113 | "TARGET_P9_VECTOR" | |
29e6733c | 1114 | { |
50c78b9a MM |
1115 | operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); |
1116 | return "xxspltib %x0,%2"; | |
29e6733c | 1117 | } |
50c78b9a MM |
1118 | [(set_attr "type" "vecperm")]) |
1119 | ||
1120 | (define_insn "xxspltib_<mode>_nosplit" | |
58f2fb5c MM |
1121 | [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") |
1122 | (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] | |
50c78b9a | 1123 | "TARGET_P9_VECTOR" |
29e6733c | 1124 | { |
50c78b9a MM |
1125 | rtx op1 = operands[1]; |
1126 | int value = 256; | |
1127 | int num_insns = -1; | |
1128 | ||
1129 | if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) | |
1130 | || num_insns != 1) | |
1131 | gcc_unreachable (); | |
1132 | ||
1133 | operands[2] = GEN_INT (value & 0xff); | |
1134 | return "xxspltib %x0,%2"; | |
c6d5ff83 | 1135 | } |
50c78b9a MM |
1136 | [(set_attr "type" "vecperm")]) |
1137 | ||
1138 | (define_insn_and_split "*xxspltib_<mode>_split" | |
1139 | [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") | |
1140 | (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] | |
1141 | "TARGET_P9_VECTOR" | |
1142 | "#" | |
1143 | "&& 1" | |
1144 | [(const_int 0)] | |
c6d5ff83 | 1145 | { |
50c78b9a MM |
1146 | int value = 256; |
1147 | int num_insns = -1; | |
1148 | rtx op0 = operands[0]; | |
1149 | rtx op1 = operands[1]; | |
1150 | rtx tmp = ((can_create_pseudo_p ()) | |
1151 | ? gen_reg_rtx (V16QImode) | |
1152 | : gen_lowpart (V16QImode, op0)); | |
c6d5ff83 | 1153 | |
50c78b9a MM |
1154 | if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) |
1155 | || num_insns != 2) | |
1156 | gcc_unreachable (); | |
c6d5ff83 | 1157 | |
50c78b9a | 1158 | emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); |
c6d5ff83 | 1159 | |
50c78b9a MM |
1160 | if (<MODE>mode == V2DImode) |
1161 | emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); | |
c6d5ff83 | 1162 | |
50c78b9a MM |
1163 | else if (<MODE>mode == V4SImode) |
1164 | emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); | |
1165 | ||
1166 | else if (<MODE>mode == V8HImode) | |
1167 | emit_insn (gen_altivec_vupkhsb (op0, tmp)); | |
1168 | ||
1169 | else | |
1170 | gcc_unreachable (); | |
29e6733c | 1171 | |
50c78b9a MM |
1172 | DONE; |
1173 | } | |
1174 | [(set_attr "type" "vecperm") | |
1175 | (set_attr "length" "8")]) | |
29e6733c | 1176 | |
29e6733c | 1177 | |
50c78b9a MM |
1178 | ;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB |
1179 | ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or | |
1180 | ;; all 1's, since the machine does not have to wait for the previous | |
1181 | ;; instruction using the register being set (such as a store waiting on a slow | |
1182 | ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. | |
c6d5ff83 | 1183 | |
50c78b9a MM |
1184 | ;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) |
1185 | ;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW | |
d730aa8a | 1186 | ;; LXVKQ XXSPLTI* |
00fd0628 | 1187 | ;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) |
f7e94dfb | 1188 | (define_insn "vsx_mov<mode>_64bit" |
50c78b9a | 1189 | [(set (match_operand:VSX_M 0 "nonimmediate_operand" |
cb152d12 | 1190 | "=ZwO, wa, wa, r, we, ?wQ, |
afc69d4e | 1191 | ?&r, ??r, ??Y, <??r>, wa, v, |
d730aa8a | 1192 | wa, wa, |
cb152d12 | 1193 | ?wa, v, <??r>, wZ, v") |
c6d5ff83 | 1194 | |
50c78b9a | 1195 | (match_operand:VSX_M 1 "input_operand" |
cb152d12 | 1196 | "wa, ZwO, wa, we, r, r, |
50c78b9a | 1197 | wQ, Y, r, r, wE, jwM, |
d730aa8a | 1198 | eQ, eP, |
00fd0628 | 1199 | ?jwM, W, <nW>, v, wZ"))] |
c6d5ff83 | 1200 | |
50c78b9a MM |
1201 | "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) |
1202 | && (register_operand (operands[0], <MODE>mode) | |
1203 | || register_operand (operands[1], <MODE>mode))" | |
1204 | { | |
1205 | return rs6000_output_move_128bit (operands); | |
1206 | } | |
1207 | [(set_attr "type" | |
863e8d53 | 1208 | "vecstore, vecload, vecsimple, mtvsr, mfvsr, load, |
50c78b9a | 1209 | store, load, store, *, vecsimple, vecsimple, |
d730aa8a | 1210 | vecperm, vecperm, |
00fd0628 | 1211 | vecsimple, *, *, vecstore, vecload") |
ca06b86c MM |
1212 | (set_attr "num_insns" |
1213 | "*, *, *, 2, *, 2, | |
1214 | 2, 2, 2, 2, *, *, | |
d730aa8a | 1215 | *, *, |
ca06b86c MM |
1216 | *, 5, 2, *, *") |
1217 | (set_attr "max_prefixed_insns" | |
1218 | "*, *, *, *, *, 2, | |
1219 | 2, 2, 2, 2, *, *, | |
d730aa8a | 1220 | *, *, |
ca06b86c | 1221 | *, *, *, *, *") |
50c78b9a | 1222 | (set_attr "length" |
911c8df0 MM |
1223 | "*, *, *, 8, *, 8, |
1224 | 8, 8, 8, 8, *, *, | |
d730aa8a | 1225 | *, *, |
911c8df0 | 1226 | *, 20, 8, *, *") |
afc69d4e | 1227 | (set_attr "isa" |
cb152d12 | 1228 | "<VSisa>, <VSisa>, <VSisa>, *, *, *, |
afc69d4e | 1229 | *, *, *, *, p9v, *, |
d730aa8a | 1230 | p10, p10, |
f9063d12 MM |
1231 | <VSisa>, *, *, *, *") |
1232 | (set_attr "prefixed" | |
1233 | "*, *, *, *, *, *, | |
1234 | *, *, *, *, *, *, | |
1235 | *, yes, | |
1236 | *, *, *, *, *")]) | |
50c78b9a MM |
1237 | |
1238 | ;; VSX store VSX load VSX move GPR load GPR store GPR move | |
d730aa8a | 1239 | ;; LXVKQ XXSPLTI* |
00fd0628 | 1240 | ;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const |
50c78b9a MM |
1241 | ;; LVX (VMX) STVX (VMX) |
1242 | (define_insn "*vsx_mov<mode>_32bit" | |
1243 | [(set (match_operand:VSX_M 0 "nonimmediate_operand" | |
cb152d12 | 1244 | "=ZwO, wa, wa, ??r, ??Y, <??r>, |
d730aa8a | 1245 | wa, wa, |
cb152d12 | 1246 | wa, v, ?wa, v, <??r>, |
50c78b9a MM |
1247 | wZ, v") |
1248 | ||
1249 | (match_operand:VSX_M 1 "input_operand" | |
cb152d12 | 1250 | "wa, ZwO, wa, Y, r, r, |
d730aa8a | 1251 | eQ, eP, |
00fd0628 | 1252 | wE, jwM, ?jwM, W, <nW>, |
50c78b9a MM |
1253 | v, wZ"))] |
1254 | ||
1255 | "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) | |
1256 | && (register_operand (operands[0], <MODE>mode) | |
1257 | || register_operand (operands[1], <MODE>mode))" | |
1258 | { | |
1259 | return rs6000_output_move_128bit (operands); | |
29e6733c | 1260 | } |
50c78b9a MM |
1261 | [(set_attr "type" |
1262 | "vecstore, vecload, vecsimple, load, store, *, | |
d730aa8a | 1263 | vecperm, vecperm, |
00fd0628 | 1264 | vecsimple, vecsimple, vecsimple, *, *, |
50c78b9a | 1265 | vecstore, vecload") |
50c78b9a | 1266 | (set_attr "length" |
911c8df0 | 1267 | "*, *, *, 16, 16, 16, |
d730aa8a | 1268 | *, *, |
911c8df0 MM |
1269 | *, *, *, 20, 16, |
1270 | *, *") | |
afc69d4e | 1271 | (set_attr "isa" |
cb152d12 | 1272 | "<VSisa>, <VSisa>, <VSisa>, *, *, *, |
d730aa8a | 1273 | p10, p10, |
cb152d12 | 1274 | p9v, *, <VSisa>, *, *, |
f9063d12 MM |
1275 | *, *") |
1276 | (set_attr "prefixed" | |
1277 | "*, *, *, *, *, *, | |
1278 | *, yes, | |
1279 | *, *, *, *, *, | |
afc69d4e | 1280 | *, *")]) |
29e6733c | 1281 | |
c9485473 MM |
1282 | ;; Explicit load/store expanders for the builtin functions |
1283 | (define_expand "vsx_load_<mode>" | |
ad18eed2 SB |
1284 | [(set (match_operand:VSX_M 0 "vsx_register_operand") |
1285 | (match_operand:VSX_M 1 "memory_operand"))] | |
c9485473 | 1286 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
06f9caed BS |
1287 | { |
1288 | /* Expand to swaps if needed, prior to swap optimization. */ | |
cb25dea3 PB |
1289 | if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR |
1290 | && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode)) | |
06f9caed BS |
1291 | { |
1292 | rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); | |
1293 | DONE; | |
1294 | } | |
1295 | }) | |
c9485473 MM |
1296 | |
1297 | (define_expand "vsx_store_<mode>" | |
ad18eed2 SB |
1298 | [(set (match_operand:VSX_M 0 "memory_operand") |
1299 | (match_operand:VSX_M 1 "vsx_register_operand"))] | |
c9485473 | 1300 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
06f9caed BS |
1301 | { |
1302 | /* Expand to swaps if needed, prior to swap optimization. */ | |
cb25dea3 PB |
1303 | if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR |
1304 | && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode)) | |
06f9caed BS |
1305 | { |
1306 | rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); | |
1307 | DONE; | |
1308 | } | |
1309 | }) | |
c9485473 | 1310 | |
b69c0061 WS |
1311 | ;; Load rightmost element from load_data |
1312 | ;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx. | |
1313 | (define_insn "vsx_lxvr<wd>x" | |
1314 | [(set (match_operand:TI 0 "vsx_register_operand" "=wa") | |
1315 | (zero_extend:TI (match_operand:INT_ISA3 1 "memory_operand" "Z")))] | |
1316 | "TARGET_POWER10" | |
1317 | "lxvr<wd>x %x0,%y1" | |
1318 | [(set_attr "type" "vecload")]) | |
1319 | ||
1320 | ;; Store rightmost element into store_data | |
1321 | ;; using stxvrbx, stxvrhx, strvxwx, strvxdx. | |
1322 | (define_insn "vsx_stxvr<wd>x" | |
1323 | [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z") | |
1324 | (truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))] | |
1325 | "TARGET_POWER10" | |
1326 | "stxvr<wd>x %x1,%y0" | |
1327 | [(set_attr "type" "vecstore")]) | |
1328 | ||
8fa97501 BS |
1329 | ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., |
1330 | ;; when you really want their element-reversing behavior. | |
1331 | (define_insn "vsx_ld_elemrev_v2di" | |
1332 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1333 | (vec_select:V2DI | |
1334 | (match_operand:V2DI 1 "memory_operand" "Z") | |
1335 | (parallel [(const_int 1) (const_int 0)])))] | |
1336 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" | |
1337 | "lxvd2x %x0,%y1" | |
1338 | [(set_attr "type" "vecload")]) | |
1339 | ||
d10cff95 CL |
1340 | (define_insn "vsx_ld_elemrev_v1ti" |
1341 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") | |
1342 | (vec_select:V1TI | |
1343 | (match_operand:V1TI 1 "memory_operand" "Z") | |
1344 | (parallel [(const_int 0)])))] | |
1345 | "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" | |
1346 | { | |
1347 | return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; | |
1348 | } | |
1349 | [(set_attr "type" "vecload")]) | |
1350 | ||
8fa97501 BS |
1351 | (define_insn "vsx_ld_elemrev_v2df" |
1352 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
1353 | (vec_select:V2DF | |
1354 | (match_operand:V2DF 1 "memory_operand" "Z") | |
1355 | (parallel [(const_int 1) (const_int 0)])))] | |
1356 | "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
1357 | "lxvd2x %x0,%y1" | |
1358 | [(set_attr "type" "vecload")]) | |
1359 | ||
1360 | (define_insn "vsx_ld_elemrev_v4si" | |
1361 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
1362 | (vec_select:V4SI | |
1363 | (match_operand:V4SI 1 "memory_operand" "Z") | |
1364 | (parallel [(const_int 3) (const_int 2) | |
1365 | (const_int 1) (const_int 0)])))] | |
1366 | "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" | |
1367 | "lxvw4x %x0,%y1" | |
1368 | [(set_attr "type" "vecload")]) | |
1369 | ||
1370 | (define_insn "vsx_ld_elemrev_v4sf" | |
1371 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
1372 | (vec_select:V4SF | |
1373 | (match_operand:V4SF 1 "memory_operand" "Z") | |
1374 | (parallel [(const_int 3) (const_int 2) | |
1375 | (const_int 1) (const_int 0)])))] | |
1376 | "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" | |
1377 | "lxvw4x %x0,%y1" | |
1378 | [(set_attr "type" "vecload")]) | |
1379 | ||
3ef9e1ec | 1380 | (define_expand "vsx_ld_elemrev_v8hi" |
8fa97501 BS |
1381 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") |
1382 | (vec_select:V8HI | |
1383 | (match_operand:V8HI 1 "memory_operand" "Z") | |
1384 | (parallel [(const_int 7) (const_int 6) | |
1385 | (const_int 5) (const_int 4) | |
1386 | (const_int 3) (const_int 2) | |
1387 | (const_int 1) (const_int 0)])))] | |
3ef9e1ec BS |
1388 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" |
1389 | { | |
1390 | if (!TARGET_P9_VECTOR) | |
1391 | { | |
1392 | rtx tmp = gen_reg_rtx (V4SImode); | |
1393 | rtx subreg, subreg2, perm[16], pcv; | |
1394 | /* 2 is leftmost element in register */ | |
1395 | unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; | |
1396 | int i; | |
1397 | ||
1398 | subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); | |
1399 | emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); | |
1400 | subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); | |
1401 | ||
1402 | for (i = 0; i < 16; ++i) | |
1403 | perm[i] = GEN_INT (reorder[i]); | |
1404 | ||
1405 | pcv = force_reg (V16QImode, | |
1406 | gen_rtx_CONST_VECTOR (V16QImode, | |
1407 | gen_rtvec_v (16, perm))); | |
1408 | emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, | |
1409 | subreg2, pcv)); | |
1410 | DONE; | |
1411 | } | |
1412 | }) | |
1413 | ||
1414 | (define_insn "*vsx_ld_elemrev_v8hi_internal" | |
1415 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
1416 | (vec_select:V8HI | |
1417 | (match_operand:V8HI 1 "memory_operand" "Z") | |
1418 | (parallel [(const_int 7) (const_int 6) | |
1419 | (const_int 5) (const_int 4) | |
1420 | (const_int 3) (const_int 2) | |
1421 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1422 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1423 | "lxvh8x %x0,%y1" | |
1424 | [(set_attr "type" "vecload")]) | |
1425 | ||
3ef9e1ec | 1426 | (define_expand "vsx_ld_elemrev_v16qi" |
8fa97501 BS |
1427 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") |
1428 | (vec_select:V16QI | |
3ef9e1ec BS |
1429 | (match_operand:V16QI 1 "memory_operand" "Z") |
1430 | (parallel [(const_int 15) (const_int 14) | |
1431 | (const_int 13) (const_int 12) | |
1432 | (const_int 11) (const_int 10) | |
1433 | (const_int 9) (const_int 8) | |
1434 | (const_int 7) (const_int 6) | |
1435 | (const_int 5) (const_int 4) | |
1436 | (const_int 3) (const_int 2) | |
1437 | (const_int 1) (const_int 0)])))] | |
1438 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" | |
1439 | { | |
1440 | if (!TARGET_P9_VECTOR) | |
1441 | { | |
1442 | rtx tmp = gen_reg_rtx (V4SImode); | |
1443 | rtx subreg, subreg2, perm[16], pcv; | |
1444 | /* 3 is leftmost element in register */ | |
1445 | unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; | |
1446 | int i; | |
1447 | ||
1448 | subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); | |
1449 | emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); | |
1450 | subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); | |
1451 | ||
1452 | for (i = 0; i < 16; ++i) | |
1453 | perm[i] = GEN_INT (reorder[i]); | |
1454 | ||
1455 | pcv = force_reg (V16QImode, | |
1456 | gen_rtx_CONST_VECTOR (V16QImode, | |
1457 | gen_rtvec_v (16, perm))); | |
1458 | emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, | |
1459 | subreg2, pcv)); | |
1460 | DONE; | |
1461 | } | |
1462 | }) | |
1463 | ||
9d36bd3b | 1464 | (define_insn "vsx_ld_elemrev_v16qi_internal" |
3ef9e1ec BS |
1465 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") |
1466 | (vec_select:V16QI | |
1467 | (match_operand:V16QI 1 "memory_operand" "Z") | |
1468 | (parallel [(const_int 15) (const_int 14) | |
1469 | (const_int 13) (const_int 12) | |
1470 | (const_int 11) (const_int 10) | |
1471 | (const_int 9) (const_int 8) | |
1472 | (const_int 7) (const_int 6) | |
1473 | (const_int 5) (const_int 4) | |
1474 | (const_int 3) (const_int 2) | |
1475 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1476 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1477 | "lxvb16x %x0,%y1" | |
1478 | [(set_attr "type" "vecload")]) | |
1479 | ||
d10cff95 CL |
1480 | (define_insn "vsx_st_elemrev_v1ti" |
1481 | [(set (match_operand:V1TI 0 "memory_operand" "=Z") | |
1482 | (vec_select:V1TI | |
1483 | (match_operand:V1TI 1 "vsx_register_operand" "+wa") | |
1484 | (parallel [(const_int 0)]))) | |
1485 | (clobber (match_dup 1))] | |
1486 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" | |
1487 | { | |
1488 | return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; | |
1489 | } | |
1490 | [(set_attr "type" "vecstore")]) | |
1491 | ||
8fa97501 BS |
1492 | (define_insn "vsx_st_elemrev_v2df" |
1493 | [(set (match_operand:V2DF 0 "memory_operand" "=Z") | |
1494 | (vec_select:V2DF | |
3ef9e1ec BS |
1495 | (match_operand:V2DF 1 "vsx_register_operand" "wa") |
1496 | (parallel [(const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1497 | "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" |
1498 | "stxvd2x %x1,%y0" | |
1499 | [(set_attr "type" "vecstore")]) | |
1500 | ||
1501 | (define_insn "vsx_st_elemrev_v2di" | |
1502 | [(set (match_operand:V2DI 0 "memory_operand" "=Z") | |
1503 | (vec_select:V2DI | |
3ef9e1ec BS |
1504 | (match_operand:V2DI 1 "vsx_register_operand" "wa") |
1505 | (parallel [(const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1506 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" |
1507 | "stxvd2x %x1,%y0" | |
1508 | [(set_attr "type" "vecstore")]) | |
1509 | ||
1510 | (define_insn "vsx_st_elemrev_v4sf" | |
1511 | [(set (match_operand:V4SF 0 "memory_operand" "=Z") | |
1512 | (vec_select:V4SF | |
3ef9e1ec BS |
1513 | (match_operand:V4SF 1 "vsx_register_operand" "wa") |
1514 | (parallel [(const_int 3) (const_int 2) | |
1515 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1516 | "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" |
1517 | "stxvw4x %x1,%y0" | |
1518 | [(set_attr "type" "vecstore")]) | |
1519 | ||
1520 | (define_insn "vsx_st_elemrev_v4si" | |
1521 | [(set (match_operand:V4SI 0 "memory_operand" "=Z") | |
1522 | (vec_select:V4SI | |
1523 | (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
1524 | (parallel [(const_int 3) (const_int 2) | |
1525 | (const_int 1) (const_int 0)])))] | |
1526 | "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" | |
1527 | "stxvw4x %x1,%y0" | |
1528 | [(set_attr "type" "vecstore")]) | |
1529 | ||
3ef9e1ec | 1530 | (define_expand "vsx_st_elemrev_v8hi" |
8fa97501 BS |
1531 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") |
1532 | (vec_select:V8HI | |
3ef9e1ec BS |
1533 | (match_operand:V8HI 1 "vsx_register_operand" "wa") |
1534 | (parallel [(const_int 7) (const_int 6) | |
1535 | (const_int 5) (const_int 4) | |
1536 | (const_int 3) (const_int 2) | |
1537 | (const_int 1) (const_int 0)])))] | |
1538 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" | |
1539 | { | |
1540 | if (!TARGET_P9_VECTOR) | |
1541 | { | |
d10cff95 | 1542 | rtx mem_subreg, subreg, perm[16], pcv; |
3ef9e1ec BS |
1543 | rtx tmp = gen_reg_rtx (V8HImode); |
1544 | /* 2 is leftmost element in register */ | |
1545 | unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; | |
1546 | int i; | |
1547 | ||
1548 | for (i = 0; i < 16; ++i) | |
1549 | perm[i] = GEN_INT (reorder[i]); | |
1550 | ||
1551 | pcv = force_reg (V16QImode, | |
1552 | gen_rtx_CONST_VECTOR (V16QImode, | |
1553 | gen_rtvec_v (16, perm))); | |
1554 | emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], | |
1555 | operands[1], pcv)); | |
1556 | subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); | |
d10cff95 CL |
1557 | mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); |
1558 | emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); | |
3ef9e1ec BS |
1559 | DONE; |
1560 | } | |
1561 | }) | |
1562 | ||
d10cff95 CL |
1563 | (define_insn "*vsx_st_elemrev_v2di_internal" |
1564 | [(set (match_operand:V2DI 0 "memory_operand" "=Z") | |
1565 | (vec_select:V2DI | |
1566 | (match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1567 | (parallel [(const_int 1) (const_int 0)])))] | |
1568 | "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" | |
1569 | "stxvd2x %x1,%y0" | |
1570 | [(set_attr "type" "vecstore")]) | |
1571 | ||
3ef9e1ec BS |
1572 | (define_insn "*vsx_st_elemrev_v8hi_internal" |
1573 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") | |
1574 | (vec_select:V8HI | |
1575 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
1576 | (parallel [(const_int 7) (const_int 6) | |
1577 | (const_int 5) (const_int 4) | |
1578 | (const_int 3) (const_int 2) | |
1579 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1580 | "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1581 | "stxvh8x %x1,%y0" | |
1582 | [(set_attr "type" "vecstore")]) | |
1583 | ||
3ef9e1ec BS |
1584 | (define_expand "vsx_st_elemrev_v16qi" |
1585 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") | |
1586 | (vec_select:V16QI | |
1587 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
1588 | (parallel [(const_int 15) (const_int 14) | |
1589 | (const_int 13) (const_int 12) | |
1590 | (const_int 11) (const_int 10) | |
1591 | (const_int 9) (const_int 8) | |
1592 | (const_int 7) (const_int 6) | |
1593 | (const_int 5) (const_int 4) | |
1594 | (const_int 3) (const_int 2) | |
1595 | (const_int 1) (const_int 0)])))] | |
1596 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" | |
1597 | { | |
1598 | if (!TARGET_P9_VECTOR) | |
1599 | { | |
d10cff95 | 1600 | rtx mem_subreg, subreg, perm[16], pcv; |
3ef9e1ec BS |
1601 | rtx tmp = gen_reg_rtx (V16QImode); |
1602 | /* 3 is leftmost element in register */ | |
1603 | unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; | |
1604 | int i; | |
1605 | ||
1606 | for (i = 0; i < 16; ++i) | |
1607 | perm[i] = GEN_INT (reorder[i]); | |
1608 | ||
1609 | pcv = force_reg (V16QImode, | |
1610 | gen_rtx_CONST_VECTOR (V16QImode, | |
1611 | gen_rtvec_v (16, perm))); | |
1612 | emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], | |
1613 | operands[1], pcv)); | |
1614 | subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); | |
d10cff95 CL |
1615 | mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); |
1616 | emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); | |
3ef9e1ec BS |
1617 | DONE; |
1618 | } | |
1619 | }) | |
1620 | ||
1621 | (define_insn "*vsx_st_elemrev_v16qi_internal" | |
8fa97501 BS |
1622 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") |
1623 | (vec_select:V16QI | |
3ef9e1ec BS |
1624 | (match_operand:V16QI 1 "vsx_register_operand" "wa") |
1625 | (parallel [(const_int 15) (const_int 14) | |
1626 | (const_int 13) (const_int 12) | |
1627 | (const_int 11) (const_int 10) | |
1628 | (const_int 9) (const_int 8) | |
1629 | (const_int 7) (const_int 6) | |
1630 | (const_int 5) (const_int 4) | |
1631 | (const_int 3) (const_int 2) | |
1632 | (const_int 1) (const_int 0)])))] | |
8fa97501 BS |
1633 | "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" |
1634 | "stxvb16x %x1,%y0" | |
1635 | [(set_attr "type" "vecstore")]) | |
1636 | ||
29e6733c | 1637 | \f |
0609bdf2 MM |
1638 | ;; VSX vector floating point arithmetic instructions. The VSX scalar |
1639 | ;; instructions are now combined with the insn for the traditional floating | |
1640 | ;; point unit. | |
29e6733c | 1641 | (define_insn "*vsx_add<mode>3" |
012f609e SB |
1642 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1643 | (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1644 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1645 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1646 | "xvadd<sd>p %x0,%x1,%x2" |
2c2aa74d | 1647 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1648 | |
1649 | (define_insn "*vsx_sub<mode>3" | |
012f609e SB |
1650 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>") |
1651 | (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1652 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1653 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1654 | "xvsub<sd>p %x0,%x1,%x2" |
2c2aa74d | 1655 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1656 | |
1657 | (define_insn "*vsx_mul<mode>3" | |
012f609e SB |
1658 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1659 | (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1660 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1661 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1662 | "xvmul<sd>p %x0,%x1,%x2" |
2c2aa74d | 1663 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 1664 | |
2ccdda19 BS |
1665 | ; Emulate vector with scalar for vec_mul in V2DImode |
1666 | (define_insn_and_split "vsx_mul_v2di" | |
1667 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1668 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1669 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1670 | UNSPEC_VSX_MULSD))] | |
1671 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1672 | "#" | |
3cb8ee5c | 1673 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1674 | [(const_int 0)] |
2ccdda19 BS |
1675 | { |
1676 | rtx op0 = operands[0]; | |
1677 | rtx op1 = operands[1]; | |
1678 | rtx op2 = operands[2]; | |
f1ad419e CL |
1679 | |
1680 | if (TARGET_POWER10) | |
1681 | emit_insn (gen_mulv2di3 (op0, op1, op2) ); | |
1682 | ||
da86c81e PB |
1683 | else |
1684 | { | |
f1ad419e CL |
1685 | rtx op3 = gen_reg_rtx (DImode); |
1686 | rtx op4 = gen_reg_rtx (DImode); | |
1687 | rtx op5 = gen_reg_rtx (DImode); | |
1688 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1689 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
1690 | if (TARGET_POWERPC64) | |
1691 | emit_insn (gen_muldi3 (op5, op3, op4)); | |
1692 | else | |
1693 | { | |
1694 | rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); | |
1695 | emit_move_insn (op5, ret); | |
1696 | } | |
1697 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); | |
1698 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
1699 | if (TARGET_POWERPC64) | |
1700 | emit_insn (gen_muldi3 (op3, op3, op4)); | |
1701 | else | |
1702 | { | |
1703 | rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); | |
1704 | emit_move_insn (op3, ret); | |
1705 | } | |
1706 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); | |
da86c81e | 1707 | } |
d5e6e133 | 1708 | DONE; |
6c332313 | 1709 | } |
2ccdda19 BS |
1710 | [(set_attr "type" "mul")]) |
1711 | ||
29e6733c | 1712 | (define_insn "*vsx_div<mode>3" |
012f609e SB |
1713 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1714 | (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1715 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1716 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1717 | "xvdiv<sd>p %x0,%x1,%x2" |
2c2aa74d | 1718 | [(set_attr "type" "<VStype_div>")]) |
29e6733c | 1719 | |
2ccdda19 BS |
1720 | ; Emulate vector with scalar for vec_div in V2DImode |
1721 | (define_insn_and_split "vsx_div_v2di" | |
1722 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1723 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1724 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1725 | UNSPEC_VSX_DIVSD))] | |
1726 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1727 | "#" | |
3cb8ee5c | 1728 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1729 | [(const_int 0)] |
2ccdda19 BS |
1730 | { |
1731 | rtx op0 = operands[0]; | |
1732 | rtx op1 = operands[1]; | |
1733 | rtx op2 = operands[2]; | |
1734 | rtx op3 = gen_reg_rtx (DImode); | |
1735 | rtx op4 = gen_reg_rtx (DImode); | |
1736 | rtx op5 = gen_reg_rtx (DImode); | |
1737 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1738 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
da86c81e PB |
1739 | if (TARGET_POWERPC64) |
1740 | emit_insn (gen_divdi3 (op5, op3, op4)); | |
1741 | else | |
1742 | { | |
1743 | rtx libfunc = optab_libfunc (sdiv_optab, DImode); | |
1744 | rtx target = emit_library_call_value (libfunc, | |
1745 | op5, LCT_NORMAL, DImode, | |
1746 | op3, DImode, | |
1747 | op4, DImode); | |
1748 | emit_move_insn (op5, target); | |
1749 | } | |
2ccdda19 BS |
1750 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); |
1751 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
da86c81e PB |
1752 | if (TARGET_POWERPC64) |
1753 | emit_insn (gen_divdi3 (op3, op3, op4)); | |
1754 | else | |
1755 | { | |
1756 | rtx libfunc = optab_libfunc (sdiv_optab, DImode); | |
1757 | rtx target = emit_library_call_value (libfunc, | |
1758 | op3, LCT_NORMAL, DImode, | |
1759 | op3, DImode, | |
1760 | op4, DImode); | |
1761 | emit_move_insn (op3, target); | |
1762 | } | |
2ccdda19 | 1763 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); |
d5e6e133 | 1764 | DONE; |
6c332313 | 1765 | } |
2ccdda19 BS |
1766 | [(set_attr "type" "div")]) |
1767 | ||
1768 | (define_insn_and_split "vsx_udiv_v2di" | |
1769 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
1770 | (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") | |
1771 | (match_operand:V2DI 2 "vsx_register_operand" "wa")] | |
1772 | UNSPEC_VSX_DIVUD))] | |
1773 | "VECTOR_MEM_VSX_P (V2DImode)" | |
1774 | "#" | |
3cb8ee5c | 1775 | "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" |
2ccdda19 | 1776 | [(const_int 0)] |
2ccdda19 BS |
1777 | { |
1778 | rtx op0 = operands[0]; | |
1779 | rtx op1 = operands[1]; | |
1780 | rtx op2 = operands[2]; | |
f1ad419e CL |
1781 | |
1782 | if (TARGET_POWER10) | |
1783 | emit_insn (gen_udivv2di3 (op0, op1, op2) ); | |
1784 | else | |
1785 | { | |
1786 | rtx op3 = gen_reg_rtx (DImode); | |
1787 | rtx op4 = gen_reg_rtx (DImode); | |
1788 | rtx op5 = gen_reg_rtx (DImode); | |
1789 | ||
1790 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); | |
1791 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); | |
1792 | ||
1793 | if (TARGET_POWERPC64) | |
1794 | emit_insn (gen_udivdi3 (op5, op3, op4)); | |
1795 | else | |
1796 | { | |
1797 | rtx libfunc = optab_libfunc (udiv_optab, DImode); | |
1798 | rtx target = emit_library_call_value (libfunc, | |
1799 | op5, LCT_NORMAL, DImode, | |
1800 | op3, DImode, | |
1801 | op4, DImode); | |
1802 | emit_move_insn (op5, target); | |
1803 | } | |
1804 | emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); | |
1805 | emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); | |
1806 | ||
1807 | if (TARGET_POWERPC64) | |
1808 | emit_insn (gen_udivdi3 (op3, op3, op4)); | |
1809 | else | |
1810 | { | |
1811 | rtx libfunc = optab_libfunc (udiv_optab, DImode); | |
1812 | rtx target = emit_library_call_value (libfunc, | |
1813 | op3, LCT_NORMAL, DImode, | |
1814 | op3, DImode, | |
1815 | op4, DImode); | |
1816 | emit_move_insn (op3, target); | |
1817 | } | |
1818 | emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); | |
1819 | } | |
1820 | DONE; | |
6c332313 | 1821 | } |
2ccdda19 BS |
1822 | [(set_attr "type" "div")]) |
1823 | ||
f03122f2 CL |
1824 | ;; Vector integer signed/unsigned divide |
1825 | (define_insn "vsx_div_v1ti" | |
1826 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1827 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1828 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1829 | UNSPEC_VSX_DIVSQ))] | |
1830 | "TARGET_POWER10" | |
1831 | "vdivsq %0,%1,%2" | |
1832 | [(set_attr "type" "div")]) | |
1833 | ||
1834 | (define_insn "vsx_udiv_v1ti" | |
1835 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1836 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1837 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1838 | UNSPEC_VSX_DIVUQ))] | |
1839 | "TARGET_POWER10" | |
1840 | "vdivuq %0,%1,%2" | |
1841 | [(set_attr "type" "div")]) | |
1842 | ||
1843 | (define_insn "vsx_dives_v1ti" | |
1844 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1845 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1846 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1847 | UNSPEC_VSX_DIVESQ))] | |
1848 | "TARGET_POWER10" | |
1849 | "vdivesq %0,%1,%2" | |
1850 | [(set_attr "type" "div")]) | |
1851 | ||
1852 | (define_insn "vsx_diveu_v1ti" | |
1853 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1854 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1855 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1856 | UNSPEC_VSX_DIVEUQ))] | |
1857 | "TARGET_POWER10" | |
1858 | "vdiveuq %0,%1,%2" | |
1859 | [(set_attr "type" "div")]) | |
1860 | ||
1861 | (define_insn "vsx_mods_v1ti" | |
1862 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1863 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1864 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1865 | UNSPEC_VSX_MODSQ))] | |
1866 | "TARGET_POWER10" | |
1867 | "vmodsq %0,%1,%2" | |
1868 | [(set_attr "type" "div")]) | |
1869 | ||
1870 | (define_insn "vsx_modu_v1ti" | |
1871 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
1872 | (unspec:V1TI [(match_operand:V1TI 1 "vsx_register_operand" "v") | |
1873 | (match_operand:V1TI 2 "vsx_register_operand" "v")] | |
1874 | UNSPEC_VSX_MODUQ))] | |
1875 | "TARGET_POWER10" | |
1876 | "vmoduq %0,%1,%2" | |
1877 | [(set_attr "type" "div")]) | |
1878 | ||
29e6733c MM |
1879 | ;; *tdiv* instruction returning the FG flag |
1880 | (define_expand "vsx_tdiv<mode>3_fg" | |
1881 | [(set (match_dup 3) | |
ad18eed2 SB |
1882 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") |
1883 | (match_operand:VSX_B 2 "vsx_register_operand")] | |
29e6733c | 1884 | UNSPEC_VSX_TDIV)) |
ad18eed2 | 1885 | (set (match_operand:SI 0 "gpc_reg_operand") |
29e6733c MM |
1886 | (gt:SI (match_dup 3) |
1887 | (const_int 0)))] | |
1888 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1889 | { | |
1890 | operands[3] = gen_reg_rtx (CCFPmode); | |
1891 | }) | |
1892 | ||
1893 | ;; *tdiv* instruction returning the FE flag | |
1894 | (define_expand "vsx_tdiv<mode>3_fe" | |
1895 | [(set (match_dup 3) | |
ad18eed2 SB |
1896 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") |
1897 | (match_operand:VSX_B 2 "vsx_register_operand")] | |
29e6733c | 1898 | UNSPEC_VSX_TDIV)) |
ad18eed2 | 1899 | (set (match_operand:SI 0 "gpc_reg_operand") |
29e6733c MM |
1900 | (eq:SI (match_dup 3) |
1901 | (const_int 0)))] | |
1902 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1903 | { | |
1904 | operands[3] = gen_reg_rtx (CCFPmode); | |
1905 | }) | |
1906 | ||
1907 | (define_insn "*vsx_tdiv<mode>3_internal" | |
012f609e SB |
1908 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") |
1909 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa") | |
1910 | (match_operand:VSX_B 2 "vsx_register_operand" "wa")] | |
29e6733c MM |
1911 | UNSPEC_VSX_TDIV))] |
1912 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 1913 | "x<VSv>tdiv<sd>p %0,%x1,%x2" |
2c2aa74d | 1914 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1915 | |
1916 | (define_insn "vsx_fre<mode>2" | |
012f609e SB |
1917 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1918 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
1919 | UNSPEC_FRES))] |
1920 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 1921 | "xvre<sd>p %x0,%x1" |
2c2aa74d | 1922 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1923 | |
1924 | (define_insn "*vsx_neg<mode>2" | |
012f609e SB |
1925 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1926 | (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1927 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1928 | "xvneg<sd>p %x0,%x1" |
2c2aa74d | 1929 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1930 | |
1931 | (define_insn "*vsx_abs<mode>2" | |
012f609e SB |
1932 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1933 | (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1934 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1935 | "xvabs<sd>p %x0,%x1" |
2c2aa74d | 1936 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1937 | |
1938 | (define_insn "vsx_nabs<mode>2" | |
012f609e | 1939 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
0609bdf2 MM |
1940 | (neg:VSX_F |
1941 | (abs:VSX_F | |
012f609e | 1942 | (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))] |
29e6733c | 1943 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1944 | "xvnabs<sd>p %x0,%x1" |
2c2aa74d | 1945 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1946 | |
1947 | (define_insn "vsx_smax<mode>3" | |
012f609e SB |
1948 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1949 | (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1950 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1951 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1952 | "xvmax<sd>p %x0,%x1,%x2" |
2c2aa74d | 1953 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1954 | |
1955 | (define_insn "*vsx_smin<mode>3" | |
012f609e SB |
1956 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1957 | (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
1958 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 1959 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1960 | "xvmin<sd>p %x0,%x1,%x2" |
2c2aa74d | 1961 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1962 | |
1963 | (define_insn "*vsx_sqrt<mode>2" | |
012f609e SB |
1964 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1965 | (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 1966 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1967 | "xvsqrt<sd>p %x0,%x1" |
1f5aa628 | 1968 | [(set_attr "type" "<sd>sqrt")]) |
29e6733c | 1969 | |
92902797 | 1970 | (define_insn "*vsx_rsqrte<mode>2" |
012f609e SB |
1971 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
1972 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
92902797 | 1973 | UNSPEC_RSQRT))] |
29e6733c | 1974 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 1975 | "xvrsqrte<sd>p %x0,%x1" |
2c2aa74d | 1976 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
1977 | |
1978 | ;; *tsqrt* returning the fg flag | |
1979 | (define_expand "vsx_tsqrt<mode>2_fg" | |
d36a53d6 | 1980 | [(set (match_dup 2) |
ad18eed2 | 1981 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] |
29e6733c | 1982 | UNSPEC_VSX_TSQRT)) |
ad18eed2 | 1983 | (set (match_operand:SI 0 "gpc_reg_operand") |
d36a53d6 | 1984 | (gt:SI (match_dup 2) |
29e6733c MM |
1985 | (const_int 0)))] |
1986 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
1987 | { | |
d36a53d6 | 1988 | operands[2] = gen_reg_rtx (CCFPmode); |
29e6733c MM |
1989 | }) |
1990 | ||
1991 | ;; *tsqrt* returning the fe flag | |
1992 | (define_expand "vsx_tsqrt<mode>2_fe" | |
d36a53d6 | 1993 | [(set (match_dup 2) |
ad18eed2 | 1994 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] |
29e6733c | 1995 | UNSPEC_VSX_TSQRT)) |
ad18eed2 | 1996 | (set (match_operand:SI 0 "gpc_reg_operand") |
d36a53d6 | 1997 | (eq:SI (match_dup 2) |
29e6733c MM |
1998 | (const_int 0)))] |
1999 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
2000 | { | |
d36a53d6 | 2001 | operands[2] = gen_reg_rtx (CCFPmode); |
29e6733c MM |
2002 | }) |
2003 | ||
2004 | (define_insn "*vsx_tsqrt<mode>2_internal" | |
012f609e SB |
2005 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") |
2006 | (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2007 | UNSPEC_VSX_TSQRT))] |
2008 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2009 | "x<VSv>tsqrt<sd>p %0,%x1" |
2c2aa74d | 2010 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2011 | |
0609bdf2 MM |
2012 | ;; Fused vector multiply/add instructions. Support the classical Altivec |
2013 | ;; versions of fma, which allows the target to be a separate register from the | |
2014 | ;; 3 inputs. Under VSX, the target must be either the addend or the first | |
2015 | ;; multiply. | |
c36193c6 MM |
2016 | |
2017 | (define_insn "*vsx_fmav4sf4" | |
8d3620ba | 2018 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") |
c36193c6 | 2019 | (fma:V4SF |
8d3620ba SB |
2020 | (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") |
2021 | (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") | |
2022 | (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))] | |
c36193c6 MM |
2023 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
2024 | "@ | |
c36193c6 MM |
2025 | xvmaddasp %x0,%x1,%x2 |
2026 | xvmaddmsp %x0,%x1,%x3 | |
2027 | vmaddfp %0,%1,%2,%3" | |
2028 | [(set_attr "type" "vecfloat")]) | |
2029 | ||
2030 | (define_insn "*vsx_fmav2df4" | |
85949949 | 2031 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") |
c36193c6 | 2032 | (fma:V2DF |
85949949 SB |
2033 | (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") |
2034 | (match_operand:V2DF 2 "vsx_register_operand" "wa,0") | |
2035 | (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))] | |
c36193c6 MM |
2036 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2037 | "@ | |
c36193c6 MM |
2038 | xvmaddadp %x0,%x1,%x2 |
2039 | xvmaddmdp %x0,%x1,%x3" | |
4356b75d | 2040 | [(set_attr "type" "vecdouble")]) |
c36193c6 | 2041 | |
d6613781 | 2042 | (define_insn "*vsx_fms<mode>4" |
012f609e | 2043 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") |
c36193c6 | 2044 | (fma:VSX_F |
012f609e SB |
2045 | (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa") |
2046 | (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") | |
c36193c6 | 2047 | (neg:VSX_F |
012f609e | 2048 | (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] |
29e6733c MM |
2049 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
2050 | "@ | |
6cc8f683 SB |
2051 | xvmsuba<sd>p %x0,%x1,%x2 |
2052 | xvmsubm<sd>p %x0,%x1,%x3" | |
4356b75d | 2053 | [(set_attr "type" "<VStype_mul>")]) |
c36193c6 | 2054 | |
d6613781 | 2055 | (define_insn "*vsx_nfma<mode>4" |
012f609e | 2056 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") |
c36193c6 MM |
2057 | (neg:VSX_F |
2058 | (fma:VSX_F | |
012f609e SB |
2059 | (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa") |
2060 | (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") | |
2061 | (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] | |
29e6733c | 2062 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
1b1562a5 | 2063 | "@ |
6cc8f683 SB |
2064 | xvnmadda<sd>p %x0,%x1,%x2 |
2065 | xvnmaddm<sd>p %x0,%x1,%x3" | |
2c2aa74d | 2066 | [(set_attr "type" "<VStype_mul>")]) |
29e6733c | 2067 | |
c36193c6 | 2068 | (define_insn "*vsx_nfmsv4sf4" |
8d3620ba | 2069 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,v") |
c36193c6 MM |
2070 | (neg:V4SF |
2071 | (fma:V4SF | |
8d3620ba SB |
2072 | (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa,v") |
2073 | (match_operand:V4SF 2 "vsx_register_operand" "wa,0,v") | |
c36193c6 | 2074 | (neg:V4SF |
8d3620ba | 2075 | (match_operand:V4SF 3 "vsx_register_operand" "0,wa,v")))))] |
c36193c6 MM |
2076 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
2077 | "@ | |
c36193c6 MM |
2078 | xvnmsubasp %x0,%x1,%x2 |
2079 | xvnmsubmsp %x0,%x1,%x3 | |
2080 | vnmsubfp %0,%1,%2,%3" | |
2081 | [(set_attr "type" "vecfloat")]) | |
2082 | ||
2083 | (define_insn "*vsx_nfmsv2df4" | |
85949949 | 2084 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") |
c36193c6 MM |
2085 | (neg:V2DF |
2086 | (fma:V2DF | |
85949949 SB |
2087 | (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") |
2088 | (match_operand:V2DF 2 "vsx_register_operand" "wa,0") | |
c36193c6 | 2089 | (neg:V2DF |
85949949 | 2090 | (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))] |
c36193c6 MM |
2091 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2092 | "@ | |
c36193c6 MM |
2093 | xvnmsubadp %x0,%x1,%x2 |
2094 | xvnmsubmdp %x0,%x1,%x3" | |
4356b75d | 2095 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2096 | |
29e6733c MM |
2097 | ;; Vector conditional expressions (no scalar version for these instructions) |
2098 | (define_insn "vsx_eq<mode>" | |
012f609e SB |
2099 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2100 | (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2101 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2102 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2103 | "xvcmpeq<sd>p %x0,%x1,%x2" |
2c2aa74d | 2104 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2105 | |
2106 | (define_insn "vsx_gt<mode>" | |
012f609e SB |
2107 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2108 | (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2109 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2110 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2111 | "xvcmpgt<sd>p %x0,%x1,%x2" |
2c2aa74d | 2112 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2113 | |
2114 | (define_insn "*vsx_ge<mode>" | |
012f609e SB |
2115 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2116 | (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
2117 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] | |
29e6733c | 2118 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2119 | "xvcmpge<sd>p %x0,%x1,%x2" |
2c2aa74d | 2120 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2121 | |
29e6733c MM |
2122 | ;; Compare vectors producing a vector result and a predicate, setting CR6 to |
2123 | ;; indicate a combined status | |
2124 | (define_insn "*vsx_eq_<mode>_p" | |
b65261f6 | 2125 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2126 | (unspec:CC |
012f609e SB |
2127 | [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2128 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2129 | UNSPEC_PREDICATE)) |
012f609e | 2130 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2131 | (eq:VSX_F (match_dup 1) |
2132 | (match_dup 2)))] | |
2133 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2134 | "xvcmpeq<sd>p. %x0,%x1,%x2" |
4356b75d | 2135 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2136 | |
2137 | (define_insn "*vsx_gt_<mode>_p" | |
b65261f6 | 2138 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2139 | (unspec:CC |
012f609e SB |
2140 | [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2141 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2142 | UNSPEC_PREDICATE)) |
012f609e | 2143 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2144 | (gt:VSX_F (match_dup 1) |
2145 | (match_dup 2)))] | |
2146 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2147 | "xvcmpgt<sd>p. %x0,%x1,%x2" |
4356b75d | 2148 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2149 | |
cf5d0fc2 WS |
2150 | ;; xvtlsbb BF,XB |
2151 | ;; Set the CR field BF to indicate if the lowest bit (bit 7) of every byte | |
2152 | ;; element in VSR[XB] is equal to 1 (ALL_TRUE) or equal to 0 (ALL_FALSE). | |
2153 | (define_insn "*xvtlsbb_internal" | |
2154 | [(set (match_operand:CC 0 "cc_reg_operand" "=y") | |
2155 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "wa")] | |
2156 | UNSPEC_XVTLSBB))] | |
2157 | "TARGET_POWER10" | |
2158 | "xvtlsbb %0,%x1" | |
2159 | [(set_attr "type" "logical")]) | |
2160 | ||
2161 | ;; Vector Test Least Significant Bit by Byte | |
2162 | ;; for the implementation of the builtin | |
2163 | ;; __builtin_vec_test_lsbb_all_ones | |
2164 | ;; int vec_test_lsbb_all_ones (vector unsigned char); | |
2165 | ;; and | |
2166 | ;; __builtin_vec_test_lsbb_all_zeros | |
2167 | ;; int vec_test_lsbb_all_zeros (vector unsigned char); | |
2168 | (define_expand "xvtlsbbo" | |
2169 | [(set (match_dup 2) | |
2170 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
2171 | UNSPEC_XVTLSBB)) | |
2172 | (set (match_operand:SI 0 "gpc_reg_operand" "=r") | |
2173 | (lt:SI (match_dup 2) (const_int 0)))] | |
2174 | "TARGET_POWER10" | |
2175 | { | |
2176 | operands[2] = gen_reg_rtx (CCmode); | |
2177 | }) | |
2178 | (define_expand "xvtlsbbz" | |
2179 | [(set (match_dup 2) | |
2180 | (unspec:CC [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
2181 | UNSPEC_XVTLSBB)) | |
2182 | (set (match_operand:SI 0 "gpc_reg_operand" "=r") | |
2183 | (eq:SI (match_dup 2) (const_int 0)))] | |
2184 | "TARGET_POWER10" | |
2185 | { | |
2186 | operands[2] = gen_reg_rtx (CCmode); | |
2187 | }) | |
2188 | ||
29e6733c | 2189 | (define_insn "*vsx_ge_<mode>_p" |
b65261f6 | 2190 | [(set (reg:CC CR6_REGNO) |
29e6733c | 2191 | (unspec:CC |
012f609e SB |
2192 | [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2193 | (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] | |
29e6733c | 2194 | UNSPEC_PREDICATE)) |
012f609e | 2195 | (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
29e6733c MM |
2196 | (ge:VSX_F (match_dup 1) |
2197 | (match_dup 2)))] | |
2198 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2199 | "xvcmpge<sd>p. %x0,%x1,%x2" |
4356b75d | 2200 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2201 | |
29e6733c MM |
2202 | ;; Copy sign |
2203 | (define_insn "vsx_copysign<mode>3" | |
012f609e | 2204 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
0609bdf2 | 2205 | (unspec:VSX_F |
012f609e SB |
2206 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") |
2207 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")] | |
8119a6a6 | 2208 | UNSPEC_COPYSIGN))] |
29e6733c | 2209 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2210 | "xvcpsgn<sd>p %x0,%x2,%x1" |
2c2aa74d | 2211 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2212 | |
2213 | ;; For the conversions, limit the register class for the integer value to be | |
2214 | ;; the fprs because we don't want to add the altivec registers to movdi/movsi. | |
2215 | ;; For the unsigned tests, there isn't a generic double -> unsigned conversion | |
2216 | ;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. | |
8a480dc3 AM |
2217 | ;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md |
2218 | ;; in allowing virtual registers. | |
29e6733c | 2219 | (define_insn "vsx_float<VSi><mode>2" |
012f609e SB |
2220 | [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") |
2221 | (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2222 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2223 | "xvcvsx<VSc><sd>p %x0,%x1" |
2c2aa74d | 2224 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2225 | |
2226 | (define_insn "vsx_floatuns<VSi><mode>2" | |
012f609e SB |
2227 | [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") |
2228 | (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2229 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2230 | "xvcvux<VSc><sd>p %x0,%x1" |
2c2aa74d | 2231 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2232 | |
2233 | (define_insn "vsx_fix_trunc<mode><VSi>2" | |
012f609e SB |
2234 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") |
2235 | (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2236 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2237 | "x<VSv>cv<sd>psx<VSc>s %x0,%x1" |
2c2aa74d | 2238 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2239 | |
2240 | (define_insn "vsx_fixuns_trunc<mode><VSi>2" | |
012f609e SB |
2241 | [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") |
2242 | (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] | |
29e6733c | 2243 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2244 | "x<VSv>cv<sd>pux<VSc>s %x0,%x1" |
2c2aa74d | 2245 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2246 | |
2247 | ;; Math rounding functions | |
6cc8f683 | 2248 | (define_insn "vsx_x<VSv>r<sd>pi" |
012f609e SB |
2249 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2250 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2251 | UNSPEC_VSX_ROUND_I))] |
2252 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2253 | "x<VSv>r<sd>pi %x0,%x1" |
2c2aa74d | 2254 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c | 2255 | |
6cc8f683 | 2256 | (define_insn "vsx_x<VSv>r<sd>pic" |
012f609e SB |
2257 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2258 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2259 | UNSPEC_VSX_ROUND_IC))] |
2260 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2261 | "x<VSv>r<sd>pic %x0,%x1" |
2c2aa74d | 2262 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2263 | |
2264 | (define_insn "vsx_btrunc<mode>2" | |
012f609e SB |
2265 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2266 | (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] | |
29e6733c | 2267 | "VECTOR_UNIT_VSX_P (<MODE>mode)" |
6cc8f683 | 2268 | "xvr<sd>piz %x0,%x1" |
2c2aa74d | 2269 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2270 | |
2271 | (define_insn "*vsx_b2trunc<mode>2" | |
012f609e SB |
2272 | [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") |
2273 | (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2274 | UNSPEC_FRIZ))] |
2275 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2276 | "x<VSv>r<sd>piz %x0,%x1" |
2c2aa74d | 2277 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2278 | |
2279 | (define_insn "vsx_floor<mode>2" | |
012f609e SB |
2280 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2281 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2282 | UNSPEC_FRIM))] |
2283 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2284 | "xvr<sd>pim %x0,%x1" |
2c2aa74d | 2285 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2286 | |
2287 | (define_insn "vsx_ceil<mode>2" | |
012f609e SB |
2288 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
2289 | (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2290 | UNSPEC_FRIP))] |
2291 | "VECTOR_UNIT_VSX_P (<MODE>mode)" | |
6cc8f683 | 2292 | "xvr<sd>pip %x0,%x1" |
2c2aa74d | 2293 | [(set_attr "type" "<VStype_simple>")]) |
29e6733c MM |
2294 | |
2295 | \f | |
2296 | ;; VSX convert to/from double vector | |
2297 | ||
2298 | ;; Convert between single and double precision | |
2299 | ;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal | |
2300 | ;; scalar single precision instructions internally use the double format. | |
2301 | ;; Prefer the altivec registers, since we likely will need to do a vperm | |
1f5aa628 SB |
2302 | (define_insn "vsx_xscvdpsp" |
2303 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa") | |
2304 | (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")] | |
29e6733c | 2305 | UNSPEC_VSX_CVSPDP))] |
1f5aa628 SB |
2306 | "VECTOR_UNIT_VSX_P (DFmode)" |
2307 | "xscvdpsp %x0,%x1" | |
2308 | [(set_attr "type" "fp")]) | |
2309 | ||
6485d5d6 | 2310 | (define_insn "vsx_xvcvspdp_be" |
1f5aa628 | 2311 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2312 | (float_extend:V2DF |
2313 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2314 | (parallel [(const_int 0) (const_int 2)]))))] | |
2315 | "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN" | |
2316 | "xvcvspdp %x0,%x1" | |
2317 | [(set_attr "type" "vecdouble")]) | |
2318 | ||
2319 | (define_insn "vsx_xvcvspdp_le" | |
2320 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") | |
2321 | (float_extend:V2DF | |
2322 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2323 | (parallel [(const_int 1) (const_int 3)]))))] | |
2324 | "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" | |
1f5aa628 SB |
2325 | "xvcvspdp %x0,%x1" |
2326 | [(set_attr "type" "vecdouble")]) | |
2327 | ||
6485d5d6 KL |
2328 | (define_expand "vsx_xvcvspdp" |
2329 | [(match_operand:V2DF 0 "vsx_register_operand") | |
2330 | (match_operand:V4SF 1 "vsx_register_operand")] | |
2331 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2332 | { | |
2333 | if (BYTES_BIG_ENDIAN) | |
2334 | emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1])); | |
2335 | else | |
2336 | emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1])); | |
2337 | DONE; | |
2338 | }) | |
2339 | ||
1f5aa628 SB |
2340 | (define_insn "vsx_xvcvdpsp" |
2341 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa") | |
2342 | (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")] | |
2343 | UNSPEC_VSX_CVSPDP))] | |
2344 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2345 | "xvcvdpsp %x0,%x1" | |
2346 | [(set_attr "type" "vecdouble")]) | |
29e6733c MM |
2347 | |
2348 | ;; xscvspdp, represent the scalar SF type as V4SF | |
2349 | (define_insn "vsx_xscvspdp" | |
cc998fd5 | 2350 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
59f5868d | 2351 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
29e6733c | 2352 | UNSPEC_VSX_CVSPDP))] |
df5a9a7c | 2353 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
2354 | "xscvspdp %x0,%x1" |
2355 | [(set_attr "type" "fp")]) | |
2356 | ||
2f448503 MM |
2357 | ;; Same as vsx_xscvspdp, but use SF as the type |
2358 | (define_insn "vsx_xscvspdp_scalar2" | |
72e3386e | 2359 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") |
2f448503 MM |
2360 | (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
2361 | UNSPEC_VSX_CVSPDP))] | |
2362 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2363 | "xscvspdp %x0,%x1" | |
2364 | [(set_attr "type" "fp")]) | |
2365 | ||
26bca0ed CL |
2366 | ;; Generate xvcvhpsp instruction |
2367 | (define_insn "vsx_xvcvhpsp" | |
2368 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
2369 | (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] | |
2370 | UNSPEC_VSX_CVHPSP))] | |
2371 | "TARGET_P9_VECTOR" | |
2372 | "xvcvhpsp %x0,%x1" | |
2373 | [(set_attr "type" "vecfloat")]) | |
2374 | ||
58b475a2 WS |
2375 | ;; Generate xvcvsphp |
2376 | (define_insn "vsx_xvcvsphp" | |
2377 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
2378 | (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] | |
2379 | UNSPEC_VSX_XVCVSPHP))] | |
2380 | "TARGET_P9_VECTOR" | |
2381 | "xvcvsphp %x0,%x1" | |
2382 | [(set_attr "type" "vecfloat")]) | |
2383 | ||
29e6733c MM |
2384 | ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF |
2385 | ;; format of scalars is actually DF. | |
2386 | (define_insn "vsx_xscvdpsp_scalar" | |
2387 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
72e3386e | 2388 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
29e6733c | 2389 | UNSPEC_VSX_CVSPDP))] |
df5a9a7c | 2390 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
29e6733c MM |
2391 | "xscvdpsp %x0,%x1" |
2392 | [(set_attr "type" "fp")]) | |
2393 | ||
0bd62dca MM |
2394 | ;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs |
2395 | (define_insn "vsx_xscvdpspn" | |
72e3386e | 2396 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
cc998fd5 | 2397 | (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2398 | UNSPEC_VSX_CVDPSPN))] |
2399 | "TARGET_XSCVDPSPN" | |
2400 | "xscvdpspn %x0,%x1" | |
2401 | [(set_attr "type" "fp")]) | |
2402 | ||
2403 | (define_insn "vsx_xscvspdpn" | |
cc998fd5 | 2404 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
57e6b981 | 2405 | (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2406 | UNSPEC_VSX_CVSPDPN))] |
2407 | "TARGET_XSCVSPDPN" | |
2408 | "xscvspdpn %x0,%x1" | |
2409 | [(set_attr "type" "fp")]) | |
2410 | ||
2411 | (define_insn "vsx_xscvdpspn_scalar" | |
57e6b981 | 2412 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
72e3386e | 2413 | (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2414 | UNSPEC_VSX_CVDPSPN))] |
2415 | "TARGET_XSCVDPSPN" | |
2416 | "xscvdpspn %x0,%x1" | |
2417 | [(set_attr "type" "fp")]) | |
2418 | ||
2419 | ;; Used by direct move to move a SFmode value from GPR to VSX register | |
2420 | (define_insn "vsx_xscvspdpn_directmove" | |
2421 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") | |
b306ab3a | 2422 | (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] |
0bd62dca MM |
2423 | UNSPEC_VSX_CVSPDPN))] |
2424 | "TARGET_XSCVSPDPN" | |
2425 | "xscvspdpn %x0,%x1" | |
2426 | [(set_attr "type" "fp")]) | |
2427 | ||
70f0f8b2 BS |
2428 | ;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) |
2429 | ||
4d8cfe0e KL |
2430 | (define_insn "vsx_xvcv<su>xwsp" |
2431 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") | |
2432 | (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))] | |
2433 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2434 | "xvcv<su>xwsp %x0,%x1" | |
2435 | [(set_attr "type" "vecfloat")]) | |
2436 | ||
2437 | (define_insn "vsx_xvcv<su>xddp" | |
2438 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
2439 | (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))] | |
2440 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2441 | "xvcv<su>xddp %x0,%x1" | |
2442 | [(set_attr "type" "vecdouble")]) | |
2443 | ||
2444 | (define_insn "vsx_xvcvsp<su>xws" | |
2445 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
2446 | (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))] | |
2447 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2448 | "xvcvsp<su>xws %x0,%x1" | |
2449 | [(set_attr "type" "vecfloat")]) | |
2450 | ||
2451 | (define_insn "vsx_xvcvdp<su>xds" | |
2452 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
2453 | (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))] | |
2454 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2455 | "xvcvdp<su>xds %x0,%x1" | |
2456 | [(set_attr "type" "vecdouble")]) | |
2457 | ||
70f0f8b2 | 2458 | (define_expand "vsx_xvcvsxddp_scale" |
ad18eed2 SB |
2459 | [(match_operand:V2DF 0 "vsx_register_operand") |
2460 | (match_operand:V2DI 1 "vsx_register_operand") | |
2461 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2462 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2463 | { | |
2464 | rtx op0 = operands[0]; | |
2465 | rtx op1 = operands[1]; | |
2466 | int scale = INTVAL(operands[2]); | |
2467 | emit_insn (gen_vsx_xvcvsxddp (op0, op1)); | |
2468 | if (scale != 0) | |
2469 | rs6000_scale_v2df (op0, op0, -scale); | |
2470 | DONE; | |
2471 | }) | |
2472 | ||
70f0f8b2 | 2473 | (define_expand "vsx_xvcvuxddp_scale" |
ad18eed2 SB |
2474 | [(match_operand:V2DF 0 "vsx_register_operand") |
2475 | (match_operand:V2DI 1 "vsx_register_operand") | |
2476 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2477 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2478 | { | |
2479 | rtx op0 = operands[0]; | |
2480 | rtx op1 = operands[1]; | |
2481 | int scale = INTVAL(operands[2]); | |
2482 | emit_insn (gen_vsx_xvcvuxddp (op0, op1)); | |
2483 | if (scale != 0) | |
2484 | rs6000_scale_v2df (op0, op0, -scale); | |
2485 | DONE; | |
2486 | }) | |
2487 | ||
70f0f8b2 | 2488 | (define_expand "vsx_xvcvdpsxds_scale" |
ad18eed2 SB |
2489 | [(match_operand:V2DI 0 "vsx_register_operand") |
2490 | (match_operand:V2DF 1 "vsx_register_operand") | |
2491 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2492 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2493 | { | |
2494 | rtx op0 = operands[0]; | |
2495 | rtx op1 = operands[1]; | |
9b5ee426 BS |
2496 | rtx tmp; |
2497 | int scale = INTVAL (operands[2]); | |
2498 | if (scale == 0) | |
2499 | tmp = op1; | |
2500 | else | |
2501 | { | |
2502 | tmp = gen_reg_rtx (V2DFmode); | |
2503 | rs6000_scale_v2df (tmp, op1, scale); | |
2504 | } | |
70f0f8b2 BS |
2505 | emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); |
2506 | DONE; | |
2507 | }) | |
2508 | ||
e5898daf CL |
2509 | ;; convert vector of 64-bit floating point numbers to vector of |
2510 | ;; 64-bit unsigned integer | |
70f0f8b2 | 2511 | (define_expand "vsx_xvcvdpuxds_scale" |
ad18eed2 SB |
2512 | [(match_operand:V2DI 0 "vsx_register_operand") |
2513 | (match_operand:V2DF 1 "vsx_register_operand") | |
2514 | (match_operand:QI 2 "immediate_operand")] | |
70f0f8b2 BS |
2515 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
2516 | { | |
2517 | rtx op0 = operands[0]; | |
2518 | rtx op1 = operands[1]; | |
9b5ee426 BS |
2519 | rtx tmp; |
2520 | int scale = INTVAL (operands[2]); | |
2521 | if (scale == 0) | |
2522 | tmp = op1; | |
2523 | else | |
2524 | { | |
2525 | tmp = gen_reg_rtx (V2DFmode); | |
2526 | rs6000_scale_v2df (tmp, op1, scale); | |
2527 | } | |
70f0f8b2 BS |
2528 | emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); |
2529 | DONE; | |
2530 | }) | |
2531 | ||
29e6733c MM |
2532 | ;; Convert from 64-bit to 32-bit types |
2533 | ;; Note, favor the Altivec registers since the usual use of these instructions | |
2534 | ;; is in vector converts and we need to use the Altivec vperm instruction. | |
2535 | ||
2536 | (define_insn "vsx_xvcvdpsxws" | |
2537 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
85949949 | 2538 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] |
29e6733c MM |
2539 | UNSPEC_VSX_CVDPSXWS))] |
2540 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2541 | "xvcvdpsxws %x0,%x1" | |
4356b75d | 2542 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
2543 | |
2544 | (define_insn "vsx_xvcvdpuxws" | |
2545 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") | |
85949949 | 2546 | (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] |
29e6733c MM |
2547 | UNSPEC_VSX_CVDPUXWS))] |
2548 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2549 | "xvcvdpuxws %x0,%x1" | |
4356b75d | 2550 | [(set_attr "type" "vecdouble")]) |
29e6733c MM |
2551 | |
2552 | (define_insn "vsx_xvcvsxdsp" | |
8d3620ba SB |
2553 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
2554 | (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2555 | UNSPEC_VSX_CVSXDSP))] |
2556 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2557 | "xvcvsxdsp %x0,%x1" | |
2558 | [(set_attr "type" "vecfloat")]) | |
2559 | ||
2560 | (define_insn "vsx_xvcvuxdsp" | |
8d3620ba SB |
2561 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") |
2562 | (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] | |
29e6733c MM |
2563 | UNSPEC_VSX_CVUXDSP))] |
2564 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
8722316b | 2565 | "xvcvuxdsp %x0,%x1" |
4356b75d | 2566 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2567 | |
6485d5d6 KL |
2568 | ;; Convert vector of 32-bit signed/unsigned integers to vector of |
2569 | ;; 64-bit floating point numbers. | |
2570 | (define_insn "vsx_xvcv<su>xwdp_be" | |
8d3620ba | 2571 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") |
6485d5d6 KL |
2572 | (any_float:V2DF |
2573 | (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
2574 | (parallel [(const_int 0) (const_int 2)]))))] | |
2575 | "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" | |
2576 | "xvcv<su>xwdp %x0,%x1" | |
4356b75d | 2577 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2578 | |
6485d5d6 KL |
2579 | (define_insn "vsx_xvcv<su>xwdp_le" |
2580 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") | |
2581 | (any_float:V2DF | |
2582 | (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") | |
2583 | (parallel [(const_int 1) (const_int 3)]))))] | |
2584 | "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
2585 | "xvcv<su>xwdp %x0,%x1" | |
2586 | [(set_attr "type" "vecdouble")]) | |
2587 | ||
2588 | (define_expand "vsx_xvcv<su>xwdp" | |
2589 | [(match_operand:V2DF 0 "vsx_register_operand") | |
2590 | (match_operand:V4SI 1 "vsx_register_operand") | |
2591 | (any_float (pc))] | |
2592 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2593 | { | |
2594 | if (BYTES_BIG_ENDIAN) | |
2595 | emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1])); | |
2596 | else | |
2597 | emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1])); | |
2598 | DONE; | |
2599 | }) | |
2600 | ||
156b5cca | 2601 | (define_insn "vsx_xvcvsxwdp_df" |
cc998fd5 | 2602 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
156b5cca MM |
2603 | (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] |
2604 | UNSPEC_VSX_CVSXWDP))] | |
2605 | "TARGET_VSX" | |
2606 | "xvcvsxwdp %x0,%x1" | |
2607 | [(set_attr "type" "vecdouble")]) | |
2608 | ||
156b5cca | 2609 | (define_insn "vsx_xvcvuxwdp_df" |
cc998fd5 | 2610 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") |
156b5cca MM |
2611 | (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] |
2612 | UNSPEC_VSX_CVUXWDP))] | |
2613 | "TARGET_VSX" | |
2614 | "xvcvuxwdp %x0,%x1" | |
2615 | [(set_attr "type" "vecdouble")]) | |
2616 | ||
6485d5d6 KL |
2617 | ;; Convert vector of 32-bit floating point numbers to vector of |
2618 | ;; 64-bit signed/unsigned integers. | |
2619 | (define_insn "vsx_xvcvsp<su>xds_be" | |
29e6733c | 2620 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2621 | (any_fix:V2DI |
2622 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2623 | (parallel [(const_int 0) (const_int 2)]))))] | |
2624 | "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" | |
2625 | "xvcvsp<su>xds %x0,%x1" | |
4356b75d | 2626 | [(set_attr "type" "vecdouble")]) |
29e6733c | 2627 | |
6485d5d6 | 2628 | (define_insn "vsx_xvcvsp<su>xds_le" |
29e6733c | 2629 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") |
6485d5d6 KL |
2630 | (any_fix:V2DI |
2631 | (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") | |
2632 | (parallel [(const_int 1) (const_int 3)]))))] | |
2633 | "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" | |
2634 | "xvcvsp<su>xds %x0,%x1" | |
4356b75d | 2635 | [(set_attr "type" "vecdouble")]) |
688e4919 | 2636 | |
6485d5d6 KL |
2637 | (define_expand "vsx_xvcvsp<su>xds" |
2638 | [(match_operand:V2DI 0 "vsx_register_operand") | |
2639 | (match_operand:V4SF 1 "vsx_register_operand") | |
2640 | (any_fix (pc))] | |
2641 | "VECTOR_UNIT_VSX_P (V2DFmode)" | |
2642 | { | |
2643 | if (BYTES_BIG_ENDIAN) | |
2644 | emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1])); | |
2645 | else | |
2646 | emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1])); | |
2647 | DONE; | |
2648 | }) | |
2649 | ||
19d22f7c CL |
2650 | ;; Generate float2 double |
2651 | ;; convert two double to float | |
2652 | (define_expand "float2_v2df" | |
2653 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2654 | (use (match_operand:V2DF 1 "register_operand" "wa")) | |
2655 | (use (match_operand:V2DF 2 "register_operand" "wa"))] | |
2656 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2657 | { | |
2658 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2659 | ||
2660 | rtx_dst = operands[0]; | |
2661 | rtx_src1 = operands[1]; | |
2662 | rtx_src2 = operands[2]; | |
2663 | ||
2664 | rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); | |
2665 | DONE; | |
2666 | }) | |
2667 | ||
be1418c7 CL |
2668 | ;; Generate float2 |
2669 | ;; convert two long long signed ints to float | |
2670 | (define_expand "float2_v2di" | |
2671 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2672 | (use (match_operand:V2DI 1 "register_operand" "wa")) | |
2673 | (use (match_operand:V2DI 2 "register_operand" "wa"))] | |
2674 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2675 | { | |
2676 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2677 | ||
2678 | rtx_dst = operands[0]; | |
2679 | rtx_src1 = operands[1]; | |
2680 | rtx_src2 = operands[2]; | |
2681 | ||
2682 | rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); | |
2683 | DONE; | |
2684 | }) | |
2685 | ||
2686 | ;; Generate uns_float2 | |
2687 | ;; convert two long long unsigned ints to float | |
2688 | (define_expand "uns_float2_v2di" | |
2689 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2690 | (use (match_operand:V2DI 1 "register_operand" "wa")) | |
2691 | (use (match_operand:V2DI 2 "register_operand" "wa"))] | |
2692 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2693 | { | |
2694 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2695 | ||
2696 | rtx_dst = operands[0]; | |
2697 | rtx_src1 = operands[1]; | |
2698 | rtx_src2 = operands[2]; | |
2699 | ||
2700 | rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); | |
2701 | DONE; | |
2702 | }) | |
2703 | ||
2704 | ;; Generate floate | |
2705 | ;; convert double or long long signed to float | |
2706 | ;; (Only even words are valid, BE numbering) | |
2707 | (define_expand "floate<mode>" | |
2708 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2709 | (use (match_operand:VSX_D 1 "register_operand" "wa"))] | |
2710 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2711 | { | |
427a7384 | 2712 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2713 | { |
2714 | /* Shift left one word to put even word correct location */ | |
2715 | rtx rtx_tmp; | |
2716 | rtx rtx_val = GEN_INT (4); | |
2717 | ||
2718 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2719 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); | |
2720 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2721 | rtx_tmp, rtx_tmp, rtx_val)); | |
2722 | } | |
2723 | else | |
394a527f | 2724 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); |
be1418c7 CL |
2725 | |
2726 | DONE; | |
2727 | }) | |
2728 | ||
2729 | ;; Generate uns_floate | |
2730 | ;; convert long long unsigned to float | |
2731 | ;; (Only even words are valid, BE numbering) | |
2732 | (define_expand "unsfloatev2di" | |
2733 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2734 | (use (match_operand:V2DI 1 "register_operand" "wa"))] | |
2735 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2736 | { | |
427a7384 | 2737 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2738 | { |
2739 | /* Shift left one word to put even word correct location */ | |
2740 | rtx rtx_tmp; | |
2741 | rtx rtx_val = GEN_INT (4); | |
2742 | ||
2743 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2744 | emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); | |
2745 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2746 | rtx_tmp, rtx_tmp, rtx_val)); | |
2747 | } | |
2748 | else | |
2749 | emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); | |
2750 | ||
2751 | DONE; | |
2752 | }) | |
2753 | ||
2754 | ;; Generate floato | |
2755 | ;; convert double or long long signed to float | |
2756 | ;; Only odd words are valid, BE numbering) | |
2757 | (define_expand "floato<mode>" | |
2758 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2759 | (use (match_operand:VSX_D 1 "register_operand" "wa"))] | |
2760 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2761 | { | |
427a7384 | 2762 | if (BYTES_BIG_ENDIAN) |
394a527f | 2763 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); |
be1418c7 CL |
2764 | else |
2765 | { | |
2766 | /* Shift left one word to put odd word correct location */ | |
2767 | rtx rtx_tmp; | |
2768 | rtx rtx_val = GEN_INT (4); | |
2769 | ||
2770 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
394a527f | 2771 | emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); |
be1418c7 CL |
2772 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], |
2773 | rtx_tmp, rtx_tmp, rtx_val)); | |
2774 | } | |
2775 | DONE; | |
2776 | }) | |
2777 | ||
2778 | ;; Generate uns_floato | |
2779 | ;; convert long long unsigned to float | |
2780 | ;; (Only odd words are valid, BE numbering) | |
2781 | (define_expand "unsfloatov2di" | |
2782 | [(use (match_operand:V4SF 0 "register_operand" "=wa")) | |
2783 | (use (match_operand:V2DI 1 "register_operand" "wa"))] | |
2784 | "VECTOR_UNIT_VSX_P (V4SFmode)" | |
2785 | { | |
427a7384 | 2786 | if (BYTES_BIG_ENDIAN) |
be1418c7 CL |
2787 | emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); |
2788 | else | |
2789 | { | |
2790 | /* Shift left one word to put odd word correct location */ | |
2791 | rtx rtx_tmp; | |
2792 | rtx rtx_val = GEN_INT (4); | |
2793 | ||
2794 | rtx_tmp = gen_reg_rtx (V4SFmode); | |
2795 | emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); | |
2796 | emit_insn (gen_altivec_vsldoi_v4sf (operands[0], | |
2797 | rtx_tmp, rtx_tmp, rtx_val)); | |
2798 | } | |
2799 | DONE; | |
2800 | }) | |
2801 | ||
e5898daf CL |
2802 | ;; Generate vsigned2 |
2803 | ;; convert two double float vectors to a vector of single precision ints | |
2804 | (define_expand "vsigned2_v2df" | |
2805 | [(match_operand:V4SI 0 "register_operand" "=wa") | |
2806 | (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") | |
2807 | (match_operand:V2DF 2 "register_operand" "wa")] | |
2808 | UNSPEC_VSX_VSIGNED2)] | |
2809 | "TARGET_VSX" | |
2810 | { | |
2811 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2812 | bool signed_convert=true; | |
2813 | ||
2814 | rtx_dst = operands[0]; | |
2815 | rtx_src1 = operands[1]; | |
2816 | rtx_src2 = operands[2]; | |
2817 | ||
2818 | rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); | |
2819 | DONE; | |
2820 | }) | |
2821 | ||
2822 | ;; Generate vsignedo_v2df | |
2823 | ;; signed double float to int convert odd word | |
2824 | (define_expand "vsignedo_v2df" | |
2825 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
2826 | (match_operand:V2DF 1 "register_operand" "wa"))] | |
2827 | "TARGET_VSX" | |
2828 | { | |
427a7384 | 2829 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2830 | { |
2831 | rtx rtx_tmp; | |
2832 | rtx rtx_val = GEN_INT (12); | |
2833 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2834 | ||
2835 | emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); | |
2836 | ||
2837 | /* Big endian word numbering for words in operand is 0 1 2 3. | |
2838 | take (operand[1] operand[1]) and shift left one word | |
2839 | 0 1 2 3 0 1 2 3 => 1 2 3 0 | |
2840 | Words 1 and 3 are now are now where they need to be for result. */ | |
2841 | ||
2842 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2843 | rtx_tmp, rtx_val)); | |
2844 | } | |
2845 | else | |
2846 | /* Little endian word numbering for operand is 3 2 1 0. | |
2847 | Result words 3 and 1 are where they need to be. */ | |
2848 | emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); | |
2849 | ||
2850 | DONE; | |
2851 | } | |
2852 | [(set_attr "type" "veccomplex")]) | |
2853 | ||
2854 | ;; Generate vsignede_v2df | |
2855 | ;; signed double float to int even word | |
2856 | (define_expand "vsignede_v2df" | |
2857 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2858 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2859 | "TARGET_VSX" | |
2860 | { | |
427a7384 | 2861 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2862 | /* Big endian word numbering for words in operand is 0 1 |
2863 | Result words 0 is where they need to be. */ | |
2864 | emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); | |
2865 | ||
2866 | else | |
2867 | { | |
2868 | rtx rtx_tmp; | |
2869 | rtx rtx_val = GEN_INT (12); | |
2870 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2871 | ||
2872 | emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); | |
2873 | ||
2874 | /* Little endian word numbering for operand is 3 2 1 0. | |
2875 | take (operand[1] operand[1]) and shift left three words | |
2876 | 0 1 2 3 0 1 2 3 => 3 0 1 2 | |
2877 | Words 0 and 2 are now where they need to be for the result. */ | |
2878 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2879 | rtx_tmp, rtx_val)); | |
2880 | } | |
2881 | DONE; | |
2882 | } | |
2883 | [(set_attr "type" "veccomplex")]) | |
2884 | ||
2885 | ;; Generate unsigned2 | |
2886 | ;; convert two double float vectors to a vector of single precision | |
2887 | ;; unsigned ints | |
2888 | (define_expand "vunsigned2_v2df" | |
2889 | [(match_operand:V4SI 0 "register_operand" "=v") | |
2890 | (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") | |
2891 | (match_operand:V2DF 2 "register_operand" "v")] | |
2892 | UNSPEC_VSX_VSIGNED2)] | |
2893 | "TARGET_VSX" | |
2894 | { | |
2895 | rtx rtx_src1, rtx_src2, rtx_dst; | |
2896 | bool signed_convert=false; | |
2897 | ||
2898 | rtx_dst = operands[0]; | |
2899 | rtx_src1 = operands[1]; | |
2900 | rtx_src2 = operands[2]; | |
2901 | ||
2902 | rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); | |
2903 | DONE; | |
2904 | }) | |
2905 | ||
2906 | ;; Generate vunsignedo_v2df | |
2907 | ;; unsigned double float to int convert odd word | |
2908 | (define_expand "vunsignedo_v2df" | |
2909 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2910 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2911 | "TARGET_VSX" | |
2912 | { | |
427a7384 | 2913 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2914 | { |
2915 | rtx rtx_tmp; | |
2916 | rtx rtx_val = GEN_INT (12); | |
2917 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2918 | ||
2919 | emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); | |
2920 | ||
2921 | /* Big endian word numbering for words in operand is 0 1 2 3. | |
2922 | take (operand[1] operand[1]) and shift left one word | |
2923 | 0 1 2 3 0 1 2 3 => 1 2 3 0 | |
2924 | Words 1 and 3 are now are now where they need to be for result. */ | |
2925 | ||
2926 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2927 | rtx_tmp, rtx_val)); | |
2928 | } | |
2929 | else | |
2930 | /* Little endian word numbering for operand is 3 2 1 0. | |
2931 | Result words 3 and 1 are where they need to be. */ | |
2932 | emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); | |
2933 | ||
2934 | DONE; | |
2935 | } | |
2936 | [(set_attr "type" "veccomplex")]) | |
2937 | ||
2938 | ;; Generate vunsignede_v2df | |
2939 | ;; unsigned double float to int even word | |
2940 | (define_expand "vunsignede_v2df" | |
2941 | [(set (match_operand:V4SI 0 "register_operand" "=v") | |
2942 | (match_operand:V2DF 1 "register_operand" "v"))] | |
2943 | "TARGET_VSX" | |
2944 | { | |
427a7384 | 2945 | if (BYTES_BIG_ENDIAN) |
e5898daf CL |
2946 | /* Big endian word numbering for words in operand is 0 1 |
2947 | Result words 0 is where they need to be. */ | |
2948 | emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); | |
2949 | ||
2950 | else | |
2951 | { | |
2952 | rtx rtx_tmp; | |
2953 | rtx rtx_val = GEN_INT (12); | |
2954 | rtx_tmp = gen_reg_rtx (V4SImode); | |
2955 | ||
2956 | emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); | |
2957 | ||
2958 | /* Little endian word numbering for operand is 3 2 1 0. | |
2959 | take (operand[1] operand[1]) and shift left three words | |
2960 | 0 1 2 3 0 1 2 3 => 3 0 1 2 | |
2961 | Words 0 and 2 are now where they need to be for the result. */ | |
2962 | emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, | |
2963 | rtx_tmp, rtx_val)); | |
2964 | } | |
2965 | DONE; | |
2966 | } | |
2967 | [(set_attr "type" "veccomplex")]) | |
2968 | ||
688e4919 | 2969 | ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since |
0c307d8f | 2970 | ;; since the xvrdpiz instruction does not truncate the value if the floating |
688e4919 | 2971 | ;; point value is < LONG_MIN or > LONG_MAX. |
0c307d8f | 2972 | (define_insn "*vsx_float_fix_v2df2" |
85949949 | 2973 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa") |
0c307d8f MM |
2974 | (float:V2DF |
2975 | (fix:V2DI | |
85949949 | 2976 | (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))] |
2c2aa74d | 2977 | "TARGET_HARD_FLOAT |
0c307d8f | 2978 | && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations |
688e4919 | 2979 | && !flag_trapping_math && TARGET_FRIZ" |
0c307d8f | 2980 | "xvrdpiz %x0,%x1" |
2c2aa74d | 2981 | [(set_attr "type" "vecdouble")]) |
688e4919 | 2982 | |
29e6733c MM |
2983 | \f |
2984 | ;; Permute operations | |
2985 | ||
2986 | ;; Build a V2DF/V2DI vector from two scalars | |
2987 | (define_insn "vsx_concat_<mode>" | |
08c4c51e | 2988 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") |
c6d5ff83 | 2989 | (vec_concat:VSX_D |
e0e3ce63 SB |
2990 | (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b") |
2991 | (match_operand:<VEC_base> 2 "gpc_reg_operand" "wa,b")))] | |
29e6733c | 2992 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
de75c876 | 2993 | { |
e86aefb8 MM |
2994 | if (which_alternative == 0) |
2995 | return (BYTES_BIG_ENDIAN | |
2996 | ? "xxpermdi %x0,%x1,%x2,0" | |
2997 | : "xxpermdi %x0,%x2,%x1,0"); | |
2998 | ||
2999 | else if (which_alternative == 1) | |
3000 | return (BYTES_BIG_ENDIAN | |
3001 | ? "mtvsrdd %x0,%1,%2" | |
3002 | : "mtvsrdd %x0,%2,%1"); | |
3003 | ||
de75c876 | 3004 | else |
e86aefb8 | 3005 | gcc_unreachable (); |
de75c876 | 3006 | } |
b0894ae0 | 3007 | [(set_attr "type" "vecperm,vecmove")]) |
29e6733c | 3008 | |
08c4c51e MM |
3009 | ;; Combiner patterns to allow creating XXPERMDI's to access either double |
3010 | ;; word element in a vector register. | |
3011 | (define_insn "*vsx_concat_<mode>_1" | |
3012 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3013 | (vec_concat:VSX_D | |
e0e3ce63 | 3014 | (vec_select:<VEC_base> |
08c4c51e MM |
3015 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa") |
3016 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) | |
e0e3ce63 | 3017 | (match_operand:<VEC_base> 3 "gpc_reg_operand" "wa")))] |
08c4c51e MM |
3018 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
3019 | { | |
3020 | HOST_WIDE_INT dword = INTVAL (operands[2]); | |
3021 | if (BYTES_BIG_ENDIAN) | |
3022 | { | |
3023 | operands[4] = GEN_INT (2*dword); | |
3024 | return "xxpermdi %x0,%x1,%x3,%4"; | |
3025 | } | |
3026 | else | |
3027 | { | |
3028 | operands[4] = GEN_INT (!dword); | |
3029 | return "xxpermdi %x0,%x3,%x1,%4"; | |
3030 | } | |
3031 | } | |
3032 | [(set_attr "type" "vecperm")]) | |
3033 | ||
3034 | (define_insn "*vsx_concat_<mode>_2" | |
3035 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3036 | (vec_concat:VSX_D | |
e0e3ce63 SB |
3037 | (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa") |
3038 | (vec_select:<VEC_base> | |
08c4c51e MM |
3039 | (match_operand:VSX_D 2 "gpc_reg_operand" "wa") |
3040 | (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] | |
3041 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3042 | { | |
3043 | HOST_WIDE_INT dword = INTVAL (operands[3]); | |
3044 | if (BYTES_BIG_ENDIAN) | |
3045 | { | |
3046 | operands[4] = GEN_INT (dword); | |
3047 | return "xxpermdi %x0,%x1,%x2,%4"; | |
3048 | } | |
3049 | else | |
3050 | { | |
3051 | operands[4] = GEN_INT (2 * !dword); | |
3052 | return "xxpermdi %x0,%x2,%x1,%4"; | |
3053 | } | |
3054 | } | |
3055 | [(set_attr "type" "vecperm")]) | |
3056 | ||
3057 | (define_insn "*vsx_concat_<mode>_3" | |
3058 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3059 | (vec_concat:VSX_D | |
e0e3ce63 | 3060 | (vec_select:<VEC_base> |
08c4c51e MM |
3061 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa") |
3062 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) | |
e0e3ce63 | 3063 | (vec_select:<VEC_base> |
08c4c51e MM |
3064 | (match_operand:VSX_D 3 "gpc_reg_operand" "wa") |
3065 | (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] | |
3066 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3067 | { | |
3068 | HOST_WIDE_INT dword1 = INTVAL (operands[2]); | |
3069 | HOST_WIDE_INT dword2 = INTVAL (operands[4]); | |
3070 | if (BYTES_BIG_ENDIAN) | |
3071 | { | |
3072 | operands[5] = GEN_INT ((2 * dword1) + dword2); | |
3073 | return "xxpermdi %x0,%x1,%x3,%5"; | |
3074 | } | |
3075 | else | |
3076 | { | |
3077 | operands[5] = GEN_INT ((2 * !dword2) + !dword1); | |
3078 | return "xxpermdi %x0,%x3,%x1,%5"; | |
3079 | } | |
3080 | } | |
3081 | [(set_attr "type" "vecperm")]) | |
3082 | ||
29e6733c MM |
3083 | ;; Special purpose concat using xxpermdi to glue two single precision values |
3084 | ;; together, relying on the fact that internally scalar floats are represented | |
3085 | ;; as doubles. This is used to initialize a V4SF vector with 4 floats | |
3086 | (define_insn "vsx_concat_v2sf" | |
6019c0fc | 3087 | [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") |
29e6733c | 3088 | (unspec:V2DF |
72e3386e SB |
3089 | [(match_operand:SF 1 "vsx_register_operand" "wa") |
3090 | (match_operand:SF 2 "vsx_register_operand" "wa")] | |
29e6733c MM |
3091 | UNSPEC_VSX_CONCAT))] |
3092 | "VECTOR_MEM_VSX_P (V2DFmode)" | |
de75c876 BS |
3093 | { |
3094 | if (BYTES_BIG_ENDIAN) | |
3095 | return "xxpermdi %x0,%x1,%x2,0"; | |
3096 | else | |
3097 | return "xxpermdi %x0,%x2,%x1,0"; | |
3098 | } | |
29e6733c MM |
3099 | [(set_attr "type" "vecperm")]) |
3100 | ||
9fede15c SB |
3101 | ;; Concatenate 4 SImode elements into a V4SImode reg. |
3102 | (define_expand "vsx_init_v4si" | |
3103 | [(use (match_operand:V4SI 0 "gpc_reg_operand")) | |
3104 | (use (match_operand:SI 1 "gpc_reg_operand")) | |
3105 | (use (match_operand:SI 2 "gpc_reg_operand")) | |
3106 | (use (match_operand:SI 3 "gpc_reg_operand")) | |
3107 | (use (match_operand:SI 4 "gpc_reg_operand"))] | |
6019c0fc | 3108 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
6019c0fc | 3109 | { |
f4a3cea3 KL |
3110 | rtx a = gen_lowpart_SUBREG (DImode, operands[1]); |
3111 | rtx b = gen_lowpart_SUBREG (DImode, operands[2]); | |
3112 | rtx c = gen_lowpart_SUBREG (DImode, operands[3]); | |
3113 | rtx d = gen_lowpart_SUBREG (DImode, operands[4]); | |
9fede15c SB |
3114 | if (!BYTES_BIG_ENDIAN) |
3115 | { | |
3116 | std::swap (a, b); | |
3117 | std::swap (c, d); | |
3118 | } | |
3119 | ||
9fede15c | 3120 | rtx ab = gen_reg_rtx (DImode); |
9fede15c | 3121 | rtx cd = gen_reg_rtx (DImode); |
f4a3cea3 KL |
3122 | emit_insn (gen_rotldi3_insert_3 (ab, a, GEN_INT (32), b, |
3123 | GEN_INT (0xffffffff))); | |
3124 | emit_insn (gen_rotldi3_insert_3 (cd, c, GEN_INT (32), d, | |
3125 | GEN_INT (0xffffffff))); | |
9fede15c SB |
3126 | |
3127 | rtx abcd = gen_reg_rtx (V2DImode); | |
3128 | emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); | |
3129 | emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); | |
6019c0fc MM |
3130 | DONE; |
3131 | }) | |
3132 | ||
0cf68694 BS |
3133 | ;; xxpermdi for little endian loads and stores. We need several of |
3134 | ;; these since the form of the PARALLEL differs by mode. | |
3135 | (define_insn "*vsx_xxpermdi2_le_<mode>" | |
012f609e | 3136 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
6579b156 | 3137 | (vec_select:VSX_D |
012f609e | 3138 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
0cf68694 BS |
3139 | (parallel [(const_int 1) (const_int 0)])))] |
3140 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" | |
3141 | "xxpermdi %x0,%x1,%x1,2" | |
3142 | [(set_attr "type" "vecperm")]) | |
3143 | ||
a8cea25c CL |
3144 | (define_insn "xxswapd_v16qi" |
3145 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
3146 | (vec_select:V16QI | |
3147 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
3148 | (parallel [(const_int 8) (const_int 9) | |
3149 | (const_int 10) (const_int 11) | |
3150 | (const_int 12) (const_int 13) | |
3151 | (const_int 14) (const_int 15) | |
3152 | (const_int 0) (const_int 1) | |
3153 | (const_int 2) (const_int 3) | |
3154 | (const_int 4) (const_int 5) | |
3155 | (const_int 6) (const_int 7)])))] | |
3156 | "TARGET_VSX" | |
3157 | ;; AIX does not support the extended mnemonic xxswapd. Use the basic | |
3158 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3159 | "xxpermdi %x0,%x1,%x1,2" |
3160 | [(set_attr "type" "vecperm")]) | |
3161 | ||
a8cea25c | 3162 | (define_insn "xxswapd_v8hi" |
0cf68694 | 3163 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") |
a8cea25c CL |
3164 | (vec_select:V8HI |
3165 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
3166 | (parallel [(const_int 4) (const_int 5) | |
3167 | (const_int 6) (const_int 7) | |
3168 | (const_int 0) (const_int 1) | |
3169 | (const_int 2) (const_int 3)])))] | |
3170 | "TARGET_VSX" | |
3171 | ;; AIX does not support the extended mnemonic xxswapd. Use the basic | |
3172 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3173 | "xxpermdi %x0,%x1,%x1,2" |
3174 | [(set_attr "type" "vecperm")]) | |
3175 | ||
a8cea25c CL |
3176 | (define_insn "xxswapd_<mode>" |
3177 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") | |
3178 | (vec_select:VSX_W | |
3179 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
3180 | (parallel [(const_int 2) (const_int 3) | |
3181 | (const_int 0) (const_int 1)])))] | |
3182 | "TARGET_VSX" | |
3183 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3184 | ;; mnemonic xxpermdi instead. | |
0cf68694 BS |
3185 | "xxpermdi %x0,%x1,%x1,2" |
3186 | [(set_attr "type" "vecperm")]) | |
3187 | ||
7b88f66d KN |
3188 | (define_insn "xxswapd_<mode>" |
3189 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
3190 | (vec_select:VSX_D | |
3191 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
3192 | (parallel [(const_int 1) (const_int 0)])))] | |
3193 | "TARGET_VSX" | |
3194 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3195 | ;; mnemonic xxpermdi instead. | |
3196 | "xxpermdi %x0,%x1,%x1,2" | |
3197 | [(set_attr "type" "vecperm")]) | |
3198 | ||
f03122f2 CL |
3199 | ;; Swap upper/lower 64-bit values in a 128-bit vector |
3200 | (define_insn "xxswapd_v1ti" | |
3201 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
3202 | (subreg:V1TI | |
3203 | (vec_select:V2DI | |
3204 | (subreg:V2DI | |
3205 | (match_operand:V1TI 1 "vsx_register_operand" "v") 0 ) | |
3206 | (parallel [(const_int 1)(const_int 0)])) | |
3207 | 0))] | |
3208 | "TARGET_POWER10" | |
3209 | ;; AIX does not support extended mnemonic xxswapd. Use the basic | |
3210 | ;; mnemonic xxpermdi instead. | |
3211 | "xxpermdi %x0,%x1,%x1,2" | |
3212 | [(set_attr "type" "vecperm")]) | |
3213 | ||
b8eaa754 CL |
3214 | (define_insn "xxgenpcvm_<mode>_internal" |
3215 | [(set (match_operand:VSX_EXTRACT_I4 0 "altivec_register_operand" "=wa") | |
3216 | (unspec:VSX_EXTRACT_I4 | |
3217 | [(match_operand:VSX_EXTRACT_I4 1 "altivec_register_operand" "v") | |
3218 | (match_operand:QI 2 "const_0_to_3_operand" "n")] | |
3219 | UNSPEC_XXGENPCV))] | |
a3c13696 | 3220 | "TARGET_POWER10" |
b8eaa754 CL |
3221 | "xxgenpcv<wd>m %x0,%1,%2" |
3222 | [(set_attr "type" "vecsimple")]) | |
3223 | ||
3224 | (define_expand "xxgenpcvm_<mode>" | |
3225 | [(use (match_operand:VSX_EXTRACT_I4 0 "register_operand")) | |
3226 | (use (match_operand:VSX_EXTRACT_I4 1 "register_operand")) | |
3227 | (use (match_operand:QI 2 "immediate_operand"))] | |
5d9d0c94 | 3228 | "TARGET_POWER10" |
b8eaa754 CL |
3229 | { |
3230 | if (!BYTES_BIG_ENDIAN) | |
3231 | { | |
3232 | /* gen_xxgenpcvm assumes Big Endian order. If LE, | |
3233 | change swap upper and lower double words. */ | |
3234 | rtx tmp = gen_reg_rtx (<MODE>mode); | |
3235 | ||
3236 | emit_insn (gen_xxswapd_<mode> (tmp, operands[1])); | |
3237 | operands[1] = tmp; | |
3238 | } | |
3239 | emit_insn (gen_xxgenpcvm_<mode>_internal (operands[0], operands[1], | |
3240 | operands[2])); | |
3241 | DONE; | |
3242 | }) | |
3243 | ||
0cf68694 BS |
3244 | ;; lxvd2x for little endian loads. We need several of |
3245 | ;; these since the form of the PARALLEL differs by mode. | |
3246 | (define_insn "*vsx_lxvd2x2_le_<mode>" | |
012f609e | 3247 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
6579b156 BS |
3248 | (vec_select:VSX_D |
3249 | (match_operand:VSX_D 1 "memory_operand" "Z") | |
0cf68694 | 3250 | (parallel [(const_int 1) (const_int 0)])))] |
5d57fdc1 | 3251 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3252 | "lxvd2x %x0,%y1" |
3253 | [(set_attr "type" "vecload")]) | |
3254 | ||
3255 | (define_insn "*vsx_lxvd2x4_le_<mode>" | |
7858932e | 3256 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
0cf68694 BS |
3257 | (vec_select:VSX_W |
3258 | (match_operand:VSX_W 1 "memory_operand" "Z") | |
3259 | (parallel [(const_int 2) (const_int 3) | |
3260 | (const_int 0) (const_int 1)])))] | |
5d57fdc1 | 3261 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3262 | "lxvd2x %x0,%y1" |
3263 | [(set_attr "type" "vecload")]) | |
3264 | ||
3265 | (define_insn "*vsx_lxvd2x8_le_V8HI" | |
3266 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
3267 | (vec_select:V8HI | |
3268 | (match_operand:V8HI 1 "memory_operand" "Z") | |
3269 | (parallel [(const_int 4) (const_int 5) | |
3270 | (const_int 6) (const_int 7) | |
3271 | (const_int 0) (const_int 1) | |
3272 | (const_int 2) (const_int 3)])))] | |
5d57fdc1 | 3273 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3274 | "lxvd2x %x0,%y1" |
3275 | [(set_attr "type" "vecload")]) | |
3276 | ||
3277 | (define_insn "*vsx_lxvd2x16_le_V16QI" | |
3278 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
3279 | (vec_select:V16QI | |
3280 | (match_operand:V16QI 1 "memory_operand" "Z") | |
3281 | (parallel [(const_int 8) (const_int 9) | |
3282 | (const_int 10) (const_int 11) | |
3283 | (const_int 12) (const_int 13) | |
3284 | (const_int 14) (const_int 15) | |
3285 | (const_int 0) (const_int 1) | |
3286 | (const_int 2) (const_int 3) | |
3287 | (const_int 4) (const_int 5) | |
3288 | (const_int 6) (const_int 7)])))] | |
5d57fdc1 | 3289 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3290 | "lxvd2x %x0,%y1" |
3291 | [(set_attr "type" "vecload")]) | |
3292 | ||
3293 | ;; stxvd2x for little endian stores. We need several of | |
3294 | ;; these since the form of the PARALLEL differs by mode. | |
3295 | (define_insn "*vsx_stxvd2x2_le_<mode>" | |
6579b156 BS |
3296 | [(set (match_operand:VSX_D 0 "memory_operand" "=Z") |
3297 | (vec_select:VSX_D | |
012f609e | 3298 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
0cf68694 | 3299 | (parallel [(const_int 1) (const_int 0)])))] |
5d57fdc1 | 3300 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3301 | "stxvd2x %x1,%y0" |
3302 | [(set_attr "type" "vecstore")]) | |
3303 | ||
3304 | (define_insn "*vsx_stxvd2x4_le_<mode>" | |
3305 | [(set (match_operand:VSX_W 0 "memory_operand" "=Z") | |
3306 | (vec_select:VSX_W | |
7858932e | 3307 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
0cf68694 BS |
3308 | (parallel [(const_int 2) (const_int 3) |
3309 | (const_int 0) (const_int 1)])))] | |
5d57fdc1 | 3310 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3311 | "stxvd2x %x1,%y0" |
3312 | [(set_attr "type" "vecstore")]) | |
3313 | ||
3314 | (define_insn "*vsx_stxvd2x8_le_V8HI" | |
3315 | [(set (match_operand:V8HI 0 "memory_operand" "=Z") | |
3316 | (vec_select:V8HI | |
3317 | (match_operand:V8HI 1 "vsx_register_operand" "wa") | |
3318 | (parallel [(const_int 4) (const_int 5) | |
3319 | (const_int 6) (const_int 7) | |
3320 | (const_int 0) (const_int 1) | |
3321 | (const_int 2) (const_int 3)])))] | |
5d57fdc1 | 3322 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3323 | "stxvd2x %x1,%y0" |
3324 | [(set_attr "type" "vecstore")]) | |
3325 | ||
3326 | (define_insn "*vsx_stxvd2x16_le_V16QI" | |
3327 | [(set (match_operand:V16QI 0 "memory_operand" "=Z") | |
3328 | (vec_select:V16QI | |
3329 | (match_operand:V16QI 1 "vsx_register_operand" "wa") | |
3330 | (parallel [(const_int 8) (const_int 9) | |
3331 | (const_int 10) (const_int 11) | |
3332 | (const_int 12) (const_int 13) | |
3333 | (const_int 14) (const_int 15) | |
3334 | (const_int 0) (const_int 1) | |
3335 | (const_int 2) (const_int 3) | |
3336 | (const_int 4) (const_int 5) | |
3337 | (const_int 6) (const_int 7)])))] | |
5d57fdc1 | 3338 | "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" |
0cf68694 BS |
3339 | "stxvd2x %x1,%y0" |
3340 | [(set_attr "type" "vecstore")]) | |
3341 | ||
a16a872d MM |
3342 | ;; Convert a TImode value into V1TImode |
3343 | (define_expand "vsx_set_v1ti" | |
ad18eed2 SB |
3344 | [(match_operand:V1TI 0 "nonimmediate_operand") |
3345 | (match_operand:V1TI 1 "nonimmediate_operand") | |
3346 | (match_operand:TI 2 "input_operand") | |
3347 | (match_operand:QI 3 "u5bit_cint_operand")] | |
a16a872d MM |
3348 | "VECTOR_MEM_VSX_P (V1TImode)" |
3349 | { | |
3350 | if (operands[3] != const0_rtx) | |
3351 | gcc_unreachable (); | |
3352 | ||
3353 | emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); | |
3354 | DONE; | |
3355 | }) | |
3356 | ||
08c4c51e MM |
3357 | ;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT |
3358 | (define_expand "vsx_set_<mode>" | |
3359 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) | |
3360 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
e0e3ce63 | 3361 | (use (match_operand:<VEC_base> 2 "gpc_reg_operand")) |
08c4c51e | 3362 | (use (match_operand:QI 3 "const_0_to_1_operand"))] |
29e6733c MM |
3363 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
3364 | { | |
08c4c51e MM |
3365 | rtx dest = operands[0]; |
3366 | rtx vec_reg = operands[1]; | |
3367 | rtx value = operands[2]; | |
3368 | rtx ele = operands[3]; | |
e0e3ce63 | 3369 | rtx tmp = gen_reg_rtx (<VEC_base>mode); |
08c4c51e MM |
3370 | |
3371 | if (ele == const0_rtx) | |
3372 | { | |
3373 | emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); | |
3374 | emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); | |
3375 | DONE; | |
3376 | } | |
3377 | else if (ele == const1_rtx) | |
3378 | { | |
3379 | emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); | |
3380 | emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); | |
3381 | DONE; | |
3382 | } | |
29e6733c MM |
3383 | else |
3384 | gcc_unreachable (); | |
08c4c51e | 3385 | }) |
29e6733c MM |
3386 | |
3387 | ;; Extract a DF/DI element from V2DF/V2DI | |
117f16fb MM |
3388 | ;; Optimize cases were we can do a simple or direct move. |
3389 | ;; Or see if we can avoid doing the move at all | |
1a3c3ee9 | 3390 | |
ba3e5a38 SB |
3391 | (define_expand "vsx_extract_<mode>" |
3392 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand") | |
e0e3ce63 | 3393 | (vec_select:<VEC_base> |
ba3e5a38 | 3394 | (match_operand:VSX_D 1 "gpc_reg_operand") |
117f16fb | 3395 | (parallel |
ba3e5a38 | 3396 | [(match_operand:QI 2 "const_0_to_1_operand")])))] |
1a3c3ee9 | 3397 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ba3e5a38 | 3398 | "") |
117f16fb | 3399 | |
ba3e5a38 SB |
3400 | (define_insn "*vsx_extract_<mode>_0" |
3401 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wa,wr") | |
3402 | (vec_select:<VEC_base> | |
3403 | (match_operand:VSX_D 1 "gpc_reg_operand" "0,wa,wa") | |
3404 | (parallel | |
3405 | [(match_operand:QI 2 "const_0_to_1_operand" "n,n,n")])))] | |
3406 | "VECTOR_MEM_VSX_P (<MODE>mode) | |
3407 | && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)" | |
3408 | { | |
3409 | if (which_alternative == 0) | |
3410 | return ASM_COMMENT_START " vec_extract to same register"; | |
117f16fb | 3411 | |
ba3e5a38 SB |
3412 | if (which_alternative == 2) |
3413 | return "mfvsrd %0,%x1"; | |
117f16fb | 3414 | |
ba3e5a38 SB |
3415 | return "xxlor %x0,%x1,%x1"; |
3416 | } | |
3417 | [(set_attr "type" "*,veclogical,mfvsr") | |
3418 | (set_attr "isa" "*,*,p8v") | |
3419 | (set_attr "length" "0,*,*")]) | |
117f16fb | 3420 | |
ba3e5a38 SB |
3421 | (define_insn "*vsx_extract_<mode>_1" |
3422 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,wr") | |
3423 | (vec_select:<VEC_base> | |
3424 | (match_operand:VSX_D 1 "gpc_reg_operand" "wa,wa") | |
3425 | (parallel | |
3426 | [(match_operand:QI 2 "const_0_to_1_operand" "n,n")])))] | |
3427 | "VECTOR_MEM_VSX_P (<MODE>mode) | |
3428 | && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 0)" | |
3429 | { | |
3430 | if (which_alternative == 1) | |
98060bbe | 3431 | return "mfvsrld %0,%x1"; |
117f16fb | 3432 | |
ba3e5a38 SB |
3433 | operands[3] = GEN_INT (BYTES_BIG_ENDIAN ? 2 : 3); |
3434 | return "xxpermdi %x0,%x1,%x1,%3"; | |
29e6733c | 3435 | } |
ba3e5a38 SB |
3436 | [(set_attr "type" "mfvsr,vecperm") |
3437 | (set_attr "isa" "*,p9v")]) | |
29e6733c | 3438 | |
d0047a25 MM |
3439 | ;; Optimize extracting a single scalar element from memory. |
3440 | (define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" | |
e0e3ce63 SB |
3441 | [(set (match_operand:<VEC_base> 0 "register_operand" "=wa,wr") |
3442 | (vec_select:<VSX_D:VEC_base> | |
d0047a25 MM |
3443 | (match_operand:VSX_D 1 "memory_operand" "m,m") |
3444 | (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) | |
3445 | (clobber (match_scratch:P 3 "=&b,&b"))] | |
dc355223 | 3446 | "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" |
d0047a25 MM |
3447 | "#" |
3448 | "&& reload_completed" | |
3449 | [(set (match_dup 0) (match_dup 4))] | |
3450 | { | |
3451 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
e0e3ce63 | 3452 | operands[3], <VSX_D:VEC_base>mode); |
d0047a25 MM |
3453 | } |
3454 | [(set_attr "type" "fpload,load") | |
3455 | (set_attr "length" "8")]) | |
117f16fb MM |
3456 | |
3457 | ;; Optimize storing a single scalar element that is the right location to | |
3458 | ;; memory | |
3459 | (define_insn "*vsx_extract_<mode>_store" | |
e0e3ce63 SB |
3460 | [(set (match_operand:<VEC_base> 0 "memory_operand" "=m,Z,wY") |
3461 | (vec_select:<VEC_base> | |
208a0405 | 3462 | (match_operand:VSX_D 1 "register_operand" "d,v,v") |
c3d13153 SB |
3463 | (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "n,n,n")])))] |
3464 | "VECTOR_MEM_VSX_P (<MODE>mode) | |
3465 | && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 0 : 1)" | |
117f16fb MM |
3466 | "@ |
3467 | stfd%U0%X0 %1,%0 | |
158985b1 | 3468 | stxsdx %x1,%y0 |
d0047a25 | 3469 | stxsd %1,%0" |
d17fbef8 | 3470 | [(set_attr "type" "fpstore") |
208a0405 | 3471 | (set_attr "isa" "*,p7v,p9v")]) |
27b097f8 | 3472 | |
e0d32185 MM |
3473 | ;; Variable V2DI/V2DF extract shift |
3474 | (define_insn "vsx_vslo_<mode>" | |
e0e3ce63 SB |
3475 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v") |
3476 | (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") | |
e0d32185 MM |
3477 | (match_operand:V2DI 2 "gpc_reg_operand" "v")] |
3478 | UNSPEC_VSX_VSLO))] | |
3479 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3480 | "vslo %0,%1,%2" | |
3481 | [(set_attr "type" "vecperm")]) | |
3482 | ||
75c299ac | 3483 | ;; Variable V2DI/V2DF extract from a register |
e0d32185 | 3484 | (define_insn_and_split "vsx_extract_<mode>_var" |
e0e3ce63 SB |
3485 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=v") |
3486 | (unspec:<VEC_base> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") | |
75c299ac | 3487 | (match_operand:DI 2 "gpc_reg_operand" "r")] |
e0d32185 | 3488 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3489 | (clobber (match_scratch:DI 3 "=r")) |
3490 | (clobber (match_scratch:V2DI 4 "=&v"))] | |
e0d32185 MM |
3491 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3492 | "#" | |
3493 | "&& reload_completed" | |
3494 | [(const_int 0)] | |
3495 | { | |
3496 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3497 | operands[3], operands[4]); | |
3498 | DONE; | |
3499 | }) | |
3500 | ||
75c299ac MM |
3501 | ;; Variable V2DI/V2DF extract from memory |
3502 | (define_insn_and_split "*vsx_extract_<mode>_var_load" | |
e0e3ce63 SB |
3503 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=wa,r") |
3504 | (unspec:<VEC_base> [(match_operand:VSX_D 1 "memory_operand" "Q,Q") | |
75c299ac MM |
3505 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] |
3506 | UNSPEC_VSX_EXTRACT)) | |
3507 | (clobber (match_scratch:DI 3 "=&b,&b"))] | |
3508 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3509 | "#" | |
3510 | "&& reload_completed" | |
3511 | [(set (match_dup 0) (match_dup 4))] | |
3512 | { | |
3513 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
e0e3ce63 | 3514 | operands[3], <VEC_base>mode); |
75c299ac MM |
3515 | } |
3516 | [(set_attr "type" "fpload,load")]) | |
3517 | ||
df10b6d4 MM |
3518 | ;; Extract a SF element from V4SF |
3519 | (define_insn_and_split "vsx_extract_v4sf" | |
72e3386e | 3520 | [(set (match_operand:SF 0 "vsx_register_operand" "=wa") |
df10b6d4 | 3521 | (vec_select:SF |
e2a99194 MM |
3522 | (match_operand:V4SF 1 "vsx_register_operand" "wa") |
3523 | (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) | |
3524 | (clobber (match_scratch:V4SF 3 "=0"))] | |
df10b6d4 | 3525 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
e2a99194 MM |
3526 | "#" |
3527 | "&& 1" | |
df10b6d4 | 3528 | [(const_int 0)] |
df10b6d4 MM |
3529 | { |
3530 | rtx op0 = operands[0]; | |
3531 | rtx op1 = operands[1]; | |
3532 | rtx op2 = operands[2]; | |
3533 | rtx op3 = operands[3]; | |
3534 | rtx tmp; | |
27b097f8 | 3535 | HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); |
df10b6d4 MM |
3536 | |
3537 | if (ele == 0) | |
3538 | tmp = op1; | |
3539 | else | |
3540 | { | |
3541 | if (GET_CODE (op3) == SCRATCH) | |
3542 | op3 = gen_reg_rtx (V4SFmode); | |
6a742466 | 3543 | emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); |
df10b6d4 MM |
3544 | tmp = op3; |
3545 | } | |
3546 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); | |
3547 | DONE; | |
e2a99194 MM |
3548 | } |
3549 | [(set_attr "length" "8") | |
df10b6d4 MM |
3550 | (set_attr "type" "fp")]) |
3551 | ||
e2a99194 | 3552 | (define_insn_and_split "*vsx_extract_v4sf_<mode>_load" |
208a0405 | 3553 | [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r") |
e2a99194 MM |
3554 | (vec_select:SF |
3555 | (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") | |
3556 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) | |
3557 | (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] | |
3558 | "VECTOR_MEM_VSX_P (V4SFmode)" | |
3559 | "#" | |
3560 | "&& reload_completed" | |
3561 | [(set (match_dup 0) (match_dup 4))] | |
3562 | { | |
3563 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3564 | operands[3], SFmode); | |
3565 | } | |
3566 | [(set_attr "type" "fpload,fpload,fpload,load") | |
d17fbef8 | 3567 | (set_attr "length" "8") |
208a0405 | 3568 | (set_attr "isa" "*,p7v,p9v,*")]) |
e2a99194 | 3569 | |
75c299ac | 3570 | ;; Variable V4SF extract from a register |
e2a99194 | 3571 | (define_insn_and_split "vsx_extract_v4sf_var" |
75c299ac MM |
3572 | [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") |
3573 | (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v") | |
3574 | (match_operand:DI 2 "gpc_reg_operand" "r")] | |
e2a99194 | 3575 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3576 | (clobber (match_scratch:DI 3 "=r")) |
3577 | (clobber (match_scratch:V2DI 4 "=&v"))] | |
19970253 | 3578 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" |
e2a99194 MM |
3579 | "#" |
3580 | "&& reload_completed" | |
3581 | [(const_int 0)] | |
3582 | { | |
3583 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3584 | operands[3], operands[4]); | |
3585 | DONE; | |
3586 | }) | |
3587 | ||
75c299ac MM |
3588 | ;; Variable V4SF extract from memory |
3589 | (define_insn_and_split "*vsx_extract_v4sf_var_load" | |
3590 | [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r") | |
3591 | (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q") | |
3592 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] | |
3593 | UNSPEC_VSX_EXTRACT)) | |
3594 | (clobber (match_scratch:DI 3 "=&b,&b"))] | |
3595 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" | |
3596 | "#" | |
3597 | "&& reload_completed" | |
3598 | [(set (match_dup 0) (match_dup 4))] | |
3599 | { | |
3600 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
3601 | operands[3], SFmode); | |
3602 | } | |
3603 | [(set_attr "type" "fpload,load")]) | |
3604 | ||
5aebfdad RH |
3605 | ;; Expand the builtin form of xxpermdi to canonical rtl. |
3606 | (define_expand "vsx_xxpermdi_<mode>" | |
a530e181 BS |
3607 | [(match_operand:VSX_L 0 "vsx_register_operand") |
3608 | (match_operand:VSX_L 1 "vsx_register_operand") | |
3609 | (match_operand:VSX_L 2 "vsx_register_operand") | |
3610 | (match_operand:QI 3 "u5bit_cint_operand")] | |
3611 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
3612 | { | |
3613 | rtx target = operands[0]; | |
3614 | rtx op0 = operands[1]; | |
3615 | rtx op1 = operands[2]; | |
3616 | int mask = INTVAL (operands[3]); | |
3617 | rtx perm0 = GEN_INT ((mask >> 1) & 1); | |
3618 | rtx perm1 = GEN_INT ((mask & 1) + 2); | |
3619 | rtx (*gen) (rtx, rtx, rtx, rtx, rtx); | |
3620 | ||
3621 | if (<MODE>mode == V2DFmode) | |
3622 | gen = gen_vsx_xxpermdi2_v2df_1; | |
3623 | else | |
3624 | { | |
3625 | gen = gen_vsx_xxpermdi2_v2di_1; | |
3626 | if (<MODE>mode != V2DImode) | |
3627 | { | |
3628 | target = gen_lowpart (V2DImode, target); | |
3629 | op0 = gen_lowpart (V2DImode, op0); | |
3630 | op1 = gen_lowpart (V2DImode, op1); | |
3631 | } | |
3632 | } | |
3633 | emit_insn (gen (target, op0, op1, perm0, perm1)); | |
3634 | DONE; | |
3635 | }) | |
3636 | ||
3637 | ;; Special version of xxpermdi that retains big-endian semantics. | |
3638 | (define_expand "vsx_xxpermdi_<mode>_be" | |
3639 | [(match_operand:VSX_L 0 "vsx_register_operand") | |
3640 | (match_operand:VSX_L 1 "vsx_register_operand") | |
3641 | (match_operand:VSX_L 2 "vsx_register_operand") | |
3642 | (match_operand:QI 3 "u5bit_cint_operand")] | |
29e6733c | 3643 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
5aebfdad RH |
3644 | { |
3645 | rtx target = operands[0]; | |
3646 | rtx op0 = operands[1]; | |
3647 | rtx op1 = operands[2]; | |
3648 | int mask = INTVAL (operands[3]); | |
3649 | rtx perm0 = GEN_INT ((mask >> 1) & 1); | |
3650 | rtx perm1 = GEN_INT ((mask & 1) + 2); | |
3651 | rtx (*gen) (rtx, rtx, rtx, rtx, rtx); | |
3652 | ||
3653 | if (<MODE>mode == V2DFmode) | |
3654 | gen = gen_vsx_xxpermdi2_v2df_1; | |
3655 | else | |
3656 | { | |
3657 | gen = gen_vsx_xxpermdi2_v2di_1; | |
3658 | if (<MODE>mode != V2DImode) | |
3659 | { | |
3660 | target = gen_lowpart (V2DImode, target); | |
c6d5ff83 MM |
3661 | op0 = gen_lowpart (V2DImode, op0); |
3662 | op1 = gen_lowpart (V2DImode, op1); | |
5aebfdad RH |
3663 | } |
3664 | } | |
54c4bfd7 BS |
3665 | /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a |
3666 | transformation we don't want; it is necessary for | |
3667 | rs6000_expand_vec_perm_const_1 but not for this use. So we | |
3668 | prepare for that by reversing the transformation here. */ | |
3669 | if (BYTES_BIG_ENDIAN) | |
3670 | emit_insn (gen (target, op0, op1, perm0, perm1)); | |
3671 | else | |
3672 | { | |
3673 | rtx p0 = GEN_INT (3 - INTVAL (perm1)); | |
3674 | rtx p1 = GEN_INT (3 - INTVAL (perm0)); | |
3675 | emit_insn (gen (target, op1, op0, p0, p1)); | |
3676 | } | |
5aebfdad RH |
3677 | DONE; |
3678 | }) | |
29e6733c | 3679 | |
5aebfdad | 3680 | (define_insn "vsx_xxpermdi2_<mode>_1" |
85949949 | 3681 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
3682 | (vec_select:VSX_D |
3683 | (vec_concat:<VS_double> | |
85949949 SB |
3684 | (match_operand:VSX_D 1 "vsx_register_operand" "wa") |
3685 | (match_operand:VSX_D 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
3686 | (parallel [(match_operand 3 "const_0_to_1_operand" "") |
3687 | (match_operand 4 "const_2_to_3_operand" "")])))] | |
29e6733c MM |
3688 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
3689 | { | |
8adcc78b BS |
3690 | int op3, op4, mask; |
3691 | ||
3692 | /* For little endian, swap operands and invert/swap selectors | |
3693 | to get the correct xxpermdi. The operand swap sets up the | |
3694 | inputs as a little endian array. The selectors are swapped | |
3695 | because they are defined to use big endian ordering. The | |
3696 | selectors are inverted to get the correct doublewords for | |
3697 | little endian ordering. */ | |
3698 | if (BYTES_BIG_ENDIAN) | |
3699 | { | |
3700 | op3 = INTVAL (operands[3]); | |
3701 | op4 = INTVAL (operands[4]); | |
3702 | } | |
3703 | else | |
3704 | { | |
3705 | op3 = 3 - INTVAL (operands[4]); | |
3706 | op4 = 3 - INTVAL (operands[3]); | |
3707 | } | |
3708 | ||
3709 | mask = (op3 << 1) | (op4 - 2); | |
5aebfdad | 3710 | operands[3] = GEN_INT (mask); |
8adcc78b BS |
3711 | |
3712 | if (BYTES_BIG_ENDIAN) | |
3713 | return "xxpermdi %x0,%x1,%x2,%3"; | |
3714 | else | |
3715 | return "xxpermdi %x0,%x2,%x1,%3"; | |
29e6733c MM |
3716 | } |
3717 | [(set_attr "type" "vecperm")]) | |
3718 | ||
bcb9a772 MM |
3719 | ;; Extraction of a single element in a small integer vector. Until ISA 3.0, |
3720 | ;; none of the small types were allowed in a vector register, so we had to | |
3721 | ;; extract to a DImode and either do a direct move or store. | |
e2a99194 | 3722 | (define_expand "vsx_extract_<mode>" |
e0e3ce63 SB |
3723 | [(parallel [(set (match_operand:<VEC_base> 0 "gpc_reg_operand") |
3724 | (vec_select:<VEC_base> | |
e2a99194 MM |
3725 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") |
3726 | (parallel [(match_operand:QI 2 "const_int_operand")]))) | |
456f0dfa | 3727 | (clobber (match_scratch:VSX_EXTRACT_I 3))])] |
e2a99194 MM |
3728 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3729 | { | |
456f0dfa | 3730 | /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ |
9bfda664 | 3731 | if (TARGET_P9_VECTOR) |
c5e74d9d | 3732 | { |
456f0dfa MM |
3733 | emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], |
3734 | operands[2])); | |
3735 | DONE; | |
c5e74d9d | 3736 | } |
456f0dfa | 3737 | }) |
c5e74d9d | 3738 | |
456f0dfa | 3739 | (define_insn "vsx_extract_<mode>_p9" |
e0e3ce63 SB |
3740 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,<VSX_EX>") |
3741 | (vec_select:<VEC_base> | |
791e7779 | 3742 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") |
16370e79 MM |
3743 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) |
3744 | (clobber (match_scratch:SI 3 "=r,X"))] | |
9bfda664 | 3745 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
c5e74d9d | 3746 | { |
16370e79 MM |
3747 | if (which_alternative == 0) |
3748 | return "#"; | |
c5e74d9d | 3749 | |
c5e74d9d | 3750 | else |
16370e79 MM |
3751 | { |
3752 | HOST_WIDE_INT elt = INTVAL (operands[2]); | |
427a7384 | 3753 | HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN |
16370e79 MM |
3754 | ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt |
3755 | : elt); | |
3756 | ||
3757 | HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); | |
3758 | HOST_WIDE_INT offset = unit_size * elt_adj; | |
3759 | ||
3760 | operands[2] = GEN_INT (offset); | |
3761 | if (unit_size == 4) | |
3762 | return "xxextractuw %x0,%x1,%2"; | |
3763 | else | |
3764 | return "vextractu<wd> %0,%1,%2"; | |
3765 | } | |
c5e74d9d | 3766 | } |
0c8ac746 SB |
3767 | [(set_attr "type" "vecsimple") |
3768 | (set_attr "isa" "p9v,*")]) | |
c5e74d9d | 3769 | |
16370e79 | 3770 | (define_split |
e0e3ce63 SB |
3771 | [(set (match_operand:<VEC_base> 0 "int_reg_operand") |
3772 | (vec_select:<VEC_base> | |
16370e79 MM |
3773 | (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") |
3774 | (parallel [(match_operand:QI 2 "const_int_operand")]))) | |
3775 | (clobber (match_operand:SI 3 "int_reg_operand"))] | |
9bfda664 | 3776 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" |
16370e79 MM |
3777 | [(const_int 0)] |
3778 | { | |
3779 | rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); | |
3780 | rtx op1 = operands[1]; | |
3781 | rtx op2 = operands[2]; | |
3782 | rtx op3 = operands[3]; | |
3783 | HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); | |
3784 | ||
3785 | emit_move_insn (op3, GEN_INT (offset)); | |
427a7384 | 3786 | if (BYTES_BIG_ENDIAN) |
16370e79 MM |
3787 | emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); |
3788 | else | |
3789 | emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); | |
3790 | DONE; | |
3791 | }) | |
3792 | ||
456f0dfa MM |
3793 | ;; Optimize zero extracts to eliminate the AND after the extract. |
3794 | (define_insn_and_split "*vsx_extract_<mode>_di_p9" | |
16370e79 | 3795 | [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") |
456f0dfa | 3796 | (zero_extend:DI |
e0e3ce63 | 3797 | (vec_select:<VEC_base> |
791e7779 | 3798 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") |
16370e79 MM |
3799 | (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) |
3800 | (clobber (match_scratch:SI 3 "=r,X"))] | |
9bfda664 | 3801 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
456f0dfa MM |
3802 | "#" |
3803 | "&& reload_completed" | |
16370e79 | 3804 | [(parallel [(set (match_dup 4) |
e0e3ce63 | 3805 | (vec_select:<VEC_base> |
16370e79 MM |
3806 | (match_dup 1) |
3807 | (parallel [(match_dup 2)]))) | |
3808 | (clobber (match_dup 3))])] | |
456f0dfa | 3809 | { |
e0e3ce63 | 3810 | operands[4] = gen_rtx_REG (<VEC_base>mode, REGNO (operands[0])); |
0c8ac746 SB |
3811 | } |
3812 | [(set_attr "isa" "p9v,*")]) | |
456f0dfa MM |
3813 | |
3814 | ;; Optimize stores to use the ISA 3.0 scalar store instructions | |
3815 | (define_insn_and_split "*vsx_extract_<mode>_store_p9" | |
e0e3ce63 SB |
3816 | [(set (match_operand:<VEC_base> 0 "memory_operand" "=Z,m") |
3817 | (vec_select:<VEC_base> | |
60fb638f | 3818 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") |
16370e79 | 3819 | (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) |
e0e3ce63 | 3820 | (clobber (match_scratch:<VEC_base> 3 "=<VSX_EX>,&*r")) |
16370e79 | 3821 | (clobber (match_scratch:SI 4 "=X,&r"))] |
9bfda664 | 3822 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" |
456f0dfa MM |
3823 | "#" |
3824 | "&& reload_completed" | |
16370e79 | 3825 | [(parallel [(set (match_dup 3) |
e0e3ce63 | 3826 | (vec_select:<VEC_base> |
16370e79 MM |
3827 | (match_dup 1) |
3828 | (parallel [(match_dup 2)]))) | |
3829 | (clobber (match_dup 4))]) | |
456f0dfa MM |
3830 | (set (match_dup 0) |
3831 | (match_dup 3))]) | |
3832 | ||
787c7a65 | 3833 | (define_insn_and_split "*vsx_extract_si" |
791e7779 | 3834 | [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") |
787c7a65 | 3835 | (vec_select:SI |
0c8ac746 | 3836 | (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v") |
156b5cca | 3837 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) |
0c8ac746 | 3838 | (clobber (match_scratch:V4SI 3 "=v,v,v"))] |
9bfda664 | 3839 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" |
787c7a65 MM |
3840 | "#" |
3841 | "&& reload_completed" | |
3842 | [(const_int 0)] | |
3843 | { | |
3844 | rtx dest = operands[0]; | |
3845 | rtx src = operands[1]; | |
3846 | rtx element = operands[2]; | |
3847 | rtx vec_tmp = operands[3]; | |
3848 | int value; | |
3849 | ||
b3e77224 KL |
3850 | /* Adjust index for LE element ordering, the below minuend 3 is computed by |
3851 | GET_MODE_NUNITS (V4SImode) - 1. */ | |
427a7384 | 3852 | if (!BYTES_BIG_ENDIAN) |
b3e77224 | 3853 | element = GEN_INT (3 - INTVAL (element)); |
787c7a65 MM |
3854 | |
3855 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
3856 | instruction. */ | |
3857 | value = INTVAL (element); | |
3858 | if (value != 1) | |
9bfda664 | 3859 | emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); |
787c7a65 MM |
3860 | else |
3861 | vec_tmp = src; | |
3862 | ||
3863 | if (MEM_P (operands[0])) | |
3864 | { | |
3865 | if (can_create_pseudo_p ()) | |
3af0c6bc | 3866 | dest = rs6000_force_indexed_or_indirect_mem (dest); |
787c7a65 | 3867 | |
9bfda664 | 3868 | if (TARGET_P8_VECTOR) |
787c7a65 MM |
3869 | emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); |
3870 | else | |
3871 | emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); | |
3872 | } | |
3873 | ||
9bfda664 | 3874 | else if (TARGET_P8_VECTOR) |
787c7a65 MM |
3875 | emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); |
3876 | else | |
3877 | emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), | |
3878 | gen_rtx_REG (DImode, REGNO (vec_tmp))); | |
3879 | ||
3880 | DONE; | |
3881 | } | |
863e8d53 | 3882 | [(set_attr "type" "mfvsr,vecperm,fpstore") |
791e7779 SB |
3883 | (set_attr "length" "8") |
3884 | (set_attr "isa" "*,p8v,*")]) | |
787c7a65 | 3885 | |
e2a99194 | 3886 | (define_insn_and_split "*vsx_extract_<mode>_p8" |
e0e3ce63 SB |
3887 | [(set (match_operand:<VEC_base> 0 "nonimmediate_operand" "=r") |
3888 | (vec_select:<VEC_base> | |
787c7a65 | 3889 | (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") |
e2a99194 | 3890 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) |
787c7a65 | 3891 | (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] |
456f0dfa | 3892 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT |
9bfda664 | 3893 | && !TARGET_P9_VECTOR" |
e2a99194 MM |
3894 | "#" |
3895 | "&& reload_completed" | |
3896 | [(const_int 0)] | |
3897 | { | |
3898 | rtx dest = operands[0]; | |
3899 | rtx src = operands[1]; | |
3900 | rtx element = operands[2]; | |
3901 | rtx vec_tmp = operands[3]; | |
3902 | int value; | |
3903 | ||
427a7384 | 3904 | if (!BYTES_BIG_ENDIAN) |
e2a99194 MM |
3905 | element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); |
3906 | ||
3907 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
3908 | instruction. */ | |
3909 | value = INTVAL (element); | |
3910 | if (<MODE>mode == V16QImode) | |
3911 | { | |
3912 | if (value != 7) | |
3913 | emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); | |
3914 | else | |
3915 | vec_tmp = src; | |
3916 | } | |
3917 | else if (<MODE>mode == V8HImode) | |
3918 | { | |
3919 | if (value != 3) | |
3920 | emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); | |
3921 | else | |
3922 | vec_tmp = src; | |
3923 | } | |
e2a99194 MM |
3924 | else |
3925 | gcc_unreachable (); | |
3926 | ||
3927 | emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), | |
3928 | gen_rtx_REG (DImode, REGNO (vec_tmp))); | |
3929 | DONE; | |
3930 | } | |
863e8d53 | 3931 | [(set_attr "type" "mfvsr")]) |
e2a99194 MM |
3932 | |
3933 | ;; Optimize extracting a single scalar element from memory. | |
3934 | (define_insn_and_split "*vsx_extract_<mode>_load" | |
e0e3ce63 SB |
3935 | [(set (match_operand:<VEC_base> 0 "register_operand" "=r") |
3936 | (vec_select:<VEC_base> | |
e2a99194 MM |
3937 | (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") |
3938 | (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) | |
3939 | (clobber (match_scratch:DI 3 "=&b"))] | |
3940 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3941 | "#" | |
3942 | "&& reload_completed" | |
3943 | [(set (match_dup 0) (match_dup 4))] | |
3944 | { | |
3945 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
e0e3ce63 | 3946 | operands[3], <VEC_base>mode); |
e2a99194 MM |
3947 | } |
3948 | [(set_attr "type" "load") | |
3949 | (set_attr "length" "8")]) | |
3950 | ||
75c299ac | 3951 | ;; Variable V16QI/V8HI/V4SI extract from a register |
e2a99194 | 3952 | (define_insn_and_split "vsx_extract_<mode>_var" |
e0e3ce63 SB |
3953 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r,r") |
3954 | (unspec:<VEC_base> | |
75c299ac MM |
3955 | [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v") |
3956 | (match_operand:DI 2 "gpc_reg_operand" "r,r")] | |
e2a99194 | 3957 | UNSPEC_VSX_EXTRACT)) |
75c299ac MM |
3958 | (clobber (match_scratch:DI 3 "=r,r")) |
3959 | (clobber (match_scratch:V2DI 4 "=X,&v"))] | |
e2a99194 MM |
3960 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" |
3961 | "#" | |
3962 | "&& reload_completed" | |
3963 | [(const_int 0)] | |
3964 | { | |
3965 | rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], | |
3966 | operands[3], operands[4]); | |
3967 | DONE; | |
0c8ac746 | 3968 | } |
75c299ac MM |
3969 | [(set_attr "isa" "p9v,*")]) |
3970 | ||
3971 | ;; Variable V16QI/V8HI/V4SI extract from memory | |
3972 | (define_insn_and_split "*vsx_extract_<mode>_var_load" | |
e0e3ce63 SB |
3973 | [(set (match_operand:<VEC_base> 0 "gpc_reg_operand" "=r") |
3974 | (unspec:<VEC_base> | |
75c299ac MM |
3975 | [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q") |
3976 | (match_operand:DI 2 "gpc_reg_operand" "r")] | |
3977 | UNSPEC_VSX_EXTRACT)) | |
3978 | (clobber (match_scratch:DI 3 "=&b"))] | |
3979 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
3980 | "#" | |
3981 | "&& reload_completed" | |
3982 | [(set (match_dup 0) (match_dup 4))] | |
3983 | { | |
3984 | operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], | |
e0e3ce63 | 3985 | operands[3], <VEC_base>mode); |
75c299ac MM |
3986 | } |
3987 | [(set_attr "type" "load")]) | |
c5e74d9d | 3988 | |
30d02149 CL |
3989 | ;; ISA 3.1 extract |
3990 | (define_expand "vextractl<mode>" | |
3991 | [(set (match_operand:V2DI 0 "altivec_register_operand") | |
3992 | (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand") | |
3993 | (match_operand:VI2 2 "altivec_register_operand") | |
3994 | (match_operand:SI 3 "register_operand")] | |
3995 | UNSPEC_EXTRACTL))] | |
3996 | "TARGET_POWER10" | |
3997 | { | |
3998 | if (BYTES_BIG_ENDIAN) | |
3999 | { | |
4000 | emit_insn (gen_vextractl<mode>_internal (operands[0], operands[1], | |
4001 | operands[2], operands[3])); | |
4002 | emit_insn (gen_xxswapd_v2di (operands[0], operands[0])); | |
4003 | } | |
4004 | else | |
4005 | emit_insn (gen_vextractr<mode>_internal (operands[0], operands[2], | |
4006 | operands[1], operands[3])); | |
4007 | DONE; | |
4008 | }) | |
4009 | ||
4010 | (define_insn "vextractl<mode>_internal" | |
4011 | [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") | |
4012 | (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v") | |
4013 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4014 | (match_operand:SI 3 "register_operand" "r")] | |
4015 | UNSPEC_EXTRACTL))] | |
4016 | "TARGET_POWER10" | |
4017 | "vext<du_or_d><wd>vlx %0,%1,%2,%3" | |
4018 | [(set_attr "type" "vecsimple")]) | |
4019 | ||
4020 | (define_expand "vextractr<mode>" | |
4021 | [(set (match_operand:V2DI 0 "altivec_register_operand") | |
4022 | (unspec:V2DI [(match_operand:VI2 1 "altivec_register_operand") | |
4023 | (match_operand:VI2 2 "altivec_register_operand") | |
4024 | (match_operand:SI 3 "register_operand")] | |
4025 | UNSPEC_EXTRACTR))] | |
4026 | "TARGET_POWER10" | |
4027 | { | |
4028 | if (BYTES_BIG_ENDIAN) | |
4029 | { | |
4030 | emit_insn (gen_vextractr<mode>_internal (operands[0], operands[1], | |
4031 | operands[2], operands[3])); | |
4032 | emit_insn (gen_xxswapd_v2di (operands[0], operands[0])); | |
4033 | } | |
4034 | else | |
4035 | emit_insn (gen_vextractl<mode>_internal (operands[0], operands[2], | |
4036 | operands[1], operands[3])); | |
4037 | DONE; | |
4038 | }) | |
4039 | ||
4040 | (define_insn "vextractr<mode>_internal" | |
4041 | [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") | |
4042 | (unspec:V2DI [(match_operand:VEC_I 1 "altivec_register_operand" "v") | |
4043 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4044 | (match_operand:SI 3 "register_operand" "r")] | |
4045 | UNSPEC_EXTRACTR))] | |
4046 | "TARGET_POWER10" | |
4047 | "vext<du_or_d><wd>vrx %0,%1,%2,%3" | |
4048 | [(set_attr "type" "vecsimple")]) | |
4049 | ||
530e9095 CL |
4050 | (define_expand "vinsertvl_<mode>" |
4051 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4052 | (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand") | |
4053 | (match_operand:VI2 2 "altivec_register_operand") | |
4054 | (match_operand:SI 3 "register_operand" "r")] | |
4055 | UNSPEC_INSERTL))] | |
4056 | "TARGET_POWER10" | |
4057 | { | |
4058 | if (BYTES_BIG_ENDIAN) | |
4059 | emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3], | |
4060 | operands[1], operands[2])); | |
4061 | else | |
4062 | emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3], | |
4063 | operands[1], operands[2])); | |
4064 | DONE; | |
4065 | }) | |
4066 | ||
4067 | (define_insn "vinsertvl_internal_<mode>" | |
4068 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4069 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4070 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4071 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4072 | UNSPEC_INSERTL))] | |
4073 | "TARGET_POWER10" | |
4074 | "vins<wd>vlx %0,%1,%2" | |
4075 | [(set_attr "type" "vecsimple")]) | |
4076 | ||
4077 | (define_expand "vinsertvr_<mode>" | |
4078 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4079 | (unspec:VI2 [(match_operand:VI2 1 "altivec_register_operand") | |
4080 | (match_operand:VI2 2 "altivec_register_operand") | |
4081 | (match_operand:SI 3 "register_operand" "r")] | |
4082 | UNSPEC_INSERTR))] | |
4083 | "TARGET_POWER10" | |
4084 | { | |
4085 | if (BYTES_BIG_ENDIAN) | |
4086 | emit_insn (gen_vinsertvr_internal_<mode> (operands[0], operands[3], | |
4087 | operands[1], operands[2])); | |
4088 | else | |
4089 | emit_insn (gen_vinsertvl_internal_<mode> (operands[0], operands[3], | |
4090 | operands[1], operands[2])); | |
4091 | DONE; | |
4092 | }) | |
4093 | ||
4094 | (define_insn "vinsertvr_internal_<mode>" | |
4095 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4096 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4097 | (match_operand:VEC_I 2 "altivec_register_operand" "v") | |
4098 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4099 | UNSPEC_INSERTR))] | |
4100 | "TARGET_POWER10" | |
4101 | "vins<wd>vrx %0,%1,%2" | |
4102 | [(set_attr "type" "vecsimple")]) | |
4103 | ||
4104 | (define_expand "vinsertgl_<mode>" | |
4105 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4106 | (unspec:VI2 [(match_operand:SI 1 "register_operand") | |
4107 | (match_operand:VI2 2 "altivec_register_operand") | |
4108 | (match_operand:SI 3 "register_operand")] | |
4109 | UNSPEC_INSERTL))] | |
4110 | "TARGET_POWER10" | |
4111 | { | |
4112 | if (BYTES_BIG_ENDIAN) | |
4113 | emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3], | |
4114 | operands[1], operands[2])); | |
4115 | else | |
4116 | emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3], | |
4117 | operands[1], operands[2])); | |
4118 | DONE; | |
4119 | }) | |
4120 | ||
4121 | (define_insn "vinsertgl_internal_<mode>" | |
4122 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4123 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4124 | (match_operand:SI 2 "register_operand" "r") | |
4125 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4126 | UNSPEC_INSERTL))] | |
4127 | "TARGET_POWER10" | |
4128 | "vins<wd>lx %0,%1,%2" | |
4129 | [(set_attr "type" "vecsimple")]) | |
4130 | ||
4131 | (define_expand "vinsertgr_<mode>" | |
4132 | [(set (match_operand:VI2 0 "altivec_register_operand") | |
4133 | (unspec:VI2 [(match_operand:SI 1 "register_operand") | |
4134 | (match_operand:VI2 2 "altivec_register_operand") | |
4135 | (match_operand:SI 3 "register_operand")] | |
4136 | UNSPEC_INSERTR))] | |
4137 | "TARGET_POWER10" | |
4138 | { | |
4139 | if (BYTES_BIG_ENDIAN) | |
4140 | emit_insn (gen_vinsertgr_internal_<mode> (operands[0], operands[3], | |
4141 | operands[1], operands[2])); | |
4142 | else | |
4143 | emit_insn (gen_vinsertgl_internal_<mode> (operands[0], operands[3], | |
4144 | operands[1], operands[2])); | |
4145 | DONE; | |
4146 | }) | |
4147 | ||
4148 | (define_insn "vinsertgr_internal_<mode>" | |
4149 | [(set (match_operand:VEC_I 0 "altivec_register_operand" "=v") | |
4150 | (unspec:VEC_I [(match_operand:SI 1 "register_operand" "r") | |
4151 | (match_operand:SI 2 "register_operand" "r") | |
4152 | (match_operand:VEC_I 3 "altivec_register_operand" "0")] | |
4153 | UNSPEC_INSERTR))] | |
4154 | "TARGET_POWER10" | |
4155 | "vins<wd>rx %0,%1,%2" | |
4156 | [(set_attr "type" "vecsimple")]) | |
4157 | ||
3f029aea CL |
4158 | (define_expand "vreplace_elt_<mode>" |
4159 | [(set (match_operand:REPLACE_ELT 0 "register_operand") | |
4160 | (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") | |
e0e3ce63 | 4161 | (match_operand:<VEC_base> 2 "register_operand") |
3f029aea CL |
4162 | (match_operand:QI 3 "const_0_to_3_operand")] |
4163 | UNSPEC_REPLACE_ELT))] | |
4164 | "TARGET_POWER10" | |
4165 | { | |
4166 | int index; | |
4167 | /* Immediate value is the word index, convert to byte index and adjust for | |
4168 | Endianness if needed. */ | |
4169 | if (BYTES_BIG_ENDIAN) | |
4170 | index = INTVAL (operands[3]) << <REPLACE_ELT_sh>; | |
4171 | ||
4172 | else | |
4173 | index = <REPLACE_ELT_max> - (INTVAL (operands[3]) << <REPLACE_ELT_sh>); | |
4174 | ||
4175 | emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], | |
4176 | operands[2], | |
4177 | GEN_INT (index))); | |
4178 | DONE; | |
4179 | } | |
4180 | [(set_attr "type" "vecsimple")]) | |
4181 | ||
3f029aea CL |
4182 | (define_insn "vreplace_elt_<mode>_inst" |
4183 | [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v") | |
4184 | (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0") | |
e0e3ce63 | 4185 | (match_operand:<VEC_base> 2 "register_operand" "r") |
3f029aea CL |
4186 | (match_operand:QI 3 "const_0_to_12_operand" "n")] |
4187 | UNSPEC_REPLACE_ELT))] | |
4188 | "TARGET_POWER10" | |
4189 | "vins<REPLACE_ELT_char> %0,%2,%3" | |
4190 | [(set_attr "type" "vecsimple")]) | |
4191 | ||
ed3fea09 BS |
4192 | (define_insn "vreplace_un_<mode>" |
4193 | [(set (match_operand:V16QI 0 "register_operand" "=v") | |
4194 | (unspec:V16QI [(match_operand:REPLACE_ELT 1 "register_operand" "0") | |
e0e3ce63 | 4195 | (match_operand:<VEC_base> 2 "register_operand" "r") |
ed3fea09 BS |
4196 | (match_operand:QI 3 "const_0_to_12_operand" "n")] |
4197 | UNSPEC_REPLACE_UN))] | |
4198 | "TARGET_POWER10" | |
4199 | "vins<REPLACE_ELT_char> %0,%2,%3" | |
4200 | [(set_attr "type" "vecsimple")]) | |
4201 | ||
156b5cca MM |
4202 | ;; VSX_EXTRACT optimizations |
4203 | ;; Optimize double d = (double) vec_extract (vi, <n>) | |
4204 | ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP | |
4205 | (define_insn_and_split "*vsx_extract_si_<uns>float_df" | |
cc998fd5 | 4206 | [(set (match_operand:DF 0 "gpc_reg_operand" "=wa") |
156b5cca MM |
4207 | (any_float:DF |
4208 | (vec_select:SI | |
4209 | (match_operand:V4SI 1 "gpc_reg_operand" "v") | |
4210 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) | |
4211 | (clobber (match_scratch:V4SI 3 "=v"))] | |
4212 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" | |
4213 | "#" | |
4214 | "&& 1" | |
4215 | [(const_int 0)] | |
4216 | { | |
4217 | rtx dest = operands[0]; | |
4218 | rtx src = operands[1]; | |
4219 | rtx element = operands[2]; | |
4220 | rtx v4si_tmp = operands[3]; | |
4221 | int value; | |
4222 | ||
b3e77224 KL |
4223 | /* Adjust index for LE element ordering, the below minuend 3 is computed by |
4224 | GET_MODE_NUNITS (V4SImode) - 1. */ | |
427a7384 | 4225 | if (!BYTES_BIG_ENDIAN) |
b3e77224 | 4226 | element = GEN_INT (3 - INTVAL (element)); |
156b5cca MM |
4227 | |
4228 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
4229 | instruction. */ | |
4230 | value = INTVAL (element); | |
4231 | if (value != 0) | |
4232 | { | |
4233 | if (GET_CODE (v4si_tmp) == SCRATCH) | |
4234 | v4si_tmp = gen_reg_rtx (V4SImode); | |
4235 | emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); | |
4236 | } | |
4237 | else | |
4238 | v4si_tmp = src; | |
4239 | ||
4240 | emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); | |
4241 | DONE; | |
4242 | }) | |
4243 | ||
4244 | ;; Optimize <type> f = (<type>) vec_extract (vi, <n>) | |
4245 | ;; where <type> is a floating point type that supported by the hardware that is | |
4246 | ;; not double. First convert the value to double, and then to the desired | |
4247 | ;; type. | |
4248 | (define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" | |
72e3386e | 4249 | [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa") |
156b5cca MM |
4250 | (any_float:VSX_EXTRACT_FL |
4251 | (vec_select:SI | |
4252 | (match_operand:V4SI 1 "gpc_reg_operand" "v") | |
4253 | (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) | |
4254 | (clobber (match_scratch:V4SI 3 "=v")) | |
cc998fd5 | 4255 | (clobber (match_scratch:DF 4 "=wa"))] |
156b5cca MM |
4256 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
4257 | "#" | |
4258 | "&& 1" | |
4259 | [(const_int 0)] | |
4260 | { | |
4261 | rtx dest = operands[0]; | |
4262 | rtx src = operands[1]; | |
4263 | rtx element = operands[2]; | |
4264 | rtx v4si_tmp = operands[3]; | |
4265 | rtx df_tmp = operands[4]; | |
4266 | int value; | |
4267 | ||
b3e77224 KL |
4268 | /* Adjust index for LE element ordering, the below minuend 3 is computed by |
4269 | GET_MODE_NUNITS (V4SImode) - 1. */ | |
427a7384 | 4270 | if (!BYTES_BIG_ENDIAN) |
b3e77224 | 4271 | element = GEN_INT (3 - INTVAL (element)); |
156b5cca MM |
4272 | |
4273 | /* If the value is in the correct position, we can avoid doing the VSPLT<x> | |
4274 | instruction. */ | |
4275 | value = INTVAL (element); | |
4276 | if (value != 0) | |
4277 | { | |
4278 | if (GET_CODE (v4si_tmp) == SCRATCH) | |
4279 | v4si_tmp = gen_reg_rtx (V4SImode); | |
4280 | emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); | |
4281 | } | |
4282 | else | |
4283 | v4si_tmp = src; | |
4284 | ||
4285 | if (GET_CODE (df_tmp) == SCRATCH) | |
4286 | df_tmp = gen_reg_rtx (DFmode); | |
4287 | ||
4288 | emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); | |
4289 | ||
4290 | if (<MODE>mode == SFmode) | |
4291 | emit_insn (gen_truncdfsf2 (dest, df_tmp)); | |
4292 | else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) | |
4293 | emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); | |
4294 | else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) | |
4295 | && TARGET_FLOAT128_HW) | |
4296 | emit_insn (gen_extenddftf2_hw (dest, df_tmp)); | |
4297 | else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) | |
4298 | emit_insn (gen_extenddfif2 (dest, df_tmp)); | |
4299 | else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) | |
4300 | emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); | |
4301 | else | |
4302 | gcc_unreachable (); | |
4303 | ||
4304 | DONE; | |
4305 | }) | |
4306 | ||
16370e79 MM |
4307 | ;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) |
4308 | ;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE | |
4309 | ;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, | |
4310 | ;; vector short or vector unsigned short. | |
e0e3ce63 | 4311 | (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_fl_<FL_CONV:mode>" |
cb152d12 | 4312 | [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") |
16370e79 | 4313 | (float:FL_CONV |
e0e3ce63 | 4314 | (vec_select:<VSX_EXTRACT_I:VEC_base> |
16370e79 MM |
4315 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") |
4316 | (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) | |
e0e3ce63 | 4317 | (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))] |
16370e79 | 4318 | "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT |
9bfda664 | 4319 | && TARGET_P9_VECTOR" |
16370e79 MM |
4320 | "#" |
4321 | "&& reload_completed" | |
4322 | [(parallel [(set (match_dup 3) | |
e0e3ce63 | 4323 | (vec_select:<VSX_EXTRACT_I:VEC_base> |
16370e79 MM |
4324 | (match_dup 1) |
4325 | (parallel [(match_dup 2)]))) | |
4326 | (clobber (scratch:SI))]) | |
4327 | (set (match_dup 4) | |
4328 | (sign_extend:DI (match_dup 3))) | |
4329 | (set (match_dup 0) | |
4330 | (float:<FL_CONV:MODE> (match_dup 4)))] | |
4331 | { | |
4332 | operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); | |
cb152d12 | 4333 | } |
f6e5e4b8 | 4334 | [(set_attr "isa" "<FL_CONV:VSisa>")]) |
16370e79 | 4335 | |
e0e3ce63 | 4336 | (define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VEC_base>_ufl_<FL_CONV:mode>" |
cb152d12 | 4337 | [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") |
16370e79 | 4338 | (unsigned_float:FL_CONV |
e0e3ce63 | 4339 | (vec_select:<VSX_EXTRACT_I:VEC_base> |
16370e79 MM |
4340 | (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") |
4341 | (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) | |
e0e3ce63 | 4342 | (clobber (match_scratch:<VSX_EXTRACT_I:VEC_base> 3 "=v"))] |
16370e79 | 4343 | "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT |
9bfda664 | 4344 | && TARGET_P9_VECTOR" |
16370e79 MM |
4345 | "#" |
4346 | "&& reload_completed" | |
4347 | [(parallel [(set (match_dup 3) | |
e0e3ce63 | 4348 | (vec_select:<VSX_EXTRACT_I:VEC_base> |
16370e79 MM |
4349 | (match_dup 1) |
4350 | (parallel [(match_dup 2)]))) | |
4351 | (clobber (scratch:SI))]) | |
4352 | (set (match_dup 0) | |
4353 | (float:<FL_CONV:MODE> (match_dup 4)))] | |
4354 | { | |
4355 | operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); | |
cb152d12 | 4356 | } |
f6e5e4b8 | 4357 | [(set_attr "isa" "<FL_CONV:VSisa>")]) |
16370e79 | 4358 | |
bcb9a772 MM |
4359 | ;; V4SI/V8HI/V16QI set operation on ISA 3.0 |
4360 | (define_insn "vsx_set_<mode>_p9" | |
4361 | [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") | |
4362 | (unspec:VSX_EXTRACT_I | |
4363 | [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") | |
e0e3ce63 | 4364 | (match_operand:<VEC_base> 2 "gpc_reg_operand" "<VSX_EX>") |
bcb9a772 MM |
4365 | (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] |
4366 | UNSPEC_VSX_SET))] | |
9bfda664 | 4367 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
bcb9a772 MM |
4368 | { |
4369 | int ele = INTVAL (operands[3]); | |
4370 | int nunits = GET_MODE_NUNITS (<MODE>mode); | |
4371 | ||
427a7384 | 4372 | if (!BYTES_BIG_ENDIAN) |
bcb9a772 MM |
4373 | ele = nunits - 1 - ele; |
4374 | ||
e0e3ce63 | 4375 | operands[3] = GEN_INT (GET_MODE_SIZE (<VEC_base>mode) * ele); |
bcb9a772 MM |
4376 | if (<MODE>mode == V4SImode) |
4377 | return "xxinsertw %x0,%x2,%3"; | |
4378 | else | |
4379 | return "vinsert<wd> %0,%2,%3"; | |
4380 | } | |
4381 | [(set_attr "type" "vecperm")]) | |
4382 | ||
16122c22 MM |
4383 | (define_insn_and_split "vsx_set_v4sf_p9" |
4384 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4385 | (unspec:V4SF | |
4386 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
72e3386e | 4387 | (match_operand:SF 2 "gpc_reg_operand" "wa") |
16122c22 MM |
4388 | (match_operand:QI 3 "const_0_to_3_operand" "n")] |
4389 | UNSPEC_VSX_SET)) | |
791e7779 | 4390 | (clobber (match_scratch:SI 4 "=&wa"))] |
9bfda664 | 4391 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
16122c22 MM |
4392 | "#" |
4393 | "&& reload_completed" | |
4394 | [(set (match_dup 5) | |
4395 | (unspec:V4SF [(match_dup 2)] | |
4396 | UNSPEC_VSX_CVDPSPN)) | |
4397 | (parallel [(set (match_dup 4) | |
4398 | (vec_select:SI (match_dup 6) | |
4399 | (parallel [(match_dup 7)]))) | |
4400 | (clobber (scratch:SI))]) | |
4401 | (set (match_dup 8) | |
4402 | (unspec:V4SI [(match_dup 8) | |
4403 | (match_dup 4) | |
4404 | (match_dup 3)] | |
4405 | UNSPEC_VSX_SET))] | |
4406 | { | |
4407 | unsigned int tmp_regno = reg_or_subregno (operands[4]); | |
4408 | ||
4409 | operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); | |
4410 | operands[6] = gen_rtx_REG (V4SImode, tmp_regno); | |
f74fc01d | 4411 | operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3); |
16122c22 MM |
4412 | operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); |
4413 | } | |
4414 | [(set_attr "type" "vecperm") | |
0c8ac746 SB |
4415 | (set_attr "length" "12") |
4416 | (set_attr "isa" "p9v")]) | |
16122c22 MM |
4417 | |
4418 | ;; Special case setting 0.0f to a V4SF element | |
4419 | (define_insn_and_split "*vsx_set_v4sf_p9_zero" | |
4420 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4421 | (unspec:V4SF | |
4422 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4423 | (match_operand:SF 2 "zero_fp_constant" "j") | |
4424 | (match_operand:QI 3 "const_0_to_3_operand" "n")] | |
4425 | UNSPEC_VSX_SET)) | |
791e7779 | 4426 | (clobber (match_scratch:SI 4 "=&wa"))] |
9bfda664 | 4427 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" |
16122c22 MM |
4428 | "#" |
4429 | "&& reload_completed" | |
4430 | [(set (match_dup 4) | |
4431 | (const_int 0)) | |
4432 | (set (match_dup 5) | |
4433 | (unspec:V4SI [(match_dup 5) | |
4434 | (match_dup 4) | |
4435 | (match_dup 3)] | |
4436 | UNSPEC_VSX_SET))] | |
4437 | { | |
4438 | operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); | |
4439 | } | |
4440 | [(set_attr "type" "vecperm") | |
0c8ac746 SB |
4441 | (set_attr "length" "8") |
4442 | (set_attr "isa" "p9v")]) | |
16122c22 MM |
4443 | |
4444 | ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element | |
4445 | ;; that is in the default scalar position (1 for big endian, 2 for little | |
4446 | ;; endian). We just need to do an xxinsertw since the element is in the | |
4447 | ;; correct location. | |
4448 | ||
4449 | (define_insn "*vsx_insert_extract_v4sf_p9" | |
4450 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4451 | (unspec:V4SF | |
4452 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4453 | (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") | |
4454 | (parallel | |
4455 | [(match_operand:QI 3 "const_0_to_3_operand" "n")])) | |
4456 | (match_operand:QI 4 "const_0_to_3_operand" "n")] | |
4457 | UNSPEC_VSX_SET))] | |
9bfda664 | 4458 | "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 |
427a7384 | 4459 | && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))" |
16122c22 MM |
4460 | { |
4461 | int ele = INTVAL (operands[4]); | |
4462 | ||
b3e77224 KL |
4463 | /* Adjust index for LE element ordering, the below minuend 3 is computed by |
4464 | GET_MODE_NUNITS (V4SFmode) - 1. */ | |
427a7384 | 4465 | if (!BYTES_BIG_ENDIAN) |
b3e77224 | 4466 | ele = 3 - ele; |
16122c22 MM |
4467 | |
4468 | operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); | |
4469 | return "xxinsertw %x0,%x2,%4"; | |
4470 | } | |
4471 | [(set_attr "type" "vecperm")]) | |
4472 | ||
4473 | ;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element | |
4474 | ;; that is in the default scalar position (1 for big endian, 2 for little | |
4475 | ;; endian). Convert the insert/extract to int and avoid doing the conversion. | |
4476 | ||
4477 | (define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" | |
4478 | [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") | |
4479 | (unspec:V4SF | |
4480 | [(match_operand:V4SF 1 "gpc_reg_operand" "0") | |
4481 | (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") | |
4482 | (parallel | |
4483 | [(match_operand:QI 3 "const_0_to_3_operand" "n")])) | |
4484 | (match_operand:QI 4 "const_0_to_3_operand" "n")] | |
4485 | UNSPEC_VSX_SET)) | |
791e7779 | 4486 | (clobber (match_scratch:SI 5 "=&wa"))] |
16122c22 | 4487 | "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) |
9bfda664 | 4488 | && TARGET_P9_VECTOR && TARGET_POWERPC64 |
427a7384 | 4489 | && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))" |
16122c22 MM |
4490 | "#" |
4491 | "&& 1" | |
4492 | [(parallel [(set (match_dup 5) | |
4493 | (vec_select:SI (match_dup 6) | |
4494 | (parallel [(match_dup 3)]))) | |
4495 | (clobber (scratch:SI))]) | |
4496 | (set (match_dup 7) | |
4497 | (unspec:V4SI [(match_dup 8) | |
4498 | (match_dup 5) | |
4499 | (match_dup 4)] | |
4500 | UNSPEC_VSX_SET))] | |
4501 | { | |
4502 | if (GET_CODE (operands[5]) == SCRATCH) | |
4503 | operands[5] = gen_reg_rtx (SImode); | |
4504 | ||
4505 | operands[6] = gen_lowpart (V4SImode, operands[2]); | |
4506 | operands[7] = gen_lowpart (V4SImode, operands[0]); | |
4507 | operands[8] = gen_lowpart (V4SImode, operands[1]); | |
4508 | } | |
0c8ac746 SB |
4509 | [(set_attr "type" "vecperm") |
4510 | (set_attr "isa" "p9v")]) | |
16122c22 | 4511 | |
5aebfdad RH |
4512 | ;; Expanders for builtins |
4513 | (define_expand "vsx_mergel_<mode>" | |
ad18eed2 SB |
4514 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) |
4515 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
4516 | (use (match_operand:VSX_D 2 "vsx_register_operand"))] | |
5aebfdad | 4517 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf | 4518 | { |
427a7384 SB |
4519 | rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); |
4520 | rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
68d3bacf | 4521 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); |
f7df4a84 | 4522 | emit_insn (gen_rtx_SET (operands[0], x)); |
c3e96073 | 4523 | DONE; |
68d3bacf | 4524 | }) |
5aebfdad RH |
4525 | |
4526 | (define_expand "vsx_mergeh_<mode>" | |
ad18eed2 SB |
4527 | [(use (match_operand:VSX_D 0 "vsx_register_operand")) |
4528 | (use (match_operand:VSX_D 1 "vsx_register_operand")) | |
4529 | (use (match_operand:VSX_D 2 "vsx_register_operand"))] | |
5aebfdad | 4530 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
68d3bacf | 4531 | { |
427a7384 SB |
4532 | rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); |
4533 | rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); | |
68d3bacf | 4534 | x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); |
f7df4a84 | 4535 | emit_insn (gen_rtx_SET (operands[0], x)); |
c3e96073 | 4536 | DONE; |
68d3bacf | 4537 | }) |
5aebfdad | 4538 | |
29e6733c | 4539 | ;; V2DF/V2DI splat |
d1f6caae MM |
4540 | ;; We separate the register splat insn from the memory splat insn to force the |
4541 | ;; register allocator to generate the indexed form of the SPLAT when it is | |
4542 | ;; given an offsettable memory reference. Otherwise, if the register and | |
4543 | ;; memory insns were combined into a single insn, the register allocator will | |
4544 | ;; load the value into a register, and then do a double word permute. | |
4545 | (define_expand "vsx_splat_<mode>" | |
4546 | [(set (match_operand:VSX_D 0 "vsx_register_operand") | |
29e6733c | 4547 | (vec_duplicate:VSX_D |
e0e3ce63 | 4548 | (match_operand:<VEC_base> 1 "input_operand")))] |
d1f6caae MM |
4549 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
4550 | { | |
4551 | rtx op1 = operands[1]; | |
4552 | if (MEM_P (op1)) | |
3af0c6bc | 4553 | operands[1] = rs6000_force_indexed_or_indirect_mem (op1); |
d1f6caae | 4554 | else if (!REG_P (op1)) |
e0e3ce63 | 4555 | op1 = force_reg (<VSX_D:VEC_base>mode, op1); |
d1f6caae MM |
4556 | }) |
4557 | ||
4558 | (define_insn "vsx_splat_<mode>_reg" | |
012f609e | 4559 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") |
d1f6caae | 4560 | (vec_duplicate:VSX_D |
e0e3ce63 | 4561 | (match_operand:<VEC_base> 1 "gpc_reg_operand" "wa,b")))] |
29e6733c MM |
4562 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
4563 | "@ | |
29e6733c | 4564 | xxpermdi %x0,%x1,%x1,0 |
d1f6caae | 4565 | mtvsrdd %x0,%1,%1" |
b0894ae0 | 4566 | [(set_attr "type" "vecperm,vecmove")]) |
d1f6caae | 4567 | |
012f609e SB |
4568 | (define_insn "vsx_splat_<mode>_mem" |
4569 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
d1f6caae | 4570 | (vec_duplicate:VSX_D |
e0e3ce63 | 4571 | (match_operand:<VSX_D:VEC_base> 1 "memory_operand" "Z")))] |
d1f6caae MM |
4572 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
4573 | "lxvdsx %x0,%y1" | |
4574 | [(set_attr "type" "vecload")]) | |
29e6733c | 4575 | |
6019c0fc MM |
4576 | ;; V4SI splat support |
4577 | (define_insn "vsx_splat_v4si" | |
6b8152b9 | 4578 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa") |
50c78b9a | 4579 | (vec_duplicate:V4SI |
58f2fb5c | 4580 | (match_operand:SI 1 "splat_input_operand" "r,Z")))] |
50c78b9a MM |
4581 | "TARGET_P9_VECTOR" |
4582 | "@ | |
4583 | mtvsrws %x0,%1 | |
4584 | lxvwsx %x0,%y1" | |
6019c0fc MM |
4585 | [(set_attr "type" "vecperm,vecload")]) |
4586 | ||
4587 | ;; SImode is not currently allowed in vector registers. This pattern | |
4588 | ;; allows us to use direct move to get the value in a vector register | |
4589 | ;; so that we can use XXSPLTW | |
4590 | (define_insn "vsx_splat_v4si_di" | |
a7dce762 | 4591 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") |
6019c0fc MM |
4592 | (vec_duplicate:V4SI |
4593 | (truncate:SI | |
e670418f | 4594 | (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))] |
6019c0fc MM |
4595 | "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" |
4596 | "@ | |
4597 | xxspltw %x0,%x1,1 | |
4598 | mtvsrws %x0,%1" | |
818502a3 SB |
4599 | [(set_attr "type" "vecperm") |
4600 | (set_attr "isa" "p8v,*")]) | |
50c78b9a MM |
4601 | |
4602 | ;; V4SF splat (ISA 3.0) | |
6019c0fc | 4603 | (define_insn_and_split "vsx_splat_v4sf" |
50c78b9a MM |
4604 | [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") |
4605 | (vec_duplicate:V4SF | |
8509e170 | 4606 | (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))] |
50c78b9a MM |
4607 | "TARGET_P9_VECTOR" |
4608 | "@ | |
4609 | lxvwsx %x0,%y1 | |
4610 | # | |
4611 | mtvsrws %x0,%1" | |
4612 | "&& reload_completed && vsx_register_operand (operands[1], SFmode)" | |
4613 | [(set (match_dup 0) | |
4614 | (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) | |
4615 | (set (match_dup 0) | |
70c11966 MM |
4616 | (unspec:V4SF [(match_dup 0) |
4617 | (const_int 0)] UNSPEC_VSX_XXSPLTW))] | |
50c78b9a | 4618 | "" |
b0894ae0 | 4619 | [(set_attr "type" "vecload,vecperm,vecperm") |
911c8df0 | 4620 | (set_attr "length" "*,8,*") |
8509e170 | 4621 | (set_attr "isa" "*,p8v,*")]) |
50c78b9a MM |
4622 | |
4623 | ;; V4SF/V4SI splat from a vector element | |
29e6733c | 4624 | (define_insn "vsx_xxspltw_<mode>" |
7858932e | 4625 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
29e6733c | 4626 | (vec_duplicate:VSX_W |
e0e3ce63 | 4627 | (vec_select:<VEC_base> |
7858932e | 4628 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
29e6733c | 4629 | (parallel |
6019c0fc | 4630 | [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] |
29e6733c | 4631 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
bf53d4b8 BS |
4632 | { |
4633 | if (!BYTES_BIG_ENDIAN) | |
4634 | operands[2] = GEN_INT (3 - INTVAL (operands[2])); | |
4635 | ||
4636 | return "xxspltw %x0,%x1,%2"; | |
4637 | } | |
4638 | [(set_attr "type" "vecperm")]) | |
4639 | ||
4640 | (define_insn "vsx_xxspltw_<mode>_direct" | |
7858932e SB |
4641 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
4642 | (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa") | |
6019c0fc | 4643 | (match_operand:QI 2 "u5bit_cint_operand" "i")] |
bf53d4b8 BS |
4644 | UNSPEC_VSX_XXSPLTW))] |
4645 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
29e6733c MM |
4646 | "xxspltw %x0,%x1,%2" |
4647 | [(set_attr "type" "vecperm")]) | |
4648 | ||
6019c0fc MM |
4649 | ;; V16QI/V8HI splat support on ISA 2.07 |
4650 | (define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" | |
4651 | [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") | |
4652 | (vec_duplicate:VSX_SPLAT_I | |
e0e3ce63 | 4653 | (truncate:<VEC_base> |
6019c0fc MM |
4654 | (match_operand:DI 1 "altivec_register_operand" "v"))))] |
4655 | "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" | |
4656 | "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" | |
4657 | [(set_attr "type" "vecperm")]) | |
4658 | ||
2ccdda19 BS |
4659 | ;; V2DF/V2DI splat for use by vec_splat builtin |
4660 | (define_insn "vsx_xxspltd_<mode>" | |
4661 | [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") | |
4662 | (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") | |
4663 | (match_operand:QI 2 "u5bit_cint_operand" "i")] | |
4664 | UNSPEC_VSX_XXSPLTD))] | |
4665 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4666 | { | |
427a7384 SB |
4667 | if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0) |
4668 | || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1)) | |
2ccdda19 BS |
4669 | return "xxpermdi %x0,%x1,%x1,0"; |
4670 | else | |
4671 | return "xxpermdi %x0,%x1,%x1,3"; | |
4672 | } | |
4673 | [(set_attr "type" "vecperm")]) | |
4674 | ||
29e6733c | 4675 | ;; V4SF/V4SI interleave |
0910c516 | 4676 | (define_expand "vsx_xxmrghw_<mode>" |
7858932e | 4677 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
4678 | (vec_select:VSX_W |
4679 | (vec_concat:<VS_double> | |
7858932e SB |
4680 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
4681 | (match_operand:VSX_W 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
4682 | (parallel [(const_int 0) (const_int 4) |
4683 | (const_int 1) (const_int 5)])))] | |
29e6733c | 4684 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 | 4685 | { |
0910c516 XL |
4686 | rtx (*fun) (rtx, rtx, rtx); |
4687 | fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode> | |
4688 | : gen_altivec_vmrglw_direct_<mode>; | |
4689 | if (!BYTES_BIG_ENDIAN) | |
4690 | std::swap (operands[1], operands[2]); | |
4691 | emit_insn (fun (operands[0], operands[1], operands[2])); | |
4692 | DONE; | |
ed79f4d0 | 4693 | } |
29e6733c MM |
4694 | [(set_attr "type" "vecperm")]) |
4695 | ||
0910c516 | 4696 | (define_expand "vsx_xxmrglw_<mode>" |
7858932e | 4697 | [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") |
5aebfdad RH |
4698 | (vec_select:VSX_W |
4699 | (vec_concat:<VS_double> | |
7858932e SB |
4700 | (match_operand:VSX_W 1 "vsx_register_operand" "wa") |
4701 | (match_operand:VSX_W 2 "vsx_register_operand" "wa")) | |
5aebfdad RH |
4702 | (parallel [(const_int 2) (const_int 6) |
4703 | (const_int 3) (const_int 7)])))] | |
29e6733c | 4704 | "VECTOR_MEM_VSX_P (<MODE>mode)" |
ed79f4d0 | 4705 | { |
0910c516 XL |
4706 | rtx (*fun) (rtx, rtx, rtx); |
4707 | fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode> | |
4708 | : gen_altivec_vmrghw_direct_<mode>; | |
4709 | if (!BYTES_BIG_ENDIAN) | |
4710 | std::swap (operands[1], operands[2]); | |
4711 | emit_insn (fun (operands[0], operands[1], operands[2])); | |
4712 | DONE; | |
ed79f4d0 | 4713 | } |
29e6733c MM |
4714 | [(set_attr "type" "vecperm")]) |
4715 | ||
4716 | ;; Shift left double by word immediate | |
4717 | (define_insn "vsx_xxsldwi_<mode>" | |
cb152d12 SB |
4718 | [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") |
4719 | (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") | |
4720 | (match_operand:VSX_L 2 "vsx_register_operand" "wa") | |
29e6733c MM |
4721 | (match_operand:QI 3 "u5bit_cint_operand" "i")] |
4722 | UNSPEC_VSX_SLDWI))] | |
4723 | "VECTOR_MEM_VSX_P (<MODE>mode)" | |
4724 | "xxsldwi %x0,%x1,%x2,%3" | |
cb152d12 SB |
4725 | [(set_attr "type" "vecperm") |
4726 | (set_attr "isa" "<VSisa>")]) | |
df10b6d4 MM |
4727 | |
4728 | \f | |
4729 | ;; Vector reduction insns and splitters | |
4730 | ||
5e8edf67 | 4731 | (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" |
85949949 | 4732 | [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa") |
df10b6d4 MM |
4733 | (VEC_reduc:V2DF |
4734 | (vec_concat:V2DF | |
4735 | (vec_select:DF | |
85949949 | 4736 | (match_operand:V2DF 1 "vfloat_operand" "wa,wa") |
df10b6d4 MM |
4737 | (parallel [(const_int 1)])) |
4738 | (vec_select:DF | |
4739 | (match_dup 1) | |
4740 | (parallel [(const_int 0)]))) | |
4741 | (match_dup 1))) | |
85949949 | 4742 | (clobber (match_scratch:V2DF 2 "=0,&wa"))] |
df10b6d4 MM |
4743 | "VECTOR_UNIT_VSX_P (V2DFmode)" |
4744 | "#" | |
0ec7641e | 4745 | "&& 1" |
df10b6d4 | 4746 | [(const_int 0)] |
df10b6d4 MM |
4747 | { |
4748 | rtx tmp = (GET_CODE (operands[2]) == SCRATCH) | |
4749 | ? gen_reg_rtx (V2DFmode) | |
4750 | : operands[2]; | |
4751 | emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); | |
4752 | emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); | |
4753 | DONE; | |
6c332313 | 4754 | } |
df10b6d4 MM |
4755 | [(set_attr "length" "8") |
4756 | (set_attr "type" "veccomplex")]) | |
4757 | ||
5e8edf67 | 4758 | (define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" |
8d3620ba | 4759 | [(set (match_operand:V4SF 0 "vfloat_operand" "=wa") |
df10b6d4 MM |
4760 | (VEC_reduc:V4SF |
4761 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
8d3620ba SB |
4762 | (match_operand:V4SF 1 "vfloat_operand" "wa"))) |
4763 | (clobber (match_scratch:V4SF 2 "=&wa")) | |
4764 | (clobber (match_scratch:V4SF 3 "=&wa"))] | |
df10b6d4 MM |
4765 | "VECTOR_UNIT_VSX_P (V4SFmode)" |
4766 | "#" | |
0ec7641e | 4767 | "&& 1" |
df10b6d4 | 4768 | [(const_int 0)] |
df10b6d4 MM |
4769 | { |
4770 | rtx op0 = operands[0]; | |
4771 | rtx op1 = operands[1]; | |
4772 | rtx tmp2, tmp3, tmp4; | |
4773 | ||
4774 | if (can_create_pseudo_p ()) | |
4775 | { | |
4776 | tmp2 = gen_reg_rtx (V4SFmode); | |
4777 | tmp3 = gen_reg_rtx (V4SFmode); | |
4778 | tmp4 = gen_reg_rtx (V4SFmode); | |
4779 | } | |
4780 | else | |
4781 | { | |
4782 | tmp2 = operands[2]; | |
4783 | tmp3 = operands[3]; | |
4784 | tmp4 = tmp2; | |
4785 | } | |
4786 | ||
4787 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
4788 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
4789 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
4790 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); | |
4791 | DONE; | |
6c332313 | 4792 | } |
df10b6d4 MM |
4793 | [(set_attr "length" "16") |
4794 | (set_attr "type" "veccomplex")]) | |
4795 | ||
4796 | ;; Combiner patterns with the vector reduction patterns that knows we can get | |
4797 | ;; to the top element of the V2DF array without doing an extract. | |
4798 | ||
4799 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" | |
85949949 | 4800 | [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa") |
df10b6d4 MM |
4801 | (vec_select:DF |
4802 | (VEC_reduc:V2DF | |
4803 | (vec_concat:V2DF | |
4804 | (vec_select:DF | |
85949949 | 4805 | (match_operand:V2DF 1 "vfloat_operand" "wa,wa") |
df10b6d4 MM |
4806 | (parallel [(const_int 1)])) |
4807 | (vec_select:DF | |
4808 | (match_dup 1) | |
4809 | (parallel [(const_int 0)]))) | |
4810 | (match_dup 1)) | |
4811 | (parallel [(const_int 1)]))) | |
85949949 | 4812 | (clobber (match_scratch:DF 2 "=0,&wa"))] |
fbf3df55 | 4813 | "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" |
df10b6d4 | 4814 | "#" |
0ec7641e | 4815 | "&& 1" |
df10b6d4 | 4816 | [(const_int 0)] |
df10b6d4 MM |
4817 | { |
4818 | rtx hi = gen_highpart (DFmode, operands[1]); | |
4819 | rtx lo = (GET_CODE (operands[2]) == SCRATCH) | |
4820 | ? gen_reg_rtx (DFmode) | |
4821 | : operands[2]; | |
4822 | ||
4823 | emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); | |
4824 | emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); | |
4825 | DONE; | |
6c332313 | 4826 | } |
df10b6d4 MM |
4827 | [(set_attr "length" "8") |
4828 | (set_attr "type" "veccomplex")]) | |
4829 | ||
4830 | (define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" | |
8d3620ba | 4831 | [(set (match_operand:SF 0 "vfloat_operand" "=f") |
df10b6d4 MM |
4832 | (vec_select:SF |
4833 | (VEC_reduc:V4SF | |
4834 | (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) | |
8d3620ba | 4835 | (match_operand:V4SF 1 "vfloat_operand" "wa")) |
df10b6d4 | 4836 | (parallel [(const_int 3)]))) |
8d3620ba SB |
4837 | (clobber (match_scratch:V4SF 2 "=&wa")) |
4838 | (clobber (match_scratch:V4SF 3 "=&wa")) | |
4839 | (clobber (match_scratch:V4SF 4 "=0"))] | |
03bb10aa | 4840 | "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" |
df10b6d4 | 4841 | "#" |
0ec7641e | 4842 | "&& 1" |
df10b6d4 | 4843 | [(const_int 0)] |
df10b6d4 MM |
4844 | { |
4845 | rtx op0 = operands[0]; | |
4846 | rtx op1 = operands[1]; | |
4847 | rtx tmp2, tmp3, tmp4, tmp5; | |
4848 | ||
4849 | if (can_create_pseudo_p ()) | |
4850 | { | |
4851 | tmp2 = gen_reg_rtx (V4SFmode); | |
4852 | tmp3 = gen_reg_rtx (V4SFmode); | |
4853 | tmp4 = gen_reg_rtx (V4SFmode); | |
4854 | tmp5 = gen_reg_rtx (V4SFmode); | |
4855 | } | |
4856 | else | |
4857 | { | |
4858 | tmp2 = operands[2]; | |
4859 | tmp3 = operands[3]; | |
4860 | tmp4 = tmp2; | |
4861 | tmp5 = operands[4]; | |
4862 | } | |
4863 | ||
4864 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); | |
4865 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); | |
4866 | emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); | |
4867 | emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); | |
4868 | emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); | |
4869 | DONE; | |
6c332313 | 4870 | } |
df10b6d4 MM |
4871 | [(set_attr "length" "20") |
4872 | (set_attr "type" "veccomplex")]) | |
d86e633a MM |
4873 | |
4874 | \f | |
4875 | ;; Power8 Vector fusion. The fused ops must be physically adjacent. | |
4876 | (define_peephole | |
ad18eed2 SB |
4877 | [(set (match_operand:P 0 "base_reg_operand") |
4878 | (match_operand:P 1 "short_cint_operand")) | |
4879 | (set (match_operand:VSX_M 2 "vsx_register_operand") | |
50c78b9a | 4880 | (mem:VSX_M (plus:P (match_dup 0) |
ad18eed2 | 4881 | (match_operand:P 3 "int_reg_operand"))))] |
5d57fdc1 | 4882 | "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" |
0bcd172e | 4883 | "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" |
d86e633a MM |
4884 | [(set_attr "length" "8") |
4885 | (set_attr "type" "vecload")]) | |
4886 | ||
4887 | (define_peephole | |
ad18eed2 SB |
4888 | [(set (match_operand:P 0 "base_reg_operand") |
4889 | (match_operand:P 1 "short_cint_operand")) | |
4890 | (set (match_operand:VSX_M 2 "vsx_register_operand") | |
4891 | (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") | |
50c78b9a | 4892 | (match_dup 0))))] |
5d57fdc1 | 4893 | "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" |
0bcd172e | 4894 | "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" |
d86e633a MM |
4895 | [(set_attr "length" "8") |
4896 | (set_attr "type" "vecload")]) | |
50c78b9a MM |
4897 | |
4898 | \f | |
db042e16 CL |
4899 | ;; ISA 3.1 vector extend sign support |
4900 | (define_insn "vsx_sign_extend_v2di_v1ti" | |
4901 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
4902 | (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] | |
4903 | UNSPEC_VSX_SIGN_EXTEND))] | |
4904 | "TARGET_POWER10" | |
4905 | "vextsd2q %0,%1" | |
4906 | [(set_attr "type" "vecexts")]) | |
4907 | ||
4908 | (define_expand "vsignextend_v2di_v1ti" | |
4909 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") | |
4910 | (unspec:V1TI [(match_operand:V2DI 1 "vsx_register_operand" "v")] | |
4911 | UNSPEC_VSX_SIGN_EXTEND))] | |
4912 | "TARGET_POWER10" | |
4913 | { | |
4914 | if (BYTES_BIG_ENDIAN) | |
4915 | { | |
4916 | rtx tmp = gen_reg_rtx (V2DImode); | |
4917 | ||
4918 | emit_insn (gen_altivec_vrevev2di2(tmp, operands[1])); | |
4919 | emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], tmp)); | |
4920 | DONE; | |
4921 | } | |
4922 | ||
4923 | emit_insn (gen_vsx_sign_extend_v2di_v1ti(operands[0], operands[1])); | |
4924 | }) | |
4925 | ||
50c78b9a MM |
4926 | ;; ISA 3.0 vector extend sign support |
4927 | ||
4928 | (define_insn "vsx_sign_extend_qi_<mode>" | |
4929 | [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") | |
4930 | (unspec:VSINT_84 | |
4931 | [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
4932 | UNSPEC_VSX_SIGN_EXTEND))] | |
4933 | "TARGET_P9_VECTOR" | |
4934 | "vextsb2<wd> %0,%1" | |
7c788ce2 | 4935 | [(set_attr "type" "vecexts")]) |
50c78b9a | 4936 | |
db042e16 CL |
4937 | (define_expand "vsignextend_qi_<mode>" |
4938 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
4939 | (unspec:VIlong | |
4940 | [(match_operand:V16QI 1 "vsx_register_operand" "v")] | |
4941 | UNSPEC_VSX_SIGN_EXTEND))] | |
4942 | "TARGET_P9_VECTOR" | |
4943 | { | |
4944 | if (BYTES_BIG_ENDIAN) | |
4945 | { | |
4946 | rtx tmp = gen_reg_rtx (V16QImode); | |
4947 | emit_insn (gen_altivec_vrevev16qi2(tmp, operands[1])); | |
4948 | emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], tmp)); | |
4949 | } | |
4950 | else | |
4951 | emit_insn (gen_vsx_sign_extend_qi_<mode>(operands[0], operands[1])); | |
4952 | DONE; | |
4953 | }) | |
4954 | ||
ac11b8c0 | 4955 | (define_insn "vsx_sign_extend_hi_<mode>" |
50c78b9a MM |
4956 | [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") |
4957 | (unspec:VSINT_84 | |
4958 | [(match_operand:V8HI 1 "vsx_register_operand" "v")] | |
4959 | UNSPEC_VSX_SIGN_EXTEND))] | |
4960 | "TARGET_P9_VECTOR" | |
4961 | "vextsh2<wd> %0,%1" | |
7c788ce2 | 4962 | [(set_attr "type" "vecexts")]) |
50c78b9a | 4963 | |
db042e16 CL |
4964 | (define_expand "vsignextend_hi_<mode>" |
4965 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
4966 | (unspec:VIlong | |
4967 | [(match_operand:V8HI 1 "vsx_register_operand" "v")] | |
4968 | UNSPEC_VSX_SIGN_EXTEND))] | |
4969 | "TARGET_P9_VECTOR" | |
4970 | { | |
4971 | if (BYTES_BIG_ENDIAN) | |
4972 | { | |
4973 | rtx tmp = gen_reg_rtx (V8HImode); | |
4974 | emit_insn (gen_altivec_vrevev8hi2(tmp, operands[1])); | |
4975 | emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], tmp)); | |
4976 | } | |
4977 | else | |
4978 | emit_insn (gen_vsx_sign_extend_hi_<mode>(operands[0], operands[1])); | |
4979 | DONE; | |
4980 | }) | |
4981 | ||
4982 | (define_insn "vsx_sign_extend_si_v2di" | |
50c78b9a MM |
4983 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") |
4984 | (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] | |
4985 | UNSPEC_VSX_SIGN_EXTEND))] | |
4986 | "TARGET_P9_VECTOR" | |
4987 | "vextsw2d %0,%1" | |
7c788ce2 | 4988 | [(set_attr "type" "vecexts")]) |
ac11b8c0 | 4989 | |
db042e16 CL |
4990 | (define_expand "vsignextend_si_v2di" |
4991 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") | |
4992 | (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] | |
4993 | UNSPEC_VSX_SIGN_EXTEND))] | |
4994 | "TARGET_P9_VECTOR" | |
4995 | { | |
4996 | if (BYTES_BIG_ENDIAN) | |
4997 | { | |
4998 | rtx tmp = gen_reg_rtx (V4SImode); | |
4999 | ||
5000 | emit_insn (gen_altivec_vrevev4si2(tmp, operands[1])); | |
5001 | emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], tmp)); | |
5002 | } | |
5003 | else | |
5004 | emit_insn (gen_vsx_sign_extend_si_v2di(operands[0], operands[1])); | |
5005 | DONE; | |
5006 | }) | |
5007 | ||
1301d7f6 MM |
5008 | ;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on |
5009 | ;; power10. On earlier systems, the machine independent code will generate a | |
5010 | ;; shift left to sign extend the 64-bit value to 128-bit. | |
5011 | ;; | |
5012 | ;; If the register allocator prefers to use GPR registers, we will use a shift | |
5013 | ;; left instruction to sign extend the 64-bit value to 128-bit. | |
5014 | ;; | |
5015 | ;; If the register allocator prefers to use Altivec registers on power10, | |
5016 | ;; generate the vextsd2q instruction. | |
5017 | (define_insn_and_split "extendditi2" | |
5018 | [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v") | |
3cb27b85 | 5019 | (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z"))) |
1301d7f6 MM |
5020 | (clobber (reg:DI CA_REGNO))] |
5021 | "TARGET_POWERPC64 && TARGET_POWER10" | |
5022 | "#" | |
5023 | "&& reload_completed" | |
5024 | [(pc)] | |
5025 | { | |
5026 | rtx dest = operands[0]; | |
5027 | rtx src = operands[1]; | |
5028 | int dest_regno = reg_or_subregno (dest); | |
5029 | ||
5030 | /* Handle conversion to GPR registers. Load up the low part and then do | |
5031 | a sign extension to the upper part. */ | |
5032 | if (INT_REGNO_P (dest_regno)) | |
5033 | { | |
5034 | rtx dest_hi = gen_highpart (DImode, dest); | |
5035 | rtx dest_lo = gen_lowpart (DImode, dest); | |
5036 | ||
5037 | emit_move_insn (dest_lo, src); | |
5038 | /* In case src is a MEM, we have to use the destination, which is a | |
5039 | register, instead of re-using the source. */ | |
5040 | rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo; | |
5041 | emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63))); | |
5042 | DONE; | |
5043 | } | |
5044 | ||
5045 | /* For conversion to an Altivec register, generate either a splat operation | |
5046 | or a load rightmost double word instruction. Both instructions gets the | |
5047 | DImode value into the lower 64 bits, and then do the vextsd2q | |
5048 | instruction. */ | |
5049 | ||
5050 | else if (ALTIVEC_REGNO_P (dest_regno)) | |
5051 | { | |
5052 | if (MEM_P (src)) | |
5053 | emit_insn (gen_vsx_lxvrdx (dest, src)); | |
5054 | else | |
5055 | { | |
5056 | rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); | |
5057 | emit_insn (gen_vsx_splat_v2di (dest_v2di, src)); | |
5058 | } | |
5059 | ||
5060 | emit_insn (gen_extendditi2_vector (dest, dest)); | |
5061 | DONE; | |
5062 | } | |
5063 | ||
5064 | else | |
5065 | gcc_unreachable (); | |
5066 | } | |
5067 | [(set_attr "length" "8") | |
5068 | (set_attr "type" "shift,load,vecmove,vecperm,load")]) | |
87325119 WS |
5069 | |
5070 | ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg | |
5071 | (define_insn "extendditi2_vector" | |
5072 | [(set (match_operand:TI 0 "gpc_reg_operand" "=v") | |
5073 | (unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")] | |
5074 | UNSPEC_EXTENDDITI2))] | |
5075 | "TARGET_POWER10" | |
5076 | "vextsd2q %0,%1" | |
5077 | [(set_attr "type" "vecexts")]) | |
5078 | ||
ac11b8c0 | 5079 | \f |
e9e6d4f6 KN |
5080 | ;; ISA 3.0 Binary Floating-Point Support |
5081 | ||
b70bb05b | 5082 | ;; VSX Scalar Extract Exponent Quad-Precision |
cdb4b7aa | 5083 | (define_insn "xsxexpqp_<mode>" |
b70bb05b | 5084 | [(set (match_operand:DI 0 "altivec_register_operand" "=v") |
cdb4b7aa | 5085 | (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] |
b70bb05b KN |
5086 | UNSPEC_VSX_SXEXPDP))] |
5087 | "TARGET_P9_VECTOR" | |
5088 | "xsxexpqp %0,%1" | |
5089 | [(set_attr "type" "vecmove")]) | |
5090 | ||
e9e6d4f6 KN |
5091 | ;; VSX Scalar Extract Exponent Double-Precision |
5092 | (define_insn "xsxexpdp" | |
5093 | [(set (match_operand:DI 0 "register_operand" "=r") | |
5094 | (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] | |
5095 | UNSPEC_VSX_SXEXPDP))] | |
5096 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5097 | "xsxexpdp %0,%x1" | |
5098 | [(set_attr "type" "integer")]) | |
5099 | ||
b70bb05b | 5100 | ;; VSX Scalar Extract Significand Quad-Precision |
cdb4b7aa | 5101 | (define_insn "xsxsigqp_<mode>" |
b70bb05b | 5102 | [(set (match_operand:TI 0 "altivec_register_operand" "=v") |
cdb4b7aa | 5103 | (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] |
b70bb05b KN |
5104 | UNSPEC_VSX_SXSIG))] |
5105 | "TARGET_P9_VECTOR" | |
5106 | "xsxsigqp %0,%1" | |
5107 | [(set_attr "type" "vecmove")]) | |
5108 | ||
e9e6d4f6 KN |
5109 | ;; VSX Scalar Extract Significand Double-Precision |
5110 | (define_insn "xsxsigdp" | |
5111 | [(set (match_operand:DI 0 "register_operand" "=r") | |
5112 | (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] | |
b70bb05b | 5113 | UNSPEC_VSX_SXSIG))] |
e9e6d4f6 KN |
5114 | "TARGET_P9_VECTOR && TARGET_64BIT" |
5115 | "xsxsigdp %0,%x1" | |
5116 | [(set_attr "type" "integer")]) | |
5117 | ||
b70bb05b | 5118 | ;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument |
cdb4b7aa MM |
5119 | (define_insn "xsiexpqpf_<mode>" |
5120 | [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") | |
5121 | (unspec:IEEE128 | |
5122 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5123 | (match_operand:DI 2 "altivec_register_operand" "v")] | |
b70bb05b KN |
5124 | UNSPEC_VSX_SIEXPQP))] |
5125 | "TARGET_P9_VECTOR" | |
5126 | "xsiexpqp %0,%1,%2" | |
5127 | [(set_attr "type" "vecmove")]) | |
5128 | ||
5129 | ;; VSX Scalar Insert Exponent Quad-Precision | |
cdb4b7aa MM |
5130 | (define_insn "xsiexpqp_<mode>" |
5131 | [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") | |
5132 | (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") | |
5133 | (match_operand:DI 2 "altivec_register_operand" "v")] | |
b70bb05b KN |
5134 | UNSPEC_VSX_SIEXPQP))] |
5135 | "TARGET_P9_VECTOR" | |
5136 | "xsiexpqp %0,%1,%2" | |
5137 | [(set_attr "type" "vecmove")]) | |
5138 | ||
e9e6d4f6 KN |
5139 | ;; VSX Scalar Insert Exponent Double-Precision |
5140 | (define_insn "xsiexpdp" | |
5141 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") | |
5142 | (unspec:DF [(match_operand:DI 1 "register_operand" "r") | |
5143 | (match_operand:DI 2 "register_operand" "r")] | |
5144 | UNSPEC_VSX_SIEXPDP))] | |
5145 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5146 | "xsiexpdp %x0,%1,%2" | |
5147 | [(set_attr "type" "fpsimple")]) | |
5148 | ||
28826a66 KN |
5149 | ;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument |
5150 | (define_insn "xsiexpdpf" | |
5151 | [(set (match_operand:DF 0 "vsx_register_operand" "=wa") | |
5152 | (unspec:DF [(match_operand:DF 1 "register_operand" "r") | |
5153 | (match_operand:DI 2 "register_operand" "r")] | |
5154 | UNSPEC_VSX_SIEXPDP))] | |
5155 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5156 | "xsiexpdp %x0,%1,%2" | |
5157 | [(set_attr "type" "fpsimple")]) | |
5158 | ||
e9e6d4f6 KN |
5159 | ;; VSX Scalar Compare Exponents Double-Precision |
5160 | (define_expand "xscmpexpdp_<code>" | |
5161 | [(set (match_dup 3) | |
5162 | (compare:CCFP | |
5163 | (unspec:DF | |
5164 | [(match_operand:DF 1 "vsx_register_operand" "wa") | |
5165 | (match_operand:DF 2 "vsx_register_operand" "wa")] | |
5166 | UNSPEC_VSX_SCMPEXPDP) | |
5167 | (const_int 0))) | |
5168 | (set (match_operand:SI 0 "register_operand" "=r") | |
5169 | (CMP_TEST:SI (match_dup 3) | |
5170 | (const_int 0)))] | |
5171 | "TARGET_P9_VECTOR" | |
5172 | { | |
5fba7efc SB |
5173 | if (<CODE> == UNORDERED && !HONOR_NANS (DFmode)) |
5174 | { | |
5175 | emit_move_insn (operands[0], const0_rtx); | |
5176 | DONE; | |
5177 | } | |
5178 | ||
e9e6d4f6 KN |
5179 | operands[3] = gen_reg_rtx (CCFPmode); |
5180 | }) | |
5181 | ||
5182 | (define_insn "*xscmpexpdp" | |
5183 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") | |
5184 | (compare:CCFP | |
5185 | (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") | |
5186 | (match_operand:DF 2 "vsx_register_operand" "wa")] | |
5187 | UNSPEC_VSX_SCMPEXPDP) | |
5188 | (match_operand:SI 3 "zero_constant" "j")))] | |
5189 | "TARGET_P9_VECTOR" | |
5190 | "xscmpexpdp %0,%x1,%x2" | |
5191 | [(set_attr "type" "fpcompare")]) | |
5192 | ||
fc756f9f CL |
5193 | ;; VSX Scalar Compare Exponents Quad-Precision |
5194 | (define_expand "xscmpexpqp_<code>_<mode>" | |
5195 | [(set (match_dup 3) | |
5196 | (compare:CCFP | |
5197 | (unspec:IEEE128 | |
5198 | [(match_operand:IEEE128 1 "vsx_register_operand" "v") | |
5199 | (match_operand:IEEE128 2 "vsx_register_operand" "v")] | |
5200 | UNSPEC_VSX_SCMPEXPQP) | |
5201 | (const_int 0))) | |
5202 | (set (match_operand:SI 0 "register_operand" "=r") | |
5203 | (CMP_TEST:SI (match_dup 3) | |
5204 | (const_int 0)))] | |
5205 | "TARGET_P9_VECTOR" | |
5206 | { | |
5fba7efc SB |
5207 | if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode)) |
5208 | { | |
5209 | emit_move_insn (operands[0], const0_rtx); | |
5210 | DONE; | |
5211 | } | |
5212 | ||
fc756f9f CL |
5213 | operands[3] = gen_reg_rtx (CCFPmode); |
5214 | }) | |
5215 | ||
5216 | (define_insn "*xscmpexpqp" | |
5217 | [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") | |
5218 | (compare:CCFP | |
5219 | (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5220 | (match_operand:IEEE128 2 "altivec_register_operand" "v")] | |
5221 | UNSPEC_VSX_SCMPEXPQP) | |
5222 | (match_operand:SI 3 "zero_constant" "j")))] | |
5223 | "TARGET_P9_VECTOR" | |
5224 | "xscmpexpqp %0,%1,%2" | |
5225 | [(set_attr "type" "fpcompare")]) | |
5226 | ||
b70bb05b KN |
5227 | ;; VSX Scalar Test Data Class Quad-Precision |
5228 | ;; (Expansion for scalar_test_data_class (__ieee128, int)) | |
5229 | ;; (Has side effect of setting the lt bit if operand 1 is negative, | |
5230 | ;; setting the eq bit if any of the conditions tested by operand 2 | |
5231 | ;; are satisfied, and clearing the gt and undordered bits to zero.) | |
cdb4b7aa | 5232 | (define_expand "xststdcqp_<mode>" |
b70bb05b KN |
5233 | [(set (match_dup 3) |
5234 | (compare:CCFP | |
cdb4b7aa MM |
5235 | (unspec:IEEE128 |
5236 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
b70bb05b KN |
5237 | (match_operand:SI 2 "u7bit_cint_operand" "n")] |
5238 | UNSPEC_VSX_STSTDC) | |
5239 | (const_int 0))) | |
5240 | (set (match_operand:SI 0 "register_operand" "=r") | |
5241 | (eq:SI (match_dup 3) | |
5242 | (const_int 0)))] | |
5243 | "TARGET_P9_VECTOR" | |
5244 | { | |
5245 | operands[3] = gen_reg_rtx (CCFPmode); | |
5246 | }) | |
5247 | ||
e9e6d4f6 KN |
5248 | ;; VSX Scalar Test Data Class Double- and Single-Precision |
5249 | ;; (The lt bit is set if operand 1 is negative. The eq bit is set | |
5250 | ;; if any of the conditions tested by operand 2 are satisfied. | |
5251 | ;; The gt and unordered bits are cleared to zero.) | |
4c5d4de7 | 5252 | (define_expand "xststdc<sd>p" |
e9e6d4f6 KN |
5253 | [(set (match_dup 3) |
5254 | (compare:CCFP | |
5255 | (unspec:SFDF | |
5256 | [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5257 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5258 | UNSPEC_VSX_STSTDC) | |
5259 | (match_dup 4))) | |
5260 | (set (match_operand:SI 0 "register_operand" "=r") | |
5261 | (eq:SI (match_dup 3) | |
5262 | (const_int 0)))] | |
5263 | "TARGET_P9_VECTOR" | |
5264 | { | |
5265 | operands[3] = gen_reg_rtx (CCFPmode); | |
5266 | operands[4] = CONST0_RTX (SImode); | |
5267 | }) | |
5268 | ||
b70bb05b | 5269 | ;; The VSX Scalar Test Negative Quad-Precision |
cdb4b7aa | 5270 | (define_expand "xststdcnegqp_<mode>" |
b70bb05b KN |
5271 | [(set (match_dup 2) |
5272 | (compare:CCFP | |
cdb4b7aa MM |
5273 | (unspec:IEEE128 |
5274 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
b70bb05b KN |
5275 | (const_int 0)] |
5276 | UNSPEC_VSX_STSTDC) | |
5277 | (const_int 0))) | |
5278 | (set (match_operand:SI 0 "register_operand" "=r") | |
5279 | (lt:SI (match_dup 2) | |
5280 | (const_int 0)))] | |
5281 | "TARGET_P9_VECTOR" | |
5282 | { | |
5283 | operands[2] = gen_reg_rtx (CCFPmode); | |
5284 | }) | |
5285 | ||
5286 | ;; The VSX Scalar Test Negative Double- and Single-Precision | |
4c5d4de7 | 5287 | (define_expand "xststdcneg<sd>p" |
e9e6d4f6 KN |
5288 | [(set (match_dup 2) |
5289 | (compare:CCFP | |
5290 | (unspec:SFDF | |
5291 | [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5292 | (const_int 0)] | |
5293 | UNSPEC_VSX_STSTDC) | |
5294 | (match_dup 3))) | |
5295 | (set (match_operand:SI 0 "register_operand" "=r") | |
5296 | (lt:SI (match_dup 2) | |
5297 | (const_int 0)))] | |
5298 | "TARGET_P9_VECTOR" | |
5299 | { | |
5300 | operands[2] = gen_reg_rtx (CCFPmode); | |
5301 | operands[3] = CONST0_RTX (SImode); | |
5302 | }) | |
5303 | ||
cdb4b7aa | 5304 | (define_insn "*xststdcqp_<mode>" |
b70bb05b KN |
5305 | [(set (match_operand:CCFP 0 "" "=y") |
5306 | (compare:CCFP | |
cdb4b7aa MM |
5307 | (unspec:IEEE128 |
5308 | [(match_operand:IEEE128 1 "altivec_register_operand" "v") | |
5309 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
b70bb05b KN |
5310 | UNSPEC_VSX_STSTDC) |
5311 | (const_int 0)))] | |
5312 | "TARGET_P9_VECTOR" | |
5313 | "xststdcqp %0,%1,%2" | |
5314 | [(set_attr "type" "fpcompare")]) | |
5315 | ||
4c5d4de7 | 5316 | (define_insn "*xststdc<sd>p" |
e9e6d4f6 KN |
5317 | [(set (match_operand:CCFP 0 "" "=y") |
5318 | (compare:CCFP | |
5319 | (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") | |
5320 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5321 | UNSPEC_VSX_STSTDC) | |
5322 | (match_operand:SI 3 "zero_constant" "j")))] | |
5323 | "TARGET_P9_VECTOR" | |
4c5d4de7 | 5324 | "xststdc<sd>p %0,%x1,%2" |
e9e6d4f6 KN |
5325 | [(set_attr "type" "fpcompare")]) |
5326 | ||
5327 | ;; VSX Vector Extract Exponent Double and Single Precision | |
6cc8f683 | 5328 | (define_insn "xvxexp<sd>p" |
e9e6d4f6 KN |
5329 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5330 | (unspec:VSX_F | |
5331 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
5332 | UNSPEC_VSX_VXEXP))] | |
5333 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5334 | "xvxexp<sd>p %x0,%x1" |
e9e6d4f6 KN |
5335 | [(set_attr "type" "vecsimple")]) |
5336 | ||
5337 | ;; VSX Vector Extract Significand Double and Single Precision | |
6cc8f683 | 5338 | (define_insn "xvxsig<sd>p" |
e9e6d4f6 KN |
5339 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5340 | (unspec:VSX_F | |
5341 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] | |
5342 | UNSPEC_VSX_VXSIG))] | |
5343 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5344 | "xvxsig<sd>p %x0,%x1" |
e9e6d4f6 KN |
5345 | [(set_attr "type" "vecsimple")]) |
5346 | ||
5347 | ;; VSX Vector Insert Exponent Double and Single Precision | |
6cc8f683 | 5348 | (define_insn "xviexp<sd>p" |
e9e6d4f6 KN |
5349 | [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") |
5350 | (unspec:VSX_F | |
5351 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
5352 | (match_operand:VSX_F 2 "vsx_register_operand" "wa")] | |
5353 | UNSPEC_VSX_VIEXP))] | |
5354 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5355 | "xviexp<sd>p %x0,%x1,%x2" |
e9e6d4f6 KN |
5356 | [(set_attr "type" "vecsimple")]) |
5357 | ||
5358 | ;; VSX Vector Test Data Class Double and Single Precision | |
5359 | ;; The corresponding elements of the result vector are all ones | |
5360 | ;; if any of the conditions tested by operand 3 are satisfied. | |
6cc8f683 | 5361 | (define_insn "xvtstdc<sd>p" |
e9e6d4f6 KN |
5362 | [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") |
5363 | (unspec:<VSI> | |
5364 | [(match_operand:VSX_F 1 "vsx_register_operand" "wa") | |
5365 | (match_operand:SI 2 "u7bit_cint_operand" "n")] | |
5366 | UNSPEC_VSX_VTSTDC))] | |
5367 | "TARGET_P9_VECTOR" | |
6cc8f683 | 5368 | "xvtstdc<sd>p %x0,%x1,%2" |
e9e6d4f6 | 5369 | [(set_attr "type" "vecsimple")]) |
902cb7b1 KN |
5370 | |
5371 | ;; ISA 3.0 String Operations Support | |
5372 | ||
5373 | ;; Compare vectors producing a vector result and a predicate, setting CR6 | |
5374 | ;; to indicate a combined status. This pattern matches v16qi, v8hi, and | |
5375 | ;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no | |
50181506 KN |
5376 | ;; need to match v4sf, v2df, or v2di modes because those are expanded |
5377 | ;; to use Power8 instructions. | |
902cb7b1 KN |
5378 | (define_insn "*vsx_ne_<mode>_p" |
5379 | [(set (reg:CC CR6_REGNO) | |
5380 | (unspec:CC | |
5381 | [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") | |
5382 | (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] | |
5383 | UNSPEC_PREDICATE)) | |
5384 | (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") | |
5385 | (ne:VSX_EXTRACT_I (match_dup 1) | |
5386 | (match_dup 2)))] | |
5387 | "TARGET_P9_VECTOR" | |
50181506 | 5388 | "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" |
902cb7b1 KN |
5389 | [(set_attr "type" "vecsimple")]) |
5390 | ||
5391 | (define_insn "*vector_nez_<mode>_p" | |
5392 | [(set (reg:CC CR6_REGNO) | |
5393 | (unspec:CC [(unspec:VI | |
5394 | [(match_operand:VI 1 "gpc_reg_operand" "v") | |
5395 | (match_operand:VI 2 "gpc_reg_operand" "v")] | |
5396 | UNSPEC_NEZ_P)] | |
5397 | UNSPEC_PREDICATE)) | |
5398 | (set (match_operand:VI 0 "gpc_reg_operand" "=v") | |
5399 | (unspec:VI [(match_dup 1) | |
5400 | (match_dup 2)] | |
5401 | UNSPEC_NEZ_P))] | |
5402 | "TARGET_P9_VECTOR" | |
5403 | "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" | |
5404 | [(set_attr "type" "vecsimple")]) | |
5405 | ||
029435a3 CL |
5406 | ;; Return first position of match between vectors using natural order |
5407 | ;; for both LE and BE execution modes. | |
4d85d480 CL |
5408 | (define_expand "first_match_index_<mode>" |
5409 | [(match_operand:SI 0 "register_operand") | |
5410 | (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5411 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5412 | UNSPEC_VSX_FIRST_MATCH_INDEX)] | |
5413 | "TARGET_P9_VECTOR" | |
5414 | { | |
5415 | int sh; | |
5416 | ||
5417 | rtx cmp_result = gen_reg_rtx (<MODE>mode); | |
5418 | rtx not_result = gen_reg_rtx (<MODE>mode); | |
5419 | ||
24f68831 CL |
5420 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], |
5421 | operands[2])); | |
4d85d480 CL |
5422 | emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); |
5423 | ||
5424 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5425 | ||
5426 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5427 | { |
5428 | if (!BYTES_BIG_ENDIAN) | |
5429 | emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); | |
5430 | else | |
5431 | emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); | |
5432 | } | |
4d85d480 CL |
5433 | else |
5434 | { | |
5435 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5436 | if (!BYTES_BIG_ENDIAN) |
5437 | emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); | |
5438 | else | |
5439 | emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); | |
5440 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5441 | } |
5442 | DONE; | |
5443 | }) | |
5444 | ||
029435a3 CL |
5445 | ;; Return first position of match between vectors or end of string (EOS) using |
5446 | ;; natural element order for both LE and BE execution modes. | |
4d85d480 CL |
5447 | (define_expand "first_match_or_eos_index_<mode>" |
5448 | [(match_operand:SI 0 "register_operand") | |
5449 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5450 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5451 | UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] | |
5452 | "TARGET_P9_VECTOR" | |
5453 | { | |
5454 | int sh; | |
5455 | rtx cmpz1_result = gen_reg_rtx (<MODE>mode); | |
5456 | rtx cmpz2_result = gen_reg_rtx (<MODE>mode); | |
5457 | rtx cmpz_result = gen_reg_rtx (<MODE>mode); | |
5458 | rtx and_result = gen_reg_rtx (<MODE>mode); | |
5459 | rtx result = gen_reg_rtx (<MODE>mode); | |
5460 | rtx vzero = gen_reg_rtx (<MODE>mode); | |
5461 | ||
5462 | /* Vector with zeros in elements that correspond to zeros in operands. */ | |
5463 | emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); | |
5464 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); | |
5465 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); | |
5466 | emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); | |
5467 | ||
5468 | /* Vector with ones in elments that do not match. */ | |
5469 | emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], | |
5470 | operands[2])); | |
5471 | ||
5472 | /* Create vector with ones in elements where there was a zero in one of | |
5473 | the source elements or the elements that match. */ | |
5474 | emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); | |
5475 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5476 | ||
5477 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5478 | { |
5479 | if (!BYTES_BIG_ENDIAN) | |
5480 | emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); | |
5481 | else | |
5482 | emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); | |
5483 | } | |
4d85d480 CL |
5484 | else |
5485 | { | |
5486 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5487 | if (!BYTES_BIG_ENDIAN) |
5488 | emit_insn (gen_vctzlsbb_<mode> (tmp, result)); | |
5489 | else | |
5490 | emit_insn (gen_vclzlsbb_<mode> (tmp, result)); | |
5491 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5492 | } |
5493 | DONE; | |
5494 | }) | |
5495 | ||
029435a3 CL |
5496 | ;; Return first position of mismatch between vectors using natural |
5497 | ;; element order for both LE and BE execution modes. | |
4d85d480 CL |
5498 | (define_expand "first_mismatch_index_<mode>" |
5499 | [(match_operand:SI 0 "register_operand") | |
5500 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5501 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5502 | UNSPEC_VSX_FIRST_MISMATCH_INDEX)] | |
5503 | "TARGET_P9_VECTOR" | |
5504 | { | |
5505 | int sh; | |
5506 | rtx cmp_result = gen_reg_rtx (<MODE>mode); | |
5507 | ||
5508 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], | |
5509 | operands[2])); | |
5510 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5511 | ||
5512 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5513 | { |
5514 | if (!BYTES_BIG_ENDIAN) | |
5515 | emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); | |
5516 | else | |
5517 | emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); | |
5518 | } | |
4d85d480 CL |
5519 | else |
5520 | { | |
5521 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5522 | if (!BYTES_BIG_ENDIAN) |
5523 | emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); | |
5524 | else | |
5525 | emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); | |
5526 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5527 | } |
5528 | DONE; | |
5529 | }) | |
5530 | ||
5531 | ;; Return first position of mismatch between vectors or end of string (EOS) | |
029435a3 | 5532 | ;; using natural element order for both LE and BE execution modes. |
4d85d480 CL |
5533 | (define_expand "first_mismatch_or_eos_index_<mode>" |
5534 | [(match_operand:SI 0 "register_operand") | |
5535 | (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") | |
5536 | (match_operand:VSX_EXTRACT_I 2 "register_operand")] | |
5537 | UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] | |
5538 | "TARGET_P9_VECTOR" | |
5539 | { | |
5540 | int sh; | |
5541 | rtx cmpz1_result = gen_reg_rtx (<MODE>mode); | |
5542 | rtx cmpz2_result = gen_reg_rtx (<MODE>mode); | |
5543 | rtx cmpz_result = gen_reg_rtx (<MODE>mode); | |
5544 | rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); | |
5545 | rtx and_result = gen_reg_rtx (<MODE>mode); | |
5546 | rtx result = gen_reg_rtx (<MODE>mode); | |
5547 | rtx vzero = gen_reg_rtx (<MODE>mode); | |
5548 | ||
5549 | /* Vector with zeros in elements that correspond to zeros in operands. */ | |
5550 | emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); | |
5551 | ||
5552 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); | |
5553 | emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); | |
5554 | emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); | |
5555 | ||
5556 | /* Vector with ones in elments that match. */ | |
5557 | emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], | |
5558 | operands[2])); | |
5559 | emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); | |
5560 | ||
5561 | /* Create vector with ones in elements where there was a zero in one of | |
5562 | the source elements or the elements did not match. */ | |
5563 | emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); | |
5564 | sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; | |
5565 | ||
5566 | if (<MODE>mode == V16QImode) | |
029435a3 CL |
5567 | { |
5568 | if (!BYTES_BIG_ENDIAN) | |
5569 | emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); | |
5570 | else | |
5571 | emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); | |
5572 | } | |
4d85d480 CL |
5573 | else |
5574 | { | |
5575 | rtx tmp = gen_reg_rtx (SImode); | |
029435a3 CL |
5576 | if (!BYTES_BIG_ENDIAN) |
5577 | emit_insn (gen_vctzlsbb_<mode> (tmp, result)); | |
5578 | else | |
5579 | emit_insn (gen_vclzlsbb_<mode> (tmp, result)); | |
5580 | emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); | |
4d85d480 CL |
5581 | } |
5582 | DONE; | |
5583 | }) | |
5584 | ||
902cb7b1 KN |
5585 | ;; Load VSX Vector with Length |
5586 | (define_expand "lxvl" | |
5587 | [(set (match_dup 3) | |
84e77783 CL |
5588 | (ashift:DI (match_operand:DI 2 "register_operand") |
5589 | (const_int 56))) | |
902cb7b1 KN |
5590 | (set (match_operand:V16QI 0 "vsx_register_operand") |
5591 | (unspec:V16QI | |
5592 | [(match_operand:DI 1 "gpc_reg_operand") | |
84e77783 | 5593 | (mem:V16QI (match_dup 1)) |
902cb7b1 KN |
5594 | (match_dup 3)] |
5595 | UNSPEC_LXVL))] | |
5596 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5597 | { | |
5598 | operands[3] = gen_reg_rtx (DImode); | |
5599 | }) | |
5600 | ||
5601 | (define_insn "*lxvl" | |
5602 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5603 | (unspec:V16QI | |
5604 | [(match_operand:DI 1 "gpc_reg_operand" "b") | |
84e77783 CL |
5605 | (mem:V16QI (match_dup 1)) |
5606 | (match_operand:DI 2 "register_operand" "r")] | |
902cb7b1 KN |
5607 | UNSPEC_LXVL))] |
5608 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
84e77783 CL |
5609 | "lxvl %x0,%1,%2" |
5610 | [(set_attr "type" "vecload")]) | |
902cb7b1 | 5611 | |
1262c6cf CL |
5612 | (define_insn "lxvll" |
5613 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5614 | (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") | |
84e77783 | 5615 | (mem:V16QI (match_dup 1)) |
1262c6cf CL |
5616 | (match_operand:DI 2 "register_operand" "r")] |
5617 | UNSPEC_LXVLL))] | |
5618 | "TARGET_P9_VECTOR" | |
5619 | "lxvll %x0,%1,%2" | |
5620 | [(set_attr "type" "vecload")]) | |
5621 | ||
5622 | ;; Expand for builtin xl_len_r | |
5623 | (define_expand "xl_len_r" | |
5624 | [(match_operand:V16QI 0 "vsx_register_operand") | |
5625 | (match_operand:DI 1 "register_operand") | |
5626 | (match_operand:DI 2 "register_operand")] | |
5627 | "" | |
5628 | { | |
5629 | rtx shift_mask = gen_reg_rtx (V16QImode); | |
5630 | rtx rtx_vtmp = gen_reg_rtx (V16QImode); | |
5631 | rtx tmp = gen_reg_rtx (DImode); | |
5632 | ||
f64b9156 | 5633 | emit_insn (gen_altivec_lvsl_reg_di (shift_mask, operands[2])); |
1262c6cf CL |
5634 | emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); |
5635 | emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); | |
5636 | emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, | |
5637 | shift_mask)); | |
5638 | DONE; | |
5639 | }) | |
5640 | ||
5641 | (define_insn "stxvll" | |
5642 | [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) | |
5643 | (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") | |
84e77783 | 5644 | (mem:V16QI (match_dup 1)) |
1262c6cf CL |
5645 | (match_operand:DI 2 "register_operand" "r")] |
5646 | UNSPEC_STXVLL))] | |
5647 | "TARGET_P9_VECTOR" | |
5648 | "stxvll %x0,%1,%2" | |
5649 | [(set_attr "type" "vecstore")]) | |
5650 | ||
902cb7b1 KN |
5651 | ;; Store VSX Vector with Length |
5652 | (define_expand "stxvl" | |
5653 | [(set (match_dup 3) | |
84e77783 CL |
5654 | (ashift:DI (match_operand:DI 2 "register_operand") |
5655 | (const_int 56))) | |
902cb7b1 KN |
5656 | (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) |
5657 | (unspec:V16QI | |
5658 | [(match_operand:V16QI 0 "vsx_register_operand") | |
84e77783 | 5659 | (mem:V16QI (match_dup 1)) |
902cb7b1 KN |
5660 | (match_dup 3)] |
5661 | UNSPEC_STXVL))] | |
5662 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5663 | { | |
5664 | operands[3] = gen_reg_rtx (DImode); | |
5665 | }) | |
5666 | ||
4070208f KL |
5667 | ;; Define optab for vector access with length vectorization exploitation. |
5668 | (define_expand "len_load_v16qi" | |
5669 | [(match_operand:V16QI 0 "vlogical_operand") | |
5670 | (match_operand:V16QI 1 "memory_operand") | |
b0e51639 RD |
5671 | (match_operand:QI 2 "gpc_reg_operand") |
5672 | (match_operand:QI 3 "zero_constant")] | |
4070208f KL |
5673 | "TARGET_P9_VECTOR && TARGET_64BIT" |
5674 | { | |
5675 | rtx mem = XEXP (operands[1], 0); | |
5676 | mem = force_reg (DImode, mem); | |
5677 | rtx len = gen_lowpart (DImode, operands[2]); | |
5678 | emit_insn (gen_lxvl (operands[0], mem, len)); | |
5679 | DONE; | |
5680 | }) | |
5681 | ||
5682 | (define_expand "len_store_v16qi" | |
5683 | [(match_operand:V16QI 0 "memory_operand") | |
5684 | (match_operand:V16QI 1 "vlogical_operand") | |
5685 | (match_operand:QI 2 "gpc_reg_operand") | |
b0e51639 | 5686 | (match_operand:QI 3 "zero_constant") |
4070208f KL |
5687 | ] |
5688 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
5689 | { | |
5690 | rtx mem = XEXP (operands[0], 0); | |
5691 | mem = force_reg (DImode, mem); | |
5692 | rtx len = gen_lowpart (DImode, operands[2]); | |
5693 | emit_insn (gen_stxvl (operands[1], mem, len)); | |
5694 | DONE; | |
5695 | }) | |
5696 | ||
902cb7b1 KN |
5697 | (define_insn "*stxvl" |
5698 | [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) | |
5699 | (unspec:V16QI | |
5700 | [(match_operand:V16QI 0 "vsx_register_operand" "wa") | |
84e77783 CL |
5701 | (mem:V16QI (match_dup 1)) |
5702 | (match_operand:DI 2 "register_operand" "r")] | |
902cb7b1 KN |
5703 | UNSPEC_STXVL))] |
5704 | "TARGET_P9_VECTOR && TARGET_64BIT" | |
84e77783 CL |
5705 | "stxvl %x0,%1,%2" |
5706 | [(set_attr "type" "vecstore")]) | |
902cb7b1 | 5707 | |
1262c6cf CL |
5708 | ;; Expand for builtin xst_len_r |
5709 | (define_expand "xst_len_r" | |
5710 | [(match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5711 | (match_operand:DI 1 "register_operand" "b") | |
5712 | (match_operand:DI 2 "register_operand" "r")] | |
5713 | "UNSPEC_XST_LEN_R" | |
5714 | { | |
5715 | rtx shift_mask = gen_reg_rtx (V16QImode); | |
5716 | rtx rtx_vtmp = gen_reg_rtx (V16QImode); | |
5717 | rtx tmp = gen_reg_rtx (DImode); | |
5718 | ||
f64b9156 | 5719 | emit_insn (gen_altivec_lvsr_reg_di (shift_mask, operands[2])); |
1262c6cf CL |
5720 | emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], |
5721 | shift_mask)); | |
5722 | emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); | |
5723 | emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); | |
5724 | DONE; | |
5725 | }) | |
5726 | ||
41e18197 | 5727 | ;; Vector Compare Not Equal Byte (specified/not+eq:) |
902cb7b1 KN |
5728 | (define_insn "vcmpneb" |
5729 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5730 | (not:V16QI |
5731 | (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") | |
5732 | (match_operand:V16QI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5733 | "TARGET_P9_VECTOR" |
5734 | "vcmpneb %0,%1,%2" | |
5735 | [(set_attr "type" "vecsimple")]) | |
5736 | ||
f03122f2 CL |
5737 | ;; Vector Compare Not Equal v1ti (specified/not+eq:) |
5738 | (define_expand "vcmpnet" | |
5739 | [(set (match_operand:V1TI 0 "altivec_register_operand") | |
5740 | (not:V1TI | |
5741 | (eq:V1TI (match_operand:V1TI 1 "altivec_register_operand") | |
5742 | (match_operand:V1TI 2 "altivec_register_operand"))))] | |
5743 | "TARGET_POWER10" | |
5744 | { | |
5745 | emit_insn (gen_eqvv1ti3 (operands[0], operands[1], operands[2])); | |
5746 | emit_insn (gen_one_cmplv1ti2 (operands[0], operands[0])); | |
5747 | DONE; | |
5748 | }) | |
5749 | ||
902cb7b1 KN |
5750 | ;; Vector Compare Not Equal or Zero Byte |
5751 | (define_insn "vcmpnezb" | |
5752 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
5753 | (unspec:V16QI | |
5754 | [(match_operand:V16QI 1 "altivec_register_operand" "v") | |
5755 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5756 | UNSPEC_VCMPNEZB))] | |
5757 | "TARGET_P9_VECTOR" | |
5758 | "vcmpnezb %0,%1,%2" | |
5759 | [(set_attr "type" "vecsimple")]) | |
5760 | ||
9d36bd3b AS |
5761 | ;; Vector Compare Not Equal or Zero Byte predicate or record-form |
5762 | (define_insn "vcmpnezb_p" | |
5763 | [(set (reg:CC CR6_REGNO) | |
5764 | (unspec:CC | |
5765 | [(match_operand:V16QI 1 "altivec_register_operand" "v") | |
5766 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5767 | UNSPEC_VCMPNEZB)) | |
5768 | (set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
5769 | (unspec:V16QI | |
5770 | [(match_dup 1) | |
5771 | (match_dup 2)] | |
5772 | UNSPEC_VCMPNEZB))] | |
5773 | "TARGET_P9_VECTOR" | |
5774 | "vcmpnezb. %0,%1,%2" | |
5775 | [(set_attr "type" "vecsimple")]) | |
5776 | ||
41e18197 | 5777 | ;; Vector Compare Not Equal Half Word (specified/not+eq:) |
902cb7b1 KN |
5778 | (define_insn "vcmpneh" |
5779 | [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5780 | (not:V8HI |
5781 | (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") | |
5782 | (match_operand:V8HI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5783 | "TARGET_P9_VECTOR" |
5784 | "vcmpneh %0,%1,%2" | |
5785 | [(set_attr "type" "vecsimple")]) | |
5786 | ||
5787 | ;; Vector Compare Not Equal or Zero Half Word | |
5788 | (define_insn "vcmpnezh" | |
5789 | [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") | |
5790 | (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") | |
5791 | (match_operand:V8HI 2 "altivec_register_operand" "v")] | |
5792 | UNSPEC_VCMPNEZH))] | |
5793 | "TARGET_P9_VECTOR" | |
5794 | "vcmpnezh %0,%1,%2" | |
5795 | [(set_attr "type" "vecsimple")]) | |
5796 | ||
41e18197 | 5797 | ;; Vector Compare Not Equal Word (specified/not+eq:) |
902cb7b1 KN |
5798 | (define_insn "vcmpnew" |
5799 | [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") | |
41e18197 WS |
5800 | (not:V4SI |
5801 | (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") | |
5802 | (match_operand:V4SI 2 "altivec_register_operand" "v"))))] | |
902cb7b1 KN |
5803 | "TARGET_P9_VECTOR" |
5804 | "vcmpnew %0,%1,%2" | |
5805 | [(set_attr "type" "vecsimple")]) | |
5806 | ||
902cb7b1 KN |
5807 | ;; Vector Compare Not Equal or Zero Word |
5808 | (define_insn "vcmpnezw" | |
5809 | [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") | |
5810 | (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") | |
5811 | (match_operand:V4SI 2 "altivec_register_operand" "v")] | |
5812 | UNSPEC_VCMPNEZW))] | |
5813 | "TARGET_P9_VECTOR" | |
5814 | "vcmpnezw %0,%1,%2" | |
5815 | [(set_attr "type" "vecsimple")]) | |
5816 | ||
5817 | ;; Vector Count Leading Zero Least-Significant Bits Byte | |
029435a3 | 5818 | (define_insn "vclzlsbb_<mode>" |
902cb7b1 KN |
5819 | [(set (match_operand:SI 0 "register_operand" "=r") |
5820 | (unspec:SI | |
029435a3 | 5821 | [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] |
902cb7b1 KN |
5822 | UNSPEC_VCLZLSBB))] |
5823 | "TARGET_P9_VECTOR" | |
5824 | "vclzlsbb %0,%1" | |
5825 | [(set_attr "type" "vecsimple")]) | |
5826 | ||
5827 | ;; Vector Count Trailing Zero Least-Significant Bits Byte | |
4d85d480 | 5828 | (define_insn "vctzlsbb_<mode>" |
902cb7b1 KN |
5829 | [(set (match_operand:SI 0 "register_operand" "=r") |
5830 | (unspec:SI | |
4d85d480 | 5831 | [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] |
902cb7b1 KN |
5832 | UNSPEC_VCTZLSBB))] |
5833 | "TARGET_P9_VECTOR" | |
5834 | "vctzlsbb %0,%1" | |
5835 | [(set_attr "type" "vecsimple")]) | |
5836 | ||
5837 | ;; Vector Extract Unsigned Byte Left-Indexed | |
5838 | (define_insn "vextublx" | |
5839 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5840 | (unspec:SI | |
5841 | [(match_operand:SI 1 "register_operand" "r") | |
5842 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5843 | UNSPEC_VEXTUBLX))] | |
5844 | "TARGET_P9_VECTOR" | |
5845 | "vextublx %0,%1,%2" | |
5846 | [(set_attr "type" "vecsimple")]) | |
5847 | ||
5848 | ;; Vector Extract Unsigned Byte Right-Indexed | |
5849 | (define_insn "vextubrx" | |
5850 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5851 | (unspec:SI | |
5852 | [(match_operand:SI 1 "register_operand" "r") | |
5853 | (match_operand:V16QI 2 "altivec_register_operand" "v")] | |
5854 | UNSPEC_VEXTUBRX))] | |
5855 | "TARGET_P9_VECTOR" | |
5856 | "vextubrx %0,%1,%2" | |
5857 | [(set_attr "type" "vecsimple")]) | |
5858 | ||
5859 | ;; Vector Extract Unsigned Half Word Left-Indexed | |
5860 | (define_insn "vextuhlx" | |
5861 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5862 | (unspec:SI | |
5863 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5864 | (match_operand:V8HI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5865 | UNSPEC_VEXTUHLX))] |
5866 | "TARGET_P9_VECTOR" | |
5867 | "vextuhlx %0,%1,%2" | |
5868 | [(set_attr "type" "vecsimple")]) | |
5869 | ||
5870 | ;; Vector Extract Unsigned Half Word Right-Indexed | |
5871 | (define_insn "vextuhrx" | |
5872 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5873 | (unspec:SI | |
5874 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5875 | (match_operand:V8HI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5876 | UNSPEC_VEXTUHRX))] |
5877 | "TARGET_P9_VECTOR" | |
5878 | "vextuhrx %0,%1,%2" | |
5879 | [(set_attr "type" "vecsimple")]) | |
5880 | ||
5881 | ;; Vector Extract Unsigned Word Left-Indexed | |
5882 | (define_insn "vextuwlx" | |
5883 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5884 | (unspec:SI | |
5885 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5886 | (match_operand:V4SI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5887 | UNSPEC_VEXTUWLX))] |
5888 | "TARGET_P9_VECTOR" | |
5889 | "vextuwlx %0,%1,%2" | |
5890 | [(set_attr "type" "vecsimple")]) | |
5891 | ||
5892 | ;; Vector Extract Unsigned Word Right-Indexed | |
5893 | (define_insn "vextuwrx" | |
5894 | [(set (match_operand:SI 0 "register_operand" "=r") | |
5895 | (unspec:SI | |
5896 | [(match_operand:SI 1 "register_operand" "r") | |
f13d510e | 5897 | (match_operand:V4SI 2 "altivec_register_operand" "v")] |
902cb7b1 KN |
5898 | UNSPEC_VEXTUWRX))] |
5899 | "TARGET_P9_VECTOR" | |
5900 | "vextuwrx %0,%1,%2" | |
5901 | [(set_attr "type" "vecsimple")]) | |
16370e79 MM |
5902 | |
5903 | ;; Vector insert/extract word at arbitrary byte values. Note, the little | |
5904 | ;; endian version needs to adjust the byte number, and the V4SI element in | |
5905 | ;; vinsert4b. | |
b8bf5603 CL |
5906 | (define_insn "extract4b" |
5907 | [(set (match_operand:V2DI 0 "vsx_register_operand") | |
5908 | (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") | |
5909 | (match_operand:QI 2 "const_0_to_12_operand" "n")] | |
5910 | UNSPEC_XXEXTRACTUW))] | |
5911 | "TARGET_P9_VECTOR" | |
5912 | { | |
427a7384 | 5913 | if (!BYTES_BIG_ENDIAN) |
b8bf5603 CL |
5914 | operands[2] = GEN_INT (12 - INTVAL (operands[2])); |
5915 | ||
5916 | return "xxextractuw %x0,%x1,%2"; | |
5917 | }) | |
5918 | ||
5919 | (define_expand "insert4b" | |
5920 | [(set (match_operand:V16QI 0 "vsx_register_operand") | |
5921 | (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") | |
5922 | (match_operand:V16QI 2 "vsx_register_operand") | |
5923 | (match_operand:QI 3 "const_0_to_12_operand")] | |
5924 | UNSPEC_XXINSERTW))] | |
5925 | "TARGET_P9_VECTOR" | |
5926 | { | |
427a7384 | 5927 | if (!BYTES_BIG_ENDIAN) |
b8bf5603 CL |
5928 | { |
5929 | rtx op1 = operands[1]; | |
5930 | rtx v4si_tmp = gen_reg_rtx (V4SImode); | |
5931 | emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); | |
5932 | operands[1] = v4si_tmp; | |
5933 | operands[3] = GEN_INT (12 - INTVAL (operands[3])); | |
5934 | } | |
5935 | }) | |
5936 | ||
5937 | (define_insn "*insert4b_internal" | |
5938 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
5939 | (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") | |
5940 | (match_operand:V16QI 2 "vsx_register_operand" "0") | |
5941 | (match_operand:QI 3 "const_0_to_12_operand" "n")] | |
5942 | UNSPEC_XXINSERTW))] | |
5943 | "TARGET_P9_VECTOR" | |
5944 | "xxinsertw %x0,%x1,%3" | |
5945 | [(set_attr "type" "vecperm")]) | |
5946 | ||
fba4b861 | 5947 | |
26bca0ed CL |
5948 | ;; Generate vector extract four float 32 values from left four elements |
5949 | ;; of eight element vector of float 16 values. | |
5950 | (define_expand "vextract_fp_from_shorth" | |
5951 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
5952 | (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] | |
5953 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] | |
5954 | "TARGET_P9_VECTOR" | |
5955 | { | |
26bca0ed | 5956 | int i; |
6ad1bf18 | 5957 | int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; |
8d31eb8f | 5958 | int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7}; |
26bca0ed CL |
5959 | |
5960 | rtx rvals[16]; | |
5961 | rtx mask = gen_reg_rtx (V16QImode); | |
5962 | rtx tmp = gen_reg_rtx (V16QImode); | |
5963 | rtvec v; | |
5964 | ||
5965 | for (i = 0; i < 16; i++) | |
6ad1bf18 CL |
5966 | if (!BYTES_BIG_ENDIAN) |
5967 | rvals[i] = GEN_INT (vals_le[i]); | |
5968 | else | |
5969 | rvals[i] = GEN_INT (vals_be[i]); | |
26bca0ed CL |
5970 | |
5971 | /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 | |
5972 | inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move | |
6ad1bf18 CL |
5973 | src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the |
5974 | conversion instruction. */ | |
26bca0ed | 5975 | v = gen_rtvec_v (16, rvals); |
8e1863ec | 5976 | emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); |
26bca0ed CL |
5977 | emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], |
5978 | operands[1], mask)); | |
5979 | emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); | |
5980 | DONE; | |
5981 | }) | |
5982 | ||
5983 | ;; Generate vector extract four float 32 values from right four elements | |
5984 | ;; of eight element vector of float 16 values. | |
5985 | (define_expand "vextract_fp_from_shortl" | |
5986 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
5987 | (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] | |
5988 | UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] | |
5989 | "TARGET_P9_VECTOR" | |
5990 | { | |
6ad1bf18 | 5991 | int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; |
8d31eb8f | 5992 | int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15}; |
6ad1bf18 | 5993 | |
26bca0ed CL |
5994 | int i; |
5995 | rtx rvals[16]; | |
5996 | rtx mask = gen_reg_rtx (V16QImode); | |
5997 | rtx tmp = gen_reg_rtx (V16QImode); | |
5998 | rtvec v; | |
5999 | ||
6000 | for (i = 0; i < 16; i++) | |
6ad1bf18 CL |
6001 | if (!BYTES_BIG_ENDIAN) |
6002 | rvals[i] = GEN_INT (vals_le[i]); | |
6003 | else | |
6004 | rvals[i] = GEN_INT (vals_be[i]); | |
26bca0ed CL |
6005 | |
6006 | /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 | |
6007 | inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move | |
6ad1bf18 CL |
6008 | src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the |
6009 | conversion instruction. */ | |
26bca0ed | 6010 | v = gen_rtvec_v (16, rvals); |
8e1863ec | 6011 | emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); |
26bca0ed CL |
6012 | emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], |
6013 | operands[1], mask)); | |
6014 | emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); | |
6015 | DONE; | |
6016 | }) | |
6017 | ||
b7d3a6a6 MM |
6018 | ;; Support for ISA 3.0 vector byte reverse |
6019 | ||
6020 | ;; Swap all bytes with in a vector | |
6021 | (define_insn "p9_xxbrq_v1ti" | |
6022 | [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") | |
6023 | (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] | |
6024 | "TARGET_P9_VECTOR" | |
6025 | "xxbrq %x0,%x1" | |
6026 | [(set_attr "type" "vecperm")]) | |
6027 | ||
6028 | (define_expand "p9_xxbrq_v16qi" | |
6029 | [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) | |
d6126f8b | 6030 | (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] |
b7d3a6a6 MM |
6031 | "TARGET_P9_VECTOR" |
6032 | { | |
d6126f8b | 6033 | rtx op0 = gen_reg_rtx (V1TImode); |
b7d3a6a6 MM |
6034 | rtx op1 = gen_lowpart (V1TImode, operands[1]); |
6035 | emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); | |
d6126f8b | 6036 | emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); |
b7d3a6a6 MM |
6037 | DONE; |
6038 | }) | |
6039 | ||
6040 | ;; Swap all bytes in each 64-bit element | |
d6126f8b JJ |
6041 | (define_insn "p9_xxbrd_v2di" |
6042 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") | |
6043 | (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] | |
b7d3a6a6 MM |
6044 | "TARGET_P9_VECTOR" |
6045 | "xxbrd %x0,%x1" | |
6046 | [(set_attr "type" "vecperm")]) | |
6047 | ||
d6126f8b JJ |
6048 | (define_expand "p9_xxbrd_v2df" |
6049 | [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) | |
6050 | (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] | |
6051 | "TARGET_P9_VECTOR" | |
6052 | { | |
6053 | rtx op0 = gen_reg_rtx (V2DImode); | |
6054 | rtx op1 = gen_lowpart (V2DImode, operands[1]); | |
6055 | emit_insn (gen_p9_xxbrd_v2di (op0, op1)); | |
6056 | emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); | |
6057 | DONE; | |
6058 | }) | |
6059 | ||
b7d3a6a6 | 6060 | ;; Swap all bytes in each 32-bit element |
d6126f8b JJ |
6061 | (define_insn "p9_xxbrw_v4si" |
6062 | [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") | |
6063 | (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] | |
b7d3a6a6 MM |
6064 | "TARGET_P9_VECTOR" |
6065 | "xxbrw %x0,%x1" | |
6066 | [(set_attr "type" "vecperm")]) | |
6067 | ||
d6126f8b JJ |
6068 | (define_expand "p9_xxbrw_v4sf" |
6069 | [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) | |
6070 | (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] | |
6071 | "TARGET_P9_VECTOR" | |
6072 | { | |
6073 | rtx op0 = gen_reg_rtx (V4SImode); | |
6074 | rtx op1 = gen_lowpart (V4SImode, operands[1]); | |
6075 | emit_insn (gen_p9_xxbrw_v4si (op0, op1)); | |
6076 | emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); | |
6077 | DONE; | |
6078 | }) | |
6079 | ||
fc504349 CL |
6080 | ;; Swap all bytes in each element of vector |
6081 | (define_expand "revb_<mode>" | |
d6126f8b JJ |
6082 | [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) |
6083 | (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] | |
fc504349 CL |
6084 | "" |
6085 | { | |
6086 | if (TARGET_P9_VECTOR) | |
6087 | emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); | |
6088 | else | |
6089 | { | |
eaba55ff XL |
6090 | if (<MODE>mode == V8HImode) |
6091 | { | |
6092 | rtx splt = gen_reg_rtx (V8HImode); | |
6093 | emit_insn (gen_altivec_vspltish (splt, GEN_INT (8))); | |
6094 | emit_insn (gen_altivec_vrlh (operands[0], operands[1], splt)); | |
6095 | } | |
6096 | else | |
6097 | { | |
6098 | /* Want to have the elements in reverse order relative | |
6099 | to the endian mode in use, i.e. in LE mode, put elements | |
6100 | in BE order. */ | |
6101 | rtx sel = swap_endian_selector_for_mode (<MODE>mode); | |
9d68cba5 HG |
6102 | emit_insn (gen_altivec_vperm_<mode>_direct (operands[0], operands[1], |
6103 | operands[1], sel)); | |
eaba55ff | 6104 | } |
fc504349 CL |
6105 | } |
6106 | ||
6107 | DONE; | |
6108 | }) | |
6109 | ||
6110 | ;; Reversing bytes in vector char is just a NOP. | |
6111 | (define_expand "revb_v16qi" | |
6112 | [(set (match_operand:V16QI 0 "vsx_register_operand") | |
6113 | (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] | |
6114 | "" | |
6115 | { | |
6116 | emit_move_insn (operands[0], operands[1]); | |
6117 | DONE; | |
6118 | }) | |
6119 | ||
b7d3a6a6 MM |
6120 | ;; Swap all bytes in each 16-bit element |
6121 | (define_insn "p9_xxbrh_v8hi" | |
6122 | [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") | |
6123 | (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] | |
6124 | "TARGET_P9_VECTOR" | |
6125 | "xxbrh %x0,%x1" | |
6126 | [(set_attr "type" "vecperm")]) | |
6127 | \f | |
fba4b861 MM |
6128 | |
6129 | ;; Operand numbers for the following peephole2 | |
6130 | (define_constants | |
6131 | [(SFBOOL_TMP_GPR 0) ;; GPR temporary | |
6132 | (SFBOOL_TMP_VSX 1) ;; vector temporary | |
6133 | (SFBOOL_MFVSR_D 2) ;; move to gpr dest | |
6134 | (SFBOOL_MFVSR_A 3) ;; move to gpr src | |
6135 | (SFBOOL_BOOL_D 4) ;; and/ior/xor dest | |
6136 | (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 | |
6137 | (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 | |
6138 | (SFBOOL_SHL_D 7) ;; shift left dest | |
6139 | (SFBOOL_SHL_A 8) ;; shift left arg | |
6140 | (SFBOOL_MTVSR_D 9) ;; move to vecter dest | |
7a6ed74d MM |
6141 | (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode |
6142 | (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode | |
6143 | (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode | |
6144 | (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode | |
fba4b861 MM |
6145 | |
6146 | ;; Attempt to optimize some common GLIBC operations using logical operations to | |
6147 | ;; pick apart SFmode operations. For example, there is code from e_powf.c | |
6148 | ;; after macro expansion that looks like: | |
6149 | ;; | |
6150 | ;; typedef union { | |
6151 | ;; float value; | |
6152 | ;; uint32_t word; | |
6153 | ;; } ieee_float_shape_type; | |
6154 | ;; | |
6155 | ;; float t1; | |
6156 | ;; int32_t is; | |
6157 | ;; | |
6158 | ;; do { | |
6159 | ;; ieee_float_shape_type gf_u; | |
6160 | ;; gf_u.value = (t1); | |
6161 | ;; (is) = gf_u.word; | |
6162 | ;; } while (0); | |
6163 | ;; | |
6164 | ;; do { | |
6165 | ;; ieee_float_shape_type sf_u; | |
6166 | ;; sf_u.word = (is & 0xfffff000); | |
6167 | ;; (t1) = sf_u.value; | |
6168 | ;; } while (0); | |
6169 | ;; | |
6170 | ;; | |
6171 | ;; This would result in two direct move operations (convert to memory format, | |
6172 | ;; direct move to GPR, do the AND operation, direct move to VSX, convert to | |
6173 | ;; scalar format). With this peephole, we eliminate the direct move to the | |
6174 | ;; GPR, and instead move the integer mask value to the vector register after a | |
6175 | ;; shift and do the VSX logical operation. | |
6176 | ||
6177 | ;; The insns for dealing with SFmode in GPR registers looks like: | |
6178 | ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) | |
6179 | ;; | |
6180 | ;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) | |
6181 | ;; | |
7a6ed74d | 6182 | ;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) |
fba4b861 | 6183 | ;; |
7a6ed74d | 6184 | ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) |
fba4b861 | 6185 | ;; |
7a6ed74d | 6186 | ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) |
fba4b861 | 6187 | ;; |
7a6ed74d | 6188 | ;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) |
fba4b861 MM |
6189 | |
6190 | (define_peephole2 | |
6191 | [(match_scratch:DI SFBOOL_TMP_GPR "r") | |
6192 | (match_scratch:V4SF SFBOOL_TMP_VSX "wa") | |
6193 | ||
7a6ed74d | 6194 | ;; MFVSRWZ (aka zero_extend) |
fba4b861 | 6195 | (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") |
7a6ed74d MM |
6196 | (zero_extend:DI |
6197 | (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) | |
fba4b861 MM |
6198 | |
6199 | ;; AND/IOR/XOR operation on int | |
6200 | (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") | |
6201 | (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") | |
6202 | (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) | |
6203 | ||
6204 | ;; SLDI | |
6205 | (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") | |
6206 | (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") | |
6207 | (const_int 32))) | |
6208 | ||
6209 | ;; MTVSRD | |
6210 | (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") | |
6211 | (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] | |
6212 | ||
6213 | "TARGET_POWERPC64 && TARGET_DIRECT_MOVE | |
6214 | /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO | |
6215 | to compare registers, when the mode is different. */ | |
6216 | && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) | |
6217 | && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) | |
6218 | && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) | |
6219 | && (REG_P (operands[SFBOOL_BOOL_A2]) | |
6220 | || CONST_INT_P (operands[SFBOOL_BOOL_A2])) | |
6221 | && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) | |
7a6ed74d | 6222 | || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) |
fba4b861 MM |
6223 | && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) |
6224 | || (REG_P (operands[SFBOOL_BOOL_A2]) | |
6225 | && REGNO (operands[SFBOOL_MFVSR_D]) | |
6226 | == REGNO (operands[SFBOOL_BOOL_A2]))) | |
6227 | && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) | |
6228 | && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) | |
7a6ed74d MM |
6229 | || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) |
6230 | && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" | |
fba4b861 MM |
6231 | [(set (match_dup SFBOOL_TMP_GPR) |
6232 | (ashift:DI (match_dup SFBOOL_BOOL_A_DI) | |
6233 | (const_int 32))) | |
6234 | ||
6235 | (set (match_dup SFBOOL_TMP_VSX_DI) | |
6236 | (match_dup SFBOOL_TMP_GPR)) | |
6237 | ||
6238 | (set (match_dup SFBOOL_MTVSR_D_V4SF) | |
7a6ed74d | 6239 | (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) |
fba4b861 MM |
6240 | (match_dup SFBOOL_TMP_VSX)))] |
6241 | { | |
6242 | rtx bool_a1 = operands[SFBOOL_BOOL_A1]; | |
6243 | rtx bool_a2 = operands[SFBOOL_BOOL_A2]; | |
6244 | int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); | |
7a6ed74d | 6245 | int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); |
fba4b861 MM |
6246 | int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); |
6247 | int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); | |
6248 | ||
6249 | if (CONST_INT_P (bool_a2)) | |
6250 | { | |
6251 | rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; | |
6252 | emit_move_insn (tmp_gpr, bool_a2); | |
6253 | operands[SFBOOL_BOOL_A_DI] = tmp_gpr; | |
6254 | } | |
6255 | else | |
6256 | { | |
6257 | int regno_bool_a1 = REGNO (bool_a1); | |
6258 | int regno_bool_a2 = REGNO (bool_a2); | |
6259 | int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 | |
6260 | ? regno_bool_a2 : regno_bool_a1); | |
6261 | operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); | |
6262 | } | |
6263 | ||
7a6ed74d | 6264 | operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); |
fba4b861 MM |
6265 | operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); |
6266 | operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); | |
6267 | }) | |
51df4136 KL |
6268 | |
6269 | ;; Support signed/unsigned long long to float conversion vectorization. | |
6270 | ;; Note that any_float (pc) here is just for code attribute <su>. | |
6271 | (define_expand "vec_pack<su>_float_v2di" | |
6272 | [(match_operand:V4SF 0 "vfloat_operand") | |
6273 | (match_operand:V2DI 1 "vint_operand") | |
6274 | (match_operand:V2DI 2 "vint_operand") | |
6275 | (any_float (pc))] | |
6276 | "TARGET_VSX" | |
6277 | { | |
6278 | rtx r1 = gen_reg_rtx (V4SFmode); | |
6279 | rtx r2 = gen_reg_rtx (V4SFmode); | |
6280 | emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1])); | |
6281 | emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2])); | |
6282 | rs6000_expand_extract_even (operands[0], r1, r2); | |
6283 | DONE; | |
6284 | }) | |
6285 | ||
6286 | ;; Support float to signed/unsigned long long conversion vectorization. | |
6287 | ;; Note that any_fix (pc) here is just for code attribute <su>. | |
6288 | (define_expand "vec_unpack_<su>fix_trunc_hi_v4sf" | |
6289 | [(match_operand:V2DI 0 "vint_operand") | |
6290 | (match_operand:V4SF 1 "vfloat_operand") | |
6291 | (any_fix (pc))] | |
6292 | "TARGET_VSX" | |
6293 | { | |
6294 | rtx reg = gen_reg_rtx (V4SFmode); | |
6295 | rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); | |
6296 | emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); | |
6297 | DONE; | |
6298 | }) | |
6299 | ||
6300 | ;; Note that any_fix (pc) here is just for code attribute <su>. | |
6301 | (define_expand "vec_unpack_<su>fix_trunc_lo_v4sf" | |
6302 | [(match_operand:V2DI 0 "vint_operand") | |
6303 | (match_operand:V4SF 1 "vfloat_operand") | |
6304 | (any_fix (pc))] | |
6305 | "TARGET_VSX" | |
6306 | { | |
6307 | rtx reg = gen_reg_rtx (V4SFmode); | |
6308 | rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); | |
6309 | emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); | |
6310 | DONE; | |
6311 | }) | |
6312 | ||
8ee2640b PB |
6313 | (define_insn "vsx_<xvcvbf16>" |
6314 | [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") | |
6315 | (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")] | |
6316 | XVCVBF16))] | |
5d9d0c94 | 6317 | "TARGET_POWER10" |
8ee2640b PB |
6318 | "<xvcvbf16> %x0,%x1" |
6319 | [(set_attr "type" "vecfloat")]) | |
02ef74ba CL |
6320 | |
6321 | (define_insn "vec_mtvsrbmi" | |
6322 | [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") | |
6323 | (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")] | |
6324 | UNSPEC_MTVSBM))] | |
6325 | "TARGET_POWER10" | |
6326 | "mtvsrbmi %0,%1" | |
6327 | ) | |
6328 | ||
6329 | (define_insn "vec_mtvsr_<mode>" | |
6330 | [(set (match_operand:VSX_MM 0 "altivec_register_operand" "=v") | |
6331 | (unspec:VSX_MM [(match_operand:DI 1 "gpc_reg_operand" "r")] | |
6332 | UNSPEC_MTVSBM))] | |
6333 | "TARGET_POWER10" | |
6334 | "mtvsr<wd>m %0,%1"; | |
6335 | [(set_attr "type" "vecsimple")]) | |
6336 | ||
6337 | (define_insn "vec_cntmb_<mode>" | |
6338 | [(set (match_operand:DI 0 "gpc_reg_operand" "=r") | |
6339 | (unspec:DI [(match_operand:VSX_MM4 1 "altivec_register_operand" "v") | |
6340 | (match_operand:QI 2 "const_0_to_1_operand" "n")] | |
6341 | UNSPEC_VCNTMB))] | |
6342 | "TARGET_POWER10" | |
03e93e1e | 6343 | "vcntmb<wd> %0,%1,%2" |
02ef74ba CL |
6344 | [(set_attr "type" "vecsimple")]) |
6345 | ||
6346 | (define_insn "vec_extract_<mode>" | |
6347 | [(set (match_operand:SI 0 "register_operand" "=r") | |
6348 | (unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")] | |
6349 | UNSPEC_VEXTRACT))] | |
6350 | "TARGET_POWER10" | |
03e93e1e | 6351 | "vextract<wd>m %0,%1" |
02ef74ba CL |
6352 | [(set_attr "type" "vecsimple")]) |
6353 | ||
6354 | (define_insn "vec_expand_<mode>" | |
6355 | [(set (match_operand:VSX_MM 0 "vsx_register_operand" "=v") | |
6356 | (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")] | |
6357 | UNSPEC_VEXPAND))] | |
6358 | "TARGET_POWER10" | |
03e93e1e | 6359 | "vexpand<wd>m %0,%1" |
02ef74ba | 6360 | [(set_attr "type" "vecsimple")]) |
f1ad419e CL |
6361 | |
6362 | (define_insn "dives_<mode>" | |
6363 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6364 | (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") | |
6365 | (match_operand:VIlong 2 "vsx_register_operand" "v")] | |
6366 | UNSPEC_VDIVES))] | |
6367 | "TARGET_POWER10" | |
6368 | "vdives<wd> %0,%1,%2" | |
6369 | [(set_attr "type" "vecdiv") | |
6370 | (set_attr "size" "<bits>")]) | |
6371 | ||
6372 | (define_insn "diveu_<mode>" | |
6373 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6374 | (unspec:VIlong [(match_operand:VIlong 1 "vsx_register_operand" "v") | |
6375 | (match_operand:VIlong 2 "vsx_register_operand" "v")] | |
6376 | UNSPEC_VDIVEU))] | |
6377 | "TARGET_POWER10" | |
6378 | "vdiveu<wd> %0,%1,%2" | |
6379 | [(set_attr "type" "vecdiv") | |
6380 | (set_attr "size" "<bits>")]) | |
6381 | ||
6382 | (define_insn "div<mode>3" | |
6383 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6384 | (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6385 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6386 | "TARGET_POWER10" | |
6387 | "vdivs<wd> %0,%1,%2" | |
6388 | [(set_attr "type" "vecdiv") | |
6389 | (set_attr "size" "<bits>")]) | |
6390 | ||
6391 | (define_insn "udiv<mode>3" | |
6392 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") | |
6393 | (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6394 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6395 | "TARGET_POWER10" | |
6396 | "vdivu<wd> %0,%1,%2" | |
6397 | [(set_attr "type" "vecdiv") | |
6398 | (set_attr "size" "<bits>")]) | |
6399 | ||
062c762e | 6400 | (define_insn "mod<mode>3" |
f1ad419e CL |
6401 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6402 | (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6403 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6404 | "TARGET_POWER10" | |
6405 | "vmods<wd> %0,%1,%2" | |
6406 | [(set_attr "type" "vecdiv") | |
6407 | (set_attr "size" "<bits>")]) | |
6408 | ||
062c762e | 6409 | (define_insn "umod<mode>3" |
f1ad419e CL |
6410 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6411 | (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6412 | (match_operand:VIlong 2 "vsx_register_operand" "v")))] | |
6413 | "TARGET_POWER10" | |
6414 | "vmodu<wd> %0,%1,%2" | |
6415 | [(set_attr "type" "vecdiv") | |
6416 | (set_attr "size" "<bits>")]) | |
6417 | ||
1c0d49b9 | 6418 | (define_insn "smul<mode>3_highpart" |
f1ad419e CL |
6419 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6420 | (mult:VIlong (ashiftrt | |
6421 | (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6422 | (const_int 32)) | |
6423 | (ashiftrt | |
6424 | (match_operand:VIlong 2 "vsx_register_operand" "v") | |
6425 | (const_int 32))))] | |
6426 | "TARGET_POWER10" | |
6427 | "vmulhs<wd> %0,%1,%2" | |
6428 | [(set_attr "type" "veccomplex")]) | |
6429 | ||
1c0d49b9 | 6430 | (define_insn "umul<mode>3_highpart" |
f1ad419e CL |
6431 | [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") |
6432 | (us_mult:VIlong (ashiftrt | |
6433 | (match_operand:VIlong 1 "vsx_register_operand" "v") | |
6434 | (const_int 32)) | |
6435 | (ashiftrt | |
6436 | (match_operand:VIlong 2 "vsx_register_operand" "v") | |
6437 | (const_int 32))))] | |
6438 | "TARGET_POWER10" | |
6439 | "vmulhu<wd> %0,%1,%2" | |
6440 | [(set_attr "type" "veccomplex")]) | |
6441 | ||
6442 | ;; Vector multiply low double word | |
6443 | (define_insn "mulv2di3" | |
6444 | [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") | |
6445 | (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v") | |
6446 | (match_operand:V2DI 2 "vsx_register_operand" "v")))] | |
6447 | "TARGET_POWER10" | |
6448 | "vmulld %0,%1,%2" | |
6449 | [(set_attr "type" "veccomplex")]) | |
d2883be3 MM |
6450 | |
6451 | \f | |
6452 | ;; XXSPLTIW built-in function support | |
6453 | (define_insn "xxspltiw_v4si" | |
6454 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6455 | (unspec:V4SI [(match_operand:SI 1 "s32bit_cint_operand" "n")] | |
6456 | UNSPEC_XXSPLTIW))] | |
6457 | "TARGET_POWER10" | |
6458 | "xxspltiw %x0,%1" | |
bb24717e | 6459 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6460 | (set_attr "prefixed" "yes")]) |
6461 | ||
6462 | (define_expand "xxspltiw_v4sf" | |
6463 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6464 | (unspec:V4SF [(match_operand:SF 1 "const_double_operand" "n")] | |
6465 | UNSPEC_XXSPLTIW))] | |
6466 | "TARGET_POWER10" | |
6467 | { | |
6468 | long value = rs6000_const_f32_to_i32 (operands[1]); | |
6469 | emit_insn (gen_xxspltiw_v4sf_inst (operands[0], GEN_INT (value))); | |
6470 | DONE; | |
6471 | }) | |
6472 | ||
6473 | (define_insn "xxspltiw_v4sf_inst" | |
6474 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6475 | (unspec:V4SF [(match_operand:SI 1 "c32bit_cint_operand" "n")] | |
6476 | UNSPEC_XXSPLTIW))] | |
6477 | "TARGET_POWER10" | |
6478 | "xxspltiw %x0,%1" | |
bb24717e | 6479 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6480 | (set_attr "prefixed" "yes")]) |
6481 | ||
6482 | ;; XXSPLTIDP built-in function support | |
6483 | (define_expand "xxspltidp_v2df" | |
6484 | [(set (match_operand:V2DF 0 "register_operand" ) | |
6485 | (unspec:V2DF [(match_operand:SF 1 "const_double_operand")] | |
bb24717e | 6486 | UNSPEC_XXSPLTIDP))] |
d2883be3 MM |
6487 | "TARGET_POWER10" |
6488 | { | |
6489 | long value = rs6000_const_f32_to_i32 (operands[1]); | |
6490 | rs6000_emit_xxspltidp_v2df (operands[0], value); | |
6491 | DONE; | |
6492 | }) | |
6493 | ||
6494 | (define_insn "xxspltidp_v2df_inst" | |
6495 | [(set (match_operand:V2DF 0 "register_operand" "=wa") | |
6496 | (unspec:V2DF [(match_operand:SI 1 "c32bit_cint_operand" "n")] | |
bb24717e | 6497 | UNSPEC_XXSPLTIDP))] |
d2883be3 MM |
6498 | "TARGET_POWER10" |
6499 | "xxspltidp %x0,%1" | |
bb24717e | 6500 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6501 | (set_attr "prefixed" "yes")]) |
6502 | ||
6503 | ;; XXSPLTI32DX built-in function support | |
6504 | (define_expand "xxsplti32dx_v4si" | |
6505 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6506 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6507 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6508 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6509 | UNSPEC_XXSPLTI32DX))] | |
6510 | "TARGET_POWER10" | |
6511 | { | |
6512 | int index = INTVAL (operands[2]); | |
6513 | ||
6514 | if (!BYTES_BIG_ENDIAN) | |
6515 | index = 1 - index; | |
6516 | ||
6517 | emit_insn (gen_xxsplti32dx_v4si_inst (operands[0], operands[1], | |
6518 | GEN_INT (index), operands[3])); | |
6519 | DONE; | |
6520 | } | |
bb24717e | 6521 | [(set_attr "type" "vecperm")]) |
d2883be3 MM |
6522 | |
6523 | (define_insn "xxsplti32dx_v4si_inst" | |
6524 | [(set (match_operand:V4SI 0 "register_operand" "=wa") | |
6525 | (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6526 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6527 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6528 | UNSPEC_XXSPLTI32DX))] | |
6529 | "TARGET_POWER10" | |
6530 | "xxsplti32dx %x0,%2,%3" | |
bb24717e | 6531 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6532 | (set_attr "prefixed" "yes")]) |
6533 | ||
6534 | (define_expand "xxsplti32dx_v4sf" | |
6535 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6536 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") | |
6537 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6538 | (match_operand:SF 3 "const_double_operand" "n")] | |
6539 | UNSPEC_XXSPLTI32DX))] | |
6540 | "TARGET_POWER10" | |
6541 | { | |
6542 | int index = INTVAL (operands[2]); | |
6543 | long value = rs6000_const_f32_to_i32 (operands[3]); | |
6544 | if (!BYTES_BIG_ENDIAN) | |
6545 | index = 1 - index; | |
6546 | ||
6547 | emit_insn (gen_xxsplti32dx_v4sf_inst (operands[0], operands[1], | |
6548 | GEN_INT (index), GEN_INT (value))); | |
6549 | DONE; | |
6550 | }) | |
6551 | ||
6552 | (define_insn "xxsplti32dx_v4sf_inst" | |
6553 | [(set (match_operand:V4SF 0 "register_operand" "=wa") | |
6554 | (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") | |
6555 | (match_operand:QI 2 "u1bit_cint_operand" "n") | |
6556 | (match_operand:SI 3 "s32bit_cint_operand" "n")] | |
6557 | UNSPEC_XXSPLTI32DX))] | |
6558 | "TARGET_POWER10" | |
6559 | "xxsplti32dx %x0,%2,%3" | |
bb24717e | 6560 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6561 | (set_attr "prefixed" "yes")]) |
6562 | ||
6563 | ;; XXBLEND built-in function support | |
6564 | (define_insn "xxblend_<mode>" | |
6565 | [(set (match_operand:VM3 0 "register_operand" "=wa") | |
6566 | (unspec:VM3 [(match_operand:VM3 1 "register_operand" "wa") | |
6567 | (match_operand:VM3 2 "register_operand" "wa") | |
6568 | (match_operand:VM3 3 "register_operand" "wa")] | |
6569 | UNSPEC_XXBLEND))] | |
6570 | "TARGET_POWER10" | |
6571 | "xxblendv<VM3_char> %x0,%x1,%x2,%x3" | |
bb24717e | 6572 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6573 | (set_attr "prefixed" "yes")]) |
6574 | ||
6575 | ;; XXPERMX built-in function support | |
6576 | (define_expand "xxpermx" | |
6577 | [(set (match_operand:V2DI 0 "register_operand" "+wa") | |
6578 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") | |
6579 | (match_operand:V2DI 2 "register_operand" "wa") | |
6580 | (match_operand:V16QI 3 "register_operand" "wa") | |
6581 | (match_operand:QI 4 "u8bit_cint_operand" "n")] | |
6582 | UNSPEC_XXPERMX))] | |
6583 | "TARGET_POWER10" | |
6584 | { | |
6585 | if (BYTES_BIG_ENDIAN) | |
6586 | emit_insn (gen_xxpermx_inst (operands[0], operands[1], | |
6587 | operands[2], operands[3], | |
6588 | operands[4])); | |
6589 | else | |
6590 | { | |
6591 | /* Reverse value of byte element indexes by XORing with 0xFF. | |
6592 | Reverse the 32-byte section identifier match by subracting bits [0:2] | |
6593 | of elemet from 7. */ | |
6594 | int value = INTVAL (operands[4]); | |
6595 | rtx vreg = gen_reg_rtx (V16QImode); | |
6596 | ||
6597 | emit_insn (gen_xxspltib_v16qi (vreg, GEN_INT (-1))); | |
6598 | emit_insn (gen_xorv16qi3 (operands[3], operands[3], vreg)); | |
6599 | value = 7 - value; | |
6600 | emit_insn (gen_xxpermx_inst (operands[0], operands[2], | |
6601 | operands[1], operands[3], | |
6602 | GEN_INT (value))); | |
6603 | } | |
6604 | ||
6605 | DONE; | |
6606 | } | |
bb24717e | 6607 | [(set_attr "type" "vecperm")]) |
d2883be3 MM |
6608 | |
6609 | (define_insn "xxpermx_inst" | |
6610 | [(set (match_operand:V2DI 0 "register_operand" "+v") | |
6611 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v") | |
6612 | (match_operand:V2DI 2 "register_operand" "v") | |
6613 | (match_operand:V16QI 3 "register_operand" "v") | |
6614 | (match_operand:QI 4 "u3bit_cint_operand" "n")] | |
6615 | UNSPEC_XXPERMX))] | |
6616 | "TARGET_POWER10" | |
6617 | "xxpermx %x0,%x1,%x2,%x3,%4" | |
bb24717e | 6618 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6619 | (set_attr "prefixed" "yes")]) |
6620 | ||
6621 | ;; XXEVAL built-in function support | |
6622 | (define_insn "xxeval" | |
6623 | [(set (match_operand:V2DI 0 "register_operand" "=wa") | |
6624 | (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "wa") | |
6625 | (match_operand:V2DI 2 "register_operand" "wa") | |
6626 | (match_operand:V2DI 3 "register_operand" "wa") | |
6627 | (match_operand:QI 4 "u8bit_cint_operand" "n")] | |
6628 | UNSPEC_XXEVAL))] | |
6629 | "TARGET_POWER10" | |
6630 | "xxeval %0,%1,%2,%3,%4" | |
bb24717e | 6631 | [(set_attr "type" "vecperm") |
d2883be3 MM |
6632 | (set_attr "prefixed" "yes")]) |
6633 | ||
240dd6c0 HG |
6634 | ;; Construct V1TI by vsx_concat_v2di |
6635 | (define_split | |
6636 | [(set (match_operand:V1TI 0 "vsx_register_operand") | |
6637 | (subreg:V1TI | |
6638 | (match_operand:TI 1 "int_reg_operand") 0 ))] | |
6639 | "TARGET_P9_VECTOR && !reload_completed" | |
6640 | [(const_int 0)] | |
6641 | { | |
6642 | rtx tmp1 = simplify_gen_subreg (DImode, operands[1], TImode, 0); | |
6643 | rtx tmp2 = simplify_gen_subreg (DImode, operands[1], TImode, 8); | |
6644 | rtx tmp3 = gen_reg_rtx (V2DImode); | |
6645 | emit_insn (gen_vsx_concat_v2di (tmp3, tmp1, tmp2)); | |
6646 | rtx tmp4 = simplify_gen_subreg (V1TImode, tmp3, V2DImode, 0); | |
6647 | emit_move_insn (operands[0], tmp4); | |
6648 | DONE; | |
6649 | }) | |
943d631a BS |
6650 | |
6651 | ;; vmsumcud | |
6652 | (define_insn "vmsumcud" | |
6653 | [(set (match_operand:V1TI 0 "register_operand" "+v") | |
6654 | (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") | |
6655 | (match_operand:V2DI 2 "register_operand" "v") | |
6656 | (match_operand:V1TI 3 "register_operand" "v")] | |
6657 | UNSPEC_VMSUMCUD))] | |
6658 | "TARGET_POWER10" | |
6659 | "vmsumcud %0,%1,%2,%3" | |
6660 | [(set_attr "type" "veccomplex")] | |
6661 | ) |