(match_operand:V_MOV 1 "register_operand")
(match_operand 2 "immediate_operand")]
"MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
- && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
+ && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode
+ && (!TARGET_RDNA2_PLUS || MODE_VF (<V_MOV:MODE>mode) <= 32)"
{
int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
int firstlane = INTVAL (operands[2]) * numlanes;
gcc_assert (nelt <= 64);
gcc_assert (sel.length () == nelt);
- if (!dst)
- {
- /* All vector permutations are possible on this architecture,
- with varying degrees of efficiency depending on the permutation. */
- return true;
- }
-
unsigned int perm[64];
for (unsigned int i = 0; i < nelt; ++i)
perm[i] = sel[i] & (2 * nelt - 1);
for (unsigned int i = nelt; i < 64; ++i)
perm[i] = 0;
+ /* RDNA devices can only do permutations within each group of 32-lanes.
+ Reject permutations that cross the boundary. */
+ if (TARGET_RDNA2_PLUS)
+ for (unsigned int i = 0; i < nelt; i++)
+ if (i < 31 ? perm[i] > 31 : perm[i] < 32)
+ return false;
+
+ /* All vector permutations are possible on other architectures,
+ with varying degrees of efficiency depending on the permutation. */
+ if (!dst)
+ return true;
+
src0 = force_reg (vmode, src0);
src1 = force_reg (vmode, src1);