generic permutation to merge the data back into the right place. This
permutation results in VPERMQ, which is slow, so better fall back to
ix86_expand_vecop_qihi. */
- if (!TARGET_AVX512BW)
- return false;
-
- if ((qimode == V16QImode && !TARGET_AVX2)
- || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
+ if (!TARGET_AVX512BW
+ || (qimode == V16QImode && !TARGET_AVX512VL)
+ || (qimode == V32QImode && !TARGET_EVEX512)
/* There are no V64HImode instructions. */
|| qimode == V64QImode)
return false;
{
case E_V16QImode:
himode = V16HImode;
- if (TARGET_AVX512VL && TARGET_AVX512BW)
- gen_truncate = gen_truncv16hiv16qi2;
+ gen_truncate = gen_truncv16hiv16qi2;
break;
case E_V32QImode:
himode = V32HImode;
hdest = expand_simple_binop (himode, code, hop1, hop2,
NULL_RTX, 1, OPTAB_DIRECT);
- if (gen_truncate)
- emit_insn (gen_truncate (dest, hdest));
- else
- {
- struct expand_vec_perm_d d;
- rtx wqdest = gen_reg_rtx (wqimode);
- rtx wqres = gen_lowpart (wqimode, hdest);
- bool ok;
- int i;
-
- /* Merge the data back into the right place. */
- d.target = wqdest;
- d.op0 = d.op1 = wqres;
- d.vmode = wqimode;
- d.nelt = GET_MODE_NUNITS (wqimode);
- d.one_operand_p = false;
- d.testing_p = false;
-
- for (i = 0; i < d.nelt; ++i)
- d.perm[i] = i * 2;
-
- ok = ix86_expand_vec_perm_const_1 (&d);
- gcc_assert (ok);
-
- emit_move_insn (dest, gen_lowpart (qimode, wqdest));
- }
-
+ emit_insn (gen_truncate (dest, hdest));
return true;
}