From: Uros Bizjak Date: Wed, 23 Jun 2021 14:14:31 +0000 (+0200) Subject: i386: Add PPERM two-operand 64bit vector permutation [PR89021] X-Git-Tag: basepoints/gcc-13~6567 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=37e93925366676201b526624e9f8dc32d82b4ff2;p=thirdparty%2Fgcc.git i386: Add PPERM two-operand 64bit vector permutation [PR89021] Add emulation of V8QI PPERM permutations for TARGET_XOP target. Similar to PSHUFB, the permutation is performed with V16QI PPERM instruction, where selector is defined in V16QI mode with inactive elements set to 0x80. Specific to two operand permutations is the remapping of elements from the second operand (e.g. e[8] -> e[16]), as we have to account for the inactive elements from the first operand. 2021-06-23 Uroš Bizjak gcc/ PR target/89021 * config/i386/i386-expand.c (expand_vec_perm_pshufb): Handle 64bit modes for TARGET_XOP. Use indirect gen_* functions. * config/i386/mmx.md (mmx_ppermv64): New insn pattern. * config/i386/i386.md (unspec): Move UNSPEC_XOP_PERMUTE from ... * config/i386/sse.md (unspec): ... here. --- diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 2986b49065c6..9c922bf1bf17 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -17467,10 +17467,23 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) if (!d->one_operand_p) { - if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) + if (GET_MODE_SIZE (d->vmode) == 8) + { + if (!TARGET_XOP) + return false; + vmode = V8QImode; + } + else if (GET_MODE_SIZE (d->vmode) == 16) + { + if (!TARGET_XOP) + return false; + } + else if (GET_MODE_SIZE (d->vmode) == 32) { - if (TARGET_AVX2 - && valid_perm_using_mode_p (V2TImode, d)) + if (!TARGET_AVX2) + return false; + + if (valid_perm_using_mode_p (V2TImode, d)) { if (d->testing_p) return true; @@ -17492,6 +17505,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) } return false; } + else + return false; } else { @@ -17651,8 +17666,22 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { rtx m128 = GEN_INT (-128); + /* Remap elements from the second operand, as we have to + account for inactive top 8 elements from the first operand. */ + if (!d->one_operand_p) + for (i = 0; i < nelt; ++i) + { + int ival = INTVAL (rperm[i]); + if (ival >= 8) + ival += 8; + rperm[i] = GEN_INT (ival); + } + + /* V8QI is emulated with V16QI instruction, fill inactive + elements in the top 8 positions with zeros. */ for (i = nelt; i < 16; ++i) rperm[i] = m128; + vpmode = V16QImode; } @@ -17660,36 +17689,54 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) gen_rtvec_v (GET_MODE_NUNITS (vpmode), rperm)); vperm = force_reg (vpmode, vperm); - target = d->target; - if (d->vmode != vmode) + if (vmode == d->vmode) + target = d->target; + else target = gen_reg_rtx (vmode); + op0 = gen_lowpart (vmode, d->op0); + if (d->one_operand_p) { + rtx (*gen) (rtx, rtx, rtx); + if (vmode == V8QImode) - emit_insn (gen_mmx_pshufbv8qi3 (target, op0, vperm)); + gen = gen_mmx_pshufbv8qi3; else if (vmode == V16QImode) - emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); + gen = gen_ssse3_pshufbv16qi3; else if (vmode == V32QImode) - emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); + gen = gen_avx2_pshufbv32qi3; else if (vmode == V64QImode) - emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); + gen = gen_avx512bw_pshufbv64qi3; else if (vmode == V8SFmode) - emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); + gen = gen_avx2_permvarv8sf; else if (vmode == V8SImode) - emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); + gen = gen_avx2_permvarv8si; else if (vmode == V16SFmode) - emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); + gen = gen_avx512f_permvarv16sf; else if (vmode == V16SImode) - emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); + gen = gen_avx512f_permvarv16si; else gcc_unreachable (); + + emit_insn (gen (target, op0, vperm)); } else { + rtx (*gen) (rtx, rtx, rtx, rtx); + op1 = gen_lowpart (vmode, d->op1); - emit_insn (gen_xop_pperm (target, op0, op1, vperm)); + + if (vmode == V8QImode) + gen = gen_mmx_ppermv64; + else if (vmode == V16QImode) + gen = gen_xop_pperm; + else + gcc_unreachable (); + + emit_insn (gen (target, op0, op1, vperm)); } + if (target != d->target) emit_move_insn (d->target, gen_lowpart (d->vmode, target)); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4e242105719a..9043be3105db 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -120,6 +120,7 @@ UNSPEC_MOVMSK UNSPEC_BLENDV UNSPEC_PSHUFB + UNSPEC_XOP_PERMUTE UNSPEC_RCP UNSPEC_RSQRT UNSPEC_PSADBW diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a107ac5ccb47..7a827dceb015 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2331,6 +2331,19 @@ "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) +;; XOP permute instructions +(define_insn "mmx_ppermv64" + [(set (match_operand:V8QI 0 "register_operand" "=x") + (unspec:V8QI + [(match_operand:V8QI 1 "register_operand" "x") + (match_operand:V8QI 2 "register_operand" "x") + (match_operand:V16QI 3 "nonimmediate_operand" "xm")] + UNSPEC_XOP_PERMUTE))] + "TARGET_XOP && TARGET_MMX_WITH_SSE" + "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f5f9403db441..c5f739c73b52 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -53,7 +53,6 @@ UNSPEC_FMADDSUB UNSPEC_XOP_UNSIGNED_CMP UNSPEC_XOP_TRUEFALSE - UNSPEC_XOP_PERMUTE UNSPEC_FRCZ ;; For AES support