]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]
authorHongyu Wang <hongyu.wang@intel.com>
Sat, 13 Jul 2024 03:45:31 +0000 (11:45 +0800)
committerHongyu Wang <hongyu.wang@intel.com>
Mon, 15 Jul 2024 02:34:07 +0000 (10:34 +0800)
According to the instruction spec of AVX512BF16, the convert from float
to BF16 is not a simple truncation. It has special handling for
denormal/nan, even for normal float it will add an extra bias according
to the least significant bit for bf number. This means we cannot use the
vcvtne2ps2bf16 for any bf16 vector shuffle.
The optimization introduced in r15-1368 adds a specific split to convert
HImode permutation with this instruction, so remove it and treat the
BFmode permutation same as HFmode.

gcc/ChangeLog:

PR target/115889
* config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove.
* config/i386/sse.md (hi_cvt_bf): Remove.
(HI_CVT_BF): Likewise.
(vpermt2_sepcial_bf16_shuffle_<mode>):Likewise.

gcc/testsuite/ChangeLog:

PR target/115889
* gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust output
scan.

gcc/config/i386/predicates.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c

index a894847adaf7cb5414c66c78b8a94405ece9975b..5d0bb1e0f54adecd05a9f26cfa491645feaed68a 100644 (file)
 
   return true;
 })
-
-;; Check that each element is odd and incrementally increasing from 1
-(define_predicate "vcvtne2ps2bf_parallel"
-  (and (match_code "const_vector")
-       (match_code "const_int" "a"))
-{
-  for (int i = 0; i < XVECLEN (op, 0); ++i)
-    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
-      return false;
-  return true;
-})
index b3b4697924b55f082349ee865bcfabd7649ecf6c..c134494cd200931284b3cc62188e527840e6721f 100644 (file)
   "TARGET_AVXVNNIINT16"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "vex")])
-
-(define_mode_attr hi_cvt_bf
-  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
-
-(define_mode_attr HI_CVT_BF
-  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
-
-(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
-  [(set (match_operand:VI2_AVX512F 0 "register_operand")
-       (unspec:VI2_AVX512F
-         [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
-          (match_operand:VI2_AVX512F 2 "register_operand")
-          (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
-          UNSPEC_VPERMT2))]
-  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
-  operands[2] = lowpart_subreg (<ssePSmode>mode,
-                               force_reg (<MODE>mode, operands[2]),
-                               <MODE>mode);
-  operands[3] = lowpart_subreg (<ssePSmode>mode,
-                               force_reg (<MODE>mode, operands[3]),
-                               <MODE>mode);
-
-  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
-                                                  operands[3],
-                                                  operands[2]));
-  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
-                                              <HI_CVT_BF>mode));
-  DONE;
-}
-[(set_attr "mode" "<sseinsnmode>")])
index 5c65f2a988470beee5ca3788ebe3ba683897a98e..e504f3f4cd702fbfb726b1910144c03687582838 100755 (executable)
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
-/* { dg-final { scan-assembler-not "vpermi2b" } } */
-/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
 
 typedef __bf16 v8bf __attribute__((vector_size(16)));
 typedef __bf16 v16bf __attribute__((vector_size(32)));