]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Support partial vectorized V2BF/V4BF smaxmin
authorLevy Hsu <admin@levyhsu.com>
Tue, 27 Aug 2024 04:52:20 +0000 (14:22 +0930)
committerLevy Hsu <admin@levyhsu.com>
Tue, 3 Sep 2024 02:54:51 +0000 (02:54 +0000)
This patch supports sminmax for partial vectorized V2BF/V4BF.

gcc/ChangeLog:

* config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.

gcc/config/i386/mmx.md
gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c [new file with mode: 0644]

index 076ea2e2fb24f0cc1574c1a25e938941677b50fe..fac90cfd4d4cc34ca8d8e4c9bc0bf55aedfc1be9 100644 (file)
   DONE;
 })
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+    (smaxmin:VBF_32_64
+      (match_operand:VBF_32_64 1 "nonimmediate_operand")
+      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2_256"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+                           force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode,
+                           force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 (define_expand "sqrt<mode>2"
   [(set (match_operand:VHF_32_64 0 "register_operand")
        (sqrt:VHF_32_64
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
new file mode 100644 (file)
index 0000000..0a7cc58
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -Ofast" } */
+/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
+/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+
+void
+maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+  int i;
+  for (i = 0; i < 2; i++)
+    dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}