]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Optimize vpermpd to vbroadcastf128 for specific permutations.
authorliuhongt <hongtao.liu@intel.com>
Thu, 11 Sep 2025 06:03:43 +0000 (23:03 -0700)
committerliuhongt <hongtao.liu@intel.com>
Tue, 16 Sep 2025 01:45:37 +0000 (18:45 -0700)
gcc/ChangeLog:

* config/i386/predicates.md (avx_vbroadcast128_operand): New
predicate.
* config/i386/sse.md (*avx_vbroadcastf128_<mode>_perm): New
pre_reload splitter.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx_vbroadcastf128.c: New test.

gcc/config/i386/predicates.md
gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx_vbroadcastf128.c [new file with mode: 0644]

index 5dbe444847fd6218459b20d3050c5803fa35e584..57950d3187822df55465c75b7867bfcc6c43c762 100644 (file)
   return true;
 })
 
+;; Return true if OP is a parallel for a vbroadcastf128 permute.
+(define_predicate "avx_vbroadcast128_operand"
+  (and (match_code "parallel")
+       (match_code "const_int" "a"))
+{
+  int i, nelt = XVECLEN (op, 0);
+  int half = nelt / 2;
+
+  for (i = 0; i < nelt; ++i)
+    {
+      int index = INTVAL (XVECEXP (op, 0, i));
+      if ((i < half && index != i)
+         || (i >= half && index != (i - half)))
+       return false;
+    }
+
+  return true;
+})
+
 ;; Return true if OP is a parallel for a palignr permute.
 (define_predicate "palignr_operand"
   (and (match_code "parallel")
index e87c26fcc072a13723fb7e9f34089eaca946f9ca..8b28c8edb19be8989ac58f6365aa8e87b7be085a 100644 (file)
    (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+
+(define_insn_and_split "*avx_vbroadcastf128_<mode>_perm"
+  [(set (match_operand:V_256 0 "register_operand")
+       (vec_select:V_256
+         (vec_concat:V_256
+           (match_operand:<ssehalfvecmode> 1 "memory_operand")
+           (match_operand:<ssehalfvecmode> 2 "general_operand"))
+         (match_parallel 3 "avx_vbroadcast128_operand"
+           [(match_operand 4 "const_int_operand")])))]
+  "TARGET_AVX && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (vec_concat: V_256 (match_dup 1) (match_dup 1)))])
+
 ;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
 (define_mode_iterator VI4F_BRCST32x2
   [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
diff --git a/gcc/testsuite/gcc.target/i386/avx_vbroadcastf128.c b/gcc/testsuite/gcc.target/i386/avx_vbroadcastf128.c
new file mode 100644 (file)
index 0000000..e0bda7d
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v3 -O3" } */
+/* { dg-final { scan-assembler-not "vpermpd"} } */
+/* { dg-final { scan-assembler {(?n)vbroadcastf(?:128|64x2)} } } */
+
+void
+foo (double* __restrict a, double*  b, double* c, int n)
+{
+  for (int i = 0; i != n; i+=4)
+    {
+      a[i] += b[i] * c[i];
+      a[i+1] += b[i+1] * c[i+1];
+      a[i+2] += b[i] * c[i+2];
+      a[i+3] += b[i+1] * c[i+3];
+    }
+
+}