]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: make better use of VBROADCASTSS / VPBROADCASTD
authorJan Beulich <jbeulich@suse.com>
Wed, 12 Jul 2023 10:14:08 +0000 (12:14 +0200)
committerJan Beulich <jbeulich@suse.com>
Wed, 12 Jul 2023 10:14:08 +0000 (12:14 +0200)
... in vec_dupv4sf / *vec_dupv4si. The respective broadcast insns are
never longer (yet sometimes shorter) than the corresponding VSHUFPS /
VPSHUFD, due to the immediate operand of the shuffle insns balancing the
(uniform) need for VEX3 in the broadcast ones. When EVEX encoding is
respective the broadcast insns are always shorter.

Add new alternatives to cover the AVX2 and AVX512 cases as appropriate.

While touching this anyway, switch to consistently using "sseshuf1" in
the "type" attributes for all shuffle forms.

gcc/

* config/i386/sse.md (vec_dupv4sf): Make first alternative use
vbroadcastss for AVX2. New AVX512F alternative.
(*vec_dupv4si): New AVX2 and AVX512F alternatives using
vpbroadcastd. Replace sselog1 by sseshuf1 in "type" attribute.

gcc/testsuite/

* gcc.target/i386/avx2-dupv4sf.c: New test.
* gcc.target/i386/avx2-dupv4si.c: Likewise.
* gcc.target/i386/avx512f-dupv4sf.c: Likewise.
* gcc.target/i386/avx512f-dupv4si.c: Likewise.

gcc/config/i386/sse.md
gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx2-dupv4si.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c [new file with mode: 0644]

index 24359cd189c8fc68ba6ebacb1464dad62a0fc813..6bf9c99a2c12eac87d0646ea6c5410c9c39f867e 100644 (file)
        (const_int 1)))])
 
 (define_insn "vec_dupv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
+  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v,x")
        (vec_duplicate:V4SF
-         (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
+         (match_operand:SF 1 "nonimmediate_operand" "Yv,v,m,0")))]
   "TARGET_SSE"
   "@
-   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
+   * return TARGET_AVX2 ? \"vbroadcastss\t{%1, %0|%0, %1}\" : \"vshufps\t{$0, %d1, %0|%0, %d1, 0}\";
+   vbroadcastss\t{%1, %g0|%g0, %1}
    vbroadcastss\t{%1, %0|%0, %1}
    shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "avx,avx,noavx")
-   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
-   (set_attr "mode" "V4SF")])
+  [(set_attr "isa" "avx,*,avx,noavx")
+   (set (attr "type")
+       (cond [(and (eq_attr "alternative" "0")
+                   (match_test "!TARGET_AVX2"))
+                (const_string "sseshuf1")
+              (eq_attr "alternative" "3")
+                (const_string "sseshuf1")
+             ]
+             (const_string "ssemov")))
+   (set (attr "length_immediate")
+       (if_then_else (eq_attr "type" "sseshuf1")
+                     (const_string "1")
+                     (const_string "0")))
+   (set_attr "prefix_extra" "0,1,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_evex,orig")
+   (set_attr "mode" "V4SF,V16SF,V4SF,V4SF")
+   (set (attr "enabled")
+       (if_then_else (eq_attr "alternative" "1")
+                     (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+                                  && !TARGET_PREFER_AVX256")
+                     (const_string "*")))])
 
 (define_insn "*vec_dupv4si"
-  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
+  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,v,v,x")
        (vec_duplicate:V4SI
-         (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
+         (match_operand:SI 1 "nonimmediate_operand" "Yvm,v,Yv,m,0")))]
   "TARGET_SSE"
   "@
+   vpbroadcastd\t{%1, %0|%0, %1}
+   vpbroadcastd\t{%1, %g0|%g0, %1}
    %vpshufd\t{$0, %1, %0|%0, %1, 0}
    vbroadcastss\t{%1, %0|%0, %1}
    shufps\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "isa" "sse2,avx,noavx")
-   (set_attr "type" "sselog1,ssemov,sselog1")
-   (set_attr "length_immediate" "1,0,1")
-   (set_attr "prefix_extra" "0,1,*")
-   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
-   (set_attr "mode" "TI,V4SF,V4SF")
+  [(set_attr "isa" "avx2,*,sse2,avx,noavx")
+   (set_attr "type" "ssemov,ssemov,sseshuf1,ssemov,sseshuf1")
+   (set_attr "length_immediate" "0,0,1,0,1")
+   (set_attr "prefix_extra" "1,1,0,1,*")
+   (set_attr "prefix" "maybe_evex,evex,maybe_vex,maybe_evex,orig")
+   (set_attr "mode" "TI,XI,TI,V4SF,V4SF")
    (set (attr "preferred_for_speed")
-     (cond [(eq_attr "alternative" "1")
+     (cond [(eq_attr "alternative" "3")
              (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
           ]
-          (symbol_ref "true")))])
+          (symbol_ref "true")))
+   (set (attr "enabled")
+       (if_then_else (eq_attr "alternative" "1")
+                     (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
+                                  && !TARGET_PREFER_AVX256")
+                     (const_string "*")))])
 
 (define_insn "*vec_dupv2di"
   [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,v,x")
diff --git a/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c b/gcc/testsuite/gcc.target/i386/avx2-dupv4sf.c
new file mode 100644 (file)
index 0000000..9b45ee5
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss" 2 } } */
+
+typedef float __attribute__ ((vector_size (16))) v4sf;
+
+v4sf bcst_reg (float f)
+{
+  register float x asm ("xmm7") = f;
+
+  asm ("" : "+v" (x));
+  return (v4sf) {x, x, x, x};
+}
+
+v4sf bcst_mem (const float *f)
+{
+  return (v4sf) {*f, *f, *f, *f};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c b/gcc/testsuite/gcc.target/i386/avx2-dupv4si.c
new file mode 100644 (file)
index 0000000..ffe6318
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpbroadcastd" 2 } } */
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si bcst_reg (int i)
+{
+  register int x asm ("xmm7") = i;
+
+  asm ("" : "+v" (x));
+  return (v4si) {x, x, x, x};
+}
+
+v4si bcst_mem (const int *i)
+{
+  return (v4si) {*i, *i, *i, *i};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c b/gcc/testsuite/gcc.target/i386/avx512f-dupv4sf.c
new file mode 100644 (file)
index 0000000..2682fc6
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler "vbroadcastss\[^\n\]*%xmm17, *%zmm" } } */
+
+typedef float __attribute__ ((vector_size (16))) v4sf;
+
+v4sf bcst (float f)
+{
+  register float x asm ("xmm17") = f;
+
+  asm ("" : "+v" (x));
+  return (v4sf) {x, x, x, x};
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c b/gcc/testsuite/gcc.target/i386/avx512f-dupv4si.c
new file mode 100644 (file)
index 0000000..dbf00d7
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler "vpbroadcastd\[^\n\]*%xmm17, *%zmm" } } */
+
+typedef int __attribute__ ((vector_size (16))) v4si;
+
+v4si bcst (int i)
+{
+  register int x asm ("xmm17") = i;
+
+  asm ("" : "+v" (x));
+  return (v4si) {x, x, x, x};
+}