]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Add V8QI and V4QImode partial vector shift operations
authorUros Bizjak <ubizjak@gmail.com>
Tue, 23 May 2023 15:54:39 +0000 (17:54 +0200)
committerUros Bizjak <ubizjak@gmail.com>
Tue, 23 May 2023 15:56:27 +0000 (17:56 +0200)
Add V8QImode and V4QImode vector shift patterns that call into
ix86_expand_vecop_qihi_partial.  Generate special sequences
for constant count operands.

gcc/ChangeLog:

* config/i386/i386-expand.cc (ix86_expand_vecop_qihi_partial):
Call ix86_expand_vec_shift_qihi_constant for shifts
with constant count operand.
* config/i386/i386.cc (ix86_shift_rotate_cost):
Handle V4QImode and V8QImode.
* config/i386/mmx.md (<insn>v8qi3): New insn pattern.
(<insn>v4qi3): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/vect-shiftv4qi.c: New test.
* gcc.target/i386/vect-shiftv8qi.c: New test.

gcc/config/i386/i386-expand.cc
gcc/config/i386/i386.cc
gcc/config/i386/mmx.md
gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c [new file with mode: 0644]

index 50d9d34ebcb68168dcf81d66e94069abaf637282..ff3d382f1b40d0b23bc07a9f4f218374b6b20f23 100644 (file)
@@ -23294,6 +23294,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
   else
     qop2 = op2;
 
+  qdest = gen_reg_rtx (V16QImode);
+
+  if (CONST_INT_P (op2)
+      && (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT)
+      && ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2))
+    {
+      emit_move_insn (dest, gen_lowpart (qimode, qdest));
+      return;
+    }
+
   switch (code)
     {
     case MULT:
@@ -23358,8 +23368,6 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2)
       bool ok;
       int i;
 
-      qdest = gen_reg_rtx (V16QImode);
-
       /* Merge the data back into the right place.  */
       d.target = qdest;
       d.op0 = qres;
index 38125ce284a92a782912db1cde83eec47d09806d..2710c6dfc5682f874a1b751d0ca81b1667ade9d1 100644 (file)
@@ -20580,6 +20580,37 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 
       switch (mode)
        {
+       case V4QImode:
+       case V8QImode:
+         if (TARGET_AVX2)
+           /* Use vpbroadcast.  */
+           extra = cost->sse_op;
+         else
+           extra = cost->sse_load[2];
+
+         if (constant_op1)
+           {
+             if (code == ASHIFTRT)
+               {
+                 count = 4;
+                 extra *= 2;
+               }
+             else
+               count = 2;
+           }
+         else if (TARGET_AVX512BW && TARGET_AVX512VL)
+           {
+             count = 3;
+             return ix86_vec_cost (mode, cost->sse_op * count);
+           }
+         else if (TARGET_SSE4_1)
+           count = 4;
+         else if (code == ASHIFTRT)
+           count = 5;
+         else
+           count = 4;
+         return ix86_vec_cost (mode, cost->sse_op * count) + extra;
+
        case V16QImode:
          if (TARGET_XOP)
            {
@@ -20600,7 +20631,12 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
            }
          /* FALLTHRU */
        case V32QImode:
-         extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+         if (TARGET_AVX2)
+           /* Use vpbroadcast.  */
+           extra = cost->sse_op;
+         else
+           extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+
          if (constant_op1)
            {
              if (code == ASHIFTRT)
index 45773673049be6ca4d8412f17b82de2b4b21621e..a37bbbb811f1cd7db747d57fb664cc5ff4420091 100644 (file)
        (const_string "0")))
    (set_attr "mode" "TI")])
 
+(define_expand "<insn>v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+       (any_shift:V8QI (match_operand:V8QI 1 "register_operand")
+                       (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+                                 operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "<insn>v4qi3"
+  [(set (match_operand:V4QI 0 "register_operand")
+       (any_shift:V4QI (match_operand:V4QI 1 "register_operand")
+                       (match_operand:DI 2 "nonmemory_operand")))]
+  "TARGET_SSE2"
+{
+  ix86_expand_vecop_qihi_partial (<CODE>, operands[0],
+                                 operands[1], operands[2]);
+  DONE;
+})
+
 (define_insn_and_split "<insn>v2qi3"
   [(set (match_operand:V2QI 0 "register_operand" "=Q")
         (any_shift:V2QI
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c
new file mode 100644 (file)
index 0000000..c06dfb8
--- /dev/null
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 4
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+  return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+  return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+  return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+  return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+  return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+  return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c
new file mode 100644 (file)
index 0000000..f5e8925
--- /dev/null
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+
+#define N 8
+
+typedef unsigned char __vu __attribute__ ((__vector_size__ (N)));
+typedef signed char __vi __attribute__ ((__vector_size__ (N)));
+
+__vu sll (__vu a, int n)
+{
+  return a << n;
+}
+
+__vu sll_c (__vu a)
+{
+  return a << 5;
+}
+
+/* { dg-final { scan-assembler-times "psllw" 2 } } */
+
+__vu srl (__vu a, int n)
+{
+  return a >> n;
+}
+
+__vu srl_c (__vu a)
+{
+  return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psrlw" 2 } } */
+
+__vi sra (__vi a, int n)
+{
+  return a >> n;
+}
+
+__vi sra_c (__vi a)
+{
+  return a >> 5;
+}
+
+/* { dg-final { scan-assembler-times "psraw" 2 } } */