From c53f51005de1248ef42b08d32875abf73eb42c3c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 23 May 2023 17:54:39 +0200 Subject: [PATCH] i386: Add V8QI and V4QImode partial vector shift operations Add V8QImode and V4QImode vector shift patterns that call into ix86_expand_vecop_qihi_partial. Generate special sequences for constant count operands. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vecop_qihi_partial): Call ix86_expand_vec_shift_qihi_constant for shifts with constant count operand. * config/i386/i386.cc (ix86_shift_rotate_cost): Handle V4QImode and V8QImode. * config/i386/mmx.md (v8qi3): New insn pattern. (v4qi3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vect-shiftv4qi.c: New test. * gcc.target/i386/vect-shiftv8qi.c: New test. --- gcc/config/i386/i386-expand.cc | 12 +++++- gcc/config/i386/i386.cc | 38 +++++++++++++++- gcc/config/i386/mmx.md | 22 ++++++++++ .../gcc.target/i386/vect-shiftv4qi.c | 43 +++++++++++++++++++ .../gcc.target/i386/vect-shiftv8qi.c | 43 +++++++++++++++++++ 5 files changed, 155 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c create mode 100644 gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 50d9d34ebcb6..ff3d382f1b40 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -23294,6 +23294,16 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2) else qop2 = op2; + qdest = gen_reg_rtx (V16QImode); + + if (CONST_INT_P (op2) + && (code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT) + && ix86_expand_vec_shift_qihi_constant (code, qdest, qop1, qop2)) + { + emit_move_insn (dest, gen_lowpart (qimode, qdest)); + return; + } + switch (code) { case MULT: @@ -23358,8 +23368,6 @@ ix86_expand_vecop_qihi_partial (enum rtx_code code, rtx dest, rtx op1, rtx op2) bool ok; int i; - qdest = gen_reg_rtx (V16QImode); - /* Merge the data back into the right place. */ d.target = qdest; d.op0 = qres; diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 38125ce284a9..2710c6dfc568 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20580,6 +20580,37 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, switch (mode) { + case V4QImode: + case V8QImode: + if (TARGET_AVX2) + /* Use vpbroadcast. */ + extra = cost->sse_op; + else + extra = cost->sse_load[2]; + + if (constant_op1) + { + if (code == ASHIFTRT) + { + count = 4; + extra *= 2; + } + else + count = 2; + } + else if (TARGET_AVX512BW && TARGET_AVX512VL) + { + count = 3; + return ix86_vec_cost (mode, cost->sse_op * count); + } + else if (TARGET_SSE4_1) + count = 4; + else if (code == ASHIFTRT) + count = 5; + else + count = 4; + return ix86_vec_cost (mode, cost->sse_op * count) + extra; + case V16QImode: if (TARGET_XOP) { @@ -20600,7 +20631,12 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, } /* FALLTHRU */ case V32QImode: - extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + if (TARGET_AVX2) + /* Use vpbroadcast. */ + extra = cost->sse_op; + else + extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3]; + if (constant_op1) { if (code == ASHIFTRT) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 45773673049b..a37bbbb811f1 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -2680,6 +2680,28 @@ (const_string "0"))) (set_attr "mode" "TI")]) +(define_expand "v8qi3" + [(set (match_operand:V8QI 0 "register_operand") + (any_shift:V8QI (match_operand:V8QI 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_MMX_WITH_SSE" +{ + ix86_expand_vecop_qihi_partial (, operands[0], + operands[1], operands[2]); + DONE; +}) + +(define_expand "v4qi3" + [(set (match_operand:V4QI 0 "register_operand") + (any_shift:V4QI (match_operand:V4QI 1 "register_operand") + (match_operand:DI 2 "nonmemory_operand")))] + "TARGET_SSE2" +{ + ix86_expand_vecop_qihi_partial (, operands[0], + operands[1], operands[2]); + DONE; +}) + (define_insn_and_split "v2qi3" [(set (match_operand:V2QI 0 "register_operand" "=Q") (any_shift:V2QI diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c new file mode 100644 index 000000000000..c06dfb87bd1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv4qi.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define N 4 + +typedef unsigned char __vu __attribute__ ((__vector_size__ (N))); +typedef signed char __vi __attribute__ ((__vector_size__ (N))); + +__vu sll (__vu a, int n) +{ + return a << n; +} + +__vu sll_c (__vu a) +{ + return a << 5; +} + +/* { dg-final { scan-assembler-times "psllw" 2 } } */ + +__vu srl (__vu a, int n) +{ + return a >> n; +} + +__vu srl_c (__vu a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psrlw" 2 } } */ + +__vi sra (__vi a, int n) +{ + return a >> n; +} + +__vi sra_c (__vi a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psraw" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c new file mode 100644 index 000000000000..f5e8925aa254 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-shiftv8qi.c @@ -0,0 +1,43 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -ftree-vectorize -msse2" } */ + +#define N 8 + +typedef unsigned char __vu __attribute__ ((__vector_size__ (N))); +typedef signed char __vi __attribute__ ((__vector_size__ (N))); + +__vu sll (__vu a, int n) +{ + return a << n; +} + +__vu sll_c (__vu a) +{ + return a << 5; +} + +/* { dg-final { scan-assembler-times "psllw" 2 } } */ + +__vu srl (__vu a, int n) +{ + return a >> n; +} + +__vu srl_c (__vu a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psrlw" 2 } } */ + +__vi sra (__vi a, int n) +{ + return a >> n; +} + +__vi sra_c (__vi a) +{ + return a >> 5; +} + +/* { dg-final { scan-assembler-times "psraw" 2 } } */ -- 2.39.2