From: Robin Dapp Date: Mon, 31 Jul 2023 15:54:35 +0000 (+0200) Subject: RISC-V: Implement vector "average" autovec pattern. X-Git-Tag: basepoints/gcc-15~6894 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=694242930906d9f7ad15977cac6dcbeae1f3d3f2;p=thirdparty%2Fgcc.git RISC-V: Implement vector "average" autovec pattern. This patch adds vector average patterns op[0] = (narrow) ((wide) op[1] + (wide) op[2]) >> 1; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1) >> 1; If there is no direct support, the vectorizer can synthesize the pattern but, presumably, due to lack of narrowing operation support, won't try a narrowing shift. Therefore, this patch implements the expanders instead. gcc/ChangeLog: * config/riscv/autovec.md (avg3_floor): Implement expander. (avg3_ceil): Ditto. * config/riscv/vector-iterators.md (ashiftrt): New iterator. (ASHIFTRT): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/widen/vec-avg-run.c: New test. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c: New test. * gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c: New test. * gcc.target/riscv/rvv/autovec/widen/vec-avg-template.h: New test. --- diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 21cf2ffaec59..acca4c22b907 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2044,3 +2044,69 @@ riscv_vector::reduction_type::MASK_LEN_FOLD_LEFT); DONE; }) + +;; ------------------------------------------------------------------------- +;; ---- [INT] Average. +;; ------------------------------------------------------------------------- +;; Implements the following "average" patterns: +;; floor: +;; op[0] = (narrow) ((wide) op[1] + (wide) op[2]) >> 1; +;; ceil: +;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1; +;; ------------------------------------------------------------------------- + +(define_expand "avg3_floor" + [(set (match_operand: 0 "register_operand") + (truncate: + (:VWEXTI + (plus:VWEXTI + (any_extend:VWEXTI + (match_operand: 1 "register_operand")) + (any_extend:VWEXTI + (match_operand: 2 "register_operand"))))))] + "TARGET_VECTOR" +{ + /* First emit a widening addition. */ + rtx tmp1 = gen_reg_rtx (mode); + rtx ops1[] = {tmp1, operands[1], operands[2]}; + insn_code icode = code_for_pred_dual_widen (PLUS, , mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops1); + + /* Then a narrowing shift. */ + rtx ops2[] = {operands[0], tmp1, const1_rtx}; + icode = code_for_pred_narrow_scalar (, mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops2); + DONE; +}) + +(define_expand "avg3_ceil" + [(set (match_operand: 0 "register_operand") + (truncate: + (:VWEXTI + (plus:VWEXTI + (plus:VWEXTI + (any_extend:VWEXTI + (match_operand: 1 "register_operand")) + (any_extend:VWEXTI + (match_operand: 2 "register_operand"))) + (const_int 1)))))] + "TARGET_VECTOR" +{ + /* First emit a widening addition. */ + rtx tmp1 = gen_reg_rtx (mode); + rtx ops1[] = {tmp1, operands[1], operands[2]}; + insn_code icode = code_for_pred_dual_widen (PLUS, , mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops1); + + /* Then add 1. */ + rtx tmp2 = gen_reg_rtx (mode); + rtx ops2[] = {tmp2, tmp1, const1_rtx}; + icode = code_for_pred_scalar (PLUS, mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops2); + + /* Finally, a narrowing shift. */ + rtx ops3[] = {operands[0], tmp2, const1_rtx}; + icode = code_for_pred_narrow_scalar (, mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops3); + DONE; +}) diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index fc237ac330d7..4023a038fe97 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -2040,6 +2040,11 @@ (define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")]) (define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")]) +(define_code_attr ext_to_rshift [(sign_extend "ashiftrt") + (zero_extend "lshiftrt")]) +(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT") + (zero_extend "LSHIFTRT")]) + (define_code_iterator and_ior [and ior]) (define_code_iterator any_float_binop [plus mult minus div]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-run.c new file mode 100644 index 000000000000..7ca193ec2f2d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-run.c @@ -0,0 +1,85 @@ +/* { dg-do run { target { riscv_vector } } } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable -lm" } */ + +#include +#include +#include + +#include "vec-avg-template.h" + +#define SZ 256 + +#define RUNS1(TYPE, SCALE) \ + TYPE a##TYPE[SZ + 1]; \ + TYPE b##TYPE[SZ + 1]; \ + TYPE dst##TYPE[SZ + 1]; \ + for (int cnt = 0, i = -(SZ * SCALE) / 2; i < (SZ * SCALE) / 2; i += SCALE) \ + { \ + a##TYPE[cnt] = i; \ + b##TYPE[cnt] = i + 1; \ + dst##TYPE[cnt++] = 0; \ + } \ + vavg_##TYPE (dst##TYPE, a##TYPE, b##TYPE, SZ); \ + for (int i = 0; i < SZ; i += SCALE) \ + assert (dst##TYPE[i] == floor ((a##TYPE[i] + b##TYPE[i]) / 2.0)); + +#define RUNU1(TYPE, SCALE) \ + TYPE a##TYPE[SZ + 1]; \ + TYPE b##TYPE[SZ + 1]; \ + TYPE dst##TYPE[SZ + 1]; \ + for (int cnt = 0, i = 0; i < (SZ * SCALE); i += SCALE) \ + { \ + a##TYPE[cnt] = i; \ + b##TYPE[cnt] = i + 1; \ + dst##TYPE[cnt++] = 0; \ + } \ + vavg_##TYPE (dst##TYPE, a##TYPE, b##TYPE, SZ); \ + for (int i = 0; i < SZ; i += SCALE) \ + assert (dst##TYPE[i] == floor ((a##TYPE[i] + b##TYPE[i]) / 2.0)); + +#define RUNS2(TYPE, SCALE) \ + TYPE a2##TYPE[SZ + 1]; \ + TYPE b2##TYPE[SZ + 1]; \ + TYPE dst2##TYPE[SZ + 1]; \ + for (int cnt = 0, i = -(SZ * SCALE) / 2; i < (SZ * SCALE) / 2; i += SCALE) \ + { \ + a2##TYPE[cnt] = i; \ + b2##TYPE[cnt] = i + 1; \ + dst2##TYPE[cnt++] = 0; \ + } \ + vavg2_##TYPE (dst2##TYPE, a2##TYPE, b2##TYPE, SZ); \ + for (int i = 0; i < SZ; i += SCALE) \ + assert (dst2##TYPE[i] == ceil ((a2##TYPE[i] + b2##TYPE[i]) / 2.0)); + +#define RUNU2(TYPE, SCALE) \ + TYPE a2##TYPE[SZ + 1]; \ + TYPE b2##TYPE[SZ + 1]; \ + TYPE dst2##TYPE[SZ + 1]; \ + for (int cnt = 0, i = 0; i < (SZ * SCALE); i += SCALE) \ + { \ + a2##TYPE[cnt] = i; \ + b2##TYPE[cnt] = i + 1; \ + dst2##TYPE[cnt++] = 0; \ + } \ + vavg2_##TYPE (dst2##TYPE, a2##TYPE, b2##TYPE, SZ); \ + for (int i = 0; i < SZ; i += SCALE) \ + assert (dst2##TYPE[i] == ceil ((a2##TYPE[i] + b2##TYPE[i]) / 2.0)); + +#define RUN_ALL() \ + RUNS1 (int8_t, 1) \ + RUNS1 (int16_t, 256) \ + RUNS1 (int32_t, 65536) \ + RUNU1 (uint8_t, 1) \ + RUNU1 (uint16_t, 256) \ + RUNU1 (uint32_t, 65536) \ + RUNS2 (int8_t, 1) \ + RUNS2 (int16_t, 256) \ + RUNS2 (int32_t, 65536) \ + RUNU2 (uint8_t, 1) \ + RUNU2 (uint16_t, 256) \ + RUNU2 (uint32_t, 65536)\ + +int main () +{ + RUN_ALL () +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c new file mode 100644 index 000000000000..e2754339d942 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv32gcv.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */ + +#include "vec-avg-template.h" + +/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c new file mode 100644 index 000000000000..1f0ef29566dd --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-rv64gcv.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */ + +#include "vec-avg-template.h" + +/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */ +/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */ +/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-template.h new file mode 100644 index 000000000000..9c2a6f1b9cb8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/vec-avg-template.h @@ -0,0 +1,33 @@ +#include + +#define TEST_TYPE(TYPE, TYPE2) \ + __attribute__ ((noipa)) void vavg_##TYPE (TYPE *dst, TYPE *a, TYPE *b, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = ((TYPE2) a[i] + b[i]) >> 1; \ + } + +#define TEST_TYPE2(TYPE, TYPE2) \ + __attribute__ ((noipa)) void vavg2_##TYPE (TYPE *dst, TYPE *a, TYPE *b, \ + int n) \ + { \ + for (int i = 0; i < n; i++) \ + dst[i] = ((TYPE2) a[i] + b[i] + 1) >> 1; \ + } + +#define TEST_ALL() \ + TEST_TYPE (int8_t, int16_t) \ + TEST_TYPE (uint8_t, uint16_t) \ + TEST_TYPE (int16_t, int32_t) \ + TEST_TYPE (uint16_t, uint32_t) \ + TEST_TYPE (int32_t, int64_t) \ + TEST_TYPE (uint32_t, uint64_t) \ + TEST_TYPE2 (int8_t, int16_t) \ + TEST_TYPE2 (uint8_t, uint16_t) \ + TEST_TYPE2 (int16_t, int32_t) \ + TEST_TYPE2 (uint16_t, uint32_t) \ + TEST_TYPE2 (int32_t, int64_t) \ + TEST_TYPE2 (uint32_t, uint64_t) + +TEST_ALL()