From: Feng Wang Date: Sat, 17 Aug 2024 14:40:42 +0000 (-0600) Subject: RISC-V: Add auto-vect pattern for vector rotate shift X-Git-Tag: basepoints/gcc-16~6585 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=54b228d80c54d32ab49cee6148cfd1364b2bc817;p=thirdparty%2Fgcc.git RISC-V: Add auto-vect pattern for vector rotate shift This patch add the vector rotate shift pattern for auto-vect. With this patch, the scalar rotate shift can be automatically vectorized into vector rotate shift. gcc/ChangeLog: * config/riscv/autovec.md (v3): Add new define_expand pattern for vector rotate shift. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-template.h: New test. --- diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 0423d7bee13..decfe2bf8cc 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2764,3 +2764,19 @@ operands[2] = const0_rtx; } ) + +;; ------------------------------------------------------------------------- +;; - vrol.vv vror.vv +;; ------------------------------------------------------------------------- +(define_expand "v3" + [(set (match_operand:VI 0 "register_operand") + (bitmanip_rotate:VI + (match_operand:VI 1 "register_operand") + (match_operand:VI 2 "register_operand")))] + "TARGET_ZVBB || TARGET_ZVKB" + { + riscv_vector::emit_vlmax_insn (code_for_pred_v (, mode), + riscv_vector::BINARY_OP, operands); + DONE; + } +) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c new file mode 100644 index 00000000000..55dac27697c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include "vrolr-template.h" + +/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */ +/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c new file mode 100644 index 00000000000..b659a0804f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c @@ -0,0 +1,88 @@ +/* { dg-do run } */ +/* { dg-require-effective-target "riscv_zvbb_ok" } */ +/* { dg-add-options "riscv_v" } */ +/* { dg-add-options "riscv_zvbb" } */ +/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */ + +#include +#include + +#include +#include +#include + +#define ARRAY_SIZE 512 + +#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \ + for (int i = 0; i < size; i++) { \ + (arr)[i] = (((arr)[i] << (shifts)[i]) | ((arr)[i] >> (bit_size - (shifts)[i]))); \ + } + +#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \ + for (int i = 0; i < size; i++) { \ + (arr)[i] = (((arr)[i] >> (shifts)[i]) | ((arr)[i] << (bit_size - (shifts)[i]))); \ + } + +void __attribute__((optimize("no-tree-vectorize"))) compare_results8( + uint8_t *result_left, uint8_t *result_right, + int bit_size, uint8_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results16( + uint16_t *result_left, uint16_t *result_right, + int bit_size, uint16_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results32( + uint32_t *result_left, uint32_t *result_right, + int bit_size, uint32_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i]))); + assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i]))); + } +} + +void __attribute__((optimize("no-tree-vectorize"))) compare_results64( + uint64_t *result_left, uint64_t *result_right, + int bit_size, uint64_t *shift_values) +{ + for (int i = 0; i < ARRAY_SIZE; i++) { + assert(result_left[i] == ((uint64_t)i << shift_values[i]) | ((uint64_t)i >> (bit_size - shift_values[i]))); + assert(result_right[i] == ((uint64_t)i >> shift_values[i]) | ((uint64_t)i << (bit_size - shift_values[i]))); + } +} + +#define TEST_SHIFT_OPERATIONS(TYPE, bit_size) \ + TYPE shift_val##bit_size[ARRAY_SIZE];\ + TYPE result_left##bit_size[ARRAY_SIZE];\ + TYPE result_right##bit_size[ARRAY_SIZE];\ + do { \ + for (int i = 0; i < ARRAY_SIZE; i++) { \ + result_left##bit_size[i] = i;\ + result_right##bit_size[i] = i;\ + shift_val##bit_size[i] = i % bit_size; \ + } \ + CIRCULAR_LEFT_SHIFT_ARRAY(result_left##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\ + CIRCULAR_RIGHT_SHIFT_ARRAY(result_right##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\ + compare_results##bit_size(result_left##bit_size, result_right##bit_size, bit_size, shift_val##bit_size); \ + } while(0) + + +int main() { + TEST_SHIFT_OPERATIONS(uint8_t, 8); + TEST_SHIFT_OPERATIONS(uint16_t, 16); + TEST_SHIFT_OPERATIONS(uint32_t, 32); + TEST_SHIFT_OPERATIONS(uint64_t, 64); + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h new file mode 100644 index 00000000000..3db0d8643a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h @@ -0,0 +1,29 @@ +#include + +#define VROL_VV(SEW, S, T) \ +__attribute__ ((noipa))\ +void autovect_vrol_vv_##S##SEW (T *out, T *op1, T *op2, int n){\ + for(int i=0; i> (SEW - op2[i]));\ + }\ +} + +#define VROR_VV(SEW, S, T) \ +__attribute__ ((noipa))\ +void autovect_vror_vv_##S##SEW (T *out, T *op1, T *op2, int n){\ + for(int i=0; i> op2[i]) | (op1[i] << (SEW - op2[i]));\ + }\ +} + +VROL_VV(8, u, uint8_t) +VROL_VV(16, u, uint16_t) +VROL_VV(32, u, uint32_t) +VROL_VV(64, u, uint64_t) + +VROR_VV(8, u, uint8_t) +VROR_VV(16, u, uint16_t) +VROR_VV(32, u, uint32_t) +VROR_VV(64, u, uint64_t)