]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Disable BSWAP optimization for NUNITS < 4
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Fri, 24 Nov 2023 05:04:18 +0000 (13:04 +0800)
committerPan Li <pan2.li@intel.com>
Fri, 24 Nov 2023 05:08:21 +0000 (13:08 +0800)
When fixing bugs, I notice there is a piece odd codes look incorrect.
which probably make codegen worse.

#include <stdint.h>

typedef int8_t vnx2qi __attribute__ ((vector_size (2)));

#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)

#define PERMUTE(TYPE, NUNITS)                                                  \
  __attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2,     \
       TYPE *out)                      \
  {                                                                            \
    TYPE v                                                                     \
      = __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
    *(TYPE *) out = v;                                                         \
  }

#define TEST_ALL(T)                                                            \
  T (vnx2qi, 2)

TEST_ALL (PERMUTE)

Before this patch:

        vsetivli        zero,2,e8,mf8,ta,ma
        vle8.v  v1,0(a0)
        vsetivli        zero,1,e16,mf4,ta,ma
        vsrl.vi v2,v1,8
        vsll.vi v1,v1,8
        vor.vv  v1,v2,v1
        vsetivli        zero,2,e8,mf8,ta,ma
        vse8.v  v1,0(a2)
        ret

After this patch:

        vsetivli        zero,2,e8,mf8,ta,ma
        vle8.v  v3,0(a0)
        vid.v   v1
        vrsub.vi        v1,v1,1
        vrgather.vv     v2,v3,v1
        vse8.v  v2,0(a2)
        ret

Committed as it is very obvious if during code review.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_bswap_pattern): Disable for NUNIT < 4.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: Adapt test.
* gcc.target/riscv/rvv/autovec/vls/perm-4.c: Ditto.

gcc/config/riscv/riscv-v.cc
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/perm-4.c

index 72b96d8339d624610d18e003ea14d5b49fdde46f..acf409733227a355e66e27c8bbd9a16b32a04aec 100644 (file)
@@ -3201,6 +3201,11 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d)
     if (!d->perm.series_p (i, step, diff - i, step))
       return false;
 
+  /* Disable when nunits < 4 since the later generic approach
+     is more profitable on BSWAP.  */
+  if (!known_gt (GET_MODE_NUNITS (d->vmode), 2))
+    return false;
+
   if (d->testing_p)
     return true;
 
index b235ec727b19224667c6bac3f60d818e1ee71565..7ab310435476582e3f125d341a658d0d0ed3720d 100644 (file)
@@ -55,7 +55,7 @@
 
 TEST_ALL (PERMUTE)
 
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
 /* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
-/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
 /* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */
index d2d49388a39bee5fd1a82214e5604aac2ad0e213..4d6862cf1c04ab8a8e76acc234400b3a4c50b31e 100644 (file)
@@ -3,7 +3,7 @@
 
 #include "../vls-vlmax/perm-4.c"
 
-/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 18 } } */
+/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 19 } } */
 /* { dg-final { scan-assembler-times {vrgatherei16\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 12 } } */
-/* { dg-final { scan-assembler-times {vrsub\.vi} 23 } } */
+/* { dg-final { scan-assembler-times {vrsub\.vi} 24 } } */
 /* { dg-final { scan-assembler-times {vrsub\.vx} 7 } } */