]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Load VLS perm indices directly from memory.
authorRobin Dapp <rdapp@ventanamicro.com>
Thu, 26 Sep 2024 09:56:08 +0000 (11:56 +0200)
committerRobin Dapp <rdapp@ventanamicro.com>
Tue, 19 Nov 2024 11:23:12 +0000 (12:23 +0100)
Instead of loading the permutation indices and using vmslt in order to
determine which elements belong to which source vector we can compute
the proper mask at compile time.  That way we can emit vlm instead of
vle + vmslt.

gcc/ChangeLog:

* config/riscv/riscv-v.cc (shuffle_merge_patterns): Load VLS
indices directly.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls/merge-1.c: Check for vlm and
no vmsleu etc.
* gcc.target/riscv/rvv/autovec/vls/merge-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/merge-6.c: Ditto.

gcc/config/riscv/riscv-v.cc
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-1.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-2.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-3.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-4.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-5.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/merge-6.c

index a0e22b6454b74562e78e1a21c16c251e3646f70d..ee7a0128c0ed3a1edbf13201855bdc0daf4aa5f6 100644 (file)
@@ -3101,9 +3101,27 @@ shuffle_merge_patterns (struct expand_vec_perm_d *d)
   machine_mode mask_mode = get_mask_mode (vmode);
   rtx mask = gen_reg_rtx (mask_mode);
 
-  if (indices_fit_selector_p)
+  if (indices_fit_selector_p && vec_len.is_constant ())
     {
-      /* MASK = SELECTOR < NUNITS ? 1 : 0.  */
+      /* For a constant vector length we can generate the needed mask at
+        compile time and load it as mask at runtime.
+        This saves a compare at runtime.  */
+      rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (),
+                             d->perm.encoding ().nelts_per_pattern ());
+      unsigned int encoded_nelts = sel.encoded_nelts ();
+      for (unsigned int i = 0; i < encoded_nelts; i++)
+       sel.quick_push (gen_int_mode (d->perm[i].to_constant ()
+                                     < vec_len.to_constant (),
+                                     GET_MODE_INNER (mask_mode)));
+      mask = sel.build ();
+    }
+  else if (indices_fit_selector_p)
+    {
+      /* For a dynamic vector length < 256 we keep the permutation
+        indices in the literal pool, load it at runtime and create the
+        mask by selecting either OP0 or OP1 by
+
+           INDICES < NUNITS ? 1 : 0.  */
       rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
       rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
       insn_code icode = code_for_pred_cmp_scalar (sel_mode);
index cd24922d0ad4c1f196f1b3e1af9e5542c4e1db24..c34734cff6d28070b3f64bc7b4423e4d08a695eb 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-1.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
index 52d91244f51b4c138dc66e6c5b7337325a9209d0..68f7b62e62ff7f644b0e3bf6c58406e4c3134fc9 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-2.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
index 4931d2a36047fb9ab9663ec30f43abb14705d1f4..1250dca65d110d4dc57f4bbcc19bf015723fc7de 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-3.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 11 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
index f22a18f8ef3b269fcb14fdc9272024e60deecd3a..1dfd8287b7f2fe8709041fd6a146494e8b016689 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-4.c"
 
 /* dg-final scan-assembler-times {\tvmerge.vvm} 11 */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 11 } } */
index cf8d04c4bce8e006ff49d2438fbae62bacd671c9..af84a6552c0a95e4b6405c9be81d683e45005ac5 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-5.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 8 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 8 } } */
index 3b6f9774d515b66216603035e79b19915162ebb6..45e999823ce0497b6ebbe3dc955f746a602ef265 100644 (file)
@@ -4,3 +4,5 @@
 #include "../vls-vlmax/merge-6.c"
 
 /* { dg-final { scan-assembler-times {\tvmerge.vvm} 5 } } */
+/* { dg-final { scan-assembler-not {\tvms} } } */
+/* { dg-final { scan-assembler-times {\tvlm.v} 5 } } */