Given below example for VLS mode
void
test (vl_t *u)
{
vl_t t;
long long *p = (long long *)&t;
p[0] = p[1] = 2;
*u = t;
}
The vec_set will simplify the insn to vmv.s.x when index is 0, without
merged operand. That will result in some problems in DCE, aka:
1: 137[DI] = a0
2: 138[V2DI] = 134[V2DI] // deleted by DCE
3: 139[DI] = #2 // deleted by DCE
4: 140[DI] = #2 // deleted by DCE
5: 141[V2DI] = vec_dup:V2DI (139[DI]) // deleted by DCE
6: 138[V2DI] = vslideup_imm (138[V2DI], 141[V2DI], 1) // deleted by DCE
7: 135[V2DI] = 138[V2DI] // deleted by DCE
8: 142[V2DI] = 135[V2DI] // deleted by DCE
9: 143[DI] = #2
10: 142[V2DI] = vec_dup:V2DI (143[DI])
11: (137[DI]) = 142[V2DI]
The higher 64 bits of 142[V2DI] is unknown here and it generated incorrect
code when store back to memory. This patch would like to fix this issue
by adding a new SCALAR_MOVE_MERGED_OP for vec_set.
Please note this patch doesn't enable VLS for vec_set, the underlying
patches will support this soon.
gcc/ChangeLog:
* config/riscv/autovec.md: Bugfix.
* config/riscv/riscv-protos.h (SCALAR_MOVE_MERGED_OP): New enum.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/scalar-move-merged-run-1.c: New test.
Signed-off-by: Pan Li <pan2.li@intel.com>
/* If we set the first element, emit an v(f)mv.s.[xf]. */
if (operands[2] == const0_rtx)
{
- rtx ops[] = {operands[0], operands[1]};
+ rtx ops[] = {operands[0], operands[0], operands[1]};
riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (<MODE>mode),
- riscv_vector::SCALAR_MOVE_OP, ops, CONST1_RTX (Pmode));
+ riscv_vector::SCALAR_MOVE_MERGED_OP, ops, CONST1_RTX (Pmode));
}
else
{
SCALAR_MOVE_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P | HAS_MERGE_P
| USE_VUNDEF_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P
| UNARY_OP_P,
+
+ SCALAR_MOVE_MERGED_OP = HAS_DEST_P | HAS_MASK_P | USE_ONE_TRUE_MASK_P
+ | HAS_MERGE_P | TDEFAULT_POLICY_P | MDEFAULT_POLICY_P
+ | UNARY_OP_P,
};
enum vlmul_type
--- /dev/null
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-options "-O3 -Wno-psabi" } */
+
+#define TEST_VAL 2
+
+typedef long long vl_t __attribute__((vector_size(2 * sizeof (long long))));
+
+void init_vl (vl_t *u)
+{
+ vl_t t;
+ long long *p = (long long *)&t;
+
+ p[0] = p[1] = TEST_VAL;
+
+ *u = t;
+}
+
+int
+main ()
+{
+ vl_t vl = {};
+
+ init_vl (&vl);
+
+ if (vl[0] != TEST_VAL || vl[1] != TEST_VAL)
+ __builtin_abort ();
+
+ return 0;
+}