Fix the bug of the rvv bool mode size by the adjustment.
Besides the mode precision (aka bit size [1, 2, 4, 8, 16, 32, 64])
of the vbool*_t, the mode size (aka byte size) will be adjusted to
[1, 1, 1, 1, 2, 4, 8] according to the rvv spec 1.0 isa. The
adjustment will provide correct information for the underlying
redundant instruction elimiation.
Given the below sample code:
{
vbool1_t v1 = *(vbool1_t*)in;
vbool64_t v2 = *(vbool64_t*)in;
*(vbool1_t*)(out + 100) = v1;
*(vbool64_t*)(out + 200) = v2;
}
Before the size adjustment:
csrr t0,vlenb
slli t1,t0,1
csrr a3,vlenb
sub sp,sp,t1
slli a4,a3,1
add a4,a4,sp
addi a2,a1,100
vsetvli a5,zero,e8,m8,ta,ma
sub a3,a4,a3
vlm.v v24,0(a0)
vsm.v v24,0(a2)
vsm.v v24,0(a3)
addi a1,a1,200
csrr t0,vlenb
vsetvli a4,zero,e8,mf8,ta,ma
slli t1,t0,1
vlm.v v24,0(a3)
vsm.v v24,0(a1)
add sp,sp,t1
jr ra
After the size adjustment:
addi a3,a1,100
vsetvli a4,zero,e8,m8,ta,ma
addi a1,a1,200
vlm.v v24,0(a0)
vsm.v v24,0(a3)
vsetvli a5,zero,e8,mf8,ta,ma
vlm.v v24,0(a0)
vsm.v v24,0(a1)
ret
Additionally, the size adjust cannot cover all possible combinations
of the vbool*_t code pattern like above. We will take a look into it
in another patches.
PR 108185
PR 108654
gcc/ChangeLog:
PR target/108654
PR target/108185
* config/riscv/riscv-modes.def (ADJUST_BYTESIZE): Adjust size
for vector mask modes.
* config/riscv/riscv.cc (riscv_v_adjust_bytesize): New.
* config/riscv/riscv.h (riscv_v_adjust_bytesize): New.
gcc/testsuite/ChangeLog:
PR target/108654
PR target/108185
* gcc.target/riscv/rvv/base/pr108185-1.c: Update.
* gcc.target/riscv/rvv/base/pr108185-2.c: Ditto.
* gcc.target/riscv/rvv/base/pr108185-3.c: Ditto.
Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
ADJUST_ALIGNMENT (VNx32BI, 1);
ADJUST_ALIGNMENT (VNx64BI, 1);
-ADJUST_BYTESIZE (VNx1BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx2BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx4BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx8BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
-ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
+ADJUST_BYTESIZE (VNx1BI, riscv_v_adjust_bytesize (VNx1BImode, 1));
+ADJUST_BYTESIZE (VNx2BI, riscv_v_adjust_bytesize (VNx2BImode, 1));
+ADJUST_BYTESIZE (VNx4BI, riscv_v_adjust_bytesize (VNx4BImode, 1));
+ADJUST_BYTESIZE (VNx8BI, riscv_v_adjust_bytesize (VNx8BImode, 1));
+ADJUST_BYTESIZE (VNx16BI, riscv_v_adjust_bytesize (VNx16BImode, 2));
+ADJUST_BYTESIZE (VNx32BI, riscv_v_adjust_bytesize (VNx32BImode, 4));
+ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_bytesize (VNx64BImode, 8));
ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
return scale;
}
+/* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
+ BYTE size for corresponding machine_mode. */
+
+poly_int64
+riscv_v_adjust_bytesize (machine_mode mode, int scale)
+{
+ if (riscv_v_ext_vector_mode_p (mode))
+ {
+ poly_uint16 mode_size = GET_MODE_SIZE (mode);
+
+ if (maybe_eq (mode_size, (uint16_t)-1))
+ mode_size = riscv_vector_chunks * scale;
+
+ if (known_gt (mode_size, BYTES_PER_RISCV_VECTOR))
+ mode_size = BYTES_PER_RISCV_VECTOR;
+
+ return mode_size;
+ }
+
+ return scale;
+}
+
/* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
PRECISION size for corresponding machine_mode. */
extern poly_uint16 riscv_vector_chunks;
extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
+extern poly_int64 riscv_v_adjust_bytesize (enum machine_mode, int);
/* The number of bits and bytes in a RVV vector. */
#define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
#define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
-/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 18 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
-/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 17 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf4,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vsetvli\s+[a-x][0-9]+,\s*zero,\s*e8,\s*mf8,\s*ta,\s*ma} 1 } } */
/* { dg-final { scan-assembler-times {vlm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */
-/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 16 } } */
+/* { dg-final { scan-assembler-times {vsm\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 12 } } */