unsigned int autovectorize_vector_modes (vec<machine_mode> *, bool);
bool cmp_lmul_le_one (machine_mode);
bool cmp_lmul_gt_one (machine_mode);
-bool vls_mode_valid_p (machine_mode);
+bool vls_mode_valid_p (machine_mode, bool allow_up_to_lmul_8 = true);
bool vlmax_avl_type_p (rtx_insn *);
bool has_vl_op (rtx_insn *);
bool tail_agnostic_p (rtx_insn *);
machine_mode mode;
while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
{
- if (vls_mode_valid_p (mode))
+ if (vls_mode_valid_p (mode, /* allow_up_to_lmul_8 */ false))
modes->safe_push (mode);
i++;
Then we can have the condition for VLS mode in fixed-vlmax, aka:
PRECISION (VLSmode) < VLEN / (64 / PRECISION(VLS_inner_mode)). */
bool
-vls_mode_valid_p (machine_mode vls_mode)
+vls_mode_valid_p (machine_mode vls_mode, bool allow_up_to_lmul_8)
{
if (!TARGET_VECTOR || TARGET_XTHEADVECTOR)
return false;
if (rvv_vector_bits == RVV_VECTOR_BITS_SCALABLE)
{
- if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL
- && !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR,
- GET_MODE_PRECISION (vls_mode)))
- /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and
- BITS_PER_RISCV_VECTOR.
-
- e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128).
- We enable VLS modes have fixed size <= 128bit. Since ordered_p is
- false between VLA modes with size = (128, 128) bits and VLS mode
- with size = 128 bits, we will end up with multiple ICEs in
- middle-end generic codes. */
- return false;
- return true;
+ if (GET_MODE_CLASS (vls_mode) != MODE_VECTOR_BOOL)
+ return true;
+ if (allow_up_to_lmul_8)
+ return true;
+ /* We enable VLS modes which are aligned with TARGET_MAX_LMUL and
+ BITS_PER_RISCV_VECTOR.
+
+ e.g. When TARGET_MAX_LMUL = 1 and BITS_PER_RISCV_VECTOR = (128,128).
+ We enable VLS modes have fixed size <= 128bit. Since ordered_p is
+ false between VLA modes with size = (128, 128) bits and VLS mode
+ with size = 128 bits, we will end up with multiple ICEs in
+ middle-end generic codes. */
+ return !ordered_p (TARGET_MAX_LMUL * BITS_PER_RISCV_VECTOR,
+ GET_MODE_PRECISION (vls_mode));
}
if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m2 -fdump-tree-optimized" } */
+
+void foo(int * restrict a, int *b, int *c)
+{
+ for (int i=0;i<32;++i)
+ a[i] = b[i] + c[i];
+}
+
+/* Make sure -mrvv-max-lmul still constraint the auto vectorizer for VLS
+ types. */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e32,m2,t[au],m[au]} } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -mrvv-max-lmul=m1 -fdump-tree-optimized" } */
+
+typedef long long int64x8_t __attribute__((vector_size(64)));
+
+int64x8_t foo(int64x8_t a, int64x8_t b)
+{
+ return a + b;
+}
+/* Make sure we can us up to LMUL 4 to process int64x8_t at once rather than
+ break that into 4 LMUL 1 operations. */
+/* { dg-final { scan-assembler {vsetivli\s+zero,8,e64,m4,t[au],m[au]} } } */