dump_printf_loc (MSG_NOTE, vect_location,
"Original vector body cost = %d\n", body_cost);
+ /* If we know we have a single partial vector iteration, cap the VF
+ to the number of scalar iterations for costing purposes. */
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ auto niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ if (niters < estimated_vf && dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Scalar loop iterates at most %wd times. Capping VF "
+ " from %d to %wd\n", niters, estimated_vf, niters);
+
+ estimated_vf = MIN (estimated_vf, niters);
+ }
+
fractional_cost scalar_cycles_per_iter
= scalar_ops.min_cycles_per_iter () * estimated_vf;
}
#define TEST_ALL(T) \
- T (int16_t, int8_t, 7) \
- T (int32_t, int8_t, 3) \
- T (int32_t, int16_t, 3) \
- T (int64_t, int8_t, 5) \
- T (int64_t, int16_t, 5) \
- T (int64_t, int32_t, 5)
+ T (int16_t, int8_t, 70) \
+ T (int32_t, int8_t, 30) \
+ T (int32_t, int16_t, 30) \
+ T (int64_t, int8_t, 50) \
+ T (int64_t, int16_t, 50) \
+ T (int64_t, int32_t, 50)
TEST_ALL (DEF_LOOP)
}
#define TEST_ALL(T) \
- T (int16_t, int8_t, 7) \
- T (int32_t, int8_t, 3) \
- T (int32_t, int16_t, 3) \
- T (int64_t, int8_t, 5) \
- T (int64_t, int16_t, 5) \
- T (int64_t, int32_t, 5)
+ T (int16_t, int8_t, 70) \
+ T (int32_t, int8_t, 30) \
+ T (int32_t, int16_t, 30) \
+ T (int64_t, int8_t, 50) \
+ T (int64_t, int16_t, 50) \
+ T (int64_t, int32_t, 50)
TEST_ALL (DEF_LOOP)
}
#define TEST_ALL(T) \
- T (int32_t, uint16_t, 0xff, 3) \
+ T (int32_t, uint16_t, 0xff, 30) \
\
- T (int64_t, uint16_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xffff, 5)
+ T (int64_t, uint16_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xffff, 50)
TEST_ALL (DEF_LOOP)
}
#define TEST_ALL(T) \
- T (int32_t, uint16_t, 0xff, 3) \
+ T (int32_t, uint16_t, 0xff, 30) \
\
- T (int64_t, uint16_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xffff, 5)
+ T (int64_t, uint16_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xffff, 50)
TEST_ALL (DEF_LOOP)
}
#define TEST_ALL(T) \
- T (int32_t, uint16_t, 0xff, 3) \
+ T (int32_t, uint16_t, 0xff, 30) \
\
- T (int64_t, uint16_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xff, 5) \
- T (int64_t, uint32_t, 0xffff, 5)
+ T (int64_t, uint16_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xff, 50) \
+ T (int64_t, uint32_t, 0xffff, 50)
TEST_ALL (DEF_LOOP)
int * __restrict__ g, int * __restrict__ h)
{
int i = 0;
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 30; i++)
{
a[i] += i;
b[i] += i;
{ \
if (which) \
{ \
- for (int i = 0; i < 7; ++i) \
+ for (int i = 0; i < 70; ++i) \
x1[i] += VAL; \
consumer (x1); \
- for (int i = 0; i < 7; ++i) \
+ for (int i = 0; i < 70; ++i) \
x2[i] -= VAL; \
consumer (x2); \
} \
else \
{ \
- for (int i = 0; i < 7; ++i) \
+ for (int i = 0; i < 70; ++i) \
x3[i] &= VAL; \
consumer (x3); \
} \
- for (int i = 0; i < 7; ++i) \
+ for (int i = 0; i < 70; ++i) \
x4[i] |= VAL; \
consumer (x4); \
}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv9-a -Ofast -fdump-tree-vect-details" } */
+
+void
+foo (char *restrict a, int *restrict b, int *restrict c, int n)
+{
+ for (int i = 0; i < 9; i++)
+ {
+ int res = c[i];
+ int t = b[i];
+ if (a[i] != 0)
+ res = t;
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv9-a -Ofast -fdump-tree-vect-details" } */
+
+void
+foo (char *restrict a, int *restrict b, int *restrict c, int n, int stride)
+{
+ if (stride <= 1)
+ return;
+
+ for (int i = 0; i < 9; i++)
+ {
+ int res = c[i];
+ int t = b[i*stride];
+ if (a[i] != 0)
+ res = t;
+ c[i] = res;
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */