--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo -ffast-math" } */
+
+#define DEF_REDUC_PLUS(TYPE) \
+ TYPE __attribute__ ((noinline, noclone)) \
+ reduc_plus_##TYPE (TYPE *__restrict a, int n) \
+ { \
+ TYPE r = 0; \
+ for (int i = 0; i < n; ++i) \
+ r += a[i]; \
+ return r; \
+ }
+
+#define TEST_PLUS(T) T (int) T (float)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math" } */
#define DEF_REDUC_PLUS(TYPE) \
TYPE __attribute__ ((noinline, noclone)) \
return r; \
}
-#define TEST_PLUS(T) T (int)
+#define TEST_PLUS(T) T (int) T (float)
TEST_PLUS (DEF_REDUC_PLUS)
unsigned int length_update_cost = 0;
if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
- /* For decrement IV style, we use a single SELECT_VL since
- beginning to calculate the number of elements need to be
- processed in current iteration, and a SHIFT operation to
- compute the next memory address instead of adding vectorization
- factor. */
- length_update_cost = 2;
+ /* For decrement IV style, Each only need a single SELECT_VL
+ or MIN since beginning to calculate the number of elements
+ need to be processed in current iteration. */
+ length_update_cost = 1;
else
/* For increment IV stype, Each may need two MINs and one MINUS to
update lengths in body for next iteration. */