--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+ double r0 = 0, r1 = 0;
+ for (; n; x += 2, n--) {
+ r0 = __builtin_fma(x[0], x[0], r0);
+ r1 = __builtin_fma(x[1], x[1], r1);
+ }
+ return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction. */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-ffp-contract=on" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+static double muladd(double x, double y, double z)
+{
+ return x * y + z;
+}
+double g(double x[], long n)
+{
+ double r0 = 0, r1 = 0;
+ for (; n; x += 2, n--) {
+ r0 = muladd(x[0], x[0], r0);
+ r1 = muladd(x[1], x[1], r1);
+ }
+ return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction. */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors and unroll factor 1" "vect" { target { x86_64-*-* i?86-*-* } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-additional-options "-ffast-math" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+double f(double x[], long n)
+{
+ double r0 = 0, r1 = 0;
+ for (; n; x += 2, n--) {
+ r0 = __builtin_fma(x[0], x[0], r0);
+ r1 = __builtin_fma(x[1], x[1], r1);
+ }
+ return r0 + r1;
+}
+
+/* We should vectorize this as SLP reduction, higher VF possible. */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" { target { x86_64-*-* i?86-*-* } } } } */
if (op.ops[2] == op.ops[opi])
neg = ! neg;
}
+ /* For an FMA the reduction code is the PLUS if the addition chain
+ is the reduction. */
+ else if (op.code == IFN_FMA && opi == 2)
+ op.code = PLUS_EXPR;
if (CONVERT_EXPR_CODE_P (op.code)
&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
;
"in-order reduction chain without SLP.\n");
return false;
}
+ /* Code generation doesn't support function calls other
+ than .COND_*. */
+ if (!op.code.is_tree_code ()
+ && !(op.code.is_internal_fn ()
+ && conditional_internal_fn_code (internal_fn (op.code))
+ != ERROR_MARK))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "in-order reduction chain operation not "
+ "supported.\n");
+ return false;
+ }
STMT_VINFO_REDUC_TYPE (reduc_info)
= reduction_type = FOLD_LEFT_REDUCTION;
}