;; FMA (fused multiply-add) patterns
(define_expand "fmasf4"
- [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
- (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
- (match_operand:SF 2 "fp_arith_reg_operand" "")
- (match_operand:SF 3 "fp_arith_reg_operand" "")))]
+ [(set (match_operand:SF 0 "fp_arith_reg_operand")
+ (fma:SF (match_operand:SF 1 "fp_arith_reg_operand")
+ (match_operand:SF 2 "fp_arith_reg_operand")
+ (match_operand:SF 3 "fp_arith_reg_operand")))]
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
{
if (TARGET_SH2E)
"fmac.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
+;; For some cases such as 'a * b + a' the FMA pattern is not generated by
+;; previous transformations. If FMA is generally allowed, let the combine
+;; pass utilize it.
+(define_insn_and_split "*fmasf4"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+ (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+ (match_operand:SF 3 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 4 "fpscr_operand"))]
+ "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+ "fmac %1,%2,%0"
+ "&& can_create_pseudo_p ()"
+ [(parallel [(set (match_dup 0)
+ (fma:SF (match_dup 1) (match_dup 2) (match_dup 3)))
+ (use (match_dup 4))])]
+{
+ /* Change 'b * a + a' into 'a * b + a'.
+ This is better for register allocation. */
+ if (REGNO (operands[2]) == REGNO (operands[3]))
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+}
+ [(set_attr "type" "fp")
+ (set_attr "fp_mode" "single")])
+
+(define_insn "*fmasf4_media"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+ (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+ (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+ "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF"
+ "fmac.s %1, %2, %0"
+ [(set_attr "type" "fparith_media")])
+
(define_expand "divsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "")
(div:SF (match_operand:SF 1 "arith_reg_operand" "")
--- /dev/null
+/* Verify that the fmac insn is used for the expression 'a * b + a' and
+ 'a * a + a'.
+ This assumes that the default compiler setting is -ffp-contract=fast. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-times "fmac" 2 } } */
+
+float
+test_00 (float a, float b)
+{
+ return a * b + a;
+}
+
+float
+test_01 (float a)
+{
+ return a * a + a;
+}
--- /dev/null
+/* Verify that the fmac insn is used for the expression 'a * b + a' and
+ 'a * a + a' when -ffast-math is specified. */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1 -ffast-math" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2" "-m3" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } } */
+/* { dg-final { scan-assembler-times "fmac" 2 } } */
+
+float
+test_00 (float a, float b)
+{
+ return a * b + a;
+}
+
+float
+test_01 (float a)
+{
+ return a * a + a;
+}