The IRA combine_and_move pass runs if the scheduler is disabled and aggressively
combines moves. The movsf/df patterns allow all FP immediates since they rely
on a split pattern. However splits do not happen during IRA, so the result is
extra literal loads. To avoid this, split early during expand and block
creation of FP immediates that need this split. Mark a few testcases that
rely on late splitting as xfail.
double f(void) { return 128.0; }
-O2 -fno-schedule-insns gives:
adrp x0, .LC0
ldr d0, [x0, #:lo12:.LC0]
ret
After patch:
mov x0,
4638707616191610880
fmov d0, x0
ret
Passes bootstrap & regress, OK for commit?
gcc:
* config/aarch64/aarch64.md (movhf_aarch64): Use aarch64_valid_fp_move.
(movsf_aarch64): Likewise.
(movdf_aarch64): Likewise.
* config/aarch64/aarch64.cc (aarch64_valid_fp_move): New function.
* config/aarch64/aarch64-protos.h (aarch64_valid_fp_move): Likewise.
gcc/testsuite:
* gcc.target/aarch64/dbl_mov_immediate_1.c: Add xfail for -0.0.
* gcc.target/aarch64/fmul_scvtf_1.c: Fixup test cases, add xfail,
reduce duplication.
opt_machine_mode aarch64_v128_mode (scalar_mode);
opt_machine_mode aarch64_full_sve_mode (scalar_mode);
bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
HOST_WIDE_INT);
return aarch64_simd_valid_mov_imm (v_op);
}
+/* Return TRUE if DST and SRC with mode MODE is a valid fp move. */
+bool
+aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
+{
+ if (!TARGET_FLOAT)
+ return false;
+
+ if (aarch64_reg_or_fp_zero (src, mode))
+ return true;
+
+ if (!register_operand (dst, mode))
+ return false;
+
+ if (MEM_P (src))
+ return true;
+
+ if (!DECIMAL_FLOAT_MODE_P (mode))
+ {
+ if (aarch64_can_const_movi_rtx_p (src, mode)
+ || aarch64_float_const_representable_p (src)
+ || aarch64_float_const_zero_rtx_p (src))
+ return true;
+
+ /* Block FP immediates which are split during expand. */
+ if (aarch64_float_const_rtx_p (src))
+ return false;
+ }
+
+ return can_create_pseudo_p ();
+}
/* Return the fixed registers used for condition codes. */
&& ! (GET_CODE (operands[1]) == CONST_DOUBLE
&& aarch64_float_const_zero_rtx_p (operands[1])))
operands[1] = force_reg (<MODE>mode, operands[1]);
+
+ if (!DECIMAL_FLOAT_MODE_P (<MODE>mode)
+ && GET_CODE (operands[1]) == CONST_DOUBLE
+ && can_create_pseudo_p ()
+ && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+ && !aarch64_float_const_representable_p (operands[1])
+ && !aarch64_float_const_zero_rtx_p (operands[1])
+ && aarch64_float_const_rtx_p (operands[1]))
+ {
+ unsigned HOST_WIDE_INT ival;
+ bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
+ gcc_assert (res);
+
+ machine_mode intmode
+ = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
+ rtx tmp = gen_reg_rtx (intmode);
+ emit_move_insn (tmp, gen_int_mode (ival, intmode));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+ DONE;
+ }
}
)
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:HFBF 0 "nonimmediate_operand")
(match_operand:HFBF 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%0.4h, #0
[ w , ?rY ; f_mcr , fp16 ] fmov\t%h0, %w1
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:SFD 0 "nonimmediate_operand")
(match_operand:SFD 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%0.2s, #0
[ w , ?rY ; f_mcr , * ] fmov\t%s0, %w1
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand")
(match_operand:DFD 1 "general_operand"))]
- "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
- || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+ "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
{@ [ cons: =0 , 1 ; attrs: type , arch ]
[ w , Y ; neon_move , simd ] movi\t%d0, #0
[ w , ?rY ; f_mcr , * ] fmov\t%d0, %x1
}
)
-(define_split
- [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
- (match_operand:GPF_HF 1 "const_double_operand"))]
- "can_create_pseudo_p ()
- && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
- && !aarch64_float_const_representable_p (operands[1])
- && !aarch64_float_const_zero_rtx_p (operands[1])
- && aarch64_float_const_rtx_p (operands[1])"
- [(const_int 0)]
- {
- unsigned HOST_WIDE_INT ival;
- if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
- FAIL;
-
- rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
- emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
- emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
- DONE;
- }
-)
-
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:TFD 0
"nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, 25838523252736" 1 } } */
/* { dg-final { scan-assembler-times "movk\tx\[0-9\]+, 0x40fe, lsl 48" 1 } } */
-/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 0 } } */
-/* { dg-final { scan-assembler-times {movi\tv[0-9]+.4s, #?0} 1 } } */
-/* { dg-final { scan-assembler-times {fneg\tv[0-9]+.2d, v[0-9]+.2d} 1 } } */
-/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "mov\tx\[0-9\]+, -9223372036854775808" 0 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {movi\tv[0-9]+.4s, #?0} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {fneg\tv[0-9]+.2d, v[0-9]+.2d} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "fmov\td\[0-9\]+, x\[0-9\]+" 1 { xfail *-*-* } } } */
return ((double) x)/(1lu << __a); \
}
-FUNC_DEFS (4)
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */
+FUNC_DEFS (2)
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#2" 1 } } */
-FUNC_DEFD (4)
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */
-
-FUNC_DEFS (8)
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */
-
-FUNC_DEFD (8)
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */
-
-FUNC_DEFS (16)
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */
-
-FUNC_DEFD (16)
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */
+FUNC_DEFD (2)
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#2" 1 } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#2" 1 } } */
FUNC_DEFS (32)
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
FUNC_DEFD (32)
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */
- /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 { xfail *-*-* } } } */
#define FUNC_TESTS(__a, __b) \
do \
for (i = 0; i < 32; i ++)
{
- FUNC_TESTS (4, i);
- FUNC_TESTS (8, i);
- FUNC_TESTS (16, i);
+ FUNC_TESTS (2, i);
FUNC_TESTS (32, i);
- FUNC_TESTD (4, i);
- FUNC_TESTD (8, i);
- FUNC_TESTD (16, i);
+ FUNC_TESTD (2, i);
FUNC_TESTD (32, i);
}
return 0;