;; ARM ldrd/strd peephole optimizations.
;;
-;; Copyright (C) 2013-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2020 Free Software Foundation, Inc.
;;
;; Written by Greta Yorsh <greta.yorsh@arm.com>
;; The following peephole optimizations identify consecutive memory
;; accesses, and try to rearrange the operands to enable generation of
;; ldrd/strd.
+;;
+;; In many cases they behave in the same way that patterns in ldmstm.md behave,
+;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
+;; registers used are an (r<N>, r<N + 1>) pair where N is even.
(define_peephole2 ; ldrd
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))]
+ (match_operand:SI 3 "memory_operand" ""))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])]
{
if (!gen_operands_ldrd_strd (operands, true, false, false))
FAIL;
- else if (TARGET_ARM)
- {
- /* In ARM state, the destination registers of LDRD/STRD must be
- consecutive. We emit DImode access. */
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit [(set (match_dup 0) (match_dup 2))] */
- emit_insn (gen_rtx_SET (operands[0], operands[2]));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(parallel [(set (match_dup 0) (match_dup 2))
- (set (match_dup 1) (match_dup 3))])] */
- rtx t1 = gen_rtx_SET (operands[0], operands[2]);
- rtx t2 = gen_rtx_SET (operands[1], operands[3]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
(define_peephole2 ; strd
(set (match_operand:SI 3 "memory_operand" "")
(match_operand:SI 1 "arm_general_register_operand" ""))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, false, false))
FAIL;
- else if (TARGET_ARM)
- {
- /* In ARM state, the destination registers of LDRD/STRD must be
- consecutive. We emit DImode access. */
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit [(set (match_dup 2) (match_dup 0))] */
- emit_insn (gen_rtx_SET (operands[2], operands[0]));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
;; The following peepholes reorder registers to enable LDRD/STRD.
(define_peephole2 ; strd of constants
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 4 "const_int_operand" ""))
+ (match_operand:SI 4 "const_int_operand" ""))
(set (match_operand:SI 2 "memory_operand" "")
- (match_dup 0))
+ (match_dup 0))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 5 "const_int_operand" ""))
+ (match_operand:SI 5 "const_int_operand" ""))
(set (match_operand:SI 3 "memory_operand" "")
- (match_dup 1))]
+ (match_dup 1))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(set (match_dup 0) (match_dup 4))
+ (set (match_dup 1) (match_dup 5))
+ (parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, true, false))
FAIL;
- else if (TARGET_ARM)
- {
- rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (set (match_dup 2) tmp)] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- emit_insn (gen_rtx_SET (operands[2], tmp));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
(define_peephole2 ; strd of constants
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 4 "const_int_operand" ""))
+ (match_operand:SI 4 "const_int_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 5 "const_int_operand" ""))
+ (match_operand:SI 5 "const_int_operand" ""))
(set (match_operand:SI 2 "memory_operand" "")
- (match_dup 0))
+ (match_dup 0))
(set (match_operand:SI 3 "memory_operand" "")
- (match_dup 1))]
+ (match_dup 1))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(set (match_dup 0) (match_dup 4))
+ (set (match_dup 1) (match_dup 5))
+ (parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, true, false))
FAIL;
- else if (TARGET_ARM)
- {
- rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit the pattern
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (set (match_dup 2) tmp)] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- emit_insn (gen_rtx_SET (operands[2], tmp));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
;; The following two peephole optimizations are only relevant for ARM
(define_peephole2 ; swap the destination registers of two loads
; before a commutative operation.
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))
+ (match_operand:SI 3 "memory_operand" ""))
(set (match_operand:SI 4 "arm_general_register_operand" "")
- (match_operator:SI 5 "commutative_binary_operator"
+ (match_operator:SI 5 "commutative_binary_operator"
[(match_operand 6 "arm_general_register_operand" "")
(match_operand 7 "arm_general_register_operand" "") ]))]
"TARGET_LDRD && TARGET_ARM
&& ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
- ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
&& (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
&& (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
- [(set (match_dup 0) (match_dup 2))
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])
(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
- {
- if (!gen_operands_ldrd_strd (operands, true, false, true))
- {
- FAIL;
- }
- else
- {
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- }
- }
-)
+{
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
+ FAIL;
+})
(define_peephole2 ; swap the destination registers of two loads
; before a commutative operation that sets the flags.
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))
+ (match_operand:SI 3 "memory_operand" ""))
(parallel
[(set (match_operand:SI 4 "arm_general_register_operand" "")
(match_operator:SI 5 "commutative_binary_operator"
||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
&& (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
&& (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
- [(set (match_dup 0) (match_dup 2))
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])
(parallel
[(set (match_dup 4)
(match_op_dup 5 [(match_dup 6) (match_dup 7)]))
(clobber (reg:CC CC_REGNUM))])]
- {
- if (!gen_operands_ldrd_strd (operands, true, false, true))
- {
- FAIL;
- }
- else
- {
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- }
- }
-)
+{
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
+ FAIL;
+})
;; TODO: Handle LDRD/STRD with writeback:
;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
;; (b) Patterns may be followed by an update of the base address.
+
+
+;; insns matching the LDRD/STRD patterns that will get created by the above
+;; peepholes.
+;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
+;; operands are not changed.
+(define_insn "*arm_ldrd"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (match_operand:SI 2 "memory_operand" "m"))
+ (set (match_operand:SI 1 "s_register_operand" "=rk")
+ (match_operand:SI 3 "memory_operand" "m"))])]
+ "TARGET_LDRD && TARGET_ARM && reload_completed
+ && valid_operands_ldrd_strd (operands, true)"
+ {
+ rtx op[2];
+ op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ op[1] = adjust_address (operands[2], DImode, 0);
+ return output_move_double (op, true, NULL);
+ }
+ [(set (attr "length")
+ (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
+ (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+ (set_attr "type" "load_8")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_strd"
+ [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
+ (match_operand:SI 0 "s_register_operand" "r"))
+ (set (match_operand:SI 3 "memory_operand" "=m")
+ (match_operand:SI 1 "s_register_operand" "rk"))])]
+ "TARGET_LDRD && TARGET_ARM && reload_completed
+ && valid_operands_ldrd_strd (operands, false)"
+ {
+ rtx op[2];
+ op[0] = adjust_address (operands[2], DImode, 0);
+ op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ return output_move_double (op, true, NULL);
+ }
+ [(set (attr "length")
+ (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
+ (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+ (set_attr "type" "store_8")
+ (set_attr "predicable" "yes")]
+)