const0_rtx);
})
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+ [(parallel [(set (reg FLAGS_REG)
+ (compare (match_operator:SWI 2 "plusminuslogic_operator"
+ [(match_operand:SWI 0 "memory_operand")
+ (match_operand:SWI 1 "<nonmemory_operand>")])
+ (const_int 0)))
+ (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+ (set (match_dup 0) (match_dup 3))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && peep2_reg_dead_p (2, operands[3])
+ && !reg_overlap_mentioned_p (operands[3], operands[0])
+ && ix86_match_ccmode (peep2_next_insn (0),
+ (GET_CODE (operands[2]) == PLUS
+ || GET_CODE (operands[2]) == MINUS)
+ ? CCGOCmode : CCNOmode)"
+ [(parallel [(set (match_dup 4) (match_dup 6))
+ (set (match_dup 0) (match_dup 5))])]
+{
+ operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+ operands[5]
+ = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+ copy_rtx (operands[0]), operands[1]);
+ operands[6]
+ = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+ const0_rtx);
+})
+
;; Likewise for instances where we have a lea pattern.
(define_peephole2
[(set (match_operand:SWI 0 "register_operand")
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI 0 "register_operand")
+ (match_operand:SWI 1 "memory_operand"))
+ (parallel [(set (match_operand:SWI 4 "register_operand")
+ (xor:SWI (match_operand:SWI 3 "register_operand")
+ (match_operand:SWI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_dup 4))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI 5 "register_operand")
+ (match_operand:SWI 6 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && (rtx_equal_p (operands[0], operands[5])
+ ? rtx_equal_p (operands[2], operands[6])
+ : rtx_equal_p (operands[2], operands[5])
+ && rtx_equal_p (operands[0], operands[6]))
+ && peep2_reg_dead_p (3, operands[4])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], QImode)
+ || any_QIreg_operand (operands[2], QImode))"
+ [(parallel [(set (match_dup 7) (match_dup 9))
+ (set (match_dup 1) (match_dup 8))])]
+{
+ operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ operands[2]);
+ operands[9]
+ = gen_rtx_COMPARE (GET_MODE (operands[7]),
+ copy_rtx (operands[8]),
+ const0_rtx);
+})
+
(define_peephole2
[(set (match_operand:SWI12 0 "register_operand")
(match_operand:SWI12 1 "memory_operand"))
const0_rtx);
})
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+ [(set (match_operand:SWI12 0 "register_operand")
+ (match_operand:SWI12 1 "memory_operand"))
+ (parallel [(set (match_operand:SI 4 "register_operand")
+ (xor:SI (match_operand:SI 3 "register_operand")
+ (match_operand:SI 2 "<nonmemory_operand>")))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_operand:SWI12 6 "register_operand")
+ (match_operand:SWI12 7 "<nonmemory_operand>")))]
+ "TARGET_APX_NDD
+ && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+ && REGNO (operands[3]) == REGNO (operands[0])
+ && REGNO (operands[5]) == REGNO (operands[4])
+ && (rtx_equal_p (operands[0], operands[6])
+ ? (REG_P (operands[2])
+ ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+ : rtx_equal_p (operands[2], operands[7]))
+ : (rtx_equal_p (operands[0], operands[7])
+ && REG_P (operands[2])
+ && REGNO (operands[2]) == REGNO (operands[6])))
+ && peep2_reg_dead_p (3, operands[5])
+ && peep2_reg_dead_p (4, operands[0])
+ && !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
+ && (<MODE>mode != QImode
+ || immediate_operand (operands[2], SImode)
+ || any_QIreg_operand (operands[2], SImode))"
+ [(parallel [(set (match_dup 8) (match_dup 10))
+ (set (match_dup 1) (match_dup 9))])]
+{
+ operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+ operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+ gen_lowpart (<MODE>mode, operands[2]));
+ operands[10]
+ = gen_rtx_COMPARE (GET_MODE (operands[8]),
+ copy_rtx (operands[9]),
+ const0_rtx);
+})
+
;; Attempt to optimize away memory stores of values the memory already
;; has. See PR79593.
(define_peephole2