enum attr_unit unit = get_attr_unit (insn);
int loadcost;
+ /* TODO: On znver5 complex addressing modes have
+ greater latency. */
if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
loadcost = 4;
else
return TARGET_FUSE_CMP_AND_BRANCH;
}
+static bool
+ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu)
+{
+ /* Validate mov:
+ - It should be reg-reg move with opcode 0x89 or 0x8B. */
+ rtx set1 = PATTERN (mov);
+ if (GET_CODE (set1) != SET
+ || !GENERAL_REG_P (SET_SRC (set1))
+ || !GENERAL_REG_P (SET_DEST (set1)))
+ return false;
+ rtx reg = SET_DEST (set1);
+ /* - it should have 0x89 or 0x8B opcode. */
+ if (!INTEGRAL_MODE_P (GET_MODE (reg))
+ || GET_MODE_SIZE (GET_MODE (reg)) < 2
+ || GET_MODE_SIZE (GET_MODE (reg)) > 8)
+ return false;
+ /* Validate ALU. */
+ if (GET_CODE (PATTERN (alu)) != PARALLEL)
+ return false;
+ rtx set2 = XVECEXP (PATTERN (alu), 0, 0);
+ if (GET_CODE (set2) != SET)
+ return false;
+ /* Match one of:
+ ADD ADC AND XOR OR SUB SBB INC DEC NOT SAL SHL SHR SAR
+ We also may add insn attribute to handle some of sporadic
+ case we output those with different RTX expressions. */
+
+ if (GET_CODE (SET_SRC (set2)) != PLUS
+ && GET_CODE (SET_SRC (set2)) != MINUS
+ && GET_CODE (SET_SRC (set2)) != XOR
+ && GET_CODE (SET_SRC (set2)) != AND
+ && GET_CODE (SET_SRC (set2)) != IOR
+ && GET_CODE (SET_SRC (set2)) != NOT
+ && GET_CODE (SET_SRC (set2)) != ASHIFT
+ && GET_CODE (SET_SRC (set2)) != ASHIFTRT
+ && GET_CODE (SET_SRC (set2)) != LSHIFTRT)
+ return false;
+ rtx op0 = XEXP (SET_SRC (set2), 0);
+ rtx op1 = GET_CODE (SET_SRC (set2)) != NOT ? XEXP (SET_SRC (set2), 1) : NULL;
+ /* One of operands should be register. */
+ if (op1 && (!REG_P (op0) || REGNO (op0) != REGNO (reg)))
+ std::swap (op0, op1);
+ if (!REG_P (op0) || REGNO (op1) != REGNO (reg))
+ return false;
+ if (op1
+ && !REG_P (op1)
+ && !x86_64_immediate_operand (op1, VOIDmode))
+ return false;
+ /* Only one of two paramters must be move destination. */
+ if (op1 && REG_P (op1) && REGNO (op1) == REGNO (reg))
+ return false;
+ return true;
+}
+
/* Check whether current microarchitecture support macro fusion
for insn pair "CONDGEN + CONDJMP". Refer to
"Intel Architectures Optimization Reference Manual". */
bool
ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
{
+ if (TARGET_FUSE_MOV_AND_ALU
+ && ix86_fuse_mov_alu_p (condgen, condjmp))
+ return true;
rtx src, dest;
enum rtx_code ccode;
rtx compare_set = NULL_RTX, test_if, cond;
jump instruction when the alu instruction produces the CCFLAG consumed by
the conditional jump instruction. */
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
- m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC)
+ m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC | m_ZNVER5)
+/* X86_TUNE_FUSE_MOV_AND_ALU: mov and alu in case mov is reg-reg mov
+ and the destination is used by alu. alu must be one of
+ ADD, ADC, AND, XOR, OR, SUB, SBB, INC, DEC, NOT, SAL, SHL, SHR, SAR. */
+DEF_TUNE (X86_TUNE_FUSE_MOV_AND_ALU, "fuse_mov_and_alu", m_ZNVER5)
/*****************************************************************************/
/* Function prologue, epilogue and function calling sequences. */