if (!optimize
|| optimize_function_for_size_p (cfun)
- || !TARGET_BRANCH_PREDICTION_HINTS)
+ || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
+ && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
return;
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
int pred_val = profile_probability::from_reg_br_prob_note
(XINT (x, 0)).to_reg_br_prob_base ();
- if (pred_val < REG_BR_PROB_BASE * 45 / 100
- || pred_val > REG_BR_PROB_BASE * 55 / 100)
- {
- bool taken = pred_val > REG_BR_PROB_BASE / 2;
- bool cputaken
- = final_forward_branch_p (current_output_insn) == 0;
-
- /* Emit hints only in the case default branch prediction
- heuristics would fail. */
- if (taken != cputaken)
- {
- /* We use 3e (DS) prefix for taken branches and
- 2e (CS) prefix for not taken branches. */
- if (taken)
- fputs ("ds ; ", file);
- else
- fputs ("cs ; ", file);
- }
- }
+ bool taken = pred_val > REG_BR_PROB_BASE / 2;
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
+ fputs ("ds ; ", file);
+ else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
+ fputs ("cs ; ", file);
}
return;
}
#define TARGET_ZERO_EXTEND_WITH_AND \
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
-#define TARGET_BRANCH_PREDICTION_HINTS \
- ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
+ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
+#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
+ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
m_K8)
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
+ microarchitecture, if the predictor has no stored information about a branch,
+ the branch has the Intel® SSE2 branch taken hint
+ (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
+ the branch’s prediction from not-taken to taken. It then flushes the pipeline
+ in front of it and steers this pipeline to fetch the taken path of the
+ branch. */
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, "branch_prediction_hints_taken", m_NONE)
+
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/
-/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
+/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
was observed with branch hints. It also increases the code size.
As a result, icc never generates branch hints. */
-DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
+DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, "branch_prediction_hints_not_taken", m_NONE)
/* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic. */
DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)