#include "expmed.h"
#include "recog.h"
#include "regset.h"
+#include "regs.h"
#include "df.h"
#include "expr.h"
#include "memmodel.h"
rtx load_mem);
void avoid_store_forwarding (basic_block);
void update_stats (function *);
+
+private:
+ /* Per-insn live-out hard-register sets for the current BB. Populated
+ lazily on the first candidate with bit-insert side-effect clobbers
+ (so aarch64 bfi pays nothing). Cleared on each avoid_store_forwarding
+ entry. */
+ hash_map<rtx_insn *, HARD_REG_SET> m_bb_live_after;
+
+ void compute_bb_live_after (basic_block bb);
};
/* Return a bit insertion sequence that would make DEST have the correct value
return insns;
}
+/* note_stores callback: record hard regs clobbered (not set) by an insn,
+ to capture side-effect clobbers (e.g. flags) without the intended dest. */
+
+static void
+record_hard_reg_clobbers (rtx x, const_rtx pat, void *data)
+{
+ if (GET_CODE (pat) == CLOBBER && REG_P (x) && HARD_REGISTER_P (x))
+ add_to_hard_reg_set ((HARD_REG_SET *) data, GET_MODE (x), REGNO (x));
+}
+
+/* Populate m_bb_live_after with the hard registers live immediately
+ after each real insn in BB. */
+
+void
+store_forwarding_analyzer::compute_bb_live_after (basic_block bb)
+{
+ auto_bitmap live;
+ df_simulate_initialize_backwards (bb, live);
+ rtx_insn *scan;
+ FOR_BB_INSNS_REVERSE (bb, scan)
+ if (INSN_P (scan))
+ {
+ HARD_REG_SET hrs;
+ REG_SET_TO_HARD_REG_SET (hrs, live);
+ m_bb_live_after.put (scan, hrs);
+ df_simulate_one_insn_backwards (bb, scan, live);
+ }
+}
+
/* Return true iff a store to STORE_MEM would write to a sub-region of bytes
from what LOAD_MEM would read. If true also store the relative byte offset
of the store within the load to OFF_VAL. */
it->store_saved_value_insn = insn2;
}
+ /* Reject if the bit-insert sequences clobber a hard register live at
+ the insertion point (e.g. shift/and/or on x86 clobber flags, which
+ would break carry chains). Done before the target cost query so
+ we skip cost work on candidates we would reject anyway. */
+ HARD_REG_SET clobbered_regs;
+ CLEAR_HARD_REG_SET (clobbered_regs);
+ FOR_EACH_VEC_ELT (stores, i, it)
+ for (rtx_insn *ins = it->bits_insert_insns; ins; ins = NEXT_INSN (ins))
+ note_stores (ins, record_hard_reg_clobbers, &clobbered_regs);
+
+ if (!hard_reg_set_empty_p (clobbered_regs))
+ {
+ if (m_bb_live_after.is_empty ())
+ compute_bb_live_after (BLOCK_FOR_INSN (load_insn));
+
+ const HARD_REG_SET *live_at_insert = m_bb_live_after.get (load_insn);
+ if (live_at_insert
+ && hard_reg_set_intersect_p (clobbered_regs, *live_at_insert))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "Not transformed: bit-insert clobbers live hard reg.\n");
+ return false;
+ }
+ }
+
if (load_elim)
total_cost -= insn_cost (load_insn, true);
df_insn_rescan (load_insn);
if (load_elim)
- delete_insn (load_insn);
+ {
+ /* Prevent a dangling rtx_insn * key after delete_insn. */
+ m_bb_live_after.remove (load_insn);
+ delete_insn (load_insn);
+ }
return true;
}
if (!optimize_bb_for_speed_p (bb))
return;
+ m_bb_live_after.empty ();
+
auto_vec<store_fwd_info, 8> store_exprs;
rtx_insn *insn;
unsigned int insn_cnt = 0;