/* Generic fusion is only ok for 32 bit operand (but see asm_comp).
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
*/
- if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
+ if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
+ noconflict(as, ref, IR_XSTORE)) {
asm_fusexref(as, IR(ir->op1), xallow);
return RID_MRM;
}
emit_mrm(as, xo, dest, RID_MRM);
}
-static void asm_fstore(ASMState *as, IRIns *ir)
+static void asm_fxstore(ASMState *as, IRIns *ir)
{
RegSet allow = RSET_GPR;
Reg src = RID_NONE;
src = ra_alloc1(as, ir->op2, allow8);
rset_clear(allow, src);
}
- asm_fusefref(as, IR(ir->op1), allow);
+ if (ir->o == IR_FSTORE)
+ asm_fusefref(as, IR(ir->op1), allow);
+ else
+ asm_fusexref(as, IR(ir->op1), allow);
+ /* ir->op2 is ignored -- unaligned stores are ok on x86. */
if (ra_hasreg(src)) {
x86Op xo;
switch (irt_type(ir->t)) {
case IR_SLOAD: asm_sload(as, ir); break;
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
- case IR_FSTORE: asm_fstore(as, ir); break;
+ case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
/* Allocations. */
case IR_SNEW: asm_snew(as, ir); break;
_(HSTORE, S , ref, ref) \
_(USTORE, S , ref, ref) \
_(FSTORE, S , ref, ref) \
+ _(XSTORE, S , ref, ref) \
\
/* Allocations. */ \
_(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
+LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
/* -- Named IR literals --------------------------------------------------- */
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
return NEXTFOLD;
}
-/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
LJFOLD(XLOAD any any)
-LJFOLDF(fwd_xload)
-{
- IRRef ref = J->chain[IR_XLOAD];
- IRRef op1 = fins->op1;
- while (ref > op1) {
- if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
- return ref;
- ref = IR(ref)->prev;
- }
- return EMITFOLD;
-}
+LJFOLDX(lj_opt_fwd_xload)
/* -- Write barriers ------------------------------------------------------ */
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
+/* -- XLOAD forwarding ---------------------------------------------------- */
+
+/* NYI: Alias analysis for XLOAD/XSTORE. */
+static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *refb)
+{
+ UNUSED(J); UNUSED(refa); UNUSED(refb);
+ return ALIAS_MAY;
+}
+
+/* XLOAD forwarding. */
+TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
+{
+ IRRef xref = fins->op1;
+ IRRef lim = xref; /* Search limit. */
+ IRIns *xr = IR(xref);
+ IRRef ref;
+
+ if ((fins->op2 & IRXLOAD_READONLY))
+ goto cselim;
+
+ /* Search for conflicting stores. */
+ ref = J->chain[IR_XSTORE];
+ while (ref > xref) {
+ IRIns *store = IR(ref);
+ switch (aa_xref(J, xr, IR(store->op1))) {
+ case ALIAS_NO: break; /* Continue searching. */
+ case ALIAS_MAY: lim = ref; goto cselim; /* Limit search for load. */
+ case ALIAS_MUST: return store->op2; /* Store forwarding. */
+ }
+ ref = store->prev;
+ }
+
+cselim:
+ /* Try to find a matching load. Below the conflicting store, if any. */
+ ref = J->chain[IR_XLOAD];
+ while (ref > lim) {
+ /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
+ if (IR(ref)->op1 == fins->op1 && irt_sametype(IR(ref)->t, fins->t))
+ return ref;
+ ref = IR(ref)->prev;
+ }
+ return lj_ir_emit(J);
+}
+
/* -- Forwarding of lj_tab_len -------------------------------------------- */
/* This is rather simplistic right now, but better than nothing. */